FileDocCategorySizeDatePackage
PPTXMLDump.javaAPI DocApache Poi 3.0.19477Mon Jan 01 18:55:34 GMT 2007org.apache.poi.hslf.dev

PPTXMLDump

public class PPTXMLDump extends Object
Utility class which dumps raw contents of a ppt file into XML format
author
Yegor Kozlov

Fields Summary
public static final int
HEADER_SIZE
public static final int
PICT_HEADER_SIZE
public static final String
PPDOC_ENTRY
public static final String
PICTURES_ENTRY
public static String
CR
protected Writer
out
protected byte[]
docstream
protected byte[]
pictstream
protected boolean
hexHeader
private static final byte[]
hexval
Constructors Summary
public PPTXMLDump(File ppt)


         
        FileInputStream fis = new FileInputStream(ppt);
        POIFSFileSystem fs = new POIFSFileSystem(fis);
        fis.close();

        //read the document entry from OLE file system
        DocumentEntry entry = (DocumentEntry)fs.getRoot().getEntry(PPDOC_ENTRY);
        docstream = new byte[entry.getSize()];
        DocumentInputStream is = fs.createDocumentInputStream(PPDOC_ENTRY);
        is.read(docstream);

        try {
            entry = (DocumentEntry)fs.getRoot().getEntry(PICTURES_ENTRY);
            pictstream = new byte[entry.getSize()];
            is = fs.createDocumentInputStream(PICTURES_ENTRY);
            is.read(pictstream);
        } catch(FileNotFoundException e){
            //silently catch errors if the presentation does not contain pictures
        }
    
Methods Summary
public voiddump(java.io.Writer out)
Dump the structure of the supplied PPT file into XML

param
out Writer to write out
throws
java.io.IOException

        this.out = out;

        int padding = 0;
        write(out, "<Presentation>" + CR, padding);
        padding++;
        if (pictstream != null){
            write(out, "<Pictures>" + CR, padding);
            dumpPictures(pictstream, padding);
            write(out, "</Pictures>" + CR, padding);
        }
        //dump the structure of the powerpoint document
        write(out, "<PowerPointDocument>" + CR, padding);
        padding++;
        dump(docstream, 0, docstream.length, padding);
        padding--;
        write(out, "</PowerPointDocument>" + CR, padding);
        padding--;
        write(out, "</Presentation>", padding);
    
public voiddump(byte[] data, int offset, int length, int padding)
Dump a part of the document stream into XML

param
data PPT binary data
param
offset offset from the beginning of the document
param
length of the document
param
padding used for formatting results
throws
java.io.IOException

        int pos = offset;
        while (pos <= (offset + length - HEADER_SIZE)){
            if (pos < 0) break;

            //read record header
            int info = LittleEndian.getUShort(data, pos);
            pos += LittleEndian.SHORT_SIZE;
            int type = LittleEndian.getUShort(data, pos);
            pos += LittleEndian.SHORT_SIZE;
            int size = (int)LittleEndian.getUInt(data, pos);
            pos += LittleEndian.INT_SIZE;

            //get name of the record by type
            String recname = RecordTypes.recordName(type);
            write(out, "<"+recname + " info=\""+info+"\" type=\""+type+"\" size=\""+size+"\" offset=\""+(pos-8)+"\"", padding);
            if (hexHeader){
                out.write(" header=\"");
                dump(out, data, pos-8, 8, 0, false);
                out.write("\"");
            }
            out.write(">" + CR);
			padding++;
            //this check works both for Escher and PowerPoint records
            boolean isContainer = (info & 0x000F) == 0x000F;
            if (isContainer) {
                //continue to dump child records
                dump(data, pos, size, padding);
            } else {
                //dump first 100 bytes of the atom data
                dump(out, data, pos, Math.min(size, 100), padding, true);
            }
			padding--;
            write(out, "</"+recname + ">" + CR, padding);

            pos += size;
        }
    
private static voiddump(java.io.Writer out, byte[] data, int offset, int length, int padding, boolean nl)
dump binary data to out with the specified padding

        int linesize = 25;
        for (int i = 0; i < padding; i++) out.write("  ");
        int i;
        for (i = offset; i < (offset + length); i++) {
            int c = data[i];
            out.write((char) hexval[(c & 0xF0) >> 4]);
            out.write((char) hexval[(c & 0x0F) >> 0]);
            out.write(' ");
            if((i+1-offset) % linesize == 0 && i != (offset + length-1)) {
                out.write(CR);
                for (int j = 0; j < padding; j++) out.write("  ");
            }
        }
        if(nl && length > 0)out.write(CR);
    
public voiddumpPictures(byte[] data, int padding)
Dumps the Pictures OLE stream into XML.

param
data from the Pictures OLE data stream
param
padding
throws
java.io.IOException

        int pos = 0;
        while (pos < data.length) {
            byte[] header = new byte[PICT_HEADER_SIZE];

            System.arraycopy(data, pos, header, 0, header.length);
            int size = LittleEndian.getInt(header, 4) - 17;
            byte[] pictdata = new byte[size];
            System.arraycopy(data, pos + PICT_HEADER_SIZE, pictdata, 0, pictdata.length);
            pos += PICT_HEADER_SIZE + size;

            padding++;
            write(out, "<picture size=\""+size+"\" type=\""+getPictureType(header)+"\">" + CR, padding);
            padding++;
            write(out, "<header>" + CR, padding);
            dump(out, header, 0, header.length, padding, true);
            write(out, "</header>" + CR, padding);
            write(out, "<imgdata>" + CR, padding);
            dump(out, pictdata, 0, Math.min(pictdata.length, 100), padding, true);
            write(out, "</imgdata>" + CR, padding);
            padding--;
            write(out, "</picture>" + CR, padding);
            padding--;

        }
    
private java.lang.StringgetPictureType(byte[] header)

        String type;
        int meta = LittleEndian.getUShort(header, 0);

        switch(meta){
            case 0x46A0: type = "jpeg"; break;
            case 0x2160: type = "wmf"; break;
            case 0x6E00: type = "png"; break;
            default: type = "unknown"; break;
        }
        return type;
    
public static voidmain(java.lang.String[] args)

        if (args.length == 0){
            System.out.println(
                "Usage: PPTXMLDump (options) pptfile\n" +
                "Where options include:\n" +
                "    -f     write output to <pptfile>.xml file in the current directory"
            );
            return;
        }
        boolean outFile = false;
        for (int i = 0; i < args.length; i++){

            if (args[i].startsWith("-")) {
                if ("-f".equals(args[i])){
                    //write ouput to a file
                    outFile = true;
                }
            } else {
                File ppt = new File(args[i]);
                PPTXMLDump dump = new PPTXMLDump(ppt);
                System.out.println("Dumping " + args[i]);

                if (outFile){
                    FileWriter out = new FileWriter(ppt.getName() + ".xml");
                    dump.dump(out);
                    out.close();
                } else {
                    StringWriter out = new StringWriter();
                    dump.dump(out);
                    System.out.println(out.toString());
                }
            }

        }
    
private static voidwrite(java.io.Writer out, java.lang.String str, int padding)
write a string to out with the specified padding

        for (int i = 0; i < padding; i++) out.write("  ");
        out.write(str);