PPTXMLDumppublic class PPTXMLDump extends Object Utility class which dumps raw contents of a ppt file into XML format |
Fields Summary |
---|
public static final int | HEADER_SIZE | public static final int | PICT_HEADER_SIZE | public static final String | PPDOC_ENTRY | public static final String | PICTURES_ENTRY | public static String | CR | protected Writer | out | protected byte[] | docstream | protected byte[] | pictstream | protected boolean | hexHeader | private static final byte[] | hexval |
Constructors Summary |
---|
public PPTXMLDump(File ppt)
FileInputStream fis = new FileInputStream(ppt);
POIFSFileSystem fs = new POIFSFileSystem(fis);
fis.close();
//read the document entry from OLE file system
DocumentEntry entry = (DocumentEntry)fs.getRoot().getEntry(PPDOC_ENTRY);
docstream = new byte[entry.getSize()];
DocumentInputStream is = fs.createDocumentInputStream(PPDOC_ENTRY);
is.read(docstream);
try {
entry = (DocumentEntry)fs.getRoot().getEntry(PICTURES_ENTRY);
pictstream = new byte[entry.getSize()];
is = fs.createDocumentInputStream(PICTURES_ENTRY);
is.read(pictstream);
} catch(FileNotFoundException e){
//silently catch errors if the presentation does not contain pictures
}
|
Methods Summary |
---|
public void | dump(java.io.Writer out)Dump the structure of the supplied PPT file into XML
this.out = out;
int padding = 0;
write(out, "<Presentation>" + CR, padding);
padding++;
if (pictstream != null){
write(out, "<Pictures>" + CR, padding);
dumpPictures(pictstream, padding);
write(out, "</Pictures>" + CR, padding);
}
//dump the structure of the powerpoint document
write(out, "<PowerPointDocument>" + CR, padding);
padding++;
dump(docstream, 0, docstream.length, padding);
padding--;
write(out, "</PowerPointDocument>" + CR, padding);
padding--;
write(out, "</Presentation>", padding);
| public void | dump(byte[] data, int offset, int length, int padding)Dump a part of the document stream into XML
int pos = offset;
while (pos <= (offset + length - HEADER_SIZE)){
if (pos < 0) break;
//read record header
int info = LittleEndian.getUShort(data, pos);
pos += LittleEndian.SHORT_SIZE;
int type = LittleEndian.getUShort(data, pos);
pos += LittleEndian.SHORT_SIZE;
int size = (int)LittleEndian.getUInt(data, pos);
pos += LittleEndian.INT_SIZE;
//get name of the record by type
String recname = RecordTypes.recordName(type);
write(out, "<"+recname + " info=\""+info+"\" type=\""+type+"\" size=\""+size+"\" offset=\""+(pos-8)+"\"", padding);
if (hexHeader){
out.write(" header=\"");
dump(out, data, pos-8, 8, 0, false);
out.write("\"");
}
out.write(">" + CR);
padding++;
//this check works both for Escher and PowerPoint records
boolean isContainer = (info & 0x000F) == 0x000F;
if (isContainer) {
//continue to dump child records
dump(data, pos, size, padding);
} else {
//dump first 100 bytes of the atom data
dump(out, data, pos, Math.min(size, 100), padding, true);
}
padding--;
write(out, "</"+recname + ">" + CR, padding);
pos += size;
}
| private static void | dump(java.io.Writer out, byte[] data, int offset, int length, int padding, boolean nl)dump binary data to out with the specified padding
int linesize = 25;
for (int i = 0; i < padding; i++) out.write(" ");
int i;
for (i = offset; i < (offset + length); i++) {
int c = data[i];
out.write((char) hexval[(c & 0xF0) >> 4]);
out.write((char) hexval[(c & 0x0F) >> 0]);
out.write(' ");
if((i+1-offset) % linesize == 0 && i != (offset + length-1)) {
out.write(CR);
for (int j = 0; j < padding; j++) out.write(" ");
}
}
if(nl && length > 0)out.write(CR);
| public void | dumpPictures(byte[] data, int padding)Dumps the Pictures OLE stream into XML.
int pos = 0;
while (pos < data.length) {
byte[] header = new byte[PICT_HEADER_SIZE];
System.arraycopy(data, pos, header, 0, header.length);
int size = LittleEndian.getInt(header, 4) - 17;
byte[] pictdata = new byte[size];
System.arraycopy(data, pos + PICT_HEADER_SIZE, pictdata, 0, pictdata.length);
pos += PICT_HEADER_SIZE + size;
padding++;
write(out, "<picture size=\""+size+"\" type=\""+getPictureType(header)+"\">" + CR, padding);
padding++;
write(out, "<header>" + CR, padding);
dump(out, header, 0, header.length, padding, true);
write(out, "</header>" + CR, padding);
write(out, "<imgdata>" + CR, padding);
dump(out, pictdata, 0, Math.min(pictdata.length, 100), padding, true);
write(out, "</imgdata>" + CR, padding);
padding--;
write(out, "</picture>" + CR, padding);
padding--;
}
| private java.lang.String | getPictureType(byte[] header)
String type;
int meta = LittleEndian.getUShort(header, 0);
switch(meta){
case 0x46A0: type = "jpeg"; break;
case 0x2160: type = "wmf"; break;
case 0x6E00: type = "png"; break;
default: type = "unknown"; break;
}
return type;
| public static void | main(java.lang.String[] args)
if (args.length == 0){
System.out.println(
"Usage: PPTXMLDump (options) pptfile\n" +
"Where options include:\n" +
" -f write output to <pptfile>.xml file in the current directory"
);
return;
}
boolean outFile = false;
for (int i = 0; i < args.length; i++){
if (args[i].startsWith("-")) {
if ("-f".equals(args[i])){
//write ouput to a file
outFile = true;
}
} else {
File ppt = new File(args[i]);
PPTXMLDump dump = new PPTXMLDump(ppt);
System.out.println("Dumping " + args[i]);
if (outFile){
FileWriter out = new FileWriter(ppt.getName() + ".xml");
dump.dump(out);
out.close();
} else {
StringWriter out = new StringWriter();
dump.dump(out);
System.out.println(out.toString());
}
}
}
| private static void | write(java.io.Writer out, java.lang.String str, int padding)write a string to out with the specified padding
for (int i = 0; i < padding; i++) out.write(" ");
out.write(str);
|
|