HDFObjectFactory.javaAPI DocApache Poi 3.0.121600Mon Jan 01 18:55:22 GMT 2007org.apache.poi.hdf.model


public class HDFObjectFactory extends Object
The Object Factory takes in a stream and creates the low level objects that represent the data.

Fields Summary
private POIFSFileSystem
OLE stuff
private FileInformationBlock
private HDFLowLevelParsingListener
Used to set up the object model
private ParsingState
parsing state for characters
private ParsingState
parsing state for paragraphs
main document stream buffer
table stream buffer
Constructors Summary
protected HDFObjectFactory(InputStream istream, HDFLowLevelParsingListener l)
Creates a new instance of HDFObjectFactory

istream The InputStream that is the Word document

        if (l == null)
            _listener = new HDFObjectModel();
            _listener = l;

        //do Ole stuff
        _filesystem = new POIFSFileSystem(istream);

        DocumentEntry headerProps =

        _mainDocument = new byte[headerProps.getSize()];

        _fib = new FileInformationBlock(_mainDocument);


public HDFObjectFactory(InputStream istream)
Creates a new instance of HDFObjectFactory

istream The InputStream that is the Word document

        this(istream, null);
Methods Summary
private voidcreateFontTable()
Initializes this document's FontTable;

        int fontTableIndex = _fib.getFcSttbfffn();
        int fontTableSize = _fib.getLcbSttbfffn();
        byte[] fontTable = new byte[fontTableSize];
        System.arraycopy(_tableBuffer, fontTableIndex, fontTable, 0, fontTableSize);
        _listener.fonts(new FontTable(fontTable));
private voidcreateListTables()
Initializes the list tables for this document

        int lfoOffset = _fib.getFcPlfLfo();
        int lfoSize = _fib.getLcbPlfLfo();
        byte[] plflfo = new byte[lfoSize];

        System.arraycopy(_tableBuffer, lfoOffset, plflfo, 0, lfoSize);

        int lstOffset = _fib.getFcPlcfLst();
        int lstSize = _fib.getLcbPlcfLst();
        if (lstOffset > 0 && lstSize > 0)
          //  The lstSize returned by _fib.getLcbPlcfLst() doesn't appear
          //  to take into account any LVLs.  Therefore, we recalculate
          //  lstSize based on where the LFO section begins (because the
          //  LFO section immediately follows the LST section).
          lstSize = lfoOffset - lstOffset;
          byte[] plcflst = new byte[lstSize];
          System.arraycopy(_tableBuffer, lstOffset, plcflst, 0, lstSize);
          _listener.lists(new ListTables(plcflst, plflfo));
private voidcreateStyleSheet()
Uncompresses the StyleSheet from file into memory.

      int stshIndex = _fib.getFcStshf();
      int stshSize = _fib.getLcbStshf();
      byte[] stsh = new byte[stshSize];
      System.arraycopy(_tableBuffer, stshIndex, stsh, 0, stshSize);

      _listener.styleSheet(new StyleSheet(stsh));
public static java.util.ListgetTypes( istream)

        List results = new ArrayList(1);

        //do Ole stuff
        POIFSFileSystem filesystem = new POIFSFileSystem(istream);

        DocumentEntry headerProps =

        byte[] mainDocument = new byte[headerProps.getSize()];

        FileInformationBlock fib = new FileInformationBlock(mainDocument);

        return results;
private voidinitCharacterProperties(int charOffset, org.apache.poi.hdf.model.hdftypes.PlexOfCps charPlcf, int start, int end)

        //Initialize paragraph property stuff
        //int currentCharPage = _charParsingState.getCurrentPage();
        int charPlcfLen = charPlcf.length();
        int currentPageIndex = _charParsingState.getCurrentPageIndex();
        FormattedDiskPage fkp = _charParsingState.getFkp();
        int currentChpxIndex = _charParsingState.getCurrentPropIndex();
        int currentArraySize = fkp.size();

        //get the character runs for this paragraph
        int charStart = 0;
        int charEnd = 0;
        //add the character runs
          if (currentChpxIndex < currentArraySize)
            charStart = fkp.getStart(currentChpxIndex);
            charEnd = fkp.getEnd(currentChpxIndex);
            byte[] chpx = fkp.getGrpprl(currentChpxIndex);
            _listener.characterRun(new ChpxNode(Math.max(charStart, start),  Math.min(charEnd, end), chpx));

            if (charEnd < end)
              _charParsingState.setState(currentPageIndex, fkp, currentChpxIndex);
            int currentCharPage = LittleEndian.getInt(_tableBuffer, charOffset + charPlcf.getStructOffset(++currentPageIndex));
            byte[] byteFkp = new byte[512];
            System.arraycopy(_mainDocument, (currentCharPage * 512), byteFkp, 0, 512);
            fkp = new CHPFormattedDiskPage(byteFkp);
            currentChpxIndex = 0;
            currentArraySize = fkp.size();
        while(currentPageIndex < charPlcfLen);
private voidinitDocumentProperties()
Initializes the DocumentProperties object unique to this document.

        int pos = _fib.getFcDop();
        int size = _fib.getLcbDop();
        byte[] dopArray = new byte[size];

        System.arraycopy(_tableBuffer, pos, dopArray, 0, size);
        _listener.document(new DocumentProperties(dopArray));
private voidinitFormattingProperties()
initializes all of the formatting properties for a Word Document


private voidinitParagraphProperties()
intializes the Paragraph Properties BTree

        int parOffset = _fib.getFcPlcfbtePapx();
        int parPlcSize = _fib.getLcbPlcfbtePapx();

        int charOffset = _fib.getFcPlcfbteChpx();
        int charPlcSize = _fib.getLcbPlcfbteChpx();

        PlexOfCps charPlcf = new PlexOfCps(charPlcSize, 4);
        PlexOfCps parPlcf = new PlexOfCps(parPlcSize, 4);

        //Initialize character property stuff
        int currentCharPage = LittleEndian.getInt(_tableBuffer, charOffset + charPlcf.getStructOffset(0));
        int charPlcfLen = charPlcf.length();
        int currentPageIndex = 0;
        byte[] fkp = new byte[512];
        System.arraycopy(_mainDocument, (currentCharPage * 512), fkp, 0, 512);
        CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(fkp);
        int currentChpxIndex = 0;
        int currentArraySize = cfkp.size();

        int arraySize = parPlcf.length();

        //first we must go through the bin table and find the fkps
        for(int x = 0; x < arraySize; x++)
            int PN = LittleEndian.getInt(_tableBuffer, parOffset + parPlcf.getStructOffset(x));

            fkp = new byte[512];
            System.arraycopy(_mainDocument, (PN * 512), fkp, 0, 512);

            PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(fkp);
            //take each fkp and get the paps
            int crun = pfkp.size();
            for(int y = 0; y < crun; y++)
                //get the beginning fc of each paragraph text run
                int fcStart = pfkp.getStart(y);
                int fcEnd = pfkp.getEnd(y);

                //get the papx for this paragraph
                byte[] papx = pfkp.getGrpprl(y);

                _listener.paragraph(new PapxNode(fcStart, fcEnd, papx));

                //get the character runs for this paragraph
                int charStart = 0;
                int charEnd = 0;
                //add the character runs
                  if (currentChpxIndex < currentArraySize)
                    charStart = cfkp.getStart(currentChpxIndex);
                    charEnd = cfkp.getEnd(currentChpxIndex);
                    byte[] chpx = cfkp.getGrpprl(currentChpxIndex);
                    _listener.characterRun(new ChpxNode(charStart, charEnd, chpx));
                    if (charEnd < fcEnd)
                    currentCharPage = LittleEndian.getInt(_tableBuffer, charOffset + charPlcf.getStructOffset(++currentPageIndex));
                    fkp = new byte[512];
                    System.arraycopy(_mainDocument, (currentCharPage * 512), fkp, 0, 512);
                    cfkp = new CHPFormattedDiskPage(fkp);
                    currentChpxIndex = 0;
                    currentArraySize = cfkp.size();
                while(currentCharPage <= charPlcfLen + 1);



private voidinitParagraphProperties(int parOffset, org.apache.poi.hdf.model.hdftypes.PlexOfCps parPlcf, int charOffset, org.apache.poi.hdf.model.hdftypes.PlexOfCps charPlcf, int start, int end)

        //Initialize paragraph property stuff
        //int currentParPage = _parParsingState.getCurrentPage();
        int parPlcfLen = parPlcf.length();
        int currentPageIndex = _parParsingState.getCurrentPageIndex();
        FormattedDiskPage fkp = _parParsingState.getFkp();
        int currentPapxIndex = _parParsingState.getCurrentPropIndex();
        int currentArraySize = fkp.size();

          if (currentPapxIndex < currentArraySize)
            int parStart = fkp.getStart(currentPapxIndex);
            int parEnd = fkp.getEnd(currentPapxIndex);
            byte[] papx = fkp.getGrpprl(currentPapxIndex);
            _listener.paragraph(new PapxNode(Math.max(parStart, start), Math.min(parEnd, end), papx));
            initCharacterProperties(charOffset, charPlcf, Math.max(start, parStart), Math.min(parEnd, end));
            if (parEnd < end)
              //save the state
              _parParsingState.setState(currentPageIndex, fkp, currentPapxIndex);
            int currentParPage = LittleEndian.getInt(_tableBuffer, parOffset + parPlcf.getStructOffset(++currentPageIndex));
            byte byteFkp[] = new byte[512];
            System.arraycopy(_mainDocument, (currentParPage * 512), byteFkp, 0, 512);
            fkp = new PAPFormattedDiskPage(byteFkp);
            currentPapxIndex = 0;
            currentArraySize = fkp.size();
        while(currentPageIndex < parPlcfLen);
private voidinitParsingStates(int parOffset, org.apache.poi.hdf.model.hdftypes.PlexOfCps parPlcf, int charOffset, org.apache.poi.hdf.model.hdftypes.PlexOfCps charPlcf)

        int currentCharPage = LittleEndian.getInt(_tableBuffer, charOffset + charPlcf.getStructOffset(0));
        byte[] fkp = new byte[512];
        System.arraycopy(_mainDocument, (currentCharPage * 512), fkp, 0, 512);
        CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(fkp);
        _charParsingState = new ParsingState(currentCharPage, cfkp);

        int currentParPage = LittleEndian.getInt(_tableBuffer, parOffset + parPlcf.getStructOffset(0));
        fkp = new byte[512];
        System.arraycopy(_mainDocument, (currentParPage * 512), fkp, 0, 512);
        PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(fkp);
        _parParsingState = new ParsingState(currentParPage, pfkp);
private voidinitSectionProperties()
initializes the SectionProperties BTree

      int ccpText = _fib.getCcpText();
      int ccpFtn = _fib.getCcpFtn();

      int fcMin = _fib.getFcMin();
      int plcfsedFC = _fib.getFcPlcfsed();
      int plcfsedSize = _fib.getLcbPlcfsed();

      int parOffset = _fib.getFcPlcfbtePapx();
      int parPlcSize = _fib.getLcbPlcfbtePapx();

      int charOffset = _fib.getFcPlcfbteChpx();
      int charPlcSize = _fib.getLcbPlcfbteChpx();

      PlexOfCps charPlcf = new PlexOfCps(charPlcSize, 4);
      PlexOfCps parPlcf = new PlexOfCps(parPlcSize, 4);

      initParsingStates(parOffset, parPlcf, charOffset, charPlcf);

      //byte[] plcfsed = new byte[plcfsedSize];
      //System.arraycopy(_tableBuffer, plcfsedFC, plcfsed, 0, plcfsedSize);

      PlexOfCps plcfsed = new PlexOfCps(plcfsedSize, 12);
      int arraySize = plcfsed.length();

      int start = fcMin;
      int end = fcMin + ccpText;
      int x = 0;
      int sectionEnd = 0;

      //do the main body sections
      while (x < arraySize)
          int sectionStart = LittleEndian.getInt(_tableBuffer, plcfsedFC + plcfsed.getIntOffset(x)) + fcMin;
          sectionEnd = LittleEndian.getInt(_tableBuffer, plcfsedFC + plcfsed.getIntOffset(x + 1)) + fcMin;
          int sepxStart = LittleEndian.getInt(_tableBuffer, plcfsedFC + plcfsed.getStructOffset(x) + 2);
          int sepxSize = LittleEndian.getShort(_mainDocument, sepxStart);

          byte[] sepx = new byte[sepxSize];
          System.arraycopy(_mainDocument, sepxStart + 2, sepx, 0, sepxSize);
          SepxNode node = new SepxNode(x + 1, sectionStart, sectionEnd, sepx);
          initParagraphProperties(parOffset, parPlcf, charOffset, charPlcf, sectionStart, Math.min(end, sectionEnd));

          if (sectionEnd > end)
      //do the header sections
      for (; x < arraySize; x++)// && sectionEnd <= end; x++)
          int sectionStart = LittleEndian.getInt(_tableBuffer, plcfsedFC + plcfsed.getIntOffset(x)) + fcMin;
          sectionEnd = LittleEndian.getInt(_tableBuffer, plcfsedFC + plcfsed.getIntOffset(x + 1)) + fcMin;
          int sepxStart = LittleEndian.getInt(_tableBuffer, plcfsedFC + plcfsed.getStructOffset(x) + 2);
          int sepxSize = LittleEndian.getShort(_mainDocument, sepxStart);

          byte[] sepx = new byte[sepxSize];
          System.arraycopy(_mainDocument, sepxStart + 2, sepx, 0, sepxSize);
          SepxNode node = new SepxNode(x + 1, sectionStart, sectionEnd, sepx);
          initParagraphProperties(parOffset, parPlcf, charOffset, charPlcf, Math.max(sectionStart, end), sectionEnd);

private voidinitTableStream()
Initializes the table stream


        String tablename = null;

        DocumentEntry tableEntry = (DocumentEntry)_filesystem.getRoot().getEntry(tablename);

        //load the table stream into a buffer
        int size = tableEntry.getSize();
        _tableBuffer = new byte[size];
private voidinitTextPieces()
Initializes the text pieces. Text is divided into pieces because some "pieces" may only contain unicode characters.


        int pos = _fib.getFcClx();

        //skips through the prms before we reach the piece table. These contain data
        //for actual fast saved files
        while (_tableBuffer[pos] == 1)
            int skip = LittleEndian.getShort(_tableBuffer, pos);
            pos += 2 + skip;
        if(_tableBuffer[pos] != 2)
            throw new IOException("The text piece table is corrupted");
            //parse out the text pieces
            int pieceTableSize = LittleEndian.getInt(_tableBuffer, ++pos);
            pos += 4;
            int pieces = (pieceTableSize - 4) / 12;
            for (int x = 0; x < pieces; x++)
                int filePos = LittleEndian.getInt(_tableBuffer, pos + ((pieces + 1) * 4) + (x * 8) + 2);
                boolean unicode = false;
                if ((filePos & 0x40000000) == 0)
                    unicode = true;
                    unicode = false;
                    filePos &= ~(0x40000000);//gives me FC in doc stream
                    filePos /= 2;
                int totLength = LittleEndian.getInt(_tableBuffer, pos + (x + 1) * 4) -
                                LittleEndian.getInt(_tableBuffer, pos + (x * 4));

                TextPiece piece = new TextPiece(filePos, totLength, unicode);



public static voidmain(java.lang.String[] args)

        HDFObjectFactory f = new HDFObjectFactory(new FileInputStream("c:\\test.doc"));
        int k = 0;
      catch(Throwable t)