FileDocCategorySizeDatePackage
Word2Forrest.javaAPI DocApache Poi 3.0.15198Mon Jan 01 18:55:34 GMT 2007org.apache.poi.hwpf

Word2Forrest

public class Word2Forrest extends Object

Fields Summary
Writer
_out
HWPFDocument
_doc
Constructors Summary
public Word2Forrest(HWPFDocument doc, OutputStream stream)

    OutputStreamWriter out = new OutputStreamWriter (stream, "UTF-8");
    _out = out;
    _doc = doc;

    init ();
    openDocument ();
    openBody ();

    Range r = doc.getRange ();
    StyleSheet styleSheet = doc.getStyleSheet ();

    int sectionLevel = 0;
    int lenParagraph = r.numParagraphs ();
    boolean inCode = false;
    for (int x = 0; x < lenParagraph; x++)
    {
      Paragraph p = r.getParagraph (x);
      String text = p.text ();
      if (text.trim ().length () == 0)
      {
        continue;
      }
      StyleDescription paragraphStyle = styleSheet.getStyleDescription (p.
        getStyleIndex ());
      String styleName = paragraphStyle.getName();
      if (styleName.startsWith ("Heading"))
      {
        if (inCode)
        {
          closeSource();
          inCode = false;
        }

        int headerLevel = Integer.parseInt (styleName.substring (8));
        if (headerLevel > sectionLevel)
        {
          openSection ();
        }
        else
        {
          for (int y = 0; y < (sectionLevel - headerLevel) + 1; y++)
          {
            closeSection ();
          }
          openSection ();
        }
        sectionLevel = headerLevel;
        openTitle ();
        writePlainText (text);
        closeTitle ();
      }
      else
      {
        int cruns = p.numCharacterRuns ();
        CharacterRun run = p.getCharacterRun (0);
        String fontName = run.getFontName();
        if (fontName.startsWith ("Courier"))
        {
          if (!inCode)
          {
            openSource ();
            inCode = true;
          }
          writePlainText (p.text());
        }
        else
        {
          if (inCode)
          {
            inCode = false;
            closeSource();
          }
          openParagraph();
          writePlainText(p.text());
          closeParagraph();
        }
      }
    }
    for (int x = 0; x < sectionLevel; x++)
    {
      closeSection();
    }
    closeBody();
    closeDocument();
    _out.flush();

  
Methods Summary
public voidcloseBody()

      _out.write ("</body>\r\n");
    
public voidcloseDocument()

      _out.write ("</document>\r\n");
    
public voidcloseParagraph()

      _out.write ("</p>");
    
public voidcloseSection()

      _out.write ("</section>");

    
public voidcloseSource()

      _out.write ("]]></source>");
    
public voidcloseTitle()

      _out.write ("</title>");
    
public voidinit()

      _out.write ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n");
      _out.write ("<!DOCTYPE document PUBLIC \"-//APACHE//DTD Documentation V1.1//EN\" \"./dtd/document-v11.dtd\">\r\n");
    
public static voidmain(java.lang.String[] args)

    try
    {
      OutputStream out = new FileOutputStream("c:\\test.xml");

      new Word2Forrest(new HWPFDocument(new FileInputStream(args[0])), out);
      out.close();
    }
    catch (Throwable t)
    {
      t.printStackTrace();
    }

  
public voidopenBody()

      _out.write ("<body>\r\n");
    
public voidopenDocument()

      _out.write ("<document>\r\n");
    
public voidopenParagraph()

      _out.write ("<p>");
    
public voidopenSection()

      _out.write ("<section>");

    
public voidopenSource()

      _out.write ("<source><![CDATA[");
    
public voidopenTitle()

      _out.write ("<title>");
    
public voidwritePlainText(java.lang.String text)

      _out.write (text);