FileDocCategorySizeDatePackage
TestWordExtractor.javaAPI DocApache Poi 3.0.13267Sun Mar 11 12:59:30 GMT 2007org.apache.poi.hwpf.extractor

TestWordExtractor

public class TestWordExtractor extends TestCase
Test the different routes to extracting text
author
Nick Burch (nick at torchbox dot com)

Fields Summary
private String[]
p_text1
private String
p_text1_block
private WordExtractor
extractor
private WordExtractor
extractor2
Constructors Summary
Methods Summary
protected voidsetUp()

	
         
		String dirname = System.getProperty("HWPF.testdata.path");
		
		String filename = dirname + "/test2.doc";
		String filename2 = dirname + "/test.doc";
		extractor = new WordExtractor(new FileInputStream(filename));
		extractor2 = new WordExtractor(new FileInputStream(filename2));
		
		// Build splat'd out text version
		for(int i=0; i<p_text1.length; i++) {
			p_text1_block += p_text1[i];
		}
    
public voidtestExtractFromParagraphs()
Test paragraph based extraction

    	String[] text = extractor.getParagraphText();
    	
    	assertEquals(p_text1.length, text.length);
    	for(int i=0; i<p_text1.length; i++) {
    		assertEquals(p_text1[i], text[i]);
    	}
    	
    	// On second one, should fall back
    	assertEquals(1, extractor2.getParagraphText().length);
    
public voidtestExtractFromTextPieces()
Test textPieces based extraction

    	String text = extractor.getTextFromPieces();
    	assertEquals(p_text1_block, text);
    
public voidtestGetText()
Test the paragraph -> flat extraction

    	assertEquals(p_text1_block, extractor.getText());
    	
    	// On second one, should fall back to text piece
    	assertEquals(extractor2.getTextFromPieces(), extractor2.getText());