TestSegmentTermEnumpublic class TestSegmentTermEnum extends TestCase
Fields Summary |
---|
Directory | dir |
Methods Summary |
---|
private void | addDoc(org.apache.lucene.index.IndexWriter writer, java.lang.String value)
Document doc = new Document();
doc.add(new Field("content", value, Field.Store.NO, Field.Index.TOKENIZED));
writer.addDocument(doc);
| public void | testTermEnum()
IndexWriter writer = null;
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
// add 100 documents with term : aaa
// add 100 documents with terms: aaa bbb
// Therefore, term 'aaa' has document frequency of 200 and term 'bbb' 100
for (int i = 0; i < 100; i++) {
addDoc(writer, "aaa");
addDoc(writer, "aaa bbb");
}
writer.close();
// verify document frequency of terms in an unoptimized index
verifyDocFreq();
// merge segments by optimizing the index
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
writer.optimize();
writer.close();
// verify document frequency of terms in an optimized index
verifyDocFreq();
| private void | verifyDocFreq()
IndexReader reader = IndexReader.open(dir);
TermEnum termEnum = null;
// create enumeration of all terms
termEnum = reader.terms();
// go to the first term (aaa)
termEnum.next();
// assert that term is 'aaa'
assertEquals("aaa", termEnum.term().text());
assertEquals(200, termEnum.docFreq());
// go to the second term (bbb)
termEnum.next();
// assert that term is 'bbb'
assertEquals("bbb", termEnum.term().text());
assertEquals(100, termEnum.docFreq());
termEnum.close();
// create enumeration of terms after term 'aaa', including 'aaa'
termEnum = reader.terms(new Term("content", "aaa"));
// assert that term is 'aaa'
assertEquals("aaa", termEnum.term().text());
assertEquals(200, termEnum.docFreq());
// go to term 'bbb'
termEnum.next();
// assert that term is 'bbb'
assertEquals("bbb", termEnum.term().text());
assertEquals(100, termEnum.docFreq());
termEnum.close();
|
|