TestIndexReaderpublic class TestIndexReader extends TestCase Copyright 2004 The Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. |
Constructors Summary |
---|
public TestIndexReader(String name)
super(name);
|
Methods Summary |
---|
private void | addDoc(org.apache.lucene.index.IndexWriter writer, java.lang.String value)
Document doc = new Document();
doc.add(new Field("content", value, Field.Store.NO, Field.Index.TOKENIZED));
writer.addDocument(doc);
| private void | addDocumentWithDifferentFields(org.apache.lucene.index.IndexWriter writer)
Document doc = new Document();
doc.add(new Field("keyword2","test1", Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field("text2","test1", Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("unindexed2","test1", Field.Store.YES, Field.Index.NO));
doc.add(new Field("unstored2","test1", Field.Store.NO, Field.Index.TOKENIZED));
writer.addDocument(doc);
| private void | addDocumentWithFields(org.apache.lucene.index.IndexWriter writer)
Document doc = new Document();
doc.add(new Field("keyword","test1", Field.Store.YES, Field.Index.UN_TOKENIZED));
doc.add(new Field("text","test1", Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("unindexed","test1", Field.Store.YES, Field.Index.NO));
doc.add(new Field("unstored","test1", Field.Store.NO, Field.Index.TOKENIZED));
writer.addDocument(doc);
| private void | addDocumentWithTermVectorFields(org.apache.lucene.index.IndexWriter writer)
Document doc = new Document();
doc.add(new Field("tvnot","tvnot", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO));
doc.add(new Field("termvector","termvector", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
doc.add(new Field("tvoffset","tvoffset", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_OFFSETS));
doc.add(new Field("tvposition","tvposition", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS));
doc.add(new Field("tvpositionoffset","tvpositionoffset", Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
writer.addDocument(doc);
| private void | assertTermDocsCount(java.lang.String msg, org.apache.lucene.index.IndexReader reader, org.apache.lucene.index.Term term, int expected)
TermDocs tdocs = null;
try {
tdocs = reader.termDocs(term);
assertNotNull(msg + ", null TermDocs", tdocs);
int count = 0;
while(tdocs.next()) {
count++;
}
assertEquals(msg + ", count mismatch", expected, count);
} finally {
if (tdocs != null)
tdocs.close();
}
| private void | deleteReaderReaderConflict(boolean optimize)
Directory dir = getDirectory(true);
Term searchTerm1 = new Term("content", "aaa");
Term searchTerm2 = new Term("content", "bbb");
Term searchTerm3 = new Term("content", "ccc");
// add 100 documents with term : aaa
// add 100 documents with term : bbb
// add 100 documents with term : ccc
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
for (int i = 0; i < 100; i++)
{
addDoc(writer, searchTerm1.text());
addDoc(writer, searchTerm2.text());
addDoc(writer, searchTerm3.text());
}
if(optimize)
writer.optimize();
writer.close();
// OPEN TWO READERS
// Both readers get segment info as exists at this time
IndexReader reader1 = IndexReader.open(dir);
assertEquals("first opened", 100, reader1.docFreq(searchTerm1));
assertEquals("first opened", 100, reader1.docFreq(searchTerm2));
assertEquals("first opened", 100, reader1.docFreq(searchTerm3));
assertTermDocsCount("first opened", reader1, searchTerm1, 100);
assertTermDocsCount("first opened", reader1, searchTerm2, 100);
assertTermDocsCount("first opened", reader1, searchTerm3, 100);
IndexReader reader2 = IndexReader.open(dir);
assertEquals("first opened", 100, reader2.docFreq(searchTerm1));
assertEquals("first opened", 100, reader2.docFreq(searchTerm2));
assertEquals("first opened", 100, reader2.docFreq(searchTerm3));
assertTermDocsCount("first opened", reader2, searchTerm1, 100);
assertTermDocsCount("first opened", reader2, searchTerm2, 100);
assertTermDocsCount("first opened", reader2, searchTerm3, 100);
// DELETE DOCS FROM READER 2 and CLOSE IT
// delete documents containing term: aaa
// when the reader is closed, the segment info is updated and
// the first reader is now stale
reader2.delete(searchTerm1);
assertEquals("after delete 1", 100, reader2.docFreq(searchTerm1));
assertEquals("after delete 1", 100, reader2.docFreq(searchTerm2));
assertEquals("after delete 1", 100, reader2.docFreq(searchTerm3));
assertTermDocsCount("after delete 1", reader2, searchTerm1, 0);
assertTermDocsCount("after delete 1", reader2, searchTerm2, 100);
assertTermDocsCount("after delete 1", reader2, searchTerm3, 100);
reader2.close();
// Make sure reader 1 is unchanged since it was open earlier
assertEquals("after delete 1", 100, reader1.docFreq(searchTerm1));
assertEquals("after delete 1", 100, reader1.docFreq(searchTerm2));
assertEquals("after delete 1", 100, reader1.docFreq(searchTerm3));
assertTermDocsCount("after delete 1", reader1, searchTerm1, 100);
assertTermDocsCount("after delete 1", reader1, searchTerm2, 100);
assertTermDocsCount("after delete 1", reader1, searchTerm3, 100);
// ATTEMPT TO DELETE FROM STALE READER
// delete documents containing term: bbb
try {
reader1.delete(searchTerm2);
fail("Delete allowed from a stale index reader");
} catch (IOException e) {
/* success */
}
// RECREATE READER AND TRY AGAIN
reader1.close();
reader1 = IndexReader.open(dir);
assertEquals("reopened", 100, reader1.docFreq(searchTerm1));
assertEquals("reopened", 100, reader1.docFreq(searchTerm2));
assertEquals("reopened", 100, reader1.docFreq(searchTerm3));
assertTermDocsCount("reopened", reader1, searchTerm1, 0);
assertTermDocsCount("reopened", reader1, searchTerm2, 100);
assertTermDocsCount("reopened", reader1, searchTerm3, 100);
reader1.delete(searchTerm2);
assertEquals("deleted 2", 100, reader1.docFreq(searchTerm1));
assertEquals("deleted 2", 100, reader1.docFreq(searchTerm2));
assertEquals("deleted 2", 100, reader1.docFreq(searchTerm3));
assertTermDocsCount("deleted 2", reader1, searchTerm1, 0);
assertTermDocsCount("deleted 2", reader1, searchTerm2, 0);
assertTermDocsCount("deleted 2", reader1, searchTerm3, 100);
reader1.close();
// Open another reader to confirm that everything is deleted
reader2 = IndexReader.open(dir);
assertEquals("reopened 2", 100, reader2.docFreq(searchTerm1));
assertEquals("reopened 2", 100, reader2.docFreq(searchTerm2));
assertEquals("reopened 2", 100, reader2.docFreq(searchTerm3));
assertTermDocsCount("reopened 2", reader2, searchTerm1, 0);
assertTermDocsCount("reopened 2", reader2, searchTerm2, 0);
assertTermDocsCount("reopened 2", reader2, searchTerm3, 100);
reader2.close();
dir.close();
| private void | deleteReaderWriterConflict(boolean optimize)
//Directory dir = new RAMDirectory();
Directory dir = getDirectory(true);
Term searchTerm = new Term("content", "aaa");
Term searchTerm2 = new Term("content", "bbb");
// add 100 documents with term : aaa
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
for (int i = 0; i < 100; i++)
{
addDoc(writer, searchTerm.text());
}
writer.close();
// OPEN READER AT THIS POINT - this should fix the view of the
// index at the point of having 100 "aaa" documents and 0 "bbb"
IndexReader reader = IndexReader.open(dir);
assertEquals("first docFreq", 100, reader.docFreq(searchTerm));
assertEquals("first docFreq", 0, reader.docFreq(searchTerm2));
assertTermDocsCount("first reader", reader, searchTerm, 100);
assertTermDocsCount("first reader", reader, searchTerm2, 0);
// add 100 documents with term : bbb
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
for (int i = 0; i < 100; i++)
{
addDoc(writer, searchTerm2.text());
}
// REQUEST OPTIMIZATION
// This causes a new segment to become current for all subsequent
// searchers. Because of this, deletions made via a previously open
// reader, which would be applied to that reader's segment, are lost
// for subsequent searchers/readers
if(optimize)
writer.optimize();
writer.close();
// The reader should not see the new data
assertEquals("first docFreq", 100, reader.docFreq(searchTerm));
assertEquals("first docFreq", 0, reader.docFreq(searchTerm2));
assertTermDocsCount("first reader", reader, searchTerm, 100);
assertTermDocsCount("first reader", reader, searchTerm2, 0);
// DELETE DOCUMENTS CONTAINING TERM: aaa
// NOTE: the reader was created when only "aaa" documents were in
int deleted = 0;
try {
deleted = reader.delete(searchTerm);
fail("Delete allowed on an index reader with stale segment information");
} catch (IOException e) {
/* success */
}
// Re-open index reader and try again. This time it should see
// the new data.
reader.close();
reader = IndexReader.open(dir);
assertEquals("first docFreq", 100, reader.docFreq(searchTerm));
assertEquals("first docFreq", 100, reader.docFreq(searchTerm2));
assertTermDocsCount("first reader", reader, searchTerm, 100);
assertTermDocsCount("first reader", reader, searchTerm2, 100);
deleted = reader.delete(searchTerm);
assertEquals("deleted count", 100, deleted);
assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm));
assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm2));
assertTermDocsCount("deleted termDocs", reader, searchTerm, 0);
assertTermDocsCount("deleted termDocs", reader, searchTerm2, 100);
reader.close();
// CREATE A NEW READER and re-test
reader = IndexReader.open(dir);
assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm));
assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm2));
assertTermDocsCount("deleted termDocs", reader, searchTerm, 0);
assertTermDocsCount("deleted termDocs", reader, searchTerm2, 100);
reader.close();
| private org.apache.lucene.store.Directory | getDirectory(boolean create)
return FSDirectory.getDirectory(new File(System.getProperty("tempDir"), "testIndex"), create);
| public static void | main(java.lang.String[] args)Main for running test case by itself.
TestRunner.run (new TestSuite(TestIndexReader.class));
// TestRunner.run (new TestIndexReader("testBasicDelete"));
// TestRunner.run (new TestIndexReader("testDeleteReaderWriterConflict"));
// TestRunner.run (new TestIndexReader("testDeleteReaderReaderConflict"));
// TestRunner.run (new TestIndexReader("testFilesOpenClose"));
| public void | testBasicDelete()
Directory dir = new RAMDirectory();
IndexWriter writer = null;
IndexReader reader = null;
Term searchTerm = new Term("content", "aaa");
// add 100 documents with term : aaa
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
for (int i = 0; i < 100; i++)
{
addDoc(writer, searchTerm.text());
}
writer.close();
// OPEN READER AT THIS POINT - this should fix the view of the
// index at the point of having 100 "aaa" documents and 0 "bbb"
reader = IndexReader.open(dir);
assertEquals("first docFreq", 100, reader.docFreq(searchTerm));
assertTermDocsCount("first reader", reader, searchTerm, 100);
// DELETE DOCUMENTS CONTAINING TERM: aaa
int deleted = 0;
reader = IndexReader.open(dir);
deleted = reader.delete(searchTerm);
assertEquals("deleted count", 100, deleted);
assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm));
assertTermDocsCount("deleted termDocs", reader, searchTerm, 0);
reader.close();
// CREATE A NEW READER and re-test
reader = IndexReader.open(dir);
assertEquals("deleted docFreq", 100, reader.docFreq(searchTerm));
assertTermDocsCount("deleted termDocs", reader, searchTerm, 0);
reader.close();
| public void | testDeleteReaderReaderConflictOptimized()
deleteReaderReaderConflict(true);
| public void | testDeleteReaderReaderConflictUnoptimized()
deleteReaderReaderConflict(false);
| public void | testDeleteReaderWriterConflictOptimized()
deleteReaderWriterConflict(true);
| public void | testDeleteReaderWriterConflictUnoptimized()
deleteReaderWriterConflict(false);
| public void | testFilesOpenClose()
// Create initial data set
Directory dir = getDirectory(true);
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
addDoc(writer, "test");
writer.close();
dir.close();
// Try to erase the data - this ensures that the writer closed all files
dir = getDirectory(true);
// Now create the data set again, just as before
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
addDoc(writer, "test");
writer.close();
dir.close();
// Now open existing directory and test that reader closes all files
dir = getDirectory(false);
IndexReader reader1 = IndexReader.open(dir);
reader1.close();
dir.close();
// The following will fail if reader did not close all files
dir = getDirectory(true);
| public void | testGetFieldNames()Tests the IndexReader.getFieldNames implementation
RAMDirectory d = new RAMDirectory();
// set up writer
IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(), true);
addDocumentWithFields(writer);
writer.close();
// set up reader
IndexReader reader = IndexReader.open(d);
Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
assertTrue(fieldNames.contains("keyword"));
assertTrue(fieldNames.contains("text"));
assertTrue(fieldNames.contains("unindexed"));
assertTrue(fieldNames.contains("unstored"));
// add more documents
writer = new IndexWriter(d, new StandardAnalyzer(), false);
// want to get some more segments here
for (int i = 0; i < 5*writer.getMergeFactor(); i++)
{
addDocumentWithFields(writer);
}
// new fields are in some different segments (we hope)
for (int i = 0; i < 5*writer.getMergeFactor(); i++)
{
addDocumentWithDifferentFields(writer);
}
// new termvector fields
for (int i = 0; i < 5*writer.getMergeFactor(); i++)
{
addDocumentWithTermVectorFields(writer);
}
writer.close();
// verify fields again
reader = IndexReader.open(d);
fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
assertEquals(13, fieldNames.size()); // the following fields
assertTrue(fieldNames.contains("keyword"));
assertTrue(fieldNames.contains("text"));
assertTrue(fieldNames.contains("unindexed"));
assertTrue(fieldNames.contains("unstored"));
assertTrue(fieldNames.contains("keyword2"));
assertTrue(fieldNames.contains("text2"));
assertTrue(fieldNames.contains("unindexed2"));
assertTrue(fieldNames.contains("unstored2"));
assertTrue(fieldNames.contains("tvnot"));
assertTrue(fieldNames.contains("termvector"));
assertTrue(fieldNames.contains("tvposition"));
assertTrue(fieldNames.contains("tvoffset"));
assertTrue(fieldNames.contains("tvpositionoffset"));
// verify that only indexed fields were returned
fieldNames = reader.getFieldNames(IndexReader.FieldOption.INDEXED);
assertEquals(11, fieldNames.size()); // 6 original + the 5 termvector fields
assertTrue(fieldNames.contains("keyword"));
assertTrue(fieldNames.contains("text"));
assertTrue(fieldNames.contains("unstored"));
assertTrue(fieldNames.contains("keyword2"));
assertTrue(fieldNames.contains("text2"));
assertTrue(fieldNames.contains("unstored2"));
assertTrue(fieldNames.contains("tvnot"));
assertTrue(fieldNames.contains("termvector"));
assertTrue(fieldNames.contains("tvposition"));
assertTrue(fieldNames.contains("tvoffset"));
assertTrue(fieldNames.contains("tvpositionoffset"));
// verify that only unindexed fields were returned
fieldNames = reader.getFieldNames(IndexReader.FieldOption.UNINDEXED);
assertEquals(2, fieldNames.size()); // the following fields
assertTrue(fieldNames.contains("unindexed"));
assertTrue(fieldNames.contains("unindexed2"));
// verify index term vector fields
fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR);
assertEquals(1, fieldNames.size()); // 1 field has term vector only
assertTrue(fieldNames.contains("termvector"));
fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION);
assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors
assertTrue(fieldNames.contains("tvposition"));
fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET);
assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors
assertTrue(fieldNames.contains("tvoffset"));
fieldNames = reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET);
assertEquals(1, fieldNames.size()); // 4 fields are indexed with term vectors
assertTrue(fieldNames.contains("tvpositionoffset"));
| public void | testIsCurrent()
RAMDirectory d = new RAMDirectory();
IndexWriter writer = new IndexWriter(d, new StandardAnalyzer(), true);
addDocumentWithFields(writer);
writer.close();
// set up reader:
IndexReader reader = IndexReader.open(d);
assertTrue(reader.isCurrent());
// modify index by adding another document:
writer = new IndexWriter(d, new StandardAnalyzer(), false);
addDocumentWithFields(writer);
writer.close();
assertFalse(reader.isCurrent());
// re-create index:
writer = new IndexWriter(d, new StandardAnalyzer(), true);
addDocumentWithFields(writer);
writer.close();
assertFalse(reader.isCurrent());
reader.close();
| public void | testLastModified()
assertFalse(IndexReader.indexExists("there_is_no_such_index"));
Directory dir = new RAMDirectory();
assertFalse(IndexReader.indexExists(dir));
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
addDocumentWithFields(writer);
assertTrue(IndexReader.isLocked(dir)); // writer open, so dir is locked
writer.close();
assertTrue(IndexReader.indexExists(dir));
IndexReader reader = IndexReader.open(dir);
assertFalse(IndexReader.isLocked(dir)); // reader only, no lock
long version = IndexReader.lastModified(dir);
reader.close();
// modify index and check version has been incremented:
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
addDocumentWithFields(writer);
writer.close();
reader = IndexReader.open(dir);
assertTrue(version < IndexReader.getCurrentVersion(dir));
reader.close();
| public void | testLock()
Directory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
addDocumentWithFields(writer);
writer.close();
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
IndexReader reader = IndexReader.open(dir);
try {
reader.delete(0);
fail("expected lock");
} catch(IOException e) {
// expected exception
}
IndexReader.unlock(dir); // this should not be done in the real world!
reader.delete(0);
reader.close();
writer.close();
| public void | testUndeleteAll()
Directory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
addDocumentWithFields(writer);
addDocumentWithFields(writer);
writer.close();
IndexReader reader = IndexReader.open(dir);
reader.delete(0);
reader.delete(1);
reader.undeleteAll();
reader.close();
reader = IndexReader.open(dir);
assertEquals(2, reader.numDocs()); // nothing has really been deleted thanks to undeleteAll()
reader.close();
|
|