FileDocCategorySizeDatePackage
TestSpellChecker.javaAPI DocApache Lucene 2.2.04375Sat Jun 16 22:21:02 BST 2007org.apache.lucene.search.spell

TestSpellChecker

public class TestSpellChecker extends TestCase
Spell checker test case
author
Nicolas Maisonneuve

Fields Summary
private SpellChecker
spellChecker
private Directory
userindex
private Directory
spellindex
Constructors Summary
Methods Summary
private voidaddwords(org.apache.lucene.index.IndexReader r, java.lang.String field)

    long time = System.currentTimeMillis();
    spellChecker.indexDictionary(new LuceneDictionary(r, field));
    time = System.currentTimeMillis() - time;
    //System.out.println("time to build " + field + ": " + time);
  
private intnumdoc()

    IndexReader rs = IndexReader.open(spellindex);
    int num = rs.numDocs();
    assertTrue(num != 0);
    //System.out.println("num docs: " + num);
    rs.close();
    return num;
  
protected voidsetUp()

    super.setUp();

    //create a user index
    userindex = new RAMDirectory();
    IndexWriter writer = new IndexWriter(userindex, new SimpleAnalyzer(), true);

    for (int i = 0; i < 1000; i++) {
      Document doc = new Document();
      doc.add(new Field("field1", English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED));
      doc.add(new Field("field2", English.intToEnglish(i + 1), Field.Store.YES, Field.Index.TOKENIZED)); // + word thousand
      writer.addDocument(doc);
    }
    writer.close();

    // create the spellChecker
    spellindex = new RAMDirectory();
    spellChecker = new SpellChecker(spellindex);
  
public voidtestBuild()

    IndexReader r = IndexReader.open(userindex);

    spellChecker.clearIndex();

    addwords(r, "field1");
    int num_field1 = this.numdoc();

    addwords(r, "field2");
    int num_field2 = this.numdoc();

    assertEquals(num_field2, num_field1 + 1);

    // test small word
    String[] similar = spellChecker.suggestSimilar("fvie", 2);
    assertEquals(1, similar.length);
    assertEquals(similar[0], "five");

    similar = spellChecker.suggestSimilar("five", 2);
    assertEquals(1, similar.length);
    assertEquals(similar[0], "nine");     // don't suggest a word for itself

    similar = spellChecker.suggestSimilar("fiv", 2);
    assertEquals(1, similar.length);
    assertEquals(similar[0], "five");

    similar = spellChecker.suggestSimilar("ive", 2);
    assertEquals(1, similar.length);
    assertEquals(similar[0], "five");

    similar = spellChecker.suggestSimilar("fives", 2);
    assertEquals(1, similar.length);
    assertEquals(similar[0], "five");

    similar = spellChecker.suggestSimilar("fie", 2);
    assertEquals(1, similar.length);
    assertEquals(similar[0], "five");

    similar = spellChecker.suggestSimilar("fi", 2);
    assertEquals(0, similar.length);

    // test restraint to a field
    similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
    assertEquals(0, similar.length); // there isn't the term thousand in the field field1

    similar = spellChecker.suggestSimilar("tousand", 10, r, "field2", false);
    assertEquals(1, similar.length); // there is the term thousand in the field field2