FileDocCategorySizeDatePackage
TestSpellChecker.javaAPI DocApache Lucene 1.93706Mon Feb 20 09:18:28 GMT 2006org.apache.lucene.search.spell

TestSpellChecker

public class TestSpellChecker extends TestCase
Test case
author
Nicolas Maisonneuve

Fields Summary
private SpellChecker
spellChecker
private Directory
userindex
private Directory
spellindex
Constructors Summary
Methods Summary
private voidaddwords(org.apache.lucene.index.IndexReader r, java.lang.String field)

    long time = System.currentTimeMillis();
    spellChecker.indexDictionary(new LuceneDictionary(r, field));
    time = System.currentTimeMillis() - time;
    //System.out.println("time to build " + field + ": " + time);
  
private intnumdoc()

    IndexReader rs = IndexReader.open(spellindex);
    int num = rs.numDocs();
    assertTrue(num != 0);
    //System.out.println("num docs: " + num);
    rs.close();
    return num;
  
protected voidsetUp()

    super.setUp();

    //create a user index
    userindex = new RAMDirectory();
    IndexWriter writer = new IndexWriter(userindex, new SimpleAnalyzer(), true);

    for (int i = 0; i < 1000; i++) {
      Document doc = new Document();
      doc.add(new Field("field1", English.intToEnglish(i), Field.Store.YES, Field.Index.TOKENIZED));
      doc.add(new Field("field2", English.intToEnglish(i + 1), Field.Store.YES, Field.Index.TOKENIZED)); // + word thousand
      writer.addDocument(doc);
    }
    writer.close();

    // create the spellChecker
    spellindex = new RAMDirectory();
    spellChecker = new SpellChecker(spellindex);
  
protected voidtearDown()

    spellChecker = null;
    super.tearDown();
  
public voidtestBuild()

    try {
      IndexReader r = IndexReader.open(userindex);

      spellChecker.clearIndex();

      addwords(r, "field1");
      int num_field1 = this.numdoc();

      addwords(r, "field2");
      int num_field2 = this.numdoc();

      assertEquals(num_field2, num_field1 + 1);

      // test small word
      String[] similar = spellChecker.suggestSimilar("fvie", 2);
      assertEquals(1, similar.length);
      assertEquals(similar[0], "five");

      similar = spellChecker.suggestSimilar("five", 2);
      assertEquals(1, similar.length);
      assertEquals(similar[0], "nine");     // don't suggest a word for itself

      similar = spellChecker.suggestSimilar("fiv", 2);
      assertEquals(1, similar.length);
      assertEquals(similar[0], "five");

      similar = spellChecker.suggestSimilar("ive", 2);
      assertEquals(1, similar.length);
      assertEquals(similar[0], "five");

      similar = spellChecker.suggestSimilar("fives", 2);
      assertEquals(1, similar.length);
      assertEquals(similar[0], "five");

      similar = spellChecker.suggestSimilar("fie", 2);
      assertEquals(1, similar.length);
      assertEquals(similar[0], "five");

      similar = spellChecker.suggestSimilar("fi", 2);
      assertEquals(0, similar.length);

      // test restraint to a field
      similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
      assertEquals(0, similar.length); // there isn't the term thousand in the field field1

      similar = spellChecker.suggestSimilar("tousand", 10, r, "field2", false);
      assertEquals(1, similar.length); // there is the term thousand in the field field2
    } catch (IOException e) {
      e.printStackTrace();
      fail();
    }