FileDocCategorySizeDatePackage
TestGermanStemFilter.javaAPI DocApache Lucene 2.1.02781Wed Feb 14 10:46:28 GMT 2007org.apache.lucene.analysis.de

TestGermanStemFilter

public class TestGermanStemFilter extends TestCase
Test the German stemmer. The stemming algorithm is known to work less than perfect, as it doesn't use any word lists with exceptions. We also check some of the cases where the algorithm is wrong.
author
Daniel Naber

Fields Summary
Constructors Summary
Methods Summary
private voidcheck(java.lang.String input, java.lang.String expected)

    StandardTokenizer tokenStream = new StandardTokenizer(new StringReader(input));
    GermanStemFilter filter = new GermanStemFilter(tokenStream);
    Token t = filter.next();
    if (t == null)
      fail();
    assertEquals(expected, t.termText());
    filter.close();
  
public voidtestStemming()

    try {
      // read test cases from external file:
      File dataDir = new File(System.getProperty("dataDir", "./bin"));
      File testFile = new File(dataDir, "org/apache/lucene/analysis/de/data.txt");
      FileInputStream fis = new FileInputStream(testFile);
      InputStreamReader isr = new InputStreamReader(fis, "iso-8859-1");
      BufferedReader breader = new BufferedReader(isr);
      while(true) {
        String line = breader.readLine();
        if (line == null)
          break;
        line = line.trim();
        if (line.startsWith("#") || line.equals(""))
          continue;    // ignore comments and empty lines
        String[] parts = line.split(";");
        //System.out.println(parts[0] + " -- " + parts[1]);
        check(parts[0], parts[1]);
      }
      breader.close();
      isr.close();
      fis.close();
    } catch (IOException e) {
       e.printStackTrace();
       fail();
    }