FileDocCategorySizeDatePackage
TestRussianAnalyzer.javaAPI DocApache Lucene 1.94814Mon Feb 20 09:18:46 GMT 2006org.apache.lucene.analysis.ru

TestRussianAnalyzer

public class TestRussianAnalyzer extends TestCase
Test case for RussianAnalyzer.
author
Boris Okner
version
$Id: TestRussianAnalyzer.java 150998 2004-08-16 20:30:46Z dnaber $

(Omit source code)

Fields Summary
private InputStreamReader
inWords
private InputStreamReader
sampleUnicode
private Reader
inWordsKOI8
private Reader
sampleKOI8
private Reader
inWords1251
private Reader
sample1251
private File
dataDir
Constructors Summary
Methods Summary
protected voidsetUp()

      dataDir = new File(System.getProperty("dataDir", "./bin"));
    
public voidtest1251()

        // 1251
        inWords1251 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/test1251.txt")), "iso-8859-1");

        sample1251 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/res1251.htm")), "iso-8859-1");

        RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.CP1251);
        TokenStream in = ra.tokenStream("", inWords1251);
        RussianLetterTokenizer sample =
            new RussianLetterTokenizer(
                sample1251,
                RussianCharsets.CP1251);

        for (;;)
        {
            Token token = in.next();

            if (token == null)
            {
                break;
            }

            Token sampleToken = sample.next();
            assertEquals(
                "1251",
                token.termText(),
                sampleToken == null
                ? null
                : sampleToken.termText());

        }

        inWords1251.close();
        sample1251.close();
    
public voidtestKOI8()

        //System.out.println(new java.util.Date());
        RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.KOI8);
        // KOI8
        inWordsKOI8 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testKOI8.txt")), "iso-8859-1");

        sampleKOI8 = new InputStreamReader(new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/resKOI8.htm")), "iso-8859-1");

        TokenStream in = ra.tokenStream("all", inWordsKOI8);
        RussianLetterTokenizer sample =
            new RussianLetterTokenizer(
                sampleKOI8,
                RussianCharsets.KOI8);

        for (;;)
        {
            Token token = in.next();

            if (token == null)
            {
                break;
            }

            Token sampleToken = sample.next();
            assertEquals(
                "KOI8",
                token.termText(),
                sampleToken == null
                ? null
                : sampleToken.termText());

        }

        inWordsKOI8.close();
        sampleKOI8.close();
    
public voidtestUnicode()

        RussianAnalyzer ra = new RussianAnalyzer(RussianCharsets.UnicodeRussian);
        inWords =
            new InputStreamReader(
                new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUnicode.txt")),
                "Unicode");

        sampleUnicode =
            new InputStreamReader(
                new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/resUnicode.htm")),
                "Unicode");

        TokenStream in = ra.tokenStream("all", inWords);

        RussianLetterTokenizer sample =
            new RussianLetterTokenizer(
                sampleUnicode,
                RussianCharsets.UnicodeRussian);

        for (;;)
        {
            Token token = in.next();

            if (token == null)
            {
                break;
            }

            Token sampleToken = sample.next();
            assertEquals(
                "Unicode",
                token.termText(),
                sampleToken == null
                ? null
                : sampleToken.termText());
        }

        inWords.close();
        sampleUnicode.close();