Fields Summary |
---|
private static final char | A |
private static final char | B |
private static final char | V |
private static final char | G |
private static final char | D |
private static final char | E |
private static final char | ZH |
private static final char | Z |
private static final char | I |
private static final char | I_ |
private static final char | K |
private static final char | L |
private static final char | M |
private static final char | N |
private static final char | O |
private static final char | P |
private static final char | R |
private static final char | S |
private static final char | T |
private static final char | U |
private static final char | X |
private static final char | CH |
private static final char | SH |
private static final char | SHCH |
private static final char | Y |
private static final char | SOFT |
private static final char | AE |
private static final char | IU |
private static final char | IA |
private static char[] | RUSSIAN_STOP_WORDSList of typical Russian stopwords. |
private Set | stopSetContains the stopwords used with the StopFilter. |
private char[] | charsetCharset for Russian letters.
Represents encoding for 32 lowercase Russian letters.
Predefined charsets can be taken from RussianCharSets class |
Methods Summary |
---|
private static java.lang.String[] | makeStopWords(char[] charset)
String[] res = new String[RUSSIAN_STOP_WORDS.length];
for (int i = 0; i < res.length; i++)
{
char[] theStopWord = RUSSIAN_STOP_WORDS[i];
// translate the word, using the charset
StringBuffer theWord = new StringBuffer();
for (int j = 0; j < theStopWord.length; j++)
{
theWord.append(charset[theStopWord[j]]);
}
res[i] = theWord.toString();
}
return res;
|
public org.apache.lucene.analysis.TokenStream | tokenStream(java.lang.String fieldName, java.io.Reader reader)Creates a TokenStream which tokenizes all the text in the provided Reader.
TokenStream result = new RussianLetterTokenizer(reader, charset);
result = new RussianLowerCaseFilter(result, charset);
result = new StopFilter(result, stopSet);
result = new RussianStemFilter(result, charset);
return result;
|