Fields Summary |
---|
private char[] | charset |
private int | RV |
private int | R1 |
private int | R2 |
private static final char | A |
private static final char | V |
private static final char | G |
private static final char | E |
private static final char | I |
private static final char | I_ |
private static final char | L |
private static final char | M |
private static final char | N |
private static final char | O |
private static final char | S |
private static final char | T |
private static final char | U |
private static final char | X |
private static final char | SH |
private static final char | SHCH |
private static final char | Y |
private static final char | SOFT |
private static final char | AE |
private static final char | IU |
private static final char | IA |
private static char[] | vowels |
private static char[] | perfectiveGerundEndings1 |
private static char[] | perfectiveGerund1Predessors |
private static char[] | perfectiveGerundEndings2 |
private static char[] | adjectiveEndings |
private static char[] | participleEndings1 |
private static char[] | participleEndings2 |
private static char[] | participle1Predessors |
private static char[] | reflexiveEndings |
private static char[] | verbEndings1 |
private static char[] | verbEndings2 |
private static char[] | verb1Predessors |
private static char[] | nounEndings |
private static char[] | superlativeEndings |
private static char[] | derivationalEndings |
Methods Summary |
---|
private boolean | adjectival(java.lang.StringBuffer stemmingZone)Adjectival ending is an adjective ending,
optionally preceded by participle ending.
Creation date: (17/03/2002 12:14:58 AM)
// look for adjective ending in a stemming zone
if (!findAndRemoveEnding(stemmingZone, adjectiveEndings))
return false;
// if adjective ending was found, try for participle ending.
// variable r is unused, we are just interested in the side effect of
// findAndRemoveEnding():
boolean r =
findAndRemoveEnding(stemmingZone, participleEndings1, participle1Predessors)
||
findAndRemoveEnding(stemmingZone, participleEndings2);
return true;
|
private boolean | derivational(java.lang.StringBuffer stemmingZone)Derivational endings
Creation date: (17/03/2002 12:14:58 AM)
int endingLength = findEnding(stemmingZone, derivationalEndings);
if (endingLength == 0)
// no derivational ending found
return false;
else
{
// Ensure that the ending locates in R2
if (R2 - RV <= stemmingZone.length() - endingLength)
{
stemmingZone.setLength(stemmingZone.length() - endingLength);
return true;
}
else
{
return false;
}
}
|
private boolean | findAndRemoveEnding(java.lang.StringBuffer stemmingZone, char[][] theEndingClass)Finds the ending among the given class of endings and removes it from stemming zone.
Creation date: (17/03/2002 8:18:34 PM)
int endingLength = findEnding(stemmingZone, theEndingClass);
if (endingLength == 0)
// not found
return false;
else {
stemmingZone.setLength(stemmingZone.length() - endingLength);
// cut the ending found
return true;
}
|
private boolean | findAndRemoveEnding(java.lang.StringBuffer stemmingZone, char[][] theEndingClass, char[][] thePredessors)Finds the ending among the given class of endings, then checks if this ending was
preceded by any of given predessors, and if so, removes it from stemming zone.
Creation date: (17/03/2002 8:18:34 PM)
int endingLength = findEnding(stemmingZone, theEndingClass);
if (endingLength == 0)
// not found
return false;
else
{
int predessorLength =
findEnding(stemmingZone,
stemmingZone.length() - endingLength - 1,
thePredessors);
if (predessorLength == 0)
return false;
else {
stemmingZone.setLength(stemmingZone.length() - endingLength);
// cut the ending found
return true;
}
}
|
private int | findEnding(java.lang.StringBuffer stemmingZone, int startIndex, char[][] theEndingClass)Finds ending among given ending class and returns the length of ending found(0, if not found).
Creation date: (17/03/2002 8:18:34 PM)
boolean match = false;
for (int i = theEndingClass.length - 1; i >= 0; i--)
{
char[] theEnding = theEndingClass[i];
// check if the ending is bigger than stemming zone
if (startIndex < theEnding.length - 1)
{
match = false;
continue;
}
match = true;
int stemmingIndex = startIndex;
for (int j = theEnding.length - 1; j >= 0; j--)
{
if (stemmingZone.charAt(stemmingIndex--) != charset[theEnding[j]])
{
match = false;
break;
}
}
// check if ending was found
if (match)
{
return theEndingClass[i].length; // cut ending
}
}
return 0;
|
private int | findEnding(java.lang.StringBuffer stemmingZone, char[][] theEndingClass)
return findEnding(stemmingZone, stemmingZone.length() - 1, theEndingClass);
|
private boolean | isVowel(char letter)Checks if character is a vowel..
Creation date: (16/03/2002 10:47:03 PM)
for (int i = 0; i < vowels.length; i++)
{
if (letter == charset[vowels[i]])
return true;
}
return false;
|
private void | markPositions(java.lang.String word)Marks positions of RV, R1 and R2 in a given word.
Creation date: (16/03/2002 3:40:11 PM)
RV = 0;
R1 = 0;
R2 = 0;
int i = 0;
// find RV
while (word.length() > i && !isVowel(word.charAt(i)))
{
i++;
}
if (word.length() - 1 < ++i)
return; // RV zone is empty
RV = i;
// find R1
while (word.length() > i && isVowel(word.charAt(i)))
{
i++;
}
if (word.length() - 1 < ++i)
return; // R1 zone is empty
R1 = i;
// find R2
while (word.length() > i && !isVowel(word.charAt(i)))
{
i++;
}
if (word.length() - 1 < ++i)
return; // R2 zone is empty
while (word.length() > i && isVowel(word.charAt(i)))
{
i++;
}
if (word.length() - 1 < ++i)
return; // R2 zone is empty
R2 = i;
|
private boolean | noun(java.lang.StringBuffer stemmingZone)Noun endings.
Creation date: (17/03/2002 12:14:58 AM)
return findAndRemoveEnding(stemmingZone, nounEndings);
|
private boolean | perfectiveGerund(java.lang.StringBuffer stemmingZone)Perfective gerund endings.
Creation date: (17/03/2002 12:14:58 AM)
return findAndRemoveEnding(
stemmingZone,
perfectiveGerundEndings1,
perfectiveGerund1Predessors)
|| findAndRemoveEnding(stemmingZone, perfectiveGerundEndings2);
|
private boolean | reflexive(java.lang.StringBuffer stemmingZone)Reflexive endings.
Creation date: (17/03/2002 12:14:58 AM)
return findAndRemoveEnding(stemmingZone, reflexiveEndings);
|
private boolean | removeI(java.lang.StringBuffer stemmingZone)Insert the method's description here.
Creation date: (17/03/2002 12:14:58 AM)
if (stemmingZone.length() > 0
&& stemmingZone.charAt(stemmingZone.length() - 1) == charset[I])
{
stemmingZone.setLength(stemmingZone.length() - 1);
return true;
}
else
{
return false;
}
|
private boolean | removeSoft(java.lang.StringBuffer stemmingZone)Insert the method's description here.
Creation date: (17/03/2002 12:14:58 AM)
if (stemmingZone.length() > 0
&& stemmingZone.charAt(stemmingZone.length() - 1) == charset[SOFT])
{
stemmingZone.setLength(stemmingZone.length() - 1);
return true;
}
else
{
return false;
}
|
public void | setCharset(char[] newCharset)Insert the method's description here.
Creation date: (16/03/2002 10:58:42 PM)
charset = newCharset;
|
public java.lang.String | stem(java.lang.String input)Finds the stem for given Russian word.
Creation date: (16/03/2002 3:36:48 PM)
markPositions(input);
if (RV == 0)
return input; //RV wasn't detected, nothing to stem
StringBuffer stemmingZone = new StringBuffer(input.substring(RV));
// stemming goes on in RV
// Step 1
if (!perfectiveGerund(stemmingZone))
{
reflexive(stemmingZone);
// variable r is unused, we are just interested in the flow that gets
// created by logical expression: apply adjectival(); if that fails,
// apply verb() etc
boolean r =
adjectival(stemmingZone)
|| verb(stemmingZone)
|| noun(stemmingZone);
}
// Step 2
removeI(stemmingZone);
// Step 3
derivational(stemmingZone);
// Step 4
superlative(stemmingZone);
undoubleN(stemmingZone);
removeSoft(stemmingZone);
// return result
return input.substring(0, RV) + stemmingZone.toString();
|
public static java.lang.String | stem(java.lang.String theWord, char[] charset)Static method for stemming with different charsets
RussianStemmer stemmer = new RussianStemmer();
stemmer.setCharset(charset);
return stemmer.stem(theWord);
|
private boolean | superlative(java.lang.StringBuffer stemmingZone)Superlative endings.
Creation date: (17/03/2002 12:14:58 AM)
return findAndRemoveEnding(stemmingZone, superlativeEndings);
|
private boolean | undoubleN(java.lang.StringBuffer stemmingZone)Undoubles N.
Creation date: (17/03/2002 12:14:58 AM)
char[][] doubleN = {
{ N, N }
};
if (findEnding(stemmingZone, doubleN) != 0)
{
stemmingZone.setLength(stemmingZone.length() - 1);
return true;
}
else
{
return false;
}
|
private boolean | verb(java.lang.StringBuffer stemmingZone)Verb endings.
Creation date: (17/03/2002 12:14:58 AM)
return findAndRemoveEnding(
stemmingZone,
verbEndings1,
verb1Predessors)
|| findAndRemoveEnding(stemmingZone, verbEndings2);
|