FileDocCategorySizeDatePackage
GreekAnalyzer.javaAPI DocApache Lucene 1.95962Mon Feb 20 09:18:48 GMT 2006org.apache.lucene.analysis.el

GreekAnalyzer

public final class GreekAnalyzer extends Analyzer
Analyzer for the Greek language. Supports an external list of stopwords (words that will not be indexed at all). A default set of stopwords is used unless an alternative list is specified.
author
Panagiotis Astithas, past@ebs.gr

Fields Summary
private static char
A
private static char
B
private static char
G
private static char
D
private static char
E
private static char
Z
private static char
H
private static char
TH
private static char
I
private static char
K
private static char
L
private static char
M
private static char
N
private static char
KS
private static char
O
private static char
P
private static char
R
private static char
S
private static char
T
private static char
Y
private static char
F
private static char
X
private static char
PS
private static char
W
private static char[]
GREEK_STOP_WORDS
List of typical Greek stopwords.
private Set
stopSet
Contains the stopwords used with the StopFilter.
private char[]
charset
Charset for Greek letters. Represents encoding for 24 lowercase Greek letters. Predefined charsets can be taken from GreekCharSets class
Constructors Summary
public GreekAnalyzer()


      
        charset = GreekCharsets.UnicodeGreek;
        stopSet = StopFilter.makeStopSet(
                    makeStopWords(GreekCharsets.UnicodeGreek));
    
public GreekAnalyzer(char[] charset)
Builds an analyzer.

        this.charset = charset;
        stopSet = StopFilter.makeStopSet(makeStopWords(charset));
    
public GreekAnalyzer(char[] charset, String[] stopwords)
Builds an analyzer with the given stop words.

        this.charset = charset;
        stopSet = StopFilter.makeStopSet(stopwords);
    
public GreekAnalyzer(char[] charset, Hashtable stopwords)
Builds an analyzer with the given stop words.

        this.charset = charset;
        stopSet = new HashSet(stopwords.keySet());
    
Methods Summary
private static java.lang.String[]makeStopWords(char[] charset)

        String[] res = new String[GREEK_STOP_WORDS.length];
        for (int i = 0; i < res.length; i++)
        {
            char[] theStopWord = GREEK_STOP_WORDS[i];
            // translate the word,using the charset
            StringBuffer theWord = new StringBuffer();
            for (int j = 0; j < theStopWord.length; j++)
            {
                theWord.append(charset[theStopWord[j]]);
            }
            res[i] = theWord.toString();
        }
        return res;
    
public org.apache.lucene.analysis.TokenStreamtokenStream(java.lang.String fieldName, java.io.Reader reader)
Creates a TokenStream which tokenizes all the text in the provided Reader.

return
A TokenStream build from a StandardTokenizer filtered with GreekLowerCaseFilter and StopFilter

    	TokenStream result = new StandardTokenizer(reader);
        result = new GreekLowerCaseFilter(result, charset);
        result = new StopFilter(result, stopSet);
        return result;