FileDocCategorySizeDatePackage
StopFilter.javaAPI DocApache Lucene 1.4.33188Tue Mar 30 00:48:00 BST 2004org.apache.lucene.analysis

StopFilter

public final class StopFilter extends TokenFilter
Removes stop words from a token stream.

Fields Summary
private Set
stopWords
Constructors Summary
public StopFilter(TokenStream in, String[] stopWords)
Constructs a filter which removes words from the input TokenStream that are named in the array of words.

    super(in);
    this.stopWords = makeStopSet(stopWords);
  
public StopFilter(TokenStream in, Hashtable stopTable)
Constructs a filter which removes words from the input TokenStream that are named in the Hashtable.

deprecated
Use {@link #StopFilter(TokenStream, Set)} instead

    super(in);
    stopWords = new HashSet(stopTable.keySet());
  
public StopFilter(TokenStream in, Set stopWords)
Constructs a filter which removes words from the input TokenStream that are named in the Set. It is crucial that an efficient Set implementation is used for maximum performance.

see
#makeStopSet(java.lang.String[])

    super(in);
    this.stopWords = stopWords;
  
Methods Summary
public static final java.util.SetmakeStopSet(java.lang.String[] stopWords)
Builds a Set from an array of stop words, appropriate for passing into the StopFilter constructor. This permits this stopWords construction to be cached once when an Analyzer is constructed.

    HashSet stopTable = new HashSet(stopWords.length);
    for (int i = 0; i < stopWords.length; i++)
      stopTable.add(stopWords[i]);
    return stopTable;
  
public static final java.util.HashtablemakeStopTable(java.lang.String[] stopWords)
Builds a Hashtable from an array of stop words, appropriate for passing into the StopFilter constructor. This permits this table construction to be cached once when an Analyzer is constructed.

deprecated
Use {@link #makeStopSet(String[])} instead.

    Hashtable stopTable = new Hashtable(stopWords.length);
    for (int i = 0; i < stopWords.length; i++)
      stopTable.put(stopWords[i], stopWords[i]);
    return stopTable;
  
public final org.apache.lucene.analysis.Tokennext()
Returns the next input Token whose termText() is not a stop word.

    // return the first non-stop word found
    for (Token token = input.next(); token != null; token = input.next())
      if (!stopWords.contains(token.termText))
        return token;
    // reached EOS -- return null
    return null;