FileDocCategorySizeDatePackage
StandardFilter.javaAPI DocApache Lucene 1.4.32110Tue Mar 30 00:48:02 BST 2004org.apache.lucene.analysis.standard

StandardFilter

public final class StandardFilter extends TokenFilter implements StandardTokenizerConstants
Normalizes tokens extracted with {@link StandardTokenizer}.

Fields Summary
private static final String
APOSTROPHE_TYPE
private static final String
ACRONYM_TYPE
Constructors Summary
public StandardFilter(TokenStream in)
Construct filtering in.

    super(in);
  
Methods Summary
public final org.apache.lucene.analysis.Tokennext()
Returns the next token in the stream, or null at EOS.

Removes 's from the end of words.

Removes dots from acronyms.

  
                           
        
    org.apache.lucene.analysis.Token t = input.next();

    if (t == null)
      return null;

    String text = t.termText();
    String type = t.type();

    if (type == APOSTROPHE_TYPE &&		  // remove 's
	(text.endsWith("'s") || text.endsWith("'S"))) {
      return new org.apache.lucene.analysis.Token
	(text.substring(0,text.length()-2),
	 t.startOffset(), t.endOffset(), type);

    } else if (type == ACRONYM_TYPE) {		  // remove dots
      StringBuffer trimmed = new StringBuffer();
      for (int i = 0; i < text.length(); i++) {
	char c = text.charAt(i);
	if (c != '.")
	  trimmed.append(c);
      }
      return new org.apache.lucene.analysis.Token
	(trimmed.toString(), t.startOffset(), t.endOffset(), type);

    } else {
      return t;
    }