Returns the next token in the stream, or null at EOS.
Removes 's from the end of words.
Removes dots from acronyms.
org.apache.lucene.analysis.Token t = input.next();
if (t == null)
return null;
String text = t.termText();
String type = t.type();
if (type == APOSTROPHE_TYPE && // remove 's
(text.endsWith("'s") || text.endsWith("'S"))) {
return new org.apache.lucene.analysis.Token
(text.substring(0,text.length()-2),
t.startOffset(), t.endOffset(), type);
} else if (type == ACRONYM_TYPE) { // remove dots
StringBuffer trimmed = new StringBuffer();
for (int i = 0; i < text.length(); i++) {
char c = text.charAt(i);
if (c != '.")
trimmed.append(c);
}
return new org.apache.lucene.analysis.Token
(trimmed.toString(), t.startOffset(), t.endOffset(), type);
} else {
return t;
}