FileDocCategorySizeDatePackage
EdgeNGramTokenizer.javaAPI DocApache Lucene 2.1.03369Wed Feb 14 10:46:28 GMT 2007org.apache.lucene.analysis.ngram

EdgeNGramTokenizer

public class EdgeNGramTokenizer extends Tokenizer
Tokenizes the input into n-grams of the given size.
author
Otis Gospodnetic

Fields Summary
private int
gramSize
private Side
side
private int
inLen
private String
inStr
private boolean
started
Constructors Summary
public EdgeNGramTokenizer(Reader input, Side side, int gramSize)
Creates EdgeNGramTokenizer that can generate an n-gram of the given size.

param
input Reader holding the input to be tokenized
param
side the {@link Side} from which to chop off an n-gram
param
gramSize the size of the n-gram to generate


                                               
         
    super(input);
    if (gramSize < 1) {
      throw new IllegalArgumentException("gramSize must be greater than zero");
    }
    this.gramSize = gramSize;
    this.side = side;
  
public EdgeNGramTokenizer(Reader input, String side, int gramSize)


  
Methods Summary
public final org.apache.lucene.analysis.Tokennext()
Returns the next token in the stream, or null at EOS.

    // if we already returned the edge n-gram, we are done
    if (started)
      return null;
    if (!started) {
      started = true;
      char[] chars = new char[1024];
      input.read(chars);
      inStr = new String(chars).trim();  // remove any trailing empty strings 
      inLen = inStr.length();
    }
    // if the input is too short, we can't generate any n-grams
    if (gramSize > inLen)
      return null;
    if (side == Side.FRONT)
      return new Token(inStr.substring(0, gramSize), 0, gramSize);
    else
      return new Token(inStr.substring(inLen-gramSize), inLen-gramSize, inLen);            
  
static org.apache.lucene.analysis.ngram.EdgeNGramTokenizer$Sideside(java.lang.String label)

    if (label == null || label.trim().length() == 0)
      throw new IllegalArgumentException("Label must be either 'front' or 'back'");
    if (label.equals("front"))
      return Side.FRONT;
    else
      return Side.BACK;