FileDocCategorySizeDatePackage
FuzzyQuery.javaAPI DocApache Lucene 2.1.06916Wed Feb 14 10:46:38 GMT 2007org.apache.lucene.search

FuzzyQuery

public class FuzzyQuery extends MultiTermQuery
Implements the fuzzy search query. The similiarity measurement is based on the Levenshtein (edit distance) algorithm.

Fields Summary
public static final float
defaultMinSimilarity
public static final int
defaultPrefixLength
private float
minimumSimilarity
private int
prefixLength
Constructors Summary
public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength)
Create a new FuzzyQuery that will match terms with a similarity of at least minimumSimilarity to term. If a prefixLength > 0 is specified, a common prefix of that length is also required.

param
term the term to search for
param
minimumSimilarity a value between 0 and 1 to set the required similarity between the query term and the matching terms. For example, for a minimumSimilarity of 0.5 a term of the same length as the query term is considered similar to the query term if the edit distance between both terms is less than length(term)*0.5
param
prefixLength length of common (non-fuzzy) prefix
throws
IllegalArgumentException if minimumSimilarity is >= 1 or < 0 or if prefixLength < 0

  
                                                                                                                                 
           
    super(term);
    
    if (minimumSimilarity >= 1.0f)
      throw new IllegalArgumentException("minimumSimilarity >= 1");
    else if (minimumSimilarity < 0.0f)
      throw new IllegalArgumentException("minimumSimilarity < 0");
    if (prefixLength < 0)
      throw new IllegalArgumentException("prefixLength < 0");
    
    this.minimumSimilarity = minimumSimilarity;
    this.prefixLength = prefixLength;
  
public FuzzyQuery(Term term, float minimumSimilarity)
Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}.

      this(term, minimumSimilarity, defaultPrefixLength);
  
public FuzzyQuery(Term term)
Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}.

    this(term, defaultMinSimilarity, defaultPrefixLength);
  
Methods Summary
public booleanequals(java.lang.Object o)

    if (this == o) return true;
    if (!(o instanceof FuzzyQuery)) return false;
    if (!super.equals(o)) return false;

    final FuzzyQuery fuzzyQuery = (FuzzyQuery) o;

    if (minimumSimilarity != fuzzyQuery.minimumSimilarity) return false;
    if (prefixLength != fuzzyQuery.prefixLength) return false;

    return true;
  
protected org.apache.lucene.search.FilteredTermEnumgetEnum(org.apache.lucene.index.IndexReader reader)

    return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
  
public floatgetMinSimilarity()
Returns the minimum similarity that is required for this query to match.

return
float value between 0.0 and 1.0

    return minimumSimilarity;
  
public intgetPrefixLength()
Returns the non-fuzzy prefix length. This is the number of characters at the start of a term that must be identical (not fuzzy) to the query term if the query is to match that term.

    return prefixLength;
  
public inthashCode()

    int result = super.hashCode();
    result = 29 * result + minimumSimilarity != +0.0f ? Float.floatToIntBits(minimumSimilarity) : 0;
    result = 29 * result + prefixLength;
    return result;
  
public org.apache.lucene.search.Queryrewrite(org.apache.lucene.index.IndexReader reader)

    FilteredTermEnum enumerator = getEnum(reader);
    int maxClauseCount = BooleanQuery.getMaxClauseCount();
    ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount);
    
    try {
      do {
        float minScore = 0.0f;
        float score = 0.0f;
        Term t = enumerator.term();
        if (t != null) {
          score = enumerator.difference();
          // terms come in alphabetical order, therefore if queue is full and score
          // not bigger than minScore, we can skip
          if(stQueue.size() < maxClauseCount || score > minScore){
            stQueue.insert(new ScoreTerm(t, score));
            minScore = ((ScoreTerm)stQueue.top()).score; // maintain minScore
          }
        }
      } while (enumerator.next());
    } finally {
      enumerator.close();
    }
    
    BooleanQuery query = new BooleanQuery(true);
    int size = stQueue.size();
    for(int i = 0; i < size; i++){
      ScoreTerm st = (ScoreTerm) stQueue.pop();
      TermQuery tq = new TermQuery(st.term);      // found a match
      tq.setBoost(getBoost() * st.score); // set the boost
      query.add(tq, BooleanClause.Occur.SHOULD);          // add to query
    }

    return query;
  
public java.lang.StringtoString(java.lang.String field)

    StringBuffer buffer = new StringBuffer();
    Term term = getTerm();
    if (!term.field().equals(field)) {
        buffer.append(term.field());
        buffer.append(":");
    }
    buffer.append(term.text());
    buffer.append('~");
    buffer.append(Float.toString(minimumSimilarity));
    buffer.append(ToStringUtils.boost(getBoost()));
    return buffer.toString();