File Doc Category Size Date Package
FuzzyQuery.java API Doc Apache Lucene 1.9 6729 Mon Feb 20 09:20:04 GMT 2006 org.apache.lucene.search

FuzzyQuery

java.lang.Object
- org.apache.lucene.search.Query
  - org.apache.lucene.search.MultiTermQuery

public final class FuzzyQuery extends MultiTermQuery

Implements the fuzzy search query. The similiarity measurement is based on the Levenshtein (edit distance) algorithm.

Fields Summary
public static final float
defaultMinSimilarity
public static final int
defaultPrefixLength
private float
minimumSimilarity
private int
prefixLength
Constructors Summary
public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength)
Create a new FuzzyQuery that will match terms with a similarity of at least minimumSimilarity to term. If a prefixLength > 0 is specified, a common prefix of that length is also required.
param
term the term to search for
param
minimumSimilarity a value between 0 and 1 to set the required similarity between the query term and the matching terms. For example, for a minimumSimilarity of 0.5 a term of the same length as the query term is considered similar to the query term if the edit distance between both terms is less than length(term)*0.5
param
prefixLength length of common (non-fuzzy) prefix
throws
IllegalArgumentException if minimumSimilarity is >= 1 or < 0 or if prefixLength < 0
super(term); if (minimumSimilarity >= 1.0f) throw new IllegalArgumentException("minimumSimilarity >= 1"); else if (minimumSimilarity < 0.0f) throw new IllegalArgumentException("minimumSimilarity < 0"); if (prefixLength < 0) throw new IllegalArgumentException("prefixLength < 0"); this.minimumSimilarity = minimumSimilarity; this.prefixLength = prefixLength;
public FuzzyQuery(Term term, float minimumSimilarity)
Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}.
this(term, minimumSimilarity, defaultPrefixLength);
public FuzzyQuery(Term term)
Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}.
this(term, defaultMinSimilarity, defaultPrefixLength);
Methods Summary
public boolean equals(java.lang.Object o)
if (this == o) return true; if (!(o instanceof FuzzyQuery)) return false; if (!super.equals(o)) return false; final FuzzyQuery fuzzyQuery = (FuzzyQuery) o; if (minimumSimilarity != fuzzyQuery.minimumSimilarity) return false; if (prefixLength != fuzzyQuery.prefixLength) return false; return true;
protected org.apache.lucene.search.FilteredTermEnum getEnum(org.apache.lucene.index.IndexReader reader)
return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength);
public float getMinSimilarity()
Returns the minimum similarity that is required for this query to match.
return
float value between 0.0 and 1.0
return minimumSimilarity;
public int getPrefixLength()
Returns the non-fuzzy prefix length. This is the number of characters at the start of a term that must be identical (not fuzzy) to the query term if the query is to match that term.
return prefixLength;
public int hashCode()
int result = super.hashCode(); result = 29 * result + minimumSimilarity != +0.0f ? Float.floatToIntBits(minimumSimilarity) : 0; result = 29 * result + prefixLength; return result;
public org.apache.lucene.search.Query rewrite(org.apache.lucene.index.IndexReader reader)
FilteredTermEnum enumerator = getEnum(reader); int maxClauseCount = BooleanQuery.getMaxClauseCount(); ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount); try { do { float minScore = 0.0f; float score = 0.0f; Term t = enumerator.term(); if (t != null) { score = enumerator.difference(); // terms come in alphabetical order, therefore if queue is full and score // not bigger than minScore, we can skip if(stQueue.size() < maxClauseCount || score > minScore){ stQueue.insert(new ScoreTerm(t, score)); minScore = ((ScoreTerm)stQueue.top()).score; // maintain minScore } } } while (enumerator.next()); } finally { enumerator.close(); } BooleanQuery query = new BooleanQuery(true); int size = stQueue.size(); for(int i = 0; i < size; i++){ ScoreTerm st = (ScoreTerm) stQueue.pop(); TermQuery tq = new TermQuery(st.term); // found a match tq.setBoost(getBoost() * st.score); // set the boost query.add(tq, BooleanClause.Occur.SHOULD); // add to query } return query;
public java.lang.String toString(java.lang.String field)
StringBuffer buffer = new StringBuffer(); Term term = getTerm(); if (!term.field().equals(field)) { buffer.append(term.field()); buffer.append(":"); } buffer.append(term.text()); buffer.append('~"); buffer.append(Float.toString(minimumSimilarity)); buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString();