FuzzyTermEnumpublic final class FuzzyTermEnum extends FilteredTermEnum Subclass of FilteredTermEnum for enumerating all terms that are similiar to the specified filter term.
Term enumerations are always ordered by Term.compareTo(). Each term in
the enumeration is greater than all that precede it. |
Fields Summary |
---|
double | distance | boolean | endEnum | Term | searchTerm | String | field | String | text | int | textlen | String | prefix | int | prefixLength | float | minimumSimilarity | double | scale_factor | private int[] | eThis static array saves us from the time required to create a new array
everytime editDistance is called. |
Constructors Summary |
---|
public FuzzyTermEnum(IndexReader reader, Term term)Empty prefix and minSimilarity of 0.5f are used.
this(reader, term, FuzzyQuery.defaultMinSimilarity, 0);
| public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity)This is the standard FuzzyTermEnum with an empty prefix.
this(reader, term, minSimilarity, 0);
| public FuzzyTermEnum(IndexReader reader, Term term, float minSimilarity, int prefixLength)Constructor for enumeration of all terms from specified reader which share a prefix of
length prefixLength with term and which have a fuzzy similarity >
minSimilarity .
super();
minimumSimilarity = minSimilarity;
scale_factor = 1.0f / (1.0f - minimumSimilarity);
searchTerm = term;
field = searchTerm.field();
text = searchTerm.text();
textlen = text.length();
if(prefixLength > 0 && prefixLength < textlen){
this.prefixLength = prefixLength;
prefix = text.substring(0, prefixLength);
text = text.substring(prefixLength);
textlen = text.length();
}
setEnum(reader.terms(new Term(searchTerm.field(), prefix)));
|
Methods Summary |
---|
public void | close()
super.close();
searchTerm = null;
field = null;
text = null;
| protected final float | difference()
return (float)((distance - minimumSimilarity) * scale_factor);
| private final int | editDistance(java.lang.String s, java.lang.String t, int n, int m)Levenshtein distance also known as edit distance is a measure of similiarity
between two strings where the distance is measured as the number of character
deletions, insertions or substitutions required to transform one string to
the other string.
This method takes in four parameters; two strings and their respective
lengths to compute the Levenshtein distance between the two strings.
The result is returned as an integer.
if (e.length <= n || e[0].length <= m) {
e = new int[Math.max(e.length, n+1)][Math.max(e[0].length, m+1)];
}
int d[][] = e; // matrix
int i; // iterates through s
int j; // iterates through t
char s_i; // ith character of s
if (n == 0) return m;
if (m == 0) return n;
// init matrix d
for (i = 0; i <= n; i++) d[i][0] = i;
for (j = 0; j <= m; j++) d[0][j] = j;
// start computing edit distance
for (i = 1; i <= n; i++) {
s_i = s.charAt(i - 1);
for (j = 1; j <= m; j++) {
if (s_i != t.charAt(j-1))
d[i][j] = min(d[i-1][j], d[i][j-1], d[i-1][j-1])+1;
else d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]);
}
}
// we got the result!
return d[n][m];
| public final boolean | endEnum()
return endEnum;
| private static final int | min(int a, int b, int c)Finds and returns the smallest of three integers
int t = (a < b) ? a : b;
return (t < c) ? t : c;
| protected final boolean | termCompare(org.apache.lucene.index.Term term)The termCompare method in FuzzyTermEnum uses Levenshtein distance to
calculate the distance between the given term and the comparing term.
String termText = term.text();
if (field == term.field() && termText.startsWith(prefix)) {
String target = termText.substring(prefixLength);
int targetlen = target.length();
int dist = editDistance(text, target, textlen, targetlen);
distance = 1 - ((double)dist / (double)Math.min(textlen, targetlen));
return (distance > minimumSimilarity);
}
endEnum = true;
return false;
|
|