QueryTermExtractorpublic final class QueryTermExtractor extends Object Utility class used to extract the terms used in a query, plus any weights.
This class will not find terms for MultiTermQuery, RangeQuery and PrefixQuery classes
so the caller must pass a rewritten query (see Query.rewrite) to obtain a list of
expanded terms. |
Methods Summary |
---|
public static final WeightedTerm[] | getIdfWeightedTerms(org.apache.lucene.search.Query query, org.apache.lucene.index.IndexReader reader, java.lang.String fieldName)Extracts all terms texts of a given Query into an array of WeightedTerms
WeightedTerm[] terms=getTerms(query,false, fieldName);
int totalNumDocs=reader.numDocs();
for (int i = 0; i < terms.length; i++)
{
try
{
int docFreq=reader.docFreq(new Term(fieldName,terms[i].term));
//IDF algorithm taken from DefaultSimilarity class
float idf=(float)(Math.log((float)totalNumDocs/(double)(docFreq+1)) + 1.0);
terms[i].weight*=idf;
}
catch (IOException e)
{
//ignore
}
}
return terms;
| public static final WeightedTerm[] | getTerms(org.apache.lucene.search.Query query)Extracts all terms texts of a given Query into an array of WeightedTerms
return getTerms(query,false);
| public static final WeightedTerm[] | getTerms(org.apache.lucene.search.Query query, boolean prohibited, java.lang.String fieldName)Extracts all terms texts of a given Query into an array of WeightedTerms
HashSet terms=new HashSet();
if(fieldName!=null)
{
fieldName=fieldName.intern();
}
getTerms(query,terms,prohibited,fieldName);
return (WeightedTerm[]) terms.toArray(new WeightedTerm[0]);
| public static final WeightedTerm[] | getTerms(org.apache.lucene.search.Query query, boolean prohibited)Extracts all terms texts of a given Query into an array of WeightedTerms
return getTerms(query,prohibited,null);
| private static final void | getTerms(org.apache.lucene.search.Query query, java.util.HashSet terms, boolean prohibited, java.lang.String fieldName)
try
{
if (query instanceof BooleanQuery)
getTermsFromBooleanQuery((BooleanQuery) query, terms, prohibited, fieldName);
else
if(query instanceof FilteredQuery)
getTermsFromFilteredQuery((FilteredQuery)query, terms,prohibited, fieldName);
else
{
HashSet nonWeightedTerms=new HashSet();
query.extractTerms(nonWeightedTerms);
for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();)
{
Term term = (Term) iter.next();
if((fieldName==null)||(term.field()==fieldName))
{
terms.add(new WeightedTerm(query.getBoost(),term.text()));
}
}
}
}
catch(UnsupportedOperationException ignore)
{
//this is non-fatal for our purposes
}
| private static final void | getTermsFromBooleanQuery(org.apache.lucene.search.BooleanQuery query, java.util.HashSet terms, boolean prohibited, java.lang.String fieldName)extractTerms is currently the only query-independent means of introspecting queries but it only reveals
a list of terms for that query - not the boosts each individual term in that query may or may not have.
"Container" queries such as BooleanQuery should be unwrapped to get at the boost info held
in each child element.
Some discussion around this topic here:
http://www.gossamer-threads.com/lists/lucene/java-dev/34208?search_string=introspection;#34208
Unfortunately there seemed to be limited interest in requiring all Query objects to implement
something common which would allow access to child queries so what follows here are query-specific
implementations for accessing embedded query elements.
BooleanClause[] queryClauses = query.getClauses();
for (int i = 0; i < queryClauses.length; i++)
{
if (prohibited || queryClauses[i].getOccur()!=BooleanClause.Occur.MUST_NOT)
getTerms(queryClauses[i].getQuery(), terms, prohibited, fieldName);
}
| private static void | getTermsFromFilteredQuery(org.apache.lucene.search.FilteredQuery query, java.util.HashSet terms, boolean prohibited, java.lang.String fieldName)
getTerms(query.getQuery(),terms,prohibited,fieldName);
|
|