FileDocCategorySizeDatePackage
QueryTermExtractor.javaAPI DocApache Lucene 2.0.04448Fri May 26 09:53:50 BST 2006org.apache.lucene.search.highlight

QueryTermExtractor

public final class QueryTermExtractor extends Object
Utility class used to extract the terms used in a query, plus any weights. This class will not find terms for MultiTermQuery, RangeQuery and PrefixQuery classes so the caller must pass a rewritten query (see Query.rewrite) to obtain a list of expanded terms.

Fields Summary
Constructors Summary
Methods Summary
public static final WeightedTerm[]getIdfWeightedTerms(org.apache.lucene.search.Query query, org.apache.lucene.index.IndexReader reader, java.lang.String fieldName)
Extracts all terms texts of a given Query into an array of WeightedTerms

param
query Query to extract term texts from
param
reader used to compute IDF which can be used to a) score selected fragments better b) use graded highlights eg chaning intensity of font color
param
fieldName the field on which Inverse Document Frequency (IDF) calculations are based
return
an array of the terms used in a query, plus their weights.

	    WeightedTerm[] terms=getTerms(query,false, fieldName);
	    int totalNumDocs=reader.numDocs();
	    for (int i = 0; i < terms.length; i++)
        {
	        try
            {
                int docFreq=reader.docFreq(new Term(fieldName,terms[i].term));
                //IDF algorithm taken from DefaultSimilarity class
                float idf=(float)(Math.log((float)totalNumDocs/(double)(docFreq+1)) + 1.0);
                terms[i].weight*=idf;
            } 
	        catch (IOException e)
            {
	            //ignore 
            }
        }
		return terms;
	
public static final WeightedTerm[]getTerms(org.apache.lucene.search.Query query)
Extracts all terms texts of a given Query into an array of WeightedTerms

param
query Query to extract term texts from
return
an array of the terms used in a query, plus their weights.

		return getTerms(query,false);
	
public static final WeightedTerm[]getTerms(org.apache.lucene.search.Query query, boolean prohibited, java.lang.String fieldName)
Extracts all terms texts of a given Query into an array of WeightedTerms

param
query Query to extract term texts from
param
prohibited true to extract "prohibited" terms, too
param
fieldName The fieldName used to filter query terms
return
an array of the terms used in a query, plus their weights.

		HashSet terms=new HashSet();
		if(fieldName!=null)
		{
		    fieldName=fieldName.intern();
		}
		getTerms(query,terms,prohibited,fieldName);
		return (WeightedTerm[]) terms.toArray(new WeightedTerm[0]);
	
public static final WeightedTerm[]getTerms(org.apache.lucene.search.Query query, boolean prohibited)
Extracts all terms texts of a given Query into an array of WeightedTerms

param
query Query to extract term texts from
param
prohibited true to extract "prohibited" terms, too
return
an array of the terms used in a query, plus their weights.

	    return getTerms(query,prohibited,null);
	
private static final voidgetTerms(org.apache.lucene.search.Query query, java.util.HashSet terms, boolean prohibited, java.lang.String fieldName)

       	try
       	{
       		HashSet nonWeightedTerms=new HashSet();
       		query.extractTerms(nonWeightedTerms);
       		for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();)
			{
				Term term = (Term) iter.next();
			    if((fieldName==null)||(term.field()==fieldName))
				{
					terms.add(new WeightedTerm(query.getBoost(),term.text()));
				}
			}
	      }
	      catch(UnsupportedOperationException ignore)
	      {
	    	  //this is non-fatal for our purposes
       	  }