FileDocCategorySizeDatePackage
QueryTermExtractor.javaAPI DocApache Lucene 1.96409Mon Feb 27 11:12:24 GMT 2006org.apache.lucene.search.highlight

QueryTermExtractor

public final class QueryTermExtractor extends Object
Utility class used to extract the terms used in a query, plus any weights. This class will not find terms for MultiTermQuery, RangeQuery and PrefixQuery classes so the caller must pass a rewritten query (see Query.rewrite) to obtain a list of expanded terms.

Fields Summary
Constructors Summary
Methods Summary
public static final WeightedTerm[]getIdfWeightedTerms(org.apache.lucene.search.Query query, org.apache.lucene.index.IndexReader reader, java.lang.String fieldName)
Extracts all terms texts of a given Query into an array of WeightedTerms

param
query Query to extract term texts from
param
reader used to compute IDF which can be used to a) score selected fragments better b) use graded highlights eg chaning intensity of font color
param
fieldName the field on which Inverse Document Frequency (IDF) calculations are based
return
an array of the terms used in a query, plus their weights.

	    WeightedTerm[] terms=getTerms(query,false, fieldName);
	    int totalNumDocs=reader.numDocs();
	    for (int i = 0; i < terms.length; i++)
        {
	        try
            {
                int docFreq=reader.docFreq(new Term(fieldName,terms[i].term));
                //IDF algorithm taken from DefaultSimilarity class
                float idf=(float)(Math.log((float)totalNumDocs/(double)(docFreq+1)) + 1.0);
                terms[i].weight*=idf;
            } 
	        catch (IOException e)
            {
	            //ignore 
            }
        }
		return terms;
	
public static final WeightedTerm[]getTerms(org.apache.lucene.search.Query query)
Extracts all terms texts of a given Query into an array of WeightedTerms

param
query Query to extract term texts from
return
an array of the terms used in a query, plus their weights.

		return getTerms(query,false);
	
public static final WeightedTerm[]getTerms(org.apache.lucene.search.Query query, boolean prohibited, java.lang.String fieldName)
Extracts all terms texts of a given Query into an array of WeightedTerms

param
query Query to extract term texts from
param
prohibited true to extract "prohibited" terms, too
param
fieldName The fieldName used to filter query terms
return
an array of the terms used in a query, plus their weights.

		HashSet terms=new HashSet();
		if(fieldName!=null)
		{
		    fieldName=fieldName.intern();
		}
		getTerms(query,terms,prohibited,fieldName);
		return (WeightedTerm[]) terms.toArray(new WeightedTerm[0]);
	
public static final WeightedTerm[]getTerms(org.apache.lucene.search.Query query, boolean prohibited)
Extracts all terms texts of a given Query into an array of WeightedTerms

param
query Query to extract term texts from
param
prohibited true to extract "prohibited" terms, too
return
an array of the terms used in a query, plus their weights.

	    return getTerms(query,prohibited,null);
	
private static final voidgetTerms(org.apache.lucene.search.Query query, java.util.HashSet terms, boolean prohibited, java.lang.String fieldName)

		if (query instanceof BooleanQuery)
			getTermsFromBooleanQuery((BooleanQuery) query, terms, prohibited, fieldName);
		else
			if (query instanceof PhraseQuery)
				getTermsFromPhraseQuery((PhraseQuery) query, terms, fieldName);
			else
				if (query instanceof TermQuery)
					getTermsFromTermQuery((TermQuery) query, terms, fieldName);
				else
		        if(query instanceof SpanNearQuery)
		            getTermsFromSpanNearQuery((SpanNearQuery) query, terms, fieldName);
	
private static final voidgetTermsFromBooleanQuery(org.apache.lucene.search.BooleanQuery query, java.util.HashSet terms, boolean prohibited, java.lang.String fieldName)

		BooleanClause[] queryClauses = query.getClauses();
		int i;

		for (i = 0; i < queryClauses.length; i++)
		{
			//Pre Lucene 2.0 code
//			if (prohibited || !queryClauses[i].prohibited)
//				getTerms(queryClauses[i].query, terms, prohibited, fieldName);
			// Lucene 2.0 ready code
			if (prohibited || queryClauses[i].getOccur()!=BooleanClause.Occur.MUST_NOT)
				getTerms(queryClauses[i].getQuery(), terms, prohibited, fieldName);
		}
	
private static final voidgetTermsFromPhraseQuery(org.apache.lucene.search.PhraseQuery query, java.util.HashSet terms, java.lang.String fieldName)

		Term[] queryTerms = query.getTerms();
		int i;

		for (i = 0; i < queryTerms.length; i++)
		{
		    if((fieldName==null)||(queryTerms[i].field()==fieldName))
		    {
		        terms.add(new WeightedTerm(query.getBoost(),queryTerms[i].text()));
		    }
		}
	
private static final voidgetTermsFromSpanNearQuery(org.apache.lucene.search.spans.SpanNearQuery query, java.util.HashSet terms, java.lang.String fieldName)


        Collection queryTerms = query.getTerms();

        for(Iterator iterator = queryTerms.iterator(); iterator.hasNext();){

            // break it out for debugging.

            Term term = (Term) iterator.next();

            String text = term.text();

    	    if((fieldName==null)||(term.field()==fieldName))
    	    {
    	        terms.add(new WeightedTerm(query.getBoost(), text));
    	    }
        }

    
private static final voidgetTermsFromTermQuery(org.apache.lucene.search.TermQuery query, java.util.HashSet terms, java.lang.String fieldName)

	    if((fieldName==null)||(query.getTerm().field()==fieldName))
	    {
	        terms.add(new WeightedTerm(query.getBoost(),query.getTerm().text()));
	    }