FileDocCategorySizeDatePackage
TermsFilterBuilder.javaAPI DocApache Lucene 1.92006Mon Feb 27 11:12:20 GMT 2006org.apache.lucene.xmlparser.builders

TermsFilterBuilder.java

package org.apache.lucene.xmlparser.builders;

import java.io.IOException;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.TermsFilter;
import org.apache.lucene.xmlparser.DOMUtils;
import org.apache.lucene.xmlparser.FilterBuilder;
import org.apache.lucene.xmlparser.ParserException;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;


/**
 * @author maharwood
 *
 * @
 */
public class TermsFilterBuilder implements FilterBuilder
{
	Analyzer analyzer;
	
	/**
	 * @param analyzer
	 */
	public TermsFilterBuilder(Analyzer analyzer)
	{
		this.analyzer = analyzer;
	}
	
	/* (non-Javadoc)
	 * @see org.apache.lucene.xmlparser.FilterBuilder#process(org.w3c.dom.Element)
	 */
	public Filter getFilter(Element e) throws ParserException
	{
		TermsFilter tf=new TermsFilter();
		NodeList nl = e.getElementsByTagName("Field");
		for(int i=0;i<nl.getLength();i++)
		{
			Element fieldElem=(Element) nl.item(i);
			String fieldName=DOMUtils.getAttributeWithInheritance(fieldElem,"fieldName");
			
			if(fieldName==null)
			{
				throw new ParserException("TermsFilter missing \"fieldName\" element");				
			}
			String text=DOMUtils.getText(fieldElem).trim();
			TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
			try
			{
			Token token=ts.next();
			Term term=null;
			while(token!=null)
			{
				if(term==null)
				{
					term=new Term(fieldName,token.termText());
				}
				else
				{
					term=term.createTerm(token.termText()); //create from previous to save fieldName.intern overhead
				}
				tf.addTerm(term);
				token=ts.next();
			}
			}
			catch(IOException ioe)
			{
				throw new RuntimeException("Error constructing terms from index:"+ioe);
			}
		}
		return tf;
	}
}