File Doc Category Size Date Package
IndexTask.java API Doc Apache Lucene 2.1.0 11772 Wed Feb 14 10:45:44 GMT 2007 org.apache.lucene.ant

IndexTask

java.lang.Object
- org.apache.tools.ant.Task

public class IndexTask extends org.apache.tools.ant.Task

Ant task to index files with Lucene

author: Erik Hatcher

Fields Summary
private ArrayList
filesets
file list
private boolean
overwrite
overwrite index?
private File
indexDir
index path
private String
handlerClassName
document handler classname
private DocumentHandler
handler
document handler instance
private String
analyzerClassName
private Analyzer
analyzer
analyzer instance
private int
mergeFactor
Lucene merge factor
private HandlerConfig
handlerConfig
private boolean
useCompoundIndex
Constructors Summary
public IndexTask()
Creates new instance
Methods Summary
public void addConfig(org.apache.lucene.ant.IndexTask$HandlerConfig config)
Sets custom properties for a configurable document handler.
if (handlerConfig != null) { throw new BuildException("Only one config element allowed"); } handlerConfig = config;
public void addFileset(org.apache.tools.ant.types.FileSet set)
Adds a set of files (nested fileset attribute).
param
set FileSet to be added
filesets.add(set);
public void execute()
Begins the indexing
exception
BuildException If an error occurs indexing the fileset
// construct handler and analyzer dynamically try { Class clazz = Class.forName(handlerClassName); handler = (DocumentHandler) clazz.newInstance(); clazz = Class.forName(analyzerClassName); analyzer = (Analyzer) clazz.newInstance(); } catch (ClassNotFoundException cnfe) { throw new BuildException(cnfe); } catch (InstantiationException ie) { throw new BuildException(ie); } catch (IllegalAccessException iae) { throw new BuildException(iae); } log("Document handler = " + handler.getClass(), Project.MSG_VERBOSE); log("Analyzer = " + analyzer.getClass(), Project.MSG_VERBOSE); if (handler instanceof ConfigurableDocumentHandler) { ((ConfigurableDocumentHandler) handler).configure(handlerConfig.getProperties()); } try { indexDocs(); } catch (IOException e) { throw new BuildException(e); }
private void indexDocs()
Index the fileset.
exception
IOException if Lucene I/O exception
todo
refactor!!!!!
Date start = new Date(); boolean create = overwrite; // If the index directory doesn't exist, // create it and force create mode if (indexDir.mkdirs() && !overwrite) { create = true; } Searcher searcher = null; boolean checkLastModified = false; if (!create) { try { searcher = new IndexSearcher(indexDir.getAbsolutePath()); checkLastModified = true; } catch (IOException ioe) { log("IOException: " + ioe.getMessage()); // Empty - ignore, which indicates to index all // documents } } log("checkLastModified = " + checkLastModified, Project.MSG_VERBOSE); IndexWriter writer = new IndexWriter(indexDir, analyzer, create); writer.setUseCompoundFile(useCompoundIndex); int totalFiles = 0; int totalIndexed = 0; int totalIgnored = 0; try { writer.setMergeFactor(mergeFactor); for (int i = 0; i < filesets.size(); i++) { FileSet fs = (FileSet) filesets.get(i); if (fs != null) { DirectoryScanner ds = fs.getDirectoryScanner(getProject()); String[] dsfiles = ds.getIncludedFiles(); File baseDir = ds.getBasedir(); for (int j = 0; j < dsfiles.length; j++) { File file = new File(baseDir, dsfiles[j]); totalFiles++; if (!file.exists() || !file.canRead()) { throw new BuildException("File \"" + file.getAbsolutePath() + "\" does not exist or is not readable."); } boolean indexIt = true; if (checkLastModified) { Term pathTerm = new Term("path", file.getPath()); TermQuery query = new TermQuery(pathTerm); Hits hits = searcher.search(query); // if document is found, compare the // indexed last modified time with the // current file // - don't index if up to date if (hits.length() > 0) { Document doc = hits.doc(0); String indexModified = doc.get("modified").trim(); if (indexModified != null) { long lastModified = 0; try { lastModified = DateTools.stringToTime(indexModified); } catch (ParseException e) { // if modified time is not parsable, skip } if (lastModified == file.lastModified()) { // TODO: remove existing document indexIt = false; } } } } if (indexIt) { try { log("Indexing " + file.getPath(), Project.MSG_VERBOSE); Document doc = handler.getDocument(file); if (doc == null) { totalIgnored++; } else { // Add the path of the file as a field named "path". Use a Keyword field, so // that the index stores the path, and so that the path is searchable doc.add(new Field("path", file.getPath(), Field.Store.YES, Field.Index.UN_TOKENIZED)); // Add the last modified date of the file a field named "modified". Use a // Keyword field, so that it's searchable, but so that no attempt is made // to tokenize the field into words. doc.add(new Field("modified", DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.addDocument(doc); totalIndexed++; } } catch (DocumentHandlerException e) { throw new BuildException(e); } } } // for j } // if (fs != null) } // for i writer.optimize(); } //try finally { // always make sure everything gets closed, // no matter how we exit. writer.close(); if (searcher != null) { searcher.close(); } } Date end = new Date(); log(totalIndexed + " out of " + totalFiles + " indexed (" + totalIgnored + " ignored) in " + (end.getTime() - start.getTime()) + " milliseconds");
public void setAnalyzer(org.apache.lucene.ant.IndexTask$AnalyzerType type)
Sets the analyzer based on the builtin Lucene analyzer types.
todo
Enforce analyzer and analyzerClassName to be mutually exclusive
analyzerClassName = type.getClassname();
public void setAnalyzerClassName(java.lang.String classname)
analyzerClassName = classname;
public void setDocumentHandler(java.lang.String classname)
Sets the documentHandler attribute of the IndexTask object
param
classname The new documentHandler value
handlerClassName = classname;
public void setIndex(java.io.File indexDir)
Specifies the directory where the index will be stored
this.indexDir = indexDir;
public void setMergeFactor(int mergeFactor)
Sets the mergeFactor attribute of the IndexTask object
param
mergeFactor The new mergeFactor value
this.mergeFactor = mergeFactor;
public void setOverwrite(boolean overwrite)
Sets the overwrite attribute of the IndexTask object
param
overwrite The new overwrite value
this.overwrite = overwrite;
public void setUseCompoundIndex(boolean useCompoundIndex)
If creating a new index and this is set to true, the index will be created in compound format.
this.useCompoundIndex = useCompoundIndex;