FileDocCategorySizeDatePackage
IndexTask.javaAPI DocApache Lucene 2.1.011772Wed Feb 14 10:45:44 GMT 2007org.apache.lucene.ant

IndexTask

public class IndexTask extends org.apache.tools.ant.Task
Ant task to index files with Lucene
author
Erik Hatcher

Fields Summary
private ArrayList
filesets
file list
private boolean
overwrite
overwrite index?
private File
indexDir
index path
private String
handlerClassName
document handler classname
private DocumentHandler
handler
document handler instance
private String
analyzerClassName
private Analyzer
analyzer
analyzer instance
private int
mergeFactor
Lucene merge factor
private HandlerConfig
handlerConfig
private boolean
useCompoundIndex
Constructors Summary
public IndexTask()
Creates new instance



         
    
  
Methods Summary
public voidaddConfig(org.apache.lucene.ant.IndexTask$HandlerConfig config)
Sets custom properties for a configurable document handler.

    if (handlerConfig != null) {
      throw new BuildException("Only one config element allowed");
    }

    handlerConfig = config;
  
public voidaddFileset(org.apache.tools.ant.types.FileSet set)
Adds a set of files (nested fileset attribute).

param
set FileSet to be added

    filesets.add(set);
  
public voidexecute()
Begins the indexing

exception
BuildException If an error occurs indexing the fileset


    // construct handler and analyzer dynamically
    try {
      Class clazz = Class.forName(handlerClassName);
      handler = (DocumentHandler) clazz.newInstance();

      clazz = Class.forName(analyzerClassName);
      analyzer = (Analyzer) clazz.newInstance();
    } catch (ClassNotFoundException cnfe) {
      throw new BuildException(cnfe);
    } catch (InstantiationException ie) {
      throw new BuildException(ie);
    } catch (IllegalAccessException iae) {
      throw new BuildException(iae);
    }

    log("Document handler = " + handler.getClass(), Project.MSG_VERBOSE);
    log("Analyzer = " + analyzer.getClass(), Project.MSG_VERBOSE);

    if (handler instanceof ConfigurableDocumentHandler) {
      ((ConfigurableDocumentHandler) handler).configure(handlerConfig.getProperties());
    }

    try {
      indexDocs();
    } catch (IOException e) {
      throw new BuildException(e);
    }
  
private voidindexDocs()
Index the fileset.

exception
IOException if Lucene I/O exception
todo
refactor!!!!!

    Date start = new Date();

    boolean create = overwrite;
    // If the index directory doesn't exist,
    // create it and force create mode
    if (indexDir.mkdirs() && !overwrite) {
      create = true;
    }

    Searcher searcher = null;
    boolean checkLastModified = false;
    if (!create) {
      try {
        searcher = new IndexSearcher(indexDir.getAbsolutePath());
        checkLastModified = true;
      } catch (IOException ioe) {
        log("IOException: " + ioe.getMessage());
        // Empty - ignore, which indicates to index all
        // documents
      }
    }

    log("checkLastModified = " + checkLastModified, Project.MSG_VERBOSE);

    IndexWriter writer =
      new IndexWriter(indexDir, analyzer, create);

    writer.setUseCompoundFile(useCompoundIndex);
    int totalFiles = 0;
    int totalIndexed = 0;
    int totalIgnored = 0;
    try {
      writer.setMergeFactor(mergeFactor);

      for (int i = 0; i < filesets.size(); i++) {
        FileSet fs = (FileSet) filesets.get(i);
        if (fs != null) {
          DirectoryScanner ds =
            fs.getDirectoryScanner(getProject());
          String[] dsfiles = ds.getIncludedFiles();
          File baseDir = ds.getBasedir();

          for (int j = 0; j < dsfiles.length; j++) {
            File file = new File(baseDir, dsfiles[j]);
            totalFiles++;

            if (!file.exists() || !file.canRead()) {
              throw new BuildException("File \"" +
                                       file.getAbsolutePath()
                                       + "\" does not exist or is not readable.");
            }

            boolean indexIt = true;

            if (checkLastModified) {
              Term pathTerm =
                new Term("path", file.getPath());
              TermQuery query =
                new TermQuery(pathTerm);
              Hits hits = searcher.search(query);

              // if document is found, compare the
              // indexed last modified time with the
              // current file
              // - don't index if up to date
              if (hits.length() > 0) {
                Document doc = hits.doc(0);
                String indexModified =
                  doc.get("modified").trim();
                if (indexModified != null) {
                  long lastModified = 0;
                  try {
                    lastModified = DateTools.stringToTime(indexModified);
                  } catch (ParseException e) {
                    // if modified time is not parsable, skip
                  }
                  if (lastModified == file.lastModified()) {
                    // TODO: remove existing document
                    indexIt = false;
                  }
                }
              }
            }

            if (indexIt) {
              try {
                log("Indexing " + file.getPath(),
                    Project.MSG_VERBOSE);
                Document doc =
                  handler.getDocument(file);

                if (doc == null) {
                  totalIgnored++;
                } else {
                  // Add the path of the file as a field named "path".  Use a Keyword field, so
                  // that the index stores the path, and so that the path is searchable
                  doc.add(new Field("path", file.getPath(), Field.Store.YES, Field.Index.UN_TOKENIZED));

                  // Add the last modified date of the file a field named "modified".  Use a
                  // Keyword field, so that it's searchable, but so that no attempt is made
                  // to tokenize the field into words.
                  doc.add(new Field("modified", DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.UN_TOKENIZED));

                  writer.addDocument(doc);
                  totalIndexed++;
                }
              } catch (DocumentHandlerException e) {
                throw new BuildException(e);
              }
            }
          }
          // for j
        }
        // if (fs != null)
      }
      // for i

      writer.optimize();
    }
      //try
    finally {
      // always make sure everything gets closed,
      // no matter how we exit.
      writer.close();
      if (searcher != null) {
        searcher.close();
      }
    }

    Date end = new Date();

    log(totalIndexed + " out of " + totalFiles + " indexed (" +
        totalIgnored + " ignored) in " + (end.getTime() - start.getTime()) +
        " milliseconds");
  
public voidsetAnalyzer(org.apache.lucene.ant.IndexTask$AnalyzerType type)
Sets the analyzer based on the builtin Lucene analyzer types.

todo
Enforce analyzer and analyzerClassName to be mutually exclusive

    analyzerClassName = type.getClassname();
  
public voidsetAnalyzerClassName(java.lang.String classname)

    analyzerClassName = classname;
  
public voidsetDocumentHandler(java.lang.String classname)
Sets the documentHandler attribute of the IndexTask object

param
classname The new documentHandler value

    handlerClassName = classname;
  
public voidsetIndex(java.io.File indexDir)
Specifies the directory where the index will be stored

    this.indexDir = indexDir;
  
public voidsetMergeFactor(int mergeFactor)
Sets the mergeFactor attribute of the IndexTask object

param
mergeFactor The new mergeFactor value

    this.mergeFactor = mergeFactor;
  
public voidsetOverwrite(boolean overwrite)
Sets the overwrite attribute of the IndexTask object

param
overwrite The new overwrite value

    this.overwrite = overwrite;
  
public voidsetUseCompoundIndex(boolean useCompoundIndex)
If creating a new index and this is set to true, the index will be created in compound format.

    this.useCompoundIndex = useCompoundIndex;