FileDocCategorySizeDatePackage
LuceneMethods.javaAPI DocApache Lucene 2.0.013465Fri May 26 09:53:54 BST 2006lucli

LuceneMethods

public class LuceneMethods extends Object
Various methods that interact with Lucene and provide info about the index, search, etc. Parts addapted from Lucene demo.

Fields Summary
private int
numDocs
private String
indexName
private Iterator
fieldIterator
private Vector
fields
private Vector
indexedFields
private String[]
fieldsArray
private Searcher
searcher
private Query
query
Constructors Summary
public LuceneMethods(String index)

    indexName = index;
    message("Lucene CLI. Using directory '" + indexName + "'. Type 'help' for instructions.");
  
Methods Summary
public voidcount(java.lang.String queryString)

    Hits hits = initSearch(queryString);
    System.out.println(hits.length() + " total documents");
    searcher.close();
  
private org.apache.lucene.search.QueryexplainQuery(java.lang.String queryString)


    searcher = new IndexSearcher(indexName);
    Analyzer analyzer = new StandardAnalyzer();
    getFieldInfo();

    int arraySize = indexedFields.size();
    String indexedArray[] = new String[arraySize];
    for (int ii = 0; ii < arraySize; ii++) {
      indexedArray[ii] = (String) indexedFields.get(ii);
    }
    MultiFieldQueryParser parser = new MultiFieldQueryParser(indexedArray, analyzer);
    query = parser.parse(queryString);
    System.out.println("Searching for: " + query.toString());
    return (query);

  
private voidgetFieldInfo()

    IndexReader indexReader = IndexReader.open(indexName);
    fields = new Vector();
    indexedFields = new Vector();

    //get the list of all field names
    fieldIterator = indexReader.getFieldNames(FieldOption.ALL).iterator();
    while (fieldIterator.hasNext()) {
      Object field = fieldIterator.next();
      if (field != null && !field.equals(""))
        fields.add(field.toString());
    }
    //
    //get the list of indexed field names
    fieldIterator = indexReader.getFieldNames(FieldOption.INDEXED).iterator();
    while (fieldIterator.hasNext()) {
      Object field = fieldIterator.next();
      if (field != null && !field.equals(""))
        indexedFields.add(field.toString());
    }
    indexReader.close();
  
public static java.util.Map.Entry[]getSortedHashtableEntries(java.util.Hashtable h)
Sort Hashtable values

param
h the hashtable we're sorting from http://developer.java.sun.com/developer/qow/archive/170/index.jsp

    Set set = h.entrySet();
    Entry[] entries =
      (Entry[]) set.toArray(
        new Entry[set.size()]);
    Arrays.sort(entries, new Comparator() {
      public int compare(Object o1, Object o2) {
        Object v1 = ((Entry) o1).getValue();
        Object v2 = ((Entry) o2).getValue();
        return ((Comparable) v2).compareTo(v1); //descending order
      }
    });
    return entries;
  
public voidinfo()

    IndexReader indexReader = IndexReader.open(indexName);


    getFieldInfo();
    numDocs = indexReader.numDocs();
    message("Index has " + numDocs + " documents ");
    message("All Fields:" + fields.toString());
    message("Indexed Fields:" + indexedFields.toString());

    if (IndexReader.isLocked(indexName)) {
      message("Index is locked");
    }
    //IndexReader.getCurrentVersion(indexName);
    //System.out.println("Version:" + version);

    indexReader.close();
  
private org.apache.lucene.search.HitsinitSearch(java.lang.String queryString)

todo
Allow user to specify analyzer


    searcher = new IndexSearcher(indexName);
    Analyzer analyzer = new StandardAnalyzer();
    getFieldInfo();

    int arraySize = fields.size();
    fieldsArray = new String[arraySize];
    for (int ii = 0; ii < arraySize; ii++) {
      fieldsArray[ii] = (String) fields.get(ii);
    }
    MultiFieldQueryParser parser = new MultiFieldQueryParser(fieldsArray, analyzer);
    query = parser.parse(queryString);
    System.out.println("Searching for: " + query.toString());
    Hits hits = searcher.search(query);
    return (hits);

  
private voidinvertDocument(org.apache.lucene.document.Document doc)


    Hashtable tokenHash = new Hashtable();
    final int maxFieldLength = 10000;

    Analyzer analyzer = new StandardAnalyzer();
    Enumeration fields = doc.fields();
    while (fields.hasMoreElements()) {
      Field field = (Field) fields.nextElement();
      String fieldName = field.name();


      if (field.isIndexed()) {
        if (field.isTokenized()) {     // un-tokenized field
          Reader reader;        // find or make Reader
          if (field.readerValue() != null)
            reader = field.readerValue();
          else if (field.stringValue() != null)
            reader = new StringReader(field.stringValue());
          else
            throw new IllegalArgumentException
              ("field must have either String or Reader value");

          int position = 0;
          // Tokenize field and add to postingTable
          TokenStream stream = analyzer.tokenStream(fieldName, reader);
          try {
            for (Token t = stream.next(); t != null; t = stream.next()) {
              position += (t.getPositionIncrement() - 1);
              position++;
              String name = t.termText();
              Integer Count = (Integer) tokenHash.get(name);
              if (Count == null) { // not in there yet
                tokenHash.put(name, new Integer(1)); //first one
              } else {
                int count = Count.intValue();
                tokenHash.put(name, new Integer(count + 1));
              }
              if (position > maxFieldLength) break;
            }
          } finally {
            stream.close();
          }
        }

      }
    }
    Entry[] sortedHash = getSortedHashtableEntries(tokenHash);
    for (int ii = 0; ii < sortedHash.length && ii < 10; ii++) {
      Entry currentEntry = sortedHash[ii];
      message((ii + 1) + ":" + currentEntry.getKey() + " " + currentEntry.getValue());
    }
  
public static voidmessage(java.lang.String s)

    System.out.println(s);
  
public voidoptimize()

    //open the index writer. False: don't create a new one
    IndexWriter indexWriter = new IndexWriter(indexName, new StandardAnalyzer(), false);
    message("Starting to optimize index.");
    long start = System.currentTimeMillis();
    indexWriter.optimize();
    message("Done optimizing index. Took " + (System.currentTimeMillis() - start) + " msecs");
    indexWriter.close();
  
private voidprintHit(org.apache.lucene.document.Document doc)

todo
Allow user to specify what field(s) to display

    for (int ii = 0; ii < fieldsArray.length; ii++) {
      String currField = fieldsArray[ii];
      String[] result = doc.getValues(currField);
      if (result != null) {
        for (int i = 0; i < result.length; i++) {
          message(currField + ":" + result[i]);
        }
      } else {
        message(currField + ": <not available>");
      }
    }
    //another option is to just do message(doc);
  
public voidsearch(java.lang.String queryString, boolean explain, boolean showTokens, jline.ConsoleReader cr)

    Hits hits = initSearch(queryString);
    System.out.println(hits.length() + " total matching documents");
    if (explain) {
      query = explainQuery(queryString);
    }

    final int HITS_PER_PAGE = 10;
    message("--------------------------------------");
    for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
      int end = Math.min(hits.length(), start + HITS_PER_PAGE);
      for (int ii = start; ii < end; ii++) {
        Document doc = hits.doc(ii);
        message("---------------- " + (ii + 1) + " score:" + hits.score(ii) + "---------------------");
        printHit(doc);
        if (showTokens) {
          invertDocument(doc);
        }
        if (explain) {
          Explanation exp = searcher.explain(query, hits.id(ii));
          message("Explanation:" + exp.toString());
        }
      }
      message("#################################################");

      if (hits.length() > end) {
      	// TODO: don't let the input end up in the command line history
      	queryString = cr.readLine("more (y/n) ? ");
        if (queryString.length() == 0 || queryString.charAt(0) == 'n")
          break;
      }
    }
    searcher.close();
  
public voidterms(java.lang.String field)
Provides a list of the top terms of the index.

param
field - the name of the command or null for all of them.

    TreeMap termMap = new TreeMap();
    IndexReader indexReader = IndexReader.open(indexName);
    TermEnum terms = indexReader.terms();
    while (terms.next()) {
      Term term = terms.term();
      //message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
      //if we're either not looking by field or we're matching the specific field
      if ((field == null) || field.equals(term.field()))
        termMap.put(term.field() + ":" + term.text(), new Integer((terms.docFreq())));
    }

    Iterator termIterator = termMap.keySet().iterator();
    for (int ii = 0; termIterator.hasNext() && ii < 100; ii++) {
      String termDetails = (String) termIterator.next();
      Integer termFreq = (Integer) termMap.get(termDetails);
      message(termDetails + ": " + termFreq);
    }
    indexReader.close();