FileDocCategorySizeDatePackage
LuceneMethods.javaAPI DocApache Lucene 1.913378Mon Feb 20 09:18:38 GMT 2006lucli

LuceneMethods

public class LuceneMethods extends Object

Fields Summary
private int
numDocs
private String
indexName
Iterator
fieldIterator
Vector
fields
Vector
indexedFields
String[]
fieldsArray
Searcher
searcher
Query
query
Constructors Summary
public LuceneMethods(String index)

    indexName = index;
    message("Lucene CLI. Using directory '" + indexName + "'. Type 'help' for instructions.");
  
Methods Summary
public voidcount(java.lang.String queryString)

    Hits hits = initSearch(queryString);
    System.out.println(hits.length() + " total documents");
    searcher.close();
  
private org.apache.lucene.search.QueryexplainQuery(java.lang.String queryString)


    searcher = new IndexSearcher(indexName);
    Analyzer analyzer = new StandardAnalyzer();
    getFieldInfo();

    MultiFieldQueryParser parser = new MultiFieldQueryParser(queryString, analyzer);

    int arraySize = indexedFields.size();
    String indexedArray[] = new String[arraySize];
    for (int ii = 0; ii < arraySize; ii++) {
      indexedArray[ii] = (String) indexedFields.get(ii);
    }
    query = parser.parse(queryString, indexedArray, analyzer);
    System.out.println("Searching for: " + query.toString());
    return (query);

  
private voidgetFieldInfo()

    IndexReader indexReader = IndexReader.open(indexName);
    fields = new Vector();
    indexedFields = new Vector();

    //get the list of all field names
    fieldIterator = indexReader.getFieldNames().iterator();
    while (fieldIterator.hasNext()) {
      Object field = fieldIterator.next();
      if (field != null && !field.equals(""))
        fields.add(field.toString());
    }
    //
    //get the list of indexed field names
    fieldIterator = indexReader.getFieldNames(true).iterator();
    while (fieldIterator.hasNext()) {
      Object field = fieldIterator.next();
      if (field != null && !field.equals(""))
        indexedFields.add(field.toString());
    }
    indexReader.close();
  
public static java.util.Map.Entry[]getSortedHashtableEntries(java.util.Hashtable h)
Sort Hashtable values

param
h the hashtable we're sorting from http://developer.java.sun.com/developer/qow/archive/170/index.jsp

    Set set = h.entrySet();
    Entry[] entries =
      (Entry[]) set.toArray(
        new Entry[set.size()]);
    Arrays.sort(entries, new Comparator() {
      public int compare(Object o1, Object o2) {
        Object v1 = ((Entry) o1).getValue();
        Object v2 = ((Entry) o2).getValue();
        return ((Comparable) v2).compareTo(v1); //descending order
      }
    });
    return entries;
  
public voidinfo()

    IndexReader indexReader = IndexReader.open(indexName);


    getFieldInfo();
    numDocs = indexReader.numDocs();
    message("Index has " + numDocs + " documents ");
    message("All Fields:" + fields.toString());
    message("Indexed Fields:" + indexedFields.toString());

    if (IndexReader.isLocked(indexName)) {
      message("Index is locked");
    }
    //IndexReader.getCurrentVersion(indexName);
    //System.out.println("Version:" + version);

    indexReader.close();
  
private org.apache.lucene.search.HitsinitSearch(java.lang.String queryString)

todo
Allow user to specify analyzer


    searcher = new IndexSearcher(indexName);
    Analyzer analyzer = new StandardAnalyzer();
    getFieldInfo();

    MultiFieldQueryParser parser = new MultiFieldQueryParser(queryString, analyzer);

    int arraySize = fields.size();
    fieldsArray = new String[arraySize];
    for (int ii = 0; ii < arraySize; ii++) {
      fieldsArray[ii] = (String) fields.get(ii);
    }
    query = parser.parse(queryString, fieldsArray, analyzer);
    System.out.println("Searching for: " + query.toString());
    Hits hits = searcher.search(query);
    return (hits);

  
private voidinvertDocument(org.apache.lucene.document.Document doc)


    Hashtable tokenHash = new Hashtable();
    final int maxFieldLength = 10000;

    Analyzer analyzer = new StandardAnalyzer();
    Enumeration fields = doc.fields();
    while (fields.hasMoreElements()) {
      Field field = (Field) fields.nextElement();
      String fieldName = field.name();


      if (field.isIndexed()) {
        if (field.isTokenized()) {     // un-tokenized field
          Reader reader;        // find or make Reader
          if (field.readerValue() != null)
            reader = field.readerValue();
          else if (field.stringValue() != null)
            reader = new StringReader(field.stringValue());
          else
            throw new IllegalArgumentException
              ("field must have either String or Reader value");

          int position = 0;
          // Tokenize field and add to postingTable
          TokenStream stream = analyzer.tokenStream(fieldName, reader);
          try {
            for (Token t = stream.next(); t != null; t = stream.next()) {
              position += (t.getPositionIncrement() - 1);
              position++;
              String name = t.termText();
              Integer Count = (Integer) tokenHash.get(name);
              if (Count == null) { // not in there yet
                tokenHash.put(name, new Integer(1)); //first one
              } else {
                int count = Count.intValue();
                tokenHash.put(name, new Integer(count + 1));
              }
              if (position > maxFieldLength) break;
            }
          } finally {
            stream.close();
          }
        }

      }
    }
    Entry[] sortedHash = getSortedHashtableEntries(tokenHash);
    for (int ii = 0; ii < sortedHash.length && ii < 10; ii++) {
      Entry currentEntry = sortedHash[ii];
      message((ii + 1) + ":" + currentEntry.getKey() + " " + currentEntry.getValue());
    }
  
public static voidmessage(java.lang.String s)

    System.out.println(s);
  
public voidoptimize()

    //open the index writer. False: don't create a new one
    IndexWriter indexWriter = new IndexWriter(indexName, new StandardAnalyzer(), false);
    message("Starting to optimize index.");
    long start = System.currentTimeMillis();
    indexWriter.optimize();
    message("Done optimizing index. Took " + (System.currentTimeMillis() - start) + " msecs");
    indexWriter.close();
  
private voidprintHit(org.apache.lucene.document.Document doc)

todo
Allow user to specify what field(s) to display

    for (int ii = 0; ii < fieldsArray.length; ii++) {
      String currField = fieldsArray[ii];
      String[] result = doc.getValues(currField);
      if (result != null) {
        for (int i = 0; i < result.length; i++) {
          message(currField + ":" + result[i]);
        }
      } else {
        message(currField + ": <not available>");
      }
    }
    //another option is to just do message(doc);
  
public voidsearch(java.lang.String queryString, boolean explain, boolean showTokens, jline.ConsoleReader cr)

    Hits hits = initSearch(queryString);
    System.out.println(hits.length() + " total matching documents");
    if (explain) {
      query = explainQuery(queryString);
    }

    final int HITS_PER_PAGE = 10;
    message("--------------------------------------");
    for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
      int end = Math.min(hits.length(), start + HITS_PER_PAGE);
      for (int ii = start; ii < end; ii++) {
        Document doc = hits.doc(ii);
        message("---------------- " + (ii + 1) + " score:" + hits.score(ii) + "---------------------");
        printHit(doc);
        if (showTokens) {
          invertDocument(doc);
        }
        if (explain) {
          Explanation exp = searcher.explain(query, hits.id(ii));
          message("Explanation:" + exp.toString());
        }
      }
      message("#################################################");

      if (hits.length() > end) {
      	// TODO: don't let the input end up in the command line history
      	queryString = cr.readLine("more (y/n) ? ");
        if (queryString.length() == 0 || queryString.charAt(0) == 'n")
          break;
      }
    }
    searcher.close();
  
public voidterms(java.lang.String field)
Provides a list of the top terms of the index.

param
field - the name of the command or null for all of them.

    TreeMap termMap = new TreeMap();
    IndexReader indexReader = IndexReader.open(indexName);
    TermEnum terms = indexReader.terms();
    while (terms.next()) {
      Term term = terms.term();
      //message(term.field() + ":" + term.text() + " freq:" + terms.docFreq());
      //if we're either not looking by field or we're matching the specific field
      if ((field == null) || field.equals(term.field()))
        termMap.put(term.field() + ":" + term.text(), new Integer((terms.docFreq())));
    }

    Iterator termIterator = termMap.keySet().iterator();
    for (int ii = 0; termIterator.hasNext() && ii < 100; ii++) {
      String termDetails = (String) termIterator.next();
      Integer termFreq = (Integer) termMap.get(termDetails);
      message(termDetails + ": " + termFreq);
    }
    indexReader.close();