FileDocCategorySizeDatePackage
HighFreqTerms.javaAPI DocApache Lucene 1.92636Mon Feb 20 09:18:28 GMT 2006org.apache.lucene.misc

HighFreqTerms

public class HighFreqTerms extends Object
HighFreqTerms class extracts terms and their frequencies out of an existing Lucene index.
version
$Id: HighFreqTerms.java 376393 2006-02-09 19:17:14Z otis $

Fields Summary
public static final int
numTerms
Constructors Summary
Methods Summary
public static voidmain(java.lang.String[] args)


         
    IndexReader reader = null;
    String field = null;
    if (args.length == 1) {
      reader = IndexReader.open(args[0]);
    } else if (args.length == 2) {
      reader = IndexReader.open(args[0]);
      field = args[1];
    } else {
      usage();
      System.exit(1);
    }

    TermInfoQueue tiq = new TermInfoQueue(numTerms);
    TermEnum terms = reader.terms();

    if (field != null) { 
      while (terms.next()) {
        if (terms.term().field().equals(field)) {
          tiq.insert(new TermInfo(terms.term(), terms.docFreq()));
        }
      }
    }
    else {
      while (terms.next()) {
        tiq.insert(new TermInfo(terms.term(), terms.docFreq()));
      }
    }
    while (tiq.size() != 0) {
      TermInfo termInfo = (TermInfo) tiq.pop();
      System.out.println(termInfo.term + " " + termInfo.docFreq);
    }

    reader.close();
  
private static voidusage()

    System.out.println(
         "\n\n"
         + "java org.apache.lucene.misc.HighFreqTerms <index dir> [field]\n\n");