FileDocCategorySizeDatePackage
LengthNormModifier.javaAPI DocApache Lucene 2.2.04463Sat Jun 16 22:21:02 BST 2007org.apache.lucene.misc

LengthNormModifier

public class LengthNormModifier extends Object
Given a directory, a Similarity, and a list of fields, updates the fieldNorms in place for every document using the Similarity.lengthNorm.

NOTE: This only works if you do not use field/document boosts in your index.

version
$Id$

Fields Summary
private Directory
dir
private Similarity
sim
Constructors Summary
public LengthNormModifier(Directory d, Similarity s)
Constructor for code that wishes to use this class progaomatically.

param
d The Directory to modify
param
s The Similarity to use in reSetNorms

    dir = d;
    sim = s;
  
Methods Summary
public static voidmain(java.lang.String[] args)
Command Line Execution method.
Usage: LengthNormModifier /path/index package.SimilarityClassName field1 field2 ...

    if (args.length < 3) {
      System.err.println("Usage: LengthNormModifier <index> <package.SimilarityClassName> <field1> [field2] ...");
      System.exit(1);
    }
    
    Similarity s = null;
    try {
      Class simClass = Class.forName(args[1]);
      s = (Similarity)simClass.newInstance();
    } catch (Exception e) {
      System.err.println("Couldn't instantiate similarity with empty constructor: " + args[1]);
      e.printStackTrace(System.err);
    }
    
    File index = new File(args[0]);
    Directory d = FSDirectory.getDirectory(index,false);
    
    LengthNormModifier lnm = new LengthNormModifier(d, s);
    
    for (int i = 2; i < args.length; i++) {
      System.out.print("Updating field: " + args[i] + " " + (new Date()).toString() + " ... ");
      lnm.reSetNorms(args[i]);
      System.out.println(new Date().toString());
    }
    
    d.close();
  
public voidreSetNorms(java.lang.String field)
Resets the norms for the specified field.

Opens a new IndexReader on the Directory given to this instance, modifies the norms using the Similarity given to this instance, and closes the IndexReader.

param
field the field whose norms should be reset

    String fieldName = field.intern();
    int[] termCounts = new int[0];
    
    IndexReader reader = null;
    TermEnum termEnum = null;
    TermDocs termDocs = null;
    try {
      reader = IndexReader.open(dir);
      termCounts = new int[reader.maxDoc()];
      try {
        termEnum = reader.terms(new Term(field,""));
        try {
          termDocs = reader.termDocs();
          do {
            Term term = termEnum.term();
            if (term != null && term.field().equals(fieldName)) {
              termDocs.seek(termEnum.term());
              while (termDocs.next()) {
                termCounts[termDocs.doc()] += termDocs.freq();
              }
            }
          } while (termEnum.next());
        } finally {
          if (null != termDocs) termDocs.close();
        }
      } finally {
        if (null != termEnum) termEnum.close();
      }
    } finally {
      if (null != reader) reader.close();
    }
    
    try {
      reader = IndexReader.open(dir); 
      for (int d = 0; d < termCounts.length; d++) {
        if (! reader.isDeleted(d)) {
          byte norm = sim.encodeNorm(sim.lengthNorm(fieldName, termCounts[d]));
          reader.setNorm(d, fieldName, norm);
        }
      }
    } finally {
      if (null != reader) reader.close();
    }