FileDocCategorySizeDatePackage
TermVectorsWriter.javaAPI DocApache Lucene 1.4.39145Fri Feb 20 20:14:56 GMT 2004org.apache.lucene.index

TermVectorsWriter

public final class TermVectorsWriter extends Object
Writer works by opening a document and then opening the fields within the document and then writing out the vectors for each field. Rough usage: for each document { writer.openDocument(); for each field on the document { writer.openField(field); for all of the terms { writer.addTerm(...) } writer.closeField } writer.closeDocument() }

Fields Summary
public static final int
FORMAT_VERSION
public static final int
FORMAT_SIZE
public static final String
TVX_EXTENSION
public static final String
TVD_EXTENSION
public static final String
TVF_EXTENSION
private org.apache.lucene.store.OutputStream
tvx
private org.apache.lucene.store.OutputStream
tvd
private org.apache.lucene.store.OutputStream
tvf
private Vector
fields
private Vector
terms
private FieldInfos
fieldInfos
private TVField
currentField
private long
currentDocPointer
Constructors Summary
public TermVectorsWriter(Directory directory, String segment, FieldInfos fieldInfos)
Create term vectors writer for the specified segment in specified directory. A new TermVectorsWriter should be created for each segment. The parameter maxFields indicates how many total fields are found in this document. Not all of these fields may require termvectors to be stored, so the number of calls to openField is less or equal to this number.


                                                                     
      
                            
      
    // Open files for TermVector storage
    tvx = directory.createFile(segment + TVX_EXTENSION);
    tvx.writeInt(FORMAT_VERSION);
    tvd = directory.createFile(segment + TVD_EXTENSION);
    tvd.writeInt(FORMAT_VERSION);
    tvf = directory.createFile(segment + TVF_EXTENSION);
    tvf.writeInt(FORMAT_VERSION);

    this.fieldInfos = fieldInfos;
    fields = new Vector(fieldInfos.size());
    terms = new Vector();
  
Methods Summary
public final voidaddTerm(java.lang.String termText, int freq)
Add term to the field's term vector. Field must already be open of NullPointerException is thrown. Terms should be added in increasing order of terms, one call per unique termNum. ProxPointer is a pointer into the TermPosition file (prx). Freq is the number of times this term appears in this field, in this document.

    if (!isDocumentOpen()) throw new IllegalStateException("Cannot add terms when document is not open");
    if (!isFieldOpen()) throw new IllegalStateException("Cannot add terms when field is not open");

    addTermInternal(termText, freq);
  
public final voidaddTermFreqVector(org.apache.lucene.index.TermFreqVector vector)
Add specified vector to the document. Document must be open but no field should be open or exception is thrown. The same document can have addTerm and addVectors calls mixed, however a given field must either be populated with addTerm or with addVector. *

    if (!isDocumentOpen()) throw new IllegalStateException("Cannot add term vector when document is not open");
    if (isFieldOpen()) throw new IllegalStateException("Cannot add term vector when field is open");
    addTermFreqVectorInternal(vector);
  
private final voidaddTermFreqVectorInternal(org.apache.lucene.index.TermFreqVector vector)

    openField(vector.getField());
    for (int i = 0; i < vector.size(); i++) {
      addTermInternal(vector.getTerms()[i], vector.getTermFrequencies()[i]);
    }
    closeField();
  
private final voidaddTermInternal(java.lang.String termText, int freq)

    currentField.length += freq;
    TVTerm term = new TVTerm();
    term.termText = termText;
    term.freq = freq;
    terms.add(term);
  
public final voidaddVectors(org.apache.lucene.index.TermFreqVector[] vectors)
Add specified vectors to the document.

    if (!isDocumentOpen()) throw new IllegalStateException("Cannot add term vectors when document is not open");
    if (isFieldOpen()) throw new IllegalStateException("Cannot add term vectors when field is open");

    for (int i = 0; i < vectors.length; i++) {
      addTermFreqVector(vectors[i]);
    }
  
final voidclose()
Close all streams.

    try {
      closeDocument();
    } finally {
      // make an effort to close all streams we can but remember and re-throw
      // the first exception encountered in this process
      IOException keep = null;
      if (tvx != null)
        try {
          tvx.close();
        } catch (IOException e) {
          if (keep == null) keep = e;
        }
      if (tvd != null)
        try {
          tvd.close();
        } catch (IOException e) {
          if (keep == null) keep = e;
        }
      if (tvf != null)
        try {
          tvf.close();
        } catch (IOException e) {
          if (keep == null) keep = e;
        }
      if (keep != null) throw (IOException) keep.fillInStackTrace();
    }
  
public final voidcloseDocument()

    if (isDocumentOpen()) {
      closeField();
      writeDoc();
      fields.clear();
      currentDocPointer = -1;
    }
  
public final voidcloseField()
Finished processing current field. This should be followed by a call to openField before future calls to addTerm.

    if (isFieldOpen()) {
      /* DEBUG */
      //System.out.println("closeField()");
      /* DEBUG */

      // save field and terms
      writeField();
      fields.add(currentField);
      terms.clear();
      currentField = null;
    }
  
public final booleanisDocumentOpen()

    return currentDocPointer != -1;
  
public final booleanisFieldOpen()
Return true if a field is currently open.

    return currentField != null;
  
public final voidopenDocument()

    closeDocument();

    currentDocPointer = tvd.getFilePointer();
  
public final voidopenField(java.lang.String field)
Start processing a field. This can be followed by a number of calls to addTerm, and a final call to closeField to indicate the end of processing of this field. If a field was previously open, it is closed automatically.

    if (!isDocumentOpen()) throw new IllegalStateException("Cannot open field when no document is open.");

    closeField();
    currentField = new TVField(fieldInfos.fieldNumber(field));
  
private voidwriteDoc()

    if (isFieldOpen()) throw new IllegalStateException("Field is still open while writing document");
    //System.out.println("Writing doc pointer: " + currentDocPointer);
    // write document index record
    tvx.writeLong(currentDocPointer);

    // write document data record
    final int size;

    // write the number of fields
    tvd.writeVInt(size = fields.size());

    // write field numbers
    int lastFieldNumber = 0;
    for (int i = 0; i < size; i++) {
      TVField field = (TVField) fields.elementAt(i);
      tvd.writeVInt(field.number - lastFieldNumber);

      lastFieldNumber = field.number;
    }

    // write field pointers
    long lastFieldPointer = 0;
    for (int i = 0; i < size; i++) {
      TVField field = (TVField) fields.elementAt(i);
      tvd.writeVLong(field.tvfPointer - lastFieldPointer);

      lastFieldPointer = field.tvfPointer;
    }
    //System.out.println("After writing doc pointer: " + tvx.getFilePointer());
  
private voidwriteField()

    // remember where this field is written
    currentField.tvfPointer = tvf.getFilePointer();
    //System.out.println("Field Pointer: " + currentField.tvfPointer);
    final int size;

    tvf.writeVInt(size = terms.size());
    tvf.writeVInt(currentField.length - size);
    String lastTermText = "";
    // write term ids and positions
    for (int i = 0; i < size; i++) {
      TVTerm term = (TVTerm) terms.elementAt(i);
      //tvf.writeString(term.termText);
      int start = StringHelper.stringDifference(lastTermText, term.termText);
      int length = term.termText.length() - start;
      tvf.writeVInt(start);			  // write shared prefix length
      tvf.writeVInt(length);			  // write delta length
      tvf.writeChars(term.termText, start, length);  // write delta chars
      tvf.writeVInt(term.freq);
      lastTermText = term.termText;
    }