FileDocCategorySizeDatePackage
SegmentReader.javaAPI DocApache Lucene 1.4.312505Sat Jul 10 08:19:02 BST 2004org.apache.lucene.index

SegmentReader

public final class SegmentReader extends IndexReader
FIXME: Describe class SegmentReader here.
version
$Id: SegmentReader.java,v 1.23 2004/07/10 06:19:01 otis Exp $

Fields Summary
private String
segment
FieldInfos
fieldInfos
private FieldsReader
fieldsReader
TermInfosReader
tis
TermVectorsReader
termVectorsReader
BitVector
deletedDocs
private boolean
deletedDocsDirty
private boolean
normsDirty
private boolean
undeleteAll
org.apache.lucene.store.InputStream
freqStream
org.apache.lucene.store.InputStream
proxStream
CompoundFileReader
cfsReader
private Hashtable
norms
Constructors Summary
SegmentReader(SegmentInfos sis, SegmentInfo si, boolean closeDir)


       
            
    super(si.dir, sis, closeDir);
    initialize(si);
  
SegmentReader(SegmentInfo si)

    super(si.dir);
    initialize(si);
  
Methods Summary
private final voidcloseNorms()

    synchronized (norms) {
      Enumeration enumerator = norms.elements();
      while (enumerator.hasMoreElements()) {
        Norm norm = (Norm) enumerator.nextElement();
        norm.in.close();
      }
    }
  
protected final voiddoClose()

    fieldsReader.close();
    tis.close();

    if (freqStream != null)
      freqStream.close();
    if (proxStream != null)
      proxStream.close();

    closeNorms();
    if (termVectorsReader != null) termVectorsReader.close();

    if (cfsReader != null)
      cfsReader.close();
  
protected final voiddoCommit()

    if (deletedDocsDirty) {               // re-write deleted 
      deletedDocs.write(directory(), segment + ".tmp");
      directory().renameFile(segment + ".tmp", segment + ".del");
    }
    if(undeleteAll && directory().fileExists(segment + ".del")){
      directory().deleteFile(segment + ".del");
    }
    if (normsDirty) {               // re-write norms 
      Enumeration values = norms.elements();
      while (values.hasMoreElements()) {
        Norm norm = (Norm) values.nextElement();
        if (norm.dirty) {
          norm.reWrite();
        }
      }
    }
    deletedDocsDirty = false;
    normsDirty = false;
    undeleteAll = false;
  
protected final voiddoDelete(int docNum)

    if (deletedDocs == null)
      deletedDocs = new BitVector(maxDoc());
    deletedDocsDirty = true;
    undeleteAll = false;
    deletedDocs.set(docNum);
  
protected final voiddoSetNorm(int doc, java.lang.String field, byte value)

    Norm norm = (Norm) norms.get(field);
    if (norm == null)                             // not an indexed field
      return;
    norm.dirty = true;                            // mark it dirty
    normsDirty = true;

    norms(field)[doc] = value;                    // set the value
  
protected final voiddoUndeleteAll()

      deletedDocs = null;
      deletedDocsDirty = false;
      undeleteAll = true;
  
public final intdocFreq(org.apache.lucene.index.Term t)

    TermInfo ti = tis.get(t);
    if (ti != null)
      return ti.docFreq;
    else
      return 0;
  
public final synchronized org.apache.lucene.document.Documentdocument(int n)

    if (isDeleted(n))
      throw new IllegalArgumentException
              ("attempt to access a deleted document");
    return fieldsReader.doc(n);
  
final java.util.Vectorfiles()

    Vector files = new Vector(16);
    final String ext[] = new String[]{
      "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del",
      "tvx", "tvd", "tvf", "tvp" };

    for (int i = 0; i < ext.length; i++) {
      String name = segment + "." + ext[i];
      if (directory().fileExists(name))
        files.addElement(name);
    }

    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed)
        files.addElement(segment + ".f" + i);
    }
    return files;
  
public java.util.CollectiongetFieldNames()

see
IndexReader#getFieldNames()

    // maintain a unique set of field names
    Set fieldSet = new HashSet();
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      fieldSet.add(fi.name);
    }
    return fieldSet;
  
public java.util.CollectiongetFieldNames(boolean indexed)

see
IndexReader#getFieldNames(boolean)

    // maintain a unique set of field names
    Set fieldSet = new HashSet();
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed == indexed)
        fieldSet.add(fi.name);
    }
    return fieldSet;
  
public java.util.CollectiongetIndexedFieldNames(boolean storedTermVector)

param
storedTermVector if true, returns only Indexed fields that have term vector info, else only indexed fields without term vector info
return
Collection of Strings indicating the names of the fields

    // maintain a unique set of field names
    Set fieldSet = new HashSet();
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed == true && fi.storeTermVector == storedTermVector){
        fieldSet.add(fi.name);
      }
    }
    return fieldSet;

  
public org.apache.lucene.index.TermFreqVectorgetTermFreqVector(int docNumber, java.lang.String field)
Return a term frequency vector for the specified document and field. The vector returned contains term numbers and frequencies for all terms in the specified field of this document, if the field had storeTermVector flag set. If the flag was not set, the method returns null.

    // Check if this field is invalid or has no stored term vector
    FieldInfo fi = fieldInfos.fieldInfo(field);
    if (fi == null || !fi.storeTermVector) return null;

    return termVectorsReader.get(docNumber, field);
  
public org.apache.lucene.index.TermFreqVector[]getTermFreqVectors(int docNumber)
Return an array of term frequency vectors for the specified document. The array contains a vector for each vectorized field in the document. Each vector vector contains term numbers and frequencies for all terms in a given vectorized field. If no such fields existed, the method returns null.

    if (termVectorsReader == null)
      return null;

    return termVectorsReader.get(docNumber);
  
static final booleanhasDeletions(org.apache.lucene.index.SegmentInfo si)

    return si.dir.fileExists(si.name + ".del");
  
public booleanhasDeletions()

    return deletedDocs != null;
  
static final booleanhasSeparateNorms(org.apache.lucene.index.SegmentInfo si)

    String[] result = si.dir.list();
    String pattern = si.name + ".f";
    int patternLength = pattern.length();
    for(int i = 0; i < 0; i++){
      if(result[i].startsWith(pattern) && Character.isDigit(result[i].charAt(patternLength)))
        return true;
    }
    return false;
  
private voidinitialize(org.apache.lucene.index.SegmentInfo si)

    segment = si.name;

    // Use compound file directory for some files, if it exists
    Directory cfsDir = directory();
    if (directory().fileExists(segment + ".cfs")) {
      cfsReader = new CompoundFileReader(directory(), segment + ".cfs");
      cfsDir = cfsReader;
    }

    // No compound file exists - use the multi-file format
    fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
    fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);

    tis = new TermInfosReader(cfsDir, segment, fieldInfos);

    // NOTE: the bitvector is stored using the regular directory, not cfs
    if (hasDeletions(si))
      deletedDocs = new BitVector(directory(), segment + ".del");

    // make sure that all index files have been read or are kept open
    // so that if an index update removes them we'll still have them
    freqStream = cfsDir.openFile(segment + ".frq");
    proxStream = cfsDir.openFile(segment + ".prx");
    openNorms(cfsDir);

    if (fieldInfos.hasVectors()) { // open term vector files only as needed
      termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos);
    }
  
public final synchronized booleanisDeleted(int n)

    return (deletedDocs != null && deletedDocs.get(n));
  
public final intmaxDoc()

    return fieldsReader.size();
  
public synchronized byte[]norms(java.lang.String field)

    Norm norm = (Norm) norms.get(field);
    if (norm == null)                             // not an indexed field
      return null;
    if (norm.bytes == null) {                     // value not yet read
      byte[] bytes = new byte[maxDoc()];
      norms(field, bytes, 0);
      norm.bytes = bytes;                         // cache it
    }
    return norm.bytes;
  
public synchronized voidnorms(java.lang.String field, byte[] bytes, int offset)
Read norms into a pre-allocated array.


    Norm norm = (Norm) norms.get(field);
    if (norm == null)
      return;					  // use zeros in array

    if (norm.bytes != null) {                     // can copy from cache
      System.arraycopy(norm.bytes, 0, bytes, offset, maxDoc());
      return;
    }

    InputStream normStream = (InputStream) norm.in.clone();
    try {                                         // read from disk
      normStream.seek(0);
      normStream.readBytes(bytes, offset, maxDoc());
    } finally {
      normStream.close();
    }
  
public final intnumDocs()

    int n = maxDoc();
    if (deletedDocs != null)
      n -= deletedDocs.count();
    return n;
  
private final voidopenNorms(org.apache.lucene.store.Directory cfsDir)

    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed) {
        String fileName = segment + ".f" + fi.number;
        // look first for re-written file, then in compound format
        Directory d = directory().fileExists(fileName) ? directory() : cfsDir;
        norms.put(fi.name, new Norm(d.openFile(fileName), fi.number));
      }
    }
  
public final org.apache.lucene.index.TermDocstermDocs()

    return new SegmentTermDocs(this);
  
public final org.apache.lucene.index.TermPositionstermPositions()

    return new SegmentTermPositions(this);
  
public final org.apache.lucene.index.TermEnumterms()

    return tis.terms();
  
public final org.apache.lucene.index.TermEnumterms(org.apache.lucene.index.Term t)

    return tis.terms(t);
  
static final booleanusesCompoundFile(org.apache.lucene.index.SegmentInfo si)

    return si.dir.fileExists(si.name + ".cfs");