FileDocCategorySizeDatePackage
SegmentReader.javaAPI DocApache Lucene 2.2.020838Sat Jun 16 22:20:36 BST 2007org.apache.lucene.index

SegmentReader

public class SegmentReader extends IndexReader
version
$Id: SegmentReader.java 542561 2007-05-29 15:14:07Z mikemccand $

Fields Summary
private String
segment
private SegmentInfo
si
FieldInfos
fieldInfos
private FieldsReader
fieldsReader
TermInfosReader
tis
TermVectorsReader
termVectorsReaderOrig
ThreadLocal
termVectorsLocal
BitVector
deletedDocs
private boolean
deletedDocsDirty
private boolean
normsDirty
private boolean
undeleteAll
private boolean
rollbackDeletedDocsDirty
private boolean
rollbackNormsDirty
private boolean
rollbackUndeleteAll
IndexInput
freqStream
IndexInput
proxStream
private IndexInput
singleNormStream
CompoundFileReader
cfsReader
private Hashtable
norms
private static Class
IMPL
The class which implements SegmentReader.
private byte[]
ones
Constructors Summary
protected SegmentReader()

   
    try {
      String name =
        System.getProperty("org.apache.lucene.SegmentReader.class",
                           SegmentReader.class.getName());
      IMPL = Class.forName(name);
    } catch (ClassNotFoundException e) {
      throw new RuntimeException("cannot load SegmentReader class: " + e, e);
    } catch (SecurityException se) {
      try {
        IMPL = Class.forName(SegmentReader.class.getName());
      } catch (ClassNotFoundException e) {
        throw new RuntimeException("cannot load default SegmentReader class: " + e, e);
      }
    }
   super(null); 
Methods Summary
private voidcloseNorms()

    synchronized (norms) {
      Enumeration enumerator = norms.elements();
      while (enumerator.hasMoreElements()) {
        Norm norm = (Norm) enumerator.nextElement();
        norm.close();
      }
      if (singleNormStream != null) {
        singleNormStream.close();
        singleNormStream = null;
      }
    }
  
static byte[]createFakeNorms(int size)

    byte[] ones = new byte[size];
    Arrays.fill(ones, DefaultSimilarity.encodeNorm(1.0f));
    return ones;
  
protected voiddoClose()

    if (fieldsReader != null) {
      fieldsReader.close();
    }
    if (tis != null) {
      tis.close();
    }

    if (freqStream != null)
      freqStream.close();
    if (proxStream != null)
      proxStream.close();

    closeNorms();

    if (termVectorsReaderOrig != null)
      termVectorsReaderOrig.close();

    if (cfsReader != null)
      cfsReader.close();
  
protected voiddoCommit()

    if (deletedDocsDirty) {               // re-write deleted
      si.advanceDelGen();

      // We can write directly to the actual name (vs to a
      // .tmp & renaming it) because the file is not live
      // until segments file is written:
      deletedDocs.write(directory(), si.getDelFileName());
    }
    if (undeleteAll && si.hasDeletions()) {
      si.clearDelGen();
    }
    if (normsDirty) {               // re-write norms
      si.setNumFields(fieldInfos.size());
      Enumeration values = norms.elements();
      while (values.hasMoreElements()) {
        Norm norm = (Norm) values.nextElement();
        if (norm.dirty) {
          norm.reWrite(si);
        }
      }
    }
    deletedDocsDirty = false;
    normsDirty = false;
    undeleteAll = false;
  
protected voiddoDelete(int docNum)

    if (deletedDocs == null)
      deletedDocs = new BitVector(maxDoc());
    deletedDocsDirty = true;
    undeleteAll = false;
    deletedDocs.set(docNum);
  
protected voiddoSetNorm(int doc, java.lang.String field, byte value)

    Norm norm = (Norm) norms.get(field);
    if (norm == null)                             // not an indexed field
      return;

    norm.dirty = true;                            // mark it dirty
    normsDirty = true;

    norms(field)[doc] = value;                    // set the value
  
protected voiddoUndeleteAll()

      deletedDocs = null;
      deletedDocsDirty = false;
      undeleteAll = true;
  
public intdocFreq(org.apache.lucene.index.Term t)

    ensureOpen();
    TermInfo ti = tis.get(t);
    if (ti != null)
      return ti.docFreq;
    else
      return 0;
  
public synchronized org.apache.lucene.document.Documentdocument(int n, org.apache.lucene.document.FieldSelector fieldSelector)

throws
CorruptIndexException if the index is corrupt
throws
IOException if there is a low-level IO error

    ensureOpen();
    if (isDeleted(n))
      throw new IllegalArgumentException
              ("attempt to access a deleted document");
    return fieldsReader.doc(n, fieldSelector);
  
private byte[]fakeNorms()

    if (ones==null) ones=createFakeNorms(maxDoc());
    return ones;
  
org.apache.lucene.index.FieldInfosfieldInfos()
Returns the field infos of this segment

    return fieldInfos;
  
java.util.Vectorfiles()

    return new Vector(si.files());
  
public static org.apache.lucene.index.SegmentReaderget(org.apache.lucene.index.SegmentInfo si)

throws
CorruptIndexException if the index is corrupt
throws
IOException if there is a low-level IO error

    return get(si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE);
  
public static org.apache.lucene.index.SegmentReaderget(org.apache.lucene.index.SegmentInfo si, int readBufferSize)

throws
CorruptIndexException if the index is corrupt
throws
IOException if there is a low-level IO error

    return get(si.dir, si, null, false, false, readBufferSize);
  
public static org.apache.lucene.index.SegmentReaderget(org.apache.lucene.index.SegmentInfos sis, org.apache.lucene.index.SegmentInfo si, boolean closeDir)

throws
CorruptIndexException if the index is corrupt
throws
IOException if there is a low-level IO error

    return get(si.dir, si, sis, closeDir, true, BufferedIndexInput.BUFFER_SIZE);
  
public static org.apache.lucene.index.SegmentReaderget(org.apache.lucene.store.Directory dir, org.apache.lucene.index.SegmentInfo si, org.apache.lucene.index.SegmentInfos sis, boolean closeDir, boolean ownDir, int readBufferSize)

throws
CorruptIndexException if the index is corrupt
throws
IOException if there is a low-level IO error

    SegmentReader instance;
    try {
      instance = (SegmentReader)IMPL.newInstance();
    } catch (Exception e) {
      throw new RuntimeException("cannot load SegmentReader class: " + e, e);
    }
    instance.init(dir, sis, closeDir, ownDir);
    instance.initialize(si, readBufferSize);
    return instance;
  
public java.util.CollectiongetFieldNames(org.apache.lucene.index.IndexReader$FieldOption fieldOption)

see
IndexReader#getFieldNames(IndexReader.FieldOption fldOption)

    ensureOpen();

    Set fieldSet = new HashSet();
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fieldOption == IndexReader.FieldOption.ALL) {
        fieldSet.add(fi.name);
      }
      else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
        fieldSet.add(fi.name);
      }
      else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) {
        fieldSet.add(fi.name);
      }
      else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) {
        fieldSet.add(fi.name);
      }
      else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
        fieldSet.add(fi.name);
      }
      else if (fi.storeTermVector == true &&
               fi.storePositionWithTermVector == false &&
               fi.storeOffsetWithTermVector == false &&
               fieldOption == IndexReader.FieldOption.TERMVECTOR) {
        fieldSet.add(fi.name);
      }
      else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
        fieldSet.add(fi.name);
      }
      else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
        fieldSet.add(fi.name);
      }
      else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
        fieldSet.add(fi.name);
      }
      else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) &&
                fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
        fieldSet.add(fi.name);
      }
    }
    return fieldSet;
  
protected synchronized byte[]getNorms(java.lang.String field)

    Norm norm = (Norm) norms.get(field);
    if (norm == null) return null;  // not indexed, or norms not stored
    if (norm.bytes == null) {                     // value not yet read
      byte[] bytes = new byte[maxDoc()];
      norms(field, bytes, 0);
      norm.bytes = bytes;                         // cache it
      // it's OK to close the underlying IndexInput as we have cached the
      // norms and will never read them again.
      norm.close();
    }
    return norm.bytes;
  
java.lang.StringgetSegmentName()
Return the name of the segment this reader is reading.

    return segment;
  
public org.apache.lucene.index.TermFreqVectorgetTermFreqVector(int docNumber, java.lang.String field)
Return a term frequency vector for the specified document and field. The vector returned contains term numbers and frequencies for all terms in the specified field of this document, if the field had storeTermVector flag set. If the flag was not set, the method returns null.

throws
IOException

    // Check if this field is invalid or has no stored term vector
    ensureOpen();
    FieldInfo fi = fieldInfos.fieldInfo(field);
    if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) 
      return null;
    
    TermVectorsReader termVectorsReader = getTermVectorsReader();
    if (termVectorsReader == null)
      return null;
    
    return termVectorsReader.get(docNumber, field);
  
public org.apache.lucene.index.TermFreqVector[]getTermFreqVectors(int docNumber)
Return an array of term frequency vectors for the specified document. The array contains a vector for each vectorized field in the document. Each vector vector contains term numbers and frequencies for all terms in a given vectorized field. If no such fields existed, the method returns null.

throws
IOException

    ensureOpen();
    if (termVectorsReaderOrig == null)
      return null;
    
    TermVectorsReader termVectorsReader = getTermVectorsReader();
    if (termVectorsReader == null)
      return null;
    
    return termVectorsReader.get(docNumber);
  
private org.apache.lucene.index.TermVectorsReadergetTermVectorsReader()
Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.

return
TermVectorsReader

    TermVectorsReader tvReader = (TermVectorsReader)termVectorsLocal.get();
    if (tvReader == null) {
      tvReader = (TermVectorsReader)termVectorsReaderOrig.clone();
      termVectorsLocal.set(tvReader);
    }
    return tvReader;
  
public booleanhasDeletions()

    // Don't call ensureOpen() here (it could affect performance)
    return deletedDocs != null;
  
static booleanhasDeletions(org.apache.lucene.index.SegmentInfo si)

    // Don't call ensureOpen() here (it could affect performance)
    return si.hasDeletions();
  
public synchronized booleanhasNorms(java.lang.String field)

    ensureOpen();
    return norms.containsKey(field);
  
static booleanhasSeparateNorms(org.apache.lucene.index.SegmentInfo si)

    return si.hasSeparateNorms();
  
private voidinitialize(org.apache.lucene.index.SegmentInfo si, int readBufferSize)

    segment = si.name;
    this.si = si;

    boolean success = false;

    try {
      // Use compound file directory for some files, if it exists
      Directory cfsDir = directory();
      if (si.getUseCompoundFile()) {
        cfsReader = new CompoundFileReader(directory(), segment + ".cfs", readBufferSize);
        cfsDir = cfsReader;
      }

      // No compound file exists - use the multi-file format
      fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
      fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos, readBufferSize);

      // Verify two sources of "maxDoc" agree:
      if (fieldsReader.size() != si.docCount) {
        throw new CorruptIndexException("doc counts differ for segment " + si.name + ": fieldsReader shows " + fieldsReader.size() + " but segmentInfo shows " + si.docCount);
      }

      tis = new TermInfosReader(cfsDir, segment, fieldInfos, readBufferSize);
      
      // NOTE: the bitvector is stored using the regular directory, not cfs
      if (hasDeletions(si)) {
        deletedDocs = new BitVector(directory(), si.getDelFileName());

        // Verify # deletes does not exceed maxDoc for this segment:
        if (deletedDocs.count() > maxDoc()) {
          throw new CorruptIndexException("number of deletes (" + deletedDocs.count() + ") exceeds max doc (" + maxDoc() + ") for segment " + si.name);
        }
      }

      // make sure that all index files have been read or are kept open
      // so that if an index update removes them we'll still have them
      freqStream = cfsDir.openInput(segment + ".frq", readBufferSize);
      proxStream = cfsDir.openInput(segment + ".prx", readBufferSize);
      openNorms(cfsDir, readBufferSize);

      if (fieldInfos.hasVectors()) { // open term vector files only as needed
        termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos, readBufferSize);
      }
      success = true;
    } finally {

      // With lock-less commits, it's entirely possible (and
      // fine) to hit a FileNotFound exception above.  In
      // this case, we want to explicitly close any subset
      // of things that were opened so that we don't have to
      // wait for a GC to do so.
      if (!success) {
        doClose();
      }
    }
  
public synchronized booleanisDeleted(int n)

    return (deletedDocs != null && deletedDocs.get(n));
  
public intmaxDoc()

    // Don't call ensureOpen() here (it could affect performance)
    return si.docCount;
  
public synchronized byte[]norms(java.lang.String field)

    ensureOpen();
    byte[] bytes = getNorms(field);
    if (bytes==null) bytes=fakeNorms();
    return bytes;
  
public synchronized voidnorms(java.lang.String field, byte[] bytes, int offset)
Read norms into a pre-allocated array.


    ensureOpen();
    Norm norm = (Norm) norms.get(field);
    if (norm == null) {
      System.arraycopy(fakeNorms(), 0, bytes, offset, maxDoc());
      return;
    }

    if (norm.bytes != null) {                     // can copy from cache
      System.arraycopy(norm.bytes, 0, bytes, offset, maxDoc());
      return;
    }

    // Read from disk.  norm.in may be shared across  multiple norms and
    // should only be used in a synchronized context.
    norm.in.seek(norm.normSeek);
    norm.in.readBytes(bytes, offset, maxDoc());
  
public intnumDocs()

    // Don't call ensureOpen() here (it could affect performance)
    int n = maxDoc();
    if (deletedDocs != null)
      n -= deletedDocs.count();
    return n;
  
private voidopenNorms(org.apache.lucene.store.Directory cfsDir, int readBufferSize)

    long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now)
    int maxDoc = maxDoc();
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed && !fi.omitNorms) {
        Directory d = directory();
        String fileName = si.getNormFileName(fi.number);
        if (!si.hasSeparateNorms(fi.number)) {
          d = cfsDir;
        }
        
        // singleNormFile means multiple norms share this file
        boolean singleNormFile = fileName.endsWith("." + IndexFileNames.NORMS_EXTENSION);
        IndexInput normInput = null;
        long normSeek;

        if (singleNormFile) {
          normSeek = nextNormSeek;
          if (singleNormStream==null) {
            singleNormStream = d.openInput(fileName, readBufferSize);
          }
          // All norms in the .nrm file can share a single IndexInput since
          // they are only used in a synchronized context.
          // If this were to change in the future, a clone could be done here.
          normInput = singleNormStream;
        } else {
          normSeek = 0;
          normInput = d.openInput(fileName);
        }

        norms.put(fi.name, new Norm(normInput, fi.number, normSeek));
        nextNormSeek += maxDoc; // increment also if some norms are separate
      }
    }
  
voidrollbackCommit()

    super.rollbackCommit();
    deletedDocsDirty = rollbackDeletedDocsDirty;
    normsDirty = rollbackNormsDirty;
    undeleteAll = rollbackUndeleteAll;
    Enumeration values = norms.elements();
    while (values.hasMoreElements()) {
      Norm norm = (Norm) values.nextElement();
      norm.dirty = norm.rollbackDirty;
    }
  
voidsetSegmentInfo(org.apache.lucene.index.SegmentInfo info)

    si = info;
  
voidstartCommit()

    super.startCommit();
    rollbackDeletedDocsDirty = deletedDocsDirty;
    rollbackNormsDirty = normsDirty;
    rollbackUndeleteAll = undeleteAll;
    Enumeration values = norms.elements();
    while (values.hasMoreElements()) {
      Norm norm = (Norm) values.nextElement();
      norm.rollbackDirty = norm.dirty;
    }
  
public org.apache.lucene.index.TermDocstermDocs()

    ensureOpen();
    return new SegmentTermDocs(this);
  
public org.apache.lucene.index.TermPositionstermPositions()

    ensureOpen();
    return new SegmentTermPositions(this);
  
public org.apache.lucene.index.TermEnumterms()

    ensureOpen();
    return tis.terms();
  
public org.apache.lucene.index.TermEnumterms(org.apache.lucene.index.Term t)

    ensureOpen();
    return tis.terms(t);
  
static booleanusesCompoundFile(org.apache.lucene.index.SegmentInfo si)

    return si.getUseCompoundFile();