FileDocCategorySizeDatePackage
SegmentReader.javaAPI DocApache Lucene 1.918645Mon Feb 20 09:20:14 GMT 2006org.apache.lucene.index

SegmentReader

public class SegmentReader extends IndexReader
version
$Id: SegmentReader.java 329523 2005-10-30 05:37:11Z yonik $

Fields Summary
private String
segment
FieldInfos
fieldInfos
private FieldsReader
fieldsReader
TermInfosReader
tis
TermVectorsReader
termVectorsReaderOrig
ThreadLocal
termVectorsLocal
BitVector
deletedDocs
private boolean
deletedDocsDirty
private boolean
normsDirty
private boolean
undeleteAll
IndexInput
freqStream
IndexInput
proxStream
CompoundFileReader
cfsReader
private Hashtable
norms
private static Class
IMPL
The class which implements SegmentReader.
private byte[]
ones
Constructors Summary
protected SegmentReader()

   
    try {
      String name =
        System.getProperty("org.apache.lucene.SegmentReader.class",
                           SegmentReader.class.getName());
      IMPL = Class.forName(name);
    } catch (ClassNotFoundException e) {
      throw new RuntimeException("cannot load SegmentReader class: " + e);
    } catch (SecurityException se) {
      try {
        IMPL = Class.forName(SegmentReader.class.getName());
      } catch (ClassNotFoundException e) {
        throw new RuntimeException("cannot load default SegmentReader class: " + e);
      }
    }
   super(null); 
Methods Summary
private voidcloseNorms()

    synchronized (norms) {
      Enumeration enumerator = norms.elements();
      while (enumerator.hasMoreElements()) {
        Norm norm = (Norm) enumerator.nextElement();
        norm.in.close();
      }
    }
  
static byte[]createFakeNorms(int size)

    byte[] ones = new byte[size];
    Arrays.fill(ones, DefaultSimilarity.encodeNorm(1.0f));
    return ones;
  
protected voiddoClose()

    fieldsReader.close();
    tis.close();

    if (freqStream != null)
      freqStream.close();
    if (proxStream != null)
      proxStream.close();

    closeNorms();

    if (termVectorsReaderOrig != null)
      termVectorsReaderOrig.close();

    if (cfsReader != null)
      cfsReader.close();
  
protected voiddoCommit()

    if (deletedDocsDirty) {               // re-write deleted
      deletedDocs.write(directory(), segment + ".tmp");
      directory().renameFile(segment + ".tmp", segment + ".del");
    }
    if(undeleteAll && directory().fileExists(segment + ".del")){
      directory().deleteFile(segment + ".del");
    }
    if (normsDirty) {               // re-write norms
      Enumeration values = norms.elements();
      while (values.hasMoreElements()) {
        Norm norm = (Norm) values.nextElement();
        if (norm.dirty) {
          norm.reWrite();
        }
      }
    }
    deletedDocsDirty = false;
    normsDirty = false;
    undeleteAll = false;
  
protected voiddoDelete(int docNum)

    if (deletedDocs == null)
      deletedDocs = new BitVector(maxDoc());
    deletedDocsDirty = true;
    undeleteAll = false;
    deletedDocs.set(docNum);
  
protected voiddoSetNorm(int doc, java.lang.String field, byte value)

    Norm norm = (Norm) norms.get(field);
    if (norm == null)                             // not an indexed field
      return;
    norm.dirty = true;                            // mark it dirty
    normsDirty = true;

    norms(field)[doc] = value;                    // set the value
  
protected voiddoUndeleteAll()

      deletedDocs = null;
      deletedDocsDirty = false;
      undeleteAll = true;
  
public intdocFreq(org.apache.lucene.index.Term t)

    TermInfo ti = tis.get(t);
    if (ti != null)
      return ti.docFreq;
    else
      return 0;
  
public synchronized org.apache.lucene.document.Documentdocument(int n)

    if (isDeleted(n))
      throw new IllegalArgumentException
              ("attempt to access a deleted document");
    return fieldsReader.doc(n);
  
private byte[]fakeNorms()

    if (ones==null) ones=createFakeNorms(maxDoc());
    return ones;
  
java.util.Vectorfiles()

    Vector files = new Vector(16);

    for (int i = 0; i < IndexFileNames.INDEX_EXTENSIONS.length; i++) {
      String name = segment + "." + IndexFileNames.INDEX_EXTENSIONS[i];
      if (directory().fileExists(name))
        files.addElement(name);
    }

    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed  && !fi.omitNorms){
        String name;
        if(cfsReader == null)
            name = segment + ".f" + i;
        else
            name = segment + ".s" + i;
        if (directory().fileExists(name))
            files.addElement(name);
      }
    }
    return files;
  
protected voidfinalize()

     // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
     termVectorsLocal.set(null);
     super.finalize();
   
public static org.apache.lucene.index.SegmentReaderget(org.apache.lucene.index.SegmentInfo si)

    return get(si.dir, si, null, false, false);
  
public static org.apache.lucene.index.SegmentReaderget(org.apache.lucene.index.SegmentInfos sis, org.apache.lucene.index.SegmentInfo si, boolean closeDir)

    return get(si.dir, si, sis, closeDir, true);
  
public static org.apache.lucene.index.SegmentReaderget(org.apache.lucene.store.Directory dir, org.apache.lucene.index.SegmentInfo si, org.apache.lucene.index.SegmentInfos sis, boolean closeDir, boolean ownDir)

    SegmentReader instance;
    try {
      instance = (SegmentReader)IMPL.newInstance();
    } catch (Exception e) {
      throw new RuntimeException("cannot load SegmentReader class: " + e);
    }
    instance.init(dir, sis, closeDir, ownDir);
    instance.initialize(si);
    return instance;
  
public java.util.CollectiongetFieldNames()

see
IndexReader#getFieldNames()
deprecated
Replaced by {@link #getFieldNames (IndexReader.FieldOption fldOption)}

    // maintain a unique set of field names
    Set fieldSet = new HashSet();
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      fieldSet.add(fi.name);
    }
    return fieldSet;
  
public java.util.CollectiongetFieldNames(boolean indexed)

see
IndexReader#getFieldNames(boolean)
deprecated
Replaced by {@link #getFieldNames (IndexReader.FieldOption fldOption)}

    // maintain a unique set of field names
    Set fieldSet = new HashSet();
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed == indexed)
        fieldSet.add(fi.name);
    }
    return fieldSet;
  
public java.util.CollectiongetFieldNames(org.apache.lucene.index.IndexReader$FieldOption fieldOption)

see
IndexReader#getFieldNames(IndexReader.FieldOption fldOption)


    Set fieldSet = new HashSet();
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fieldOption == IndexReader.FieldOption.ALL) {
        fieldSet.add(fi.name);
      }
      else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) {
        fieldSet.add(fi.name);
      }
      else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) {
        fieldSet.add(fi.name);
      }
      else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) {
        fieldSet.add(fi.name);
      }
      else if (fi.storeTermVector == true &&
               fi.storePositionWithTermVector == false &&
               fi.storeOffsetWithTermVector == false &&
               fieldOption == IndexReader.FieldOption.TERMVECTOR) {
        fieldSet.add(fi.name);
      }
      else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) {
        fieldSet.add(fi.name);
      }
      else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) {
        fieldSet.add(fi.name);
      }
      else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) {
        fieldSet.add(fi.name);
      }
      else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) &&
                fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
        fieldSet.add(fi.name);
      }
    }
    return fieldSet;
  
public java.util.CollectiongetIndexedFieldNames(org.apache.lucene.document.Field$TermVector tvSpec)

see
IndexReader#getIndexedFieldNames(Field.TermVector tvSpec)
deprecated
Replaced by {@link #getFieldNames (IndexReader.FieldOption fldOption)}

    boolean storedTermVector;
    boolean storePositionWithTermVector;
    boolean storeOffsetWithTermVector;

    if(tvSpec == Field.TermVector.NO){
      storedTermVector = false;
      storePositionWithTermVector = false;
      storeOffsetWithTermVector = false;
    }
    else if(tvSpec == Field.TermVector.YES){
      storedTermVector = true;
      storePositionWithTermVector = false;
      storeOffsetWithTermVector = false;
    }
    else if(tvSpec == Field.TermVector.WITH_POSITIONS){
      storedTermVector = true;
      storePositionWithTermVector = true;
      storeOffsetWithTermVector = false;
    }
    else if(tvSpec == Field.TermVector.WITH_OFFSETS){
      storedTermVector = true;
      storePositionWithTermVector = false;
      storeOffsetWithTermVector = true;
    }
    else if(tvSpec == Field.TermVector.WITH_POSITIONS_OFFSETS){
      storedTermVector = true;
      storePositionWithTermVector = true;
      storeOffsetWithTermVector = true;
    }
    else{
      throw new IllegalArgumentException("unknown termVector parameter " + tvSpec);
    }

    // maintain a unique set of field names
    Set fieldSet = new HashSet();
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed && fi.storeTermVector == storedTermVector &&
          fi.storePositionWithTermVector == storePositionWithTermVector &&
          fi.storeOffsetWithTermVector == storeOffsetWithTermVector){
        fieldSet.add(fi.name);
      }
    }
    return fieldSet;
  
protected synchronized byte[]getNorms(java.lang.String field)

    Norm norm = (Norm) norms.get(field);
    if (norm == null) return null;  // not indexed, or norms not stored

    if (norm.bytes == null) {                     // value not yet read
      byte[] bytes = new byte[maxDoc()];
      norms(field, bytes, 0);
      norm.bytes = bytes;                         // cache it
    }
    return norm.bytes;
  
public org.apache.lucene.index.TermFreqVectorgetTermFreqVector(int docNumber, java.lang.String field)
Return a term frequency vector for the specified document and field. The vector returned contains term numbers and frequencies for all terms in the specified field of this document, if the field had storeTermVector flag set. If the flag was not set, the method returns null.

throws
IOException

    // Check if this field is invalid or has no stored term vector
    FieldInfo fi = fieldInfos.fieldInfo(field);
    if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) 
      return null;
    
    TermVectorsReader termVectorsReader = getTermVectorsReader();
    if (termVectorsReader == null)
      return null;
    
    return termVectorsReader.get(docNumber, field);
  
public org.apache.lucene.index.TermFreqVector[]getTermFreqVectors(int docNumber)
Return an array of term frequency vectors for the specified document. The array contains a vector for each vectorized field in the document. Each vector vector contains term numbers and frequencies for all terms in a given vectorized field. If no such fields existed, the method returns null.

throws
IOException

    if (termVectorsReaderOrig == null)
      return null;
    
    TermVectorsReader termVectorsReader = getTermVectorsReader();
    if (termVectorsReader == null)
      return null;
    
    return termVectorsReader.get(docNumber);
  
private org.apache.lucene.index.TermVectorsReadergetTermVectorsReader()
Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.

return
TermVectorsReader

    TermVectorsReader tvReader = (TermVectorsReader)termVectorsLocal.get();
    if (tvReader == null) {
      tvReader = (TermVectorsReader)termVectorsReaderOrig.clone();
      termVectorsLocal.set(tvReader);
    }
    return tvReader;
  
public booleanhasDeletions()

    return deletedDocs != null;
  
static booleanhasDeletions(org.apache.lucene.index.SegmentInfo si)

    return si.dir.fileExists(si.name + ".del");
  
public synchronized booleanhasNorms(java.lang.String field)

    return norms.containsKey(field);
  
static booleanhasSeparateNorms(org.apache.lucene.index.SegmentInfo si)

    String[] result = si.dir.list();
    String pattern = si.name + ".s";
    int patternLength = pattern.length();
    for(int i = 0; i < result.length; i++){
      if(result[i].startsWith(pattern) && Character.isDigit(result[i].charAt(patternLength)))
        return true;
    }
    return false;
  
private voidinitialize(org.apache.lucene.index.SegmentInfo si)

    segment = si.name;

    // Use compound file directory for some files, if it exists
    Directory cfsDir = directory();
    if (directory().fileExists(segment + ".cfs")) {
      cfsReader = new CompoundFileReader(directory(), segment + ".cfs");
      cfsDir = cfsReader;
    }

    // No compound file exists - use the multi-file format
    fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
    fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);

    tis = new TermInfosReader(cfsDir, segment, fieldInfos);

    // NOTE: the bitvector is stored using the regular directory, not cfs
    if (hasDeletions(si))
      deletedDocs = new BitVector(directory(), segment + ".del");

    // make sure that all index files have been read or are kept open
    // so that if an index update removes them we'll still have them
    freqStream = cfsDir.openInput(segment + ".frq");
    proxStream = cfsDir.openInput(segment + ".prx");
    openNorms(cfsDir);

    if (fieldInfos.hasVectors()) { // open term vector files only as needed
      termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
    }
  
public synchronized booleanisDeleted(int n)

    return (deletedDocs != null && deletedDocs.get(n));
  
public intmaxDoc()

    return fieldsReader.size();
  
public synchronized byte[]norms(java.lang.String field)

    byte[] bytes = getNorms(field);
    if (bytes==null) bytes=fakeNorms();
    return bytes;
  
public synchronized voidnorms(java.lang.String field, byte[] bytes, int offset)
Read norms into a pre-allocated array.


    Norm norm = (Norm) norms.get(field);
    if (norm == null) {
      System.arraycopy(fakeNorms(), 0, bytes, offset, maxDoc());
      return;
    }

    if (norm.bytes != null) {                     // can copy from cache
      System.arraycopy(norm.bytes, 0, bytes, offset, maxDoc());
      return;
    }

    IndexInput normStream = (IndexInput) norm.in.clone();
    try {                                         // read from disk
      normStream.seek(0);
      normStream.readBytes(bytes, offset, maxDoc());
    } finally {
      normStream.close();
    }
  
public intnumDocs()

    int n = maxDoc();
    if (deletedDocs != null)
      n -= deletedDocs.count();
    return n;
  
private voidopenNorms(org.apache.lucene.store.Directory cfsDir)

    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed && !fi.omitNorms) {
        // look first if there are separate norms in compound format
        String fileName = segment + ".s" + fi.number;
        Directory d = directory();
        if(!d.fileExists(fileName)){
            fileName = segment + ".f" + fi.number;
            d = cfsDir;
        }
        norms.put(fi.name, new Norm(d.openInput(fileName), fi.number));
      }
    }
  
public org.apache.lucene.index.TermDocstermDocs()

    return new SegmentTermDocs(this);
  
public org.apache.lucene.index.TermPositionstermPositions()

    return new SegmentTermPositions(this);
  
public org.apache.lucene.index.TermEnumterms()

    return tis.terms();
  
public org.apache.lucene.index.TermEnumterms(org.apache.lucene.index.Term t)

    return tis.terms(t);
  
static booleanusesCompoundFile(org.apache.lucene.index.SegmentInfo si)

    return si.dir.fileExists(si.name + ".cfs");