FileDocCategorySizeDatePackage
TermInfosReader.javaAPI DocApache Lucene 1.96487Mon Feb 20 09:20:14 GMT 2006org.apache.lucene.index

TermInfosReader

public final class TermInfosReader extends Object
This stores a monotonically increasing set of pairs in a Directory. Pairs are accessed either by Term or by ordinal position the set.

Fields Summary
private Directory
directory
private String
segment
private FieldInfos
fieldInfos
private ThreadLocal
enumerators
private SegmentTermEnum
origEnum
private long
size
private Term[]
indexTerms
private TermInfo[]
indexInfos
private long[]
indexPointers
private SegmentTermEnum
indexEnum
Constructors Summary
TermInfosReader(Directory dir, String seg, FieldInfos fis)


       
         
    directory = dir;
    segment = seg;
    fieldInfos = fis;

    origEnum = new SegmentTermEnum(directory.openInput(segment + ".tis"),
                                   fieldInfos, false);
    size = origEnum.size;

    indexEnum =
      new SegmentTermEnum(directory.openInput(segment + ".tii"),
			  fieldInfos, true);
  
Methods Summary
final voidclose()

    if (origEnum != null)
      origEnum.close();
    if (indexEnum != null)
      indexEnum.close();
  
private synchronized voidensureIndexIsRead()

    if (indexTerms != null)                       // index already read
      return;                                     // do nothing
    try {
      int indexSize = (int)indexEnum.size;        // otherwise read index

      indexTerms = new Term[indexSize];
      indexInfos = new TermInfo[indexSize];
      indexPointers = new long[indexSize];
        
      for (int i = 0; indexEnum.next(); i++) {
        indexTerms[i] = indexEnum.term();
        indexInfos[i] = indexEnum.termInfo();
        indexPointers[i] = indexEnum.indexPointer;
      }
    } finally {
        indexEnum.close();
        indexEnum = null;
    }
  
protected voidfinalize()

    // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
    enumerators.set(null);
  
org.apache.lucene.index.TermInfoget(org.apache.lucene.index.Term term)
Returns the TermInfo for a Term in the set, or null.

    if (size == 0) return null;

    ensureIndexIsRead();

    // optimize sequential access: first try scanning cached enum w/o seeking
    SegmentTermEnum enumerator = getEnum();
    if (enumerator.term() != null                 // term is at or past current
	&& ((enumerator.prev() != null && term.compareTo(enumerator.prev())> 0)
	    || term.compareTo(enumerator.term()) >= 0)) {
      int enumOffset = (int)(enumerator.position/enumerator.indexInterval)+1;
      if (indexTerms.length == enumOffset	  // but before end of block
	  || term.compareTo(indexTerms[enumOffset]) < 0)
	return scanEnum(term);			  // no need to seek
    }

    // random-access: must seek
    seekEnum(getIndexOffset(term));
    return scanEnum(term);
  
final org.apache.lucene.index.Termget(int position)
Returns the nth term in the set.

    if (size == 0) return null;

    SegmentTermEnum enumerator = getEnum();
    if (enumerator != null && enumerator.term() != null &&
        position >= enumerator.position &&
	position < (enumerator.position + enumerator.indexInterval))
      return scanEnum(position);		  // can avoid seek

    seekEnum(position / enumerator.indexInterval); // must seek
    return scanEnum(position);
  
private org.apache.lucene.index.SegmentTermEnumgetEnum()

    SegmentTermEnum termEnum = (SegmentTermEnum)enumerators.get();
    if (termEnum == null) {
      termEnum = terms();
      enumerators.set(termEnum);
    }
    return termEnum;
  
private final intgetIndexOffset(org.apache.lucene.index.Term term)
Returns the offset of the greatest index entry which is less than or equal to term.

    int lo = 0;					  // binary search indexTerms[]
    int hi = indexTerms.length - 1;

    while (hi >= lo) {
      int mid = (lo + hi) >> 1;
      int delta = term.compareTo(indexTerms[mid]);
      if (delta < 0)
	hi = mid - 1;
      else if (delta > 0)
	lo = mid + 1;
      else
	return mid;
    }
    return hi;
  
final longgetPosition(org.apache.lucene.index.Term term)
Returns the position of a Term in the set or -1.

    if (size == 0) return -1;

    ensureIndexIsRead();
    int indexOffset = getIndexOffset(term);
    seekEnum(indexOffset);

    SegmentTermEnum enumerator = getEnum();
    while(term.compareTo(enumerator.term()) > 0 && enumerator.next()) {}

    if (term.compareTo(enumerator.term()) == 0)
      return enumerator.position;
    else
      return -1;
  
public intgetSkipInterval()

    return origEnum.skipInterval;
  
private final org.apache.lucene.index.TermInfoscanEnum(org.apache.lucene.index.Term term)
Scans within block for matching term.

    SegmentTermEnum enumerator = getEnum();
    enumerator.scanTo(term);
    if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0)
      return enumerator.termInfo();
    else
      return null;
  
private final org.apache.lucene.index.TermscanEnum(int position)

    SegmentTermEnum enumerator = getEnum();
    while(enumerator.position < position)
      if (!enumerator.next())
	return null;

    return enumerator.term();
  
private final voidseekEnum(int indexOffset)

    getEnum().seek(indexPointers[indexOffset],
	      (indexOffset * getEnum().indexInterval) - 1,
	      indexTerms[indexOffset], indexInfos[indexOffset]);
  
final longsize()
Returns the number of term/value pairs in the set.

    return size;
  
public org.apache.lucene.index.SegmentTermEnumterms()
Returns an enumeration of all the Terms and TermInfos in the set.

    return (SegmentTermEnum)origEnum.clone();
  
public org.apache.lucene.index.SegmentTermEnumterms(org.apache.lucene.index.Term term)
Returns an enumeration of terms starting at or after the named term.

    get(term);
    return (SegmentTermEnum)getEnum().clone();