FileDocCategorySizeDatePackage
FieldsReader.javaAPI DocApache Lucene 2.2.017750Sat Jun 16 22:20:36 BST 2007org.apache.lucene.index

FieldsReader

public final class FieldsReader extends Object
Class responsible for access to stored document fields.

It uses <segment>.fdt and <segment>.fdx; files.

version
$Id: FieldsReader.java 542561 2007-05-29 15:14:07Z mikemccand $

Fields Summary
private final FieldInfos
fieldInfos
private final IndexInput
cloneableFieldsStream
private final IndexInput
fieldsStream
private final IndexInput
indexStream
private int
size
private boolean
closed
private ThreadLocal
fieldsStreamTL
Constructors Summary
FieldsReader(Directory d, String segment, FieldInfos fn)


          
    this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE);
  
FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize)

    fieldInfos = fn;

    cloneableFieldsStream = d.openInput(segment + ".fdt", readBufferSize);
    fieldsStream = (IndexInput)cloneableFieldsStream.clone();
    indexStream = d.openInput(segment + ".fdx", readBufferSize);
    size = (int) (indexStream.length() / 8);
  
Methods Summary
private voidaddField(org.apache.lucene.document.Document doc, org.apache.lucene.index.FieldInfo fi, boolean binary, boolean compressed, boolean tokenize)


    //we have a binary stored field, and it may be compressed
    if (binary) {
      int toRead = fieldsStream.readVInt();
      final byte[] b = new byte[toRead];
      fieldsStream.readBytes(b, 0, b.length);
      if (compressed)
        doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
      else
        doc.add(new Field(fi.name, b, Field.Store.YES));

    } else {
      Field.Store store = Field.Store.YES;
      Field.Index index = getIndexType(fi, tokenize);
      Field.TermVector termVector = getTermVectorType(fi);

      Fieldable f;
      if (compressed) {
        store = Field.Store.COMPRESS;
        int toRead = fieldsStream.readVInt();

        final byte[] b = new byte[toRead];
        fieldsStream.readBytes(b, 0, b.length);
        f = new Field(fi.name,      // field name
                new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
                store,
                index,
                termVector);
        f.setOmitNorms(fi.omitNorms);
      } else {
        f = new Field(fi.name,     // name
                fieldsStream.readString(), // read value
                store,
                index,
                termVector);
        f.setOmitNorms(fi.omitNorms);
      }
      doc.add(f);
    }
  
private voidaddFieldForMerge(org.apache.lucene.document.Document doc, org.apache.lucene.index.FieldInfo fi, boolean binary, boolean compressed, boolean tokenize)

    Object data;
      
    if (binary || compressed) {
      int toRead = fieldsStream.readVInt();
      final byte[] b = new byte[toRead];
      fieldsStream.readBytes(b, 0, b.length);
      data = b;
    } else {
      data = fieldsStream.readString();
    }
      
    doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize));
  
private voidaddFieldLazy(org.apache.lucene.document.Document doc, org.apache.lucene.index.FieldInfo fi, boolean binary, boolean compressed, boolean tokenize)

    if (binary == true) {
      int toRead = fieldsStream.readVInt();
      long pointer = fieldsStream.getFilePointer();
      if (compressed) {
        //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
        doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer));
      } else {
        //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
        doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer));
      }
      //Need to move the pointer ahead by toRead positions
      fieldsStream.seek(pointer + toRead);
    } else {
      Field.Store store = Field.Store.YES;
      Field.Index index = getIndexType(fi, tokenize);
      Field.TermVector termVector = getTermVectorType(fi);

      Fieldable f;
      if (compressed) {
        store = Field.Store.COMPRESS;
        int toRead = fieldsStream.readVInt();
        long pointer = fieldsStream.getFilePointer();
        f = new LazyField(fi.name, store, toRead, pointer);
        //skip over the part that we aren't loading
        fieldsStream.seek(pointer + toRead);
        f.setOmitNorms(fi.omitNorms);
      } else {
        int length = fieldsStream.readVInt();
        long pointer = fieldsStream.getFilePointer();
        //Skip ahead of where we are by the length of what is stored
        fieldsStream.skipChars(length);
        f = new LazyField(fi.name, store, index, termVector, length, pointer);
        f.setOmitNorms(fi.omitNorms);
      }
      doc.add(f);
    }

  
private intaddFieldSize(org.apache.lucene.document.Document doc, org.apache.lucene.index.FieldInfo fi, boolean binary, boolean compressed)

    int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;
    byte[] sizebytes = new byte[4];
    sizebytes[0] = (byte) (bytesize>>>24);
    sizebytes[1] = (byte) (bytesize>>>16);
    sizebytes[2] = (byte) (bytesize>>> 8);
    sizebytes[3] = (byte)  bytesize      ;
    doc.add(new Field(fi.name, sizebytes, Field.Store.YES));
    return size;
  
final voidclose()
Closes the underlying {@link org.apache.lucene.store.IndexInput} streams, including any ones associated with a lazy implementation of a Field. This means that the Fields values will not be accessible.

throws
IOException

    if (!closed) {
      fieldsStream.close();
      cloneableFieldsStream.close();
      indexStream.close();
      IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
      if (localFieldsStream != null) {
        localFieldsStream.close();
        fieldsStreamTL.set(null);
      }
      closed = true;
    }
  
final org.apache.lucene.document.Documentdoc(int n, org.apache.lucene.document.FieldSelector fieldSelector)

    indexStream.seek(n * 8L);
    long position = indexStream.readLong();
    fieldsStream.seek(position);

    Document doc = new Document();
    int numFields = fieldsStream.readVInt();
    for (int i = 0; i < numFields; i++) {
      int fieldNumber = fieldsStream.readVInt();
      FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
      FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
      
      byte bits = fieldsStream.readByte();
      boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
      boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
      boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
      //TODO: Find an alternative approach here if this list continues to grow beyond the
      //list of 5 or 6 currently here.  See Lucene 762 for discussion
      if (acceptField.equals(FieldSelectorResult.LOAD)) {
        addField(doc, fi, binary, compressed, tokenize);
      }
      else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE)) {
        addFieldForMerge(doc, fi, binary, compressed, tokenize);
      }
      else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
        addField(doc, fi, binary, compressed, tokenize);
        break;//Get out of this loop
      }
      else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
        addFieldLazy(doc, fi, binary, compressed, tokenize);
      }
      else if (acceptField.equals(FieldSelectorResult.SIZE)){
        skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
      }
      else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
        addFieldSize(doc, fi, binary, compressed);
        break;
      }
      else {
        skipField(binary, compressed);
      }
    }

    return doc;
  
protected final voidensureOpen()

throws
AlreadyClosedException if this FieldsReader is closed

    if (closed) {
      throw new AlreadyClosedException("this FieldsReader is closed");
    }
  
private org.apache.lucene.document.Field$IndexgetIndexType(org.apache.lucene.index.FieldInfo fi, boolean tokenize)

    Field.Index index;
    if (fi.isIndexed && tokenize)
      index = Field.Index.TOKENIZED;
    else if (fi.isIndexed && !tokenize)
      index = Field.Index.UN_TOKENIZED;
    else
      index = Field.Index.NO;
    return index;
  
private org.apache.lucene.document.Field$TermVectorgetTermVectorType(org.apache.lucene.index.FieldInfo fi)

    Field.TermVector termVector = null;
    if (fi.storeTermVector) {
      if (fi.storeOffsetWithTermVector) {
        if (fi.storePositionWithTermVector) {
          termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
        } else {
          termVector = Field.TermVector.WITH_OFFSETS;
        }
      } else if (fi.storePositionWithTermVector) {
        termVector = Field.TermVector.WITH_POSITIONS;
      } else {
        termVector = Field.TermVector.YES;
      }
    } else {
      termVector = Field.TermVector.NO;
    }
    return termVector;
  
final intsize()

    return size;
  
private voidskipField(boolean binary, boolean compressed)
Skip the field. We still have to read some of the information about the field, but can skip past the actual content. This will have the most payoff on large fields.

    skipField(binary, compressed, fieldsStream.readVInt());
  
private voidskipField(boolean binary, boolean compressed, int toRead)

    if (binary || compressed) {
      long pointer = fieldsStream.getFilePointer();
      fieldsStream.seek(pointer + toRead);
    } else {
      //We need to skip chars.  This will slow us down, but still better
      fieldsStream.skipChars(toRead);
    }
  
private final byte[]uncompress(byte[] input)


    Inflater decompressor = new Inflater();
    decompressor.setInput(input);

    // Create an expandable byte array to hold the decompressed data
    ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);

    // Decompress the data
    byte[] buf = new byte[1024];
    while (!decompressor.finished()) {
      try {
        int count = decompressor.inflate(buf);
        bos.write(buf, 0, count);
      }
      catch (DataFormatException e) {
        // this will happen if the field is not compressed
        CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString());
        newException.initCause(e);
        throw newException;
      }
    }
  
    decompressor.end();
    
    // Get the decompressed data
    return bos.toByteArray();