File Doc Category Size Date Package
FieldsReader.java API Doc Apache Lucene 2.2.0 17750 Sat Jun 16 22:20:36 BST 2007 org.apache.lucene.index

FieldsReader

java.lang.Object

public final class FieldsReader extends Object

Class responsible for access to stored document fields.

It uses <segment>.fdt and <segment>.fdx; files.

version: $Id: FieldsReader.java 542561 2007-05-29 15:14:07Z mikemccand $

Fields Summary
private final FieldInfos
fieldInfos
private final IndexInput
cloneableFieldsStream
private final IndexInput
fieldsStream
private final IndexInput
indexStream
private int
size
private boolean
closed
private ThreadLocal
fieldsStreamTL
Constructors Summary
FieldsReader(Directory d, String segment, FieldInfos fn)
this(d, segment, fn, BufferedIndexInput.BUFFER_SIZE);
FieldsReader(Directory d, String segment, FieldInfos fn, int readBufferSize)
fieldInfos = fn; cloneableFieldsStream = d.openInput(segment + ".fdt", readBufferSize); fieldsStream = (IndexInput)cloneableFieldsStream.clone(); indexStream = d.openInput(segment + ".fdx", readBufferSize); size = (int) (indexStream.length() / 8);
Methods Summary
private void addField(org.apache.lucene.document.Document doc, org.apache.lucene.index.FieldInfo fi, boolean binary, boolean compressed, boolean tokenize)
//we have a binary stored field, and it may be compressed if (binary) { int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); if (compressed) doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS)); else doc.add(new Field(fi.name, b, Field.Store.YES)); } else { Field.Store store = Field.Store.YES; Field.Index index = getIndexType(fi, tokenize); Field.TermVector termVector = getTermVectorType(fi); Fieldable f; if (compressed) { store = Field.Store.COMPRESS; int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); f = new Field(fi.name, // field name new String(uncompress(b), "UTF-8"), // uncompress the value and add as string store, index, termVector); f.setOmitNorms(fi.omitNorms); } else { f = new Field(fi.name, // name fieldsStream.readString(), // read value store, index, termVector); f.setOmitNorms(fi.omitNorms); } doc.add(f); }
private void addFieldForMerge(org.apache.lucene.document.Document doc, org.apache.lucene.index.FieldInfo fi, boolean binary, boolean compressed, boolean tokenize)
Object data; if (binary || compressed) { int toRead = fieldsStream.readVInt(); final byte[] b = new byte[toRead]; fieldsStream.readBytes(b, 0, b.length); data = b; } else { data = fieldsStream.readString(); } doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize));
private void addFieldLazy(org.apache.lucene.document.Document doc, org.apache.lucene.index.FieldInfo fi, boolean binary, boolean compressed, boolean tokenize)
if (binary == true) { int toRead = fieldsStream.readVInt(); long pointer = fieldsStream.getFilePointer(); if (compressed) { //was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS)); doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer)); } else { //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES)); doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer)); } //Need to move the pointer ahead by toRead positions fieldsStream.seek(pointer + toRead); } else { Field.Store store = Field.Store.YES; Field.Index index = getIndexType(fi, tokenize); Field.TermVector termVector = getTermVectorType(fi); Fieldable f; if (compressed) { store = Field.Store.COMPRESS; int toRead = fieldsStream.readVInt(); long pointer = fieldsStream.getFilePointer(); f = new LazyField(fi.name, store, toRead, pointer); //skip over the part that we aren't loading fieldsStream.seek(pointer + toRead); f.setOmitNorms(fi.omitNorms); } else { int length = fieldsStream.readVInt(); long pointer = fieldsStream.getFilePointer(); //Skip ahead of where we are by the length of what is stored fieldsStream.skipChars(length); f = new LazyField(fi.name, store, index, termVector, length, pointer); f.setOmitNorms(fi.omitNorms); } doc.add(f); }
private int addFieldSize(org.apache.lucene.document.Document doc, org.apache.lucene.index.FieldInfo fi, boolean binary, boolean compressed)
int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size; byte[] sizebytes = new byte[4]; sizebytes[0] = (byte) (bytesize>>>24); sizebytes[1] = (byte) (bytesize>>>16); sizebytes[2] = (byte) (bytesize>>> 8); sizebytes[3] = (byte) bytesize ; doc.add(new Field(fi.name, sizebytes, Field.Store.YES)); return size;
final void close()
Closes the underlying {@link org.apache.lucene.store.IndexInput} streams, including any ones associated with a lazy implementation of a Field. This means that the Fields values will not be accessible.
throws
IOException
if (!closed) { fieldsStream.close(); cloneableFieldsStream.close(); indexStream.close(); IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get(); if (localFieldsStream != null) { localFieldsStream.close(); fieldsStreamTL.set(null); } closed = true; }
final org.apache.lucene.document.Document doc(int n, org.apache.lucene.document.FieldSelector fieldSelector)
indexStream.seek(n * 8L); long position = indexStream.readLong(); fieldsStream.seek(position); Document doc = new Document(); int numFields = fieldsStream.readVInt(); for (int i = 0; i < numFields; i++) { int fieldNumber = fieldsStream.readVInt(); FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name); byte bits = fieldsStream.readByte(); boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0; boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; //TODO: Find an alternative approach here if this list continues to grow beyond the //list of 5 or 6 currently here. See Lucene 762 for discussion if (acceptField.equals(FieldSelectorResult.LOAD)) { addField(doc, fi, binary, compressed, tokenize); } else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE)) { addFieldForMerge(doc, fi, binary, compressed, tokenize); } else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){ addField(doc, fi, binary, compressed, tokenize); break;//Get out of this loop } else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) { addFieldLazy(doc, fi, binary, compressed, tokenize); } else if (acceptField.equals(FieldSelectorResult.SIZE)){ skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed)); } else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){ addFieldSize(doc, fi, binary, compressed); break; } else { skipField(binary, compressed); } } return doc;
protected final void ensureOpen()
throws
AlreadyClosedException if this FieldsReader is closed
if (closed) { throw new AlreadyClosedException("this FieldsReader is closed"); }
private org.apache.lucene.document.Field$Index getIndexType(org.apache.lucene.index.FieldInfo fi, boolean tokenize)
Field.Index index; if (fi.isIndexed && tokenize) index = Field.Index.TOKENIZED; else if (fi.isIndexed && !tokenize) index = Field.Index.UN_TOKENIZED; else index = Field.Index.NO; return index;
private org.apache.lucene.document.Field$TermVector getTermVectorType(org.apache.lucene.index.FieldInfo fi)
Field.TermVector termVector = null; if (fi.storeTermVector) { if (fi.storeOffsetWithTermVector) { if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS_OFFSETS; } else { termVector = Field.TermVector.WITH_OFFSETS; } } else if (fi.storePositionWithTermVector) { termVector = Field.TermVector.WITH_POSITIONS; } else { termVector = Field.TermVector.YES; } } else { termVector = Field.TermVector.NO; } return termVector;
final int size()
return size;
private void skipField(boolean binary, boolean compressed)
Skip the field. We still have to read some of the information about the field, but can skip past the actual content. This will have the most payoff on large fields.
skipField(binary, compressed, fieldsStream.readVInt());
private void skipField(boolean binary, boolean compressed, int toRead)
if (binary || compressed) { long pointer = fieldsStream.getFilePointer(); fieldsStream.seek(pointer + toRead); } else { //We need to skip chars. This will slow us down, but still better fieldsStream.skipChars(toRead); }
private final byte[] uncompress(byte[] input)
Inflater decompressor = new Inflater(); decompressor.setInput(input); // Create an expandable byte array to hold the decompressed data ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length); // Decompress the data byte[] buf = new byte[1024]; while (!decompressor.finished()) { try { int count = decompressor.inflate(buf); bos.write(buf, 0, count); } catch (DataFormatException e) { // this will happen if the field is not compressed CorruptIndexException newException = new CorruptIndexException("field data are in wrong format: " + e.toString()); newException.initCause(e); throw newException; } } decompressor.end(); // Get the decompressed data return bos.toByteArray();