FieldsReaderpublic final class FieldsReader extends Object Class responsible for access to stored document fields.
It uses <segment>.fdt and <segment>.fdx; files. |
Fields Summary |
---|
private final FieldInfos | fieldInfos | private final IndexInput | cloneableFieldsStream | private final IndexInput | fieldsStream | private final IndexInput | indexStream | private int | size | private ThreadLocal | fieldsStreamTL |
Constructors Summary |
---|
FieldsReader(Directory d, String segment, FieldInfos fn)
fieldInfos = fn;
cloneableFieldsStream = d.openInput(segment + ".fdt");
fieldsStream = (IndexInput)cloneableFieldsStream.clone();
indexStream = d.openInput(segment + ".fdx");
size = (int) (indexStream.length() / 8);
|
Methods Summary |
---|
private void | addField(org.apache.lucene.document.Document doc, org.apache.lucene.index.FieldInfo fi, boolean binary, boolean compressed, boolean tokenize)
//we have a binary stored field, and it may be compressed
if (binary) {
int toRead = fieldsStream.readVInt();
final byte[] b = new byte[toRead];
fieldsStream.readBytes(b, 0, b.length);
if (compressed)
doc.add(new Field(fi.name, uncompress(b), Field.Store.COMPRESS));
else
doc.add(new Field(fi.name, b, Field.Store.YES));
} else {
Field.Store store = Field.Store.YES;
Field.Index index = getIndexType(fi, tokenize);
Field.TermVector termVector = getTermVectorType(fi);
Fieldable f;
if (compressed) {
store = Field.Store.COMPRESS;
int toRead = fieldsStream.readVInt();
final byte[] b = new byte[toRead];
fieldsStream.readBytes(b, 0, b.length);
f = new Field(fi.name, // field name
new String(uncompress(b), "UTF-8"), // uncompress the value and add as string
store,
index,
termVector);
f.setOmitNorms(fi.omitNorms);
} else {
f = new Field(fi.name, // name
fieldsStream.readString(), // read value
store,
index,
termVector);
f.setOmitNorms(fi.omitNorms);
}
doc.add(f);
}
| private void | addFieldForMerge(org.apache.lucene.document.Document doc, org.apache.lucene.index.FieldInfo fi, boolean binary, boolean compressed, boolean tokenize)
Object data;
if (binary || compressed) {
int toRead = fieldsStream.readVInt();
final byte[] b = new byte[toRead];
fieldsStream.readBytes(b, 0, b.length);
data = b;
} else {
data = fieldsStream.readString();
}
doc.add(new FieldForMerge(data, fi, binary, compressed, tokenize));
| private void | addFieldLazy(org.apache.lucene.document.Document doc, org.apache.lucene.index.FieldInfo fi, boolean binary, boolean compressed, boolean tokenize)
if (binary == true) {
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
if (compressed) {
//was: doc.add(new Fieldable(fi.name, uncompress(b), Fieldable.Store.COMPRESS));
doc.add(new LazyField(fi.name, Field.Store.COMPRESS, toRead, pointer));
} else {
//was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer));
}
//Need to move the pointer ahead by toRead positions
fieldsStream.seek(pointer + toRead);
} else {
Field.Store store = Field.Store.YES;
Field.Index index = getIndexType(fi, tokenize);
Field.TermVector termVector = getTermVectorType(fi);
Fieldable f;
if (compressed) {
store = Field.Store.COMPRESS;
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
f = new LazyField(fi.name, store, toRead, pointer);
//skip over the part that we aren't loading
fieldsStream.seek(pointer + toRead);
f.setOmitNorms(fi.omitNorms);
} else {
int length = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
//Skip ahead of where we are by the length of what is stored
fieldsStream.skipChars(length);
f = new LazyField(fi.name, store, index, termVector, length, pointer);
f.setOmitNorms(fi.omitNorms);
}
doc.add(f);
}
| private int | addFieldSize(org.apache.lucene.document.Document doc, org.apache.lucene.index.FieldInfo fi, boolean binary, boolean compressed)
int size = fieldsStream.readVInt(), bytesize = binary || compressed ? size : 2*size;
byte[] sizebytes = new byte[4];
sizebytes[0] = (byte) (bytesize>>>24);
sizebytes[1] = (byte) (bytesize>>>16);
sizebytes[2] = (byte) (bytesize>>> 8);
sizebytes[3] = (byte) bytesize ;
doc.add(new Field(fi.name, sizebytes, Field.Store.YES));
return size;
| final void | close()Closes the underlying {@link org.apache.lucene.store.IndexInput} streams, including any ones associated with a
lazy implementation of a Field. This means that the Fields values will not be accessible.
fieldsStream.close();
cloneableFieldsStream.close();
indexStream.close();
IndexInput localFieldsStream = (IndexInput) fieldsStreamTL.get();
if (localFieldsStream != null) {
localFieldsStream.close();
fieldsStreamTL.set(null);
}
| final org.apache.lucene.document.Document | doc(int n, org.apache.lucene.document.FieldSelector fieldSelector)
indexStream.seek(n * 8L);
long position = indexStream.readLong();
fieldsStream.seek(position);
Document doc = new Document();
int numFields = fieldsStream.readVInt();
for (int i = 0; i < numFields; i++) {
int fieldNumber = fieldsStream.readVInt();
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
byte bits = fieldsStream.readByte();
boolean compressed = (bits & FieldsWriter.FIELD_IS_COMPRESSED) != 0;
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
//TODO: Find an alternative approach here if this list continues to grow beyond the
//list of 5 or 6 currently here. See Lucene 762 for discussion
if (acceptField.equals(FieldSelectorResult.LOAD)) {
addField(doc, fi, binary, compressed, tokenize);
}
else if (acceptField.equals(FieldSelectorResult.LOAD_FOR_MERGE)) {
addFieldForMerge(doc, fi, binary, compressed, tokenize);
}
else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
addField(doc, fi, binary, compressed, tokenize);
break;//Get out of this loop
}
else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
addFieldLazy(doc, fi, binary, compressed, tokenize);
}
else if (acceptField.equals(FieldSelectorResult.SIZE)){
skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed));
}
else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
addFieldSize(doc, fi, binary, compressed);
break;
}
else {
skipField(binary, compressed);
}
}
return doc;
| private org.apache.lucene.document.Field$Index | getIndexType(org.apache.lucene.index.FieldInfo fi, boolean tokenize)
Field.Index index;
if (fi.isIndexed && tokenize)
index = Field.Index.TOKENIZED;
else if (fi.isIndexed && !tokenize)
index = Field.Index.UN_TOKENIZED;
else
index = Field.Index.NO;
return index;
| private org.apache.lucene.document.Field$TermVector | getTermVectorType(org.apache.lucene.index.FieldInfo fi)
Field.TermVector termVector = null;
if (fi.storeTermVector) {
if (fi.storeOffsetWithTermVector) {
if (fi.storePositionWithTermVector) {
termVector = Field.TermVector.WITH_POSITIONS_OFFSETS;
} else {
termVector = Field.TermVector.WITH_OFFSETS;
}
} else if (fi.storePositionWithTermVector) {
termVector = Field.TermVector.WITH_POSITIONS;
} else {
termVector = Field.TermVector.YES;
}
} else {
termVector = Field.TermVector.NO;
}
return termVector;
| final int | size()
return size;
| private void | skipField(boolean binary, boolean compressed)Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
This will have the most payoff on large fields.
skipField(binary, compressed, fieldsStream.readVInt());
| private void | skipField(boolean binary, boolean compressed, int toRead)
if (binary || compressed) {
long pointer = fieldsStream.getFilePointer();
fieldsStream.seek(pointer + toRead);
} else {
//We need to skip chars. This will slow us down, but still better
fieldsStream.skipChars(toRead);
}
| private final byte[] | uncompress(byte[] input)
Inflater decompressor = new Inflater();
decompressor.setInput(input);
// Create an expandable byte array to hold the decompressed data
ByteArrayOutputStream bos = new ByteArrayOutputStream(input.length);
// Decompress the data
byte[] buf = new byte[1024];
while (!decompressor.finished()) {
try {
int count = decompressor.inflate(buf);
bos.write(buf, 0, count);
}
catch (DataFormatException e) {
// this will happen if the field is not compressed
IOException newException = new IOException("field data are in wrong format: " + e.toString());
newException.initCause(e);
throw newException;
}
}
decompressor.end();
// Get the decompressed data
return bos.toByteArray();
|
|