Methods Summary |
---|
public final void | addAllDocVectors(org.apache.lucene.index.TermFreqVector[] vectors)Add a complete document specified by all its term vectors. If document has no
term vectors, add value for tvx.
openDocument();
if (vectors != null) {
for (int i = 0; i < vectors.length; i++) {
boolean storePositionWithTermVector = false;
boolean storeOffsetWithTermVector = false;
try {
TermPositionVector tpVector = (TermPositionVector) vectors[i];
if (tpVector.size() > 0 && tpVector.getTermPositions(0) != null)
storePositionWithTermVector = true;
if (tpVector.size() > 0 && tpVector.getOffsets(0) != null)
storeOffsetWithTermVector = true;
FieldInfo fieldInfo = fieldInfos.fieldInfo(tpVector.getField());
openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);
for (int j = 0; j < tpVector.size(); j++)
addTermInternal(tpVector.getTerms()[j], tpVector.getTermFrequencies()[j], tpVector.getTermPositions(j),
tpVector.getOffsets(j));
closeField();
} catch (ClassCastException ignore) {
TermFreqVector tfVector = vectors[i];
FieldInfo fieldInfo = fieldInfos.fieldInfo(tfVector.getField());
openField(fieldInfo.number, storePositionWithTermVector, storeOffsetWithTermVector);
for (int j = 0; j < tfVector.size(); j++)
addTermInternal(tfVector.getTerms()[j], tfVector.getTermFrequencies()[j], null, null);
closeField();
}
}
}
closeDocument();
|
public final void | addTerm(java.lang.String termText, int freq, int[] positions, org.apache.lucene.index.TermVectorOffsetInfo[] offsets)
if (!isDocumentOpen())
throw new IllegalStateException("Cannot add terms when document is not open");
if (!isFieldOpen())
throw new IllegalStateException("Cannot add terms when field is not open");
addTermInternal(termText, freq, positions, offsets);
|
public final void | addTerm(java.lang.String termText, int freq)Add term to the field's term vector. Fieldable must already be open.
Terms should be added in
increasing order of terms, one call per unique termNum. ProxPointer
is a pointer into the TermPosition file (prx). Freq is the number of
times this term appears in this field, in this document.
addTerm(termText, freq, null, null);
|
private final void | addTermInternal(java.lang.String termText, int freq, int[] positions, org.apache.lucene.index.TermVectorOffsetInfo[] offsets)
TVTerm term = new TVTerm();
term.termText = termText;
term.freq = freq;
term.positions = positions;
term.offsets = offsets;
terms.add(term);
|
final void | close()Close all streams.
try {
closeDocument();
} finally {
// make an effort to close all streams we can but remember and re-throw
// the first exception encountered in this process
IOException keep = null;
if (tvx != null)
try {
tvx.close();
} catch (IOException e) {
if (keep == null) keep = e;
}
if (tvd != null)
try {
tvd.close();
} catch (IOException e) {
if (keep == null) keep = e;
}
if (tvf != null)
try {
tvf.close();
} catch (IOException e) {
if (keep == null) keep = e;
}
if (keep != null) throw (IOException) keep.fillInStackTrace();
}
|
public final void | closeDocument()
if (isDocumentOpen()) {
closeField();
writeDoc();
fields.clear();
currentDocPointer = -1;
}
|
public final void | closeField()Finished processing current field. This should be followed by a call to
openField before future calls to addTerm.
if (isFieldOpen()) {
/* DEBUG */
//System.out.println("closeField()");
/* DEBUG */
// save field and terms
writeField();
fields.add(currentField);
terms.clear();
currentField = null;
}
|
public final boolean | isDocumentOpen()
return currentDocPointer != -1;
|
public final boolean | isFieldOpen()Return true if a field is currently open.
return currentField != null;
|
public final void | openDocument()
closeDocument();
currentDocPointer = tvd.getFilePointer();
|
public final void | openField(java.lang.String field)Start processing a field. This can be followed by a number of calls to
addTerm, and a final call to closeField to indicate the end of
processing of this field. If a field was previously open, it is
closed automatically.
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
openField(fieldInfo.number, fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector);
|
private void | openField(int fieldNumber, boolean storePositionWithTermVector, boolean storeOffsetWithTermVector)
if (!isDocumentOpen())
throw new IllegalStateException("Cannot open field when no document is open.");
closeField();
currentField = new TVField(fieldNumber, storePositionWithTermVector, storeOffsetWithTermVector);
|
private void | writeDoc()
if (isFieldOpen())
throw new IllegalStateException("Field is still open while writing document");
//System.out.println("Writing doc pointer: " + currentDocPointer);
// write document index record
tvx.writeLong(currentDocPointer);
// write document data record
final int size = fields.size();
// write the number of fields
tvd.writeVInt(size);
// write field numbers
for (int i = 0; i < size; i++) {
TVField field = (TVField) fields.elementAt(i);
tvd.writeVInt(field.number);
}
// write field pointers
long lastFieldPointer = 0;
for (int i = 0; i < size; i++) {
TVField field = (TVField) fields.elementAt(i);
tvd.writeVLong(field.tvfPointer - lastFieldPointer);
lastFieldPointer = field.tvfPointer;
}
//System.out.println("After writing doc pointer: " + tvx.getFilePointer());
|
private void | writeField()
// remember where this field is written
currentField.tvfPointer = tvf.getFilePointer();
//System.out.println("Fieldable Pointer: " + currentField.tvfPointer);
final int size = terms.size();
tvf.writeVInt(size);
boolean storePositions = currentField.storePositions;
boolean storeOffsets = currentField.storeOffsets;
byte bits = 0x0;
if (storePositions)
bits |= STORE_POSITIONS_WITH_TERMVECTOR;
if (storeOffsets)
bits |= STORE_OFFSET_WITH_TERMVECTOR;
tvf.writeByte(bits);
String lastTermText = "";
for (int i = 0; i < size; i++) {
TVTerm term = (TVTerm) terms.elementAt(i);
int start = StringHelper.stringDifference(lastTermText, term.termText);
int length = term.termText.length() - start;
tvf.writeVInt(start); // write shared prefix length
tvf.writeVInt(length); // write delta length
tvf.writeChars(term.termText, start, length); // write delta chars
tvf.writeVInt(term.freq);
lastTermText = term.termText;
if(storePositions){
if(term.positions == null)
throw new IllegalStateException("Trying to write positions that are null!");
// use delta encoding for positions
int position = 0;
for (int j = 0; j < term.freq; j++){
tvf.writeVInt(term.positions[j] - position);
position = term.positions[j];
}
}
if(storeOffsets){
if(term.offsets == null)
throw new IllegalStateException("Trying to write offsets that are null!");
// use delta encoding for offsets
int position = 0;
for (int j = 0; j < term.freq; j++) {
tvf.writeVInt(term.offsets[j].getStartOffset() - position);
tvf.writeVInt(term.offsets[j].getEndOffset() - term.offsets[j].getStartOffset()); //Save the diff between the two.
position = term.offsets[j].getEndOffset();
}
}
}
|