Methods Summary |
---|
final void | add(org.apache.lucene.index.IndexReader reader)Add an IndexReader to the collection of readers that are to be merged
readers.addElement(reader);
|
private final int | appendPostings(org.apache.lucene.index.SegmentMergeInfo[] smis, int n)Process postings from multiple segments all positioned on the
same term. Writes out merged entries into freqOutput and
the proxOutput streams.
int lastDoc = 0;
int df = 0; // number of docs w/ term
resetSkip();
for (int i = 0; i < n; i++) {
SegmentMergeInfo smi = smis[i];
TermPositions postings = smi.postings;
int base = smi.base;
int[] docMap = smi.docMap;
postings.seek(smi.termEnum);
while (postings.next()) {
int doc = postings.doc();
if (docMap != null)
doc = docMap[doc]; // map around deletions
doc += base; // convert to merged space
if (doc < lastDoc)
throw new IllegalStateException("docs out of order");
df++;
if ((df % skipInterval) == 0) {
bufferSkip(lastDoc);
}
int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
lastDoc = doc;
int freq = postings.freq();
if (freq == 1) {
freqOutput.writeVInt(docCode | 1); // write doc & freq=1
} else {
freqOutput.writeVInt(docCode); // write doc
freqOutput.writeVInt(freq); // write frequency in doc
}
int lastPosition = 0; // write position deltas
for (int j = 0; j < freq; j++) {
int position = postings.nextPosition();
proxOutput.writeVInt(position - lastPosition);
lastPosition = position;
}
}
}
return df;
|
private void | bufferSkip(int doc)
long freqPointer = freqOutput.getFilePointer();
long proxPointer = proxOutput.getFilePointer();
skipBuffer.writeVInt(doc - lastSkipDoc);
skipBuffer.writeVInt((int) (freqPointer - lastSkipFreqPointer));
skipBuffer.writeVInt((int) (proxPointer - lastSkipProxPointer));
lastSkipDoc = doc;
lastSkipFreqPointer = freqPointer;
lastSkipProxPointer = proxPointer;
|
final void | closeReaders()close all IndexReaders that have been added.
Should not be called before merge().
for (int i = 0; i < readers.size(); i++) { // close readers
IndexReader reader = (IndexReader) readers.elementAt(i);
reader.close();
}
|
private final void | createCompoundFile()
CompoundFileWriter cfsWriter =
new CompoundFileWriter(directory, segment + ".cfs");
ArrayList files =
new ArrayList(COMPOUND_EXTENSIONS.length + fieldInfos.size());
// Basic files
for (int i = 0; i < COMPOUND_EXTENSIONS.length; i++) {
files.add(segment + "." + COMPOUND_EXTENSIONS[i]);
}
// Field norm files
for (int i = 0; i < fieldInfos.size(); i++) {
FieldInfo fi = fieldInfos.fieldInfo(i);
if (fi.isIndexed) {
files.add(segment + ".f" + i);
}
}
// Vector files
if (fieldInfos.hasVectors()) {
for (int i = 0; i < VECTOR_EXTENSIONS.length; i++) {
files.add(segment + "." + VECTOR_EXTENSIONS[i]);
}
}
// Now merge all added files
Iterator it = files.iterator();
while (it.hasNext()) {
cfsWriter.addFile((String) it.next());
}
// Perform the merge
cfsWriter.close();
// Now delete the source files
it = files.iterator();
while (it.hasNext()) {
directory.deleteFile((String) it.next());
}
|
final int | merge()Merges the readers specified by the {@link #add} method into the directory passed to the constructor
int value;
value = mergeFields();
mergeTerms();
mergeNorms();
if (fieldInfos.hasVectors())
mergeVectors();
if (useCompoundFile)
createCompoundFile();
return value;
|
private final int | mergeFields()
fieldInfos = new FieldInfos(); // merge field names
int docCount = 0;
for (int i = 0; i < readers.size(); i++) {
IndexReader reader = (IndexReader) readers.elementAt(i);
fieldInfos.addIndexed(reader.getIndexedFieldNames(true), true);
fieldInfos.addIndexed(reader.getIndexedFieldNames(false), false);
fieldInfos.add(reader.getFieldNames(false), false);
}
fieldInfos.write(directory, segment + ".fnm");
FieldsWriter fieldsWriter = // merge field values
new FieldsWriter(directory, segment, fieldInfos);
try {
for (int i = 0; i < readers.size(); i++) {
IndexReader reader = (IndexReader) readers.elementAt(i);
int maxDoc = reader.maxDoc();
for (int j = 0; j < maxDoc; j++)
if (!reader.isDeleted(j)) { // skip deleted docs
fieldsWriter.addDocument(reader.document(j));
docCount++;
}
}
} finally {
fieldsWriter.close();
}
return docCount;
|
private void | mergeNorms()
for (int i = 0; i < fieldInfos.size(); i++) {
FieldInfo fi = fieldInfos.fieldInfo(i);
if (fi.isIndexed) {
OutputStream output = directory.createFile(segment + ".f" + i);
try {
for (int j = 0; j < readers.size(); j++) {
IndexReader reader = (IndexReader) readers.elementAt(j);
byte[] input = reader.norms(fi.name);
int maxDoc = reader.maxDoc();
for (int k = 0; k < maxDoc; k++) {
byte norm = input != null ? input[k] : (byte) 0;
if (!reader.isDeleted(k)) {
output.writeByte(norm);
}
}
}
} finally {
output.close();
}
}
}
|
private final void | mergeTermInfo(org.apache.lucene.index.SegmentMergeInfo[] smis, int n)Merge one term found in one or more segments. The array smis
contains segments that are positioned at the same term. N
is the number of cells in the array actually occupied. // minimize consing
long freqPointer = freqOutput.getFilePointer();
long proxPointer = proxOutput.getFilePointer();
int df = appendPostings(smis, n); // append posting data
long skipPointer = writeSkip();
if (df > 0) {
// add an entry to the dictionary with pointers to prox and freq files
termInfo.set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer));
termInfosWriter.add(smis[0].term, termInfo);
}
|
private final void | mergeTermInfos()
int base = 0;
for (int i = 0; i < readers.size(); i++) {
IndexReader reader = (IndexReader) readers.elementAt(i);
TermEnum termEnum = reader.terms();
SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader);
base += reader.numDocs();
if (smi.next())
queue.put(smi); // initialize queue
else
smi.close();
}
SegmentMergeInfo[] match = new SegmentMergeInfo[readers.size()];
while (queue.size() > 0) {
int matchSize = 0; // pop matching terms
match[matchSize++] = (SegmentMergeInfo) queue.pop();
Term term = match[0].term;
SegmentMergeInfo top = (SegmentMergeInfo) queue.top();
while (top != null && term.compareTo(top.term) == 0) {
match[matchSize++] = (SegmentMergeInfo) queue.pop();
top = (SegmentMergeInfo) queue.top();
}
mergeTermInfo(match, matchSize); // add new TermInfo
while (matchSize > 0) {
SegmentMergeInfo smi = match[--matchSize];
if (smi.next())
queue.put(smi); // restore queue
else
smi.close(); // done with a segment
}
}
|
private final void | mergeTerms()
try {
freqOutput = directory.createFile(segment + ".frq");
proxOutput = directory.createFile(segment + ".prx");
termInfosWriter =
new TermInfosWriter(directory, segment, fieldInfos);
skipInterval = termInfosWriter.skipInterval;
queue = new SegmentMergeQueue(readers.size());
mergeTermInfos();
} finally {
if (freqOutput != null) freqOutput.close();
if (proxOutput != null) proxOutput.close();
if (termInfosWriter != null) termInfosWriter.close();
if (queue != null) queue.close();
}
|
private final void | mergeVectors()Merge the TermVectors from each of the segments into the new one.
TermVectorsWriter termVectorsWriter =
new TermVectorsWriter(directory, segment, fieldInfos);
try {
for (int r = 0; r < readers.size(); r++) {
IndexReader reader = (IndexReader) readers.elementAt(r);
int maxDoc = reader.maxDoc();
for (int docNum = 0; docNum < maxDoc; docNum++) {
// skip deleted docs
if (reader.isDeleted(docNum)) {
continue;
}
termVectorsWriter.openDocument();
// get all term vectors
TermFreqVector[] sourceTermVector =
reader.getTermFreqVectors(docNum);
if (sourceTermVector != null) {
for (int f = 0; f < sourceTermVector.length; f++) {
// translate field numbers
TermFreqVector termVector = sourceTermVector[f];
termVectorsWriter.openField(termVector.getField());
String [] terms = termVector.getTerms();
int [] freqs = termVector.getTermFrequencies();
for (int t = 0; t < terms.length; t++) {
termVectorsWriter.addTerm(terms[t], freqs[t]);
}
}
termVectorsWriter.closeDocument();
}
}
}
} finally {
termVectorsWriter.close();
}
|
private void | resetSkip()
skipBuffer.reset();
lastSkipDoc = 0;
lastSkipFreqPointer = freqOutput.getFilePointer();
lastSkipProxPointer = proxOutput.getFilePointer();
|
final org.apache.lucene.index.IndexReader | segmentReader(int i)
return (IndexReader) readers.elementAt(i);
|
private long | writeSkip()
long skipPointer = freqOutput.getFilePointer();
skipBuffer.writeTo(freqOutput);
return skipPointer;
|