package org.apache.lucene.search.payloads;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.TermSpans;
import java.io.IOException;
/**
* Copyright 2004 The Apache Software Foundation
* <p/>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* The BoostingTermQuery is very similar to the {@link org.apache.lucene.search.spans.SpanTermQuery} except
* that it factors in the value of the payload located at each of the positions where the
* {@link org.apache.lucene.index.Term} occurs.
* <p>
* In order to take advantage of this, you must override {@link org.apache.lucene.search.Similarity#scorePayload(byte[],int,int)}
* which returns 1 by default.
* <p>
* Payload scores are averaged across term occurrences in the document.
*
* <p><font color="#FF0000">
* WARNING: The status of the <b>Payloads</b> feature is experimental.
* The APIs introduced here might change in the future and will not be
* supported anymore in such a case.</font>
*
* @see org.apache.lucene.search.Similarity#scorePayload(byte[], int, int)
*/
public class BoostingTermQuery extends SpanTermQuery{
public BoostingTermQuery(Term term) {
super(term);
}
protected Weight createWeight(Searcher searcher) throws IOException {
return new BoostingTermWeight(this, searcher);
}
protected class BoostingTermWeight extends SpanWeight implements Weight {
public BoostingTermWeight(BoostingTermQuery query, Searcher searcher) throws IOException {
super(query, searcher);
}
public Scorer scorer(IndexReader reader) throws IOException {
return new BoostingSpanScorer((TermSpans)query.getSpans(reader), this, similarity,
reader.norms(query.getField()));
}
class BoostingSpanScorer extends SpanScorer {
//TODO: is this the best way to allocate this?
byte[] payload = new byte[256];
private TermPositions positions;
protected float payloadScore;
private int payloadsSeen;
public BoostingSpanScorer(TermSpans spans, Weight weight,
Similarity similarity, byte[] norms) throws IOException {
super(spans, weight, similarity, norms);
positions = spans.getPositions();
}
/**
* Go to the next document
*
*/
/*public boolean next() throws IOException {
boolean result = super.next();
//set the payload. super.next() properly increments the term positions
if (result) {
//Load the payloads for all
processPayload();
}
return result;
}
public boolean skipTo(int target) throws IOException {
boolean result = super.skipTo(target);
if (result) {
processPayload();
}
return result;
}*/
protected boolean setFreqCurrentDoc() throws IOException {
if (!more) {
return false;
}
doc = spans.doc();
freq = 0.0f;
payloadScore = 0;
payloadsSeen = 0;
Similarity similarity1 = getSimilarity();
while (more && doc == spans.doc()) {
int matchLength = spans.end() - spans.start();
freq += similarity1.sloppyFreq(matchLength);
processPayload(similarity1);
more = spans.next();//this moves positions to the next match in this document
}
return more || (freq != 0);
}
protected void processPayload(Similarity similarity) throws IOException {
if (positions.isPayloadAvailable()) {
payload = positions.getPayload(payload, 0);
payloadScore += similarity.scorePayload(payload, 0, positions.getPayloadLength());
payloadsSeen++;
} else {
//zero out the payload?
}
}
public float score() throws IOException {
return super.score() * (payloadsSeen > 0 ? (payloadScore / payloadsSeen) : 1);
}
public Explanation explain(final int doc) throws IOException {
Explanation result = new Explanation();
Explanation nonPayloadExpl = super.explain(doc);
result.addDetail(nonPayloadExpl);
//QUESTION: Is there a wau to avoid this skipTo call? We need to know whether to load the payload or not
Explanation payloadBoost = new Explanation();
result.addDetail(payloadBoost);
/*
if (skipTo(doc) == true) {
processPayload();
}
*/
float avgPayloadScore = payloadScore / payloadsSeen;
payloadBoost.setValue(avgPayloadScore);
//GSI: I suppose we could toString the payload, but I don't think that would be a good idea
payloadBoost.setDescription("scorePayload(...)");
result.setValue(nonPayloadExpl.getValue() * avgPayloadScore);
result.setDescription("btq, product of:");
return result;
}
}
}
public boolean equals(Object o) {
if (!(o instanceof BoostingTermQuery))
return false;
BoostingTermQuery other = (BoostingTermQuery) o;
return (this.getBoost() == other.getBoost())
&& this.term.equals(other.term);
}
}
|