FileDocCategorySizeDatePackage
BoostingTermQuery.javaAPI DocApache Lucene 2.2.05938Sat Jun 16 22:20:32 BST 2007org.apache.lucene.search.payloads

BoostingTermQuery.java

package org.apache.lucene.search.payloads;

import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermPositions;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.SpanScorer;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.TermSpans;

import java.io.IOException;

/**
 * Copyright 2004 The Apache Software Foundation
 * <p/>
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * <p/>
 * http://www.apache.org/licenses/LICENSE-2.0
 * <p/>
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * The BoostingTermQuery is very similar to the {@link org.apache.lucene.search.spans.SpanTermQuery} except
 * that it factors in the value of the payload located at each of the positions where the
 * {@link org.apache.lucene.index.Term} occurs.
 * <p>
 * In order to take advantage of this, you must override {@link org.apache.lucene.search.Similarity#scorePayload(byte[],int,int)}
 * which returns 1 by default.
 * <p>
 * Payload scores are averaged across term occurrences in the document.  
 * 
 * <p><font color="#FF0000">
 * WARNING: The status of the <b>Payloads</b> feature is experimental. 
 * The APIs introduced here might change in the future and will not be 
 * supported anymore in such a case.</font>
 *
 * @see org.apache.lucene.search.Similarity#scorePayload(byte[], int, int)
 */
public class BoostingTermQuery extends SpanTermQuery{


  public BoostingTermQuery(Term term) {
    super(term);
  }


  protected Weight createWeight(Searcher searcher) throws IOException {
    return new BoostingTermWeight(this, searcher);
  }

  protected class BoostingTermWeight extends SpanWeight implements Weight {


    public BoostingTermWeight(BoostingTermQuery query, Searcher searcher) throws IOException {
      super(query, searcher);
    }




    public Scorer scorer(IndexReader reader) throws IOException {
      return new BoostingSpanScorer((TermSpans)query.getSpans(reader), this, similarity,
              reader.norms(query.getField()));
    }

    class BoostingSpanScorer extends SpanScorer {

      //TODO: is this the best way to allocate this?
      byte[] payload = new byte[256];
      private TermPositions positions;
      protected float payloadScore;
      private int payloadsSeen;

      public BoostingSpanScorer(TermSpans spans, Weight weight,
                                Similarity similarity, byte[] norms) throws IOException {
        super(spans, weight, similarity, norms);
        positions = spans.getPositions();

      }

      /**
       * Go to the next document
       * 
       */
      /*public boolean next() throws IOException {

        boolean result = super.next();
        //set the payload.  super.next() properly increments the term positions
        if (result) {
          //Load the payloads for all 
          processPayload();
        }

        return result;
      }

      public boolean skipTo(int target) throws IOException {
        boolean result = super.skipTo(target);

        if (result) {
          processPayload();
        }

        return result;
      }*/

      protected boolean setFreqCurrentDoc() throws IOException {
        if (!more) {
          return false;
        }
        doc = spans.doc();
        freq = 0.0f;
        payloadScore = 0;
        payloadsSeen = 0;
        Similarity similarity1 = getSimilarity();
        while (more && doc == spans.doc()) {
          int matchLength = spans.end() - spans.start();

          freq += similarity1.sloppyFreq(matchLength);
          processPayload(similarity1);

          more = spans.next();//this moves positions to the next match in this document
        }
        return more || (freq != 0);
      }


      protected void processPayload(Similarity similarity) throws IOException {
        if (positions.isPayloadAvailable()) {
          payload = positions.getPayload(payload, 0);
          payloadScore += similarity.scorePayload(payload, 0, positions.getPayloadLength());
          payloadsSeen++;

        } else {
          //zero out the payload?
        }

      }

      public float score() throws IOException {

        return super.score() * (payloadsSeen > 0 ? (payloadScore / payloadsSeen) : 1);
      }


      public Explanation explain(final int doc) throws IOException {
        Explanation result = new Explanation();
        Explanation nonPayloadExpl = super.explain(doc);
        result.addDetail(nonPayloadExpl);
        //QUESTION: Is there a wau to avoid this skipTo call?  We need to know whether to load the payload or not
        
        Explanation payloadBoost = new Explanation();
        result.addDetail(payloadBoost);
/*
        if (skipTo(doc) == true) {
          processPayload();
        }
*/

        float avgPayloadScore = payloadScore / payloadsSeen;
        payloadBoost.setValue(avgPayloadScore);
        //GSI: I suppose we could toString the payload, but I don't think that would be a good idea 
        payloadBoost.setDescription("scorePayload(...)");
        result.setValue(nonPayloadExpl.getValue() * avgPayloadScore);
        result.setDescription("btq, product of:");
        return result;
      }
    }

  }


  public boolean equals(Object o) {
    if (!(o instanceof BoostingTermQuery))
      return false;
    BoostingTermQuery other = (BoostingTermQuery) o;
    return (this.getBoost() == other.getBoost())
            && this.term.equals(other.term);
  }
}