FileDocCategorySizeDatePackage
PhraseScorer.javaAPI DocApache Lucene 2.2.06013Sat Jun 16 22:20:34 BST 2007org.apache.lucene.search

PhraseScorer

public abstract class PhraseScorer extends Scorer
Expert: Scoring functionality for phrase queries.
A document is considered matching if it contains the phrase-query terms at "valid" positons. What "valid positions" are depends on the type of the phrase query: for an exact phrase query terms are required to appear in adjacent locations, while for a sloppy phrase query some distance between the terms is allowed. The abstract method {@link #phraseFreq()} of extending classes is invoked for each document containing all the phrase query terms, in order to compute the frequency of the phrase query in that document. A non zero frequency means a match.

Fields Summary
private Weight
weight
protected byte[]
norms
protected float
value
private boolean
firstTime
private boolean
more
protected PhraseQueue
pq
protected PhrasePositions
first
protected PhrasePositions
last
private float
freq
Constructors Summary
PhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, byte[] norms)

 //prhase frequency in current doc as computed by phraseFreq().


         
                 
    super(similarity);
    this.norms = norms;
    this.weight = weight;
    this.value = weight.getValue();

    // convert tps to a list of phrase positions.
    // note: phrase-position differs from term-position in that its position
    // reflects the phrase offset: pp.pos = tp.pos - offset.
    // this allows to easily identify a matching (exact) phrase 
    // when all PhrasePositions have exactly the same position.
    for (int i = 0; i < tps.length; i++) {
      PhrasePositions pp = new PhrasePositions(tps[i], offsets[i]);
      if (last != null) {			  // add next to end of list
        last.next = pp;
      } else
        first = pp;
      last = pp;
    }

    pq = new PhraseQueue(tps.length);             // construct empty pq

  
Methods Summary
private booleandoNext()

    while (more) {
      while (more && first.doc < last.doc) {      // find doc w/ all the terms
        more = first.skipTo(last.doc);            // skip first upto last
        firstToLast();                            // and move it to the end
      }

      if (more) {
        // found a doc with all of the terms
        freq = phraseFreq();                      // check for phrase
        if (freq == 0.0f)                         // no match
          more = last.next();                     // trigger further scanning
        else
          return true;                            // found a match
      }
    }
    return false;                                 // no more matches
  
public intdoc()

 return first.doc; 
public org.apache.lucene.search.Explanationexplain(int doc)

    Explanation tfExplanation = new Explanation();

    while (next() && doc() < doc) {}

    float phraseFreq = (doc() == doc) ? freq : 0.0f;
    tfExplanation.setValue(getSimilarity().tf(phraseFreq));
    tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");

    return tfExplanation;
  
protected final voidfirstToLast()

    last.next = first;			  // move first to end of list
    last = first;
    first = first.next;
    last.next = null;
  
private voidinit()

    for (PhrasePositions pp = first; more && pp != null; pp = pp.next) 
      more = pp.next();
    if(more)
      sort();
  
public booleannext()

    if (firstTime) {
      init();
      firstTime = false;
    } else if (more) {
      more = last.next();                         // trigger further scanning
    }
    return doNext();
  
protected abstract floatphraseFreq()
For a document containing all the phrase query terms, compute the frequency of the phrase in that document. A non zero frequency means a match.
Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations.

return
frequency of the phrase in current doc, 0 if not found.

protected final voidpqToList()

    last = first = null;
    while (pq.top() != null) {
      PhrasePositions pp = (PhrasePositions) pq.pop();
      if (last != null) {			  // add next to end of list
        last.next = pp;
      } else
        first = pp;
      last = pp;
      pp.next = null;
    }
  
public floatscore()

    //System.out.println("scoring " + first.doc);
    float raw = getSimilarity().tf(freq) * value; // raw score
    return raw * Similarity.decodeNorm(norms[first.doc]); // normalize
  
public booleanskipTo(int target)

    firstTime = false;
    for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
      more = pp.skipTo(target);
    }
    if (more)
      sort();                                     // re-sort
    return doNext();
  
private voidsort()

    pq.clear();
    for (PhrasePositions pp = first; pp != null; pp = pp.next)
      pq.put(pp);
    pqToList();
  
public java.lang.StringtoString()

 return "scorer(" + weight + ")";