FileDocCategorySizeDatePackage
NearSpansOrdered.javaAPI DocApache Lucene 2.1.09115Wed Feb 14 10:46:38 GMT 2007org.apache.lucene.search.spans

NearSpansOrdered

public class NearSpansOrdered extends Object implements Spans
A Spans that is formed from the ordered subspans of a SpanNearQuery where the subspans do not overlap and have a maximum slop between them.

The formed spans only contains minimum slop matches.
The matching slop is computed from the distance(s) between the non overlapping matching Spans.
Successive matches are always formed from the successive Spans of the SpanNearQuery.

The formed spans may contain overlaps when the slop is at least 1. For example, when querying using

t1 t2 t3
with slop at least 1, the fragment:
t1 t2 t1 t3 t2 t3
matches twice:
t1 t2 .. t3 
 t1 .. t2 t3

Fields Summary
private final int
allowedSlop
private boolean
firstTime
private boolean
more
private final Spans[]
subSpans
The spans in the same order as the SpanNearQuery
private boolean
inSameDoc
Indicates that all subSpans have same doc()
private int
matchDoc
private int
matchStart
private int
matchEnd
private final Spans[]
subSpansByDoc
private final Comparator
spanDocComparator
private SpanNearQuery
query
Constructors Summary
public NearSpansOrdered(SpanNearQuery spanNearQuery, IndexReader reader)


      
    
    if (spanNearQuery.getClauses().length < 2) {
      throw new IllegalArgumentException("Less than 2 clauses: "
                                         + spanNearQuery);
    }
    allowedSlop = spanNearQuery.getSlop();
    SpanQuery[] clauses = spanNearQuery.getClauses();
    subSpans = new Spans[clauses.length];
    subSpansByDoc = new Spans[clauses.length];
    for (int i = 0; i < clauses.length; i++) {
      subSpans[i] = clauses[i].getSpans(reader);
      subSpansByDoc[i] = subSpans[i]; // used in toSameDoc()
    }
    query = spanNearQuery; // kept for toString() only.
  
Methods Summary
private booleanadvanceAfterOrdered()
Advances the subSpans to just after an ordered match with a minimum slop that is smaller than the slop allowed by the SpanNearQuery.

return
true iff there is such a match.

    while (more && (inSameDoc || toSameDoc())) {
      if (stretchToOrder() && shrinkToAfterShortestMatch()) {
        return true;
      }
    }
    return false; // no more matches
  
public intdoc()

 return matchDoc; 
private static final booleandocSpansOrdered(int start1, int end1, int start2, int end2)
Like {@link #docSpansOrdered(Spans,Spans)}, but use the spans starts and ends as parameters.

    return (start1 == start2) ? (end1 < end2) : (start1 < start2);
  
static final booleandocSpansOrdered(org.apache.lucene.search.spans.Spans spans1, org.apache.lucene.search.spans.Spans spans2)
Check whether two Spans in the same document are ordered.

param
spans1
param
spans2
return
true iff spans1 starts before spans2 or the spans start at the same position, and spans1 ends before spans2.

    assert spans1.doc() == spans2.doc() : "doc1 " + spans1.doc() + " != doc2 " + spans2.doc();
    int start1 = spans1.start();
    int start2 = spans2.start();
    /* Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() : */
    return (start1 == start2) ? (spans1.end() < spans2.end()) : (start1 < start2);
  
public intend()

 return matchEnd; 
public booleannext()

    if (firstTime) {
      firstTime = false;
      for (int i = 0; i < subSpans.length; i++) {
        if (! subSpans[i].next()) {
          more = false;
          return false;
        }
      }
      more = true;
    }
    return advanceAfterOrdered();
  
private booleanshrinkToAfterShortestMatch()
The subSpans are ordered in the same doc, so there is a possible match. Compute the slop while making the match as short as possible by advancing all subSpans except the last one in reverse order.

    matchStart = subSpans[subSpans.length - 1].start();
    matchEnd = subSpans[subSpans.length - 1].end();
    int matchSlop = 0;
    int lastStart = matchStart;
    int lastEnd = matchEnd;
    for (int i = subSpans.length - 2; i >= 0; i--) {
      Spans prevSpans = subSpans[i];
      int prevStart = prevSpans.start();
      int prevEnd = prevSpans.end();
      while (true) { // Advance prevSpans until after (lastStart, lastEnd)
        if (! prevSpans.next()) {
          inSameDoc = false;
          more = false;
          break; // Check remaining subSpans for final match.
        } else if (matchDoc != prevSpans.doc()) {
          inSameDoc = false; // The last subSpans is not advanced here.
          break; // Check remaining subSpans for last match in this document.
        } else {
          int ppStart = prevSpans.start();
          int ppEnd = prevSpans.end(); // Cannot avoid invoking .end()
          if (! docSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) {
            break; // Check remaining subSpans.
          } else { // prevSpans still before (lastStart, lastEnd)
            prevStart = ppStart;
            prevEnd = ppEnd;
          }
        }
      }
      assert prevStart <= matchStart;
      if (matchStart > prevEnd) { // Only non overlapping spans add to slop.
        matchSlop += (matchStart - prevEnd);
      }
      /* Do not break on (matchSlop > allowedSlop) here to make sure
       * that subSpans[0] is advanced after the match, if any.
       */
      matchStart = prevStart;
      lastStart = prevStart;
      lastEnd = prevEnd;
    }
    return matchSlop <= allowedSlop; // ordered and allowed slop
  
public booleanskipTo(int target)

    if (firstTime) {
      firstTime = false;
      for (int i = 0; i < subSpans.length; i++) {
        if (! subSpans[i].skipTo(target)) {
          more = false;
          return false;
        }
      }
      more = true;
    } else if (more && (subSpans[0].doc() < target)) {
      if (subSpans[0].skipTo(target)) {
        inSameDoc = false;
      } else {
        more = false;
        return false;
      }
    }
    return advanceAfterOrdered();
  
public intstart()

 return matchStart; 
private booleanstretchToOrder()
Order the subSpans within the same document by advancing all later spans after the previous one.

    matchDoc = subSpans[0].doc();
    for (int i = 1; inSameDoc && (i < subSpans.length); i++) {
      while (! docSpansOrdered(subSpans[i-1], subSpans[i])) {
        if (! subSpans[i].next()) {
          inSameDoc = false;
          more = false;
          break;
        } else if (matchDoc != subSpans[i].doc()) {
          inSameDoc = false;
          break;
        }
      }
    }
    return inSameDoc;
  
private booleantoSameDoc()
Advance the subSpans to the same document

    Arrays.sort(subSpansByDoc, spanDocComparator);
    int firstIndex = 0;
    int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
    while (subSpansByDoc[firstIndex].doc() != maxDoc) {
      if (! subSpansByDoc[firstIndex].skipTo(maxDoc)) {
        more = false;
        inSameDoc = false;
        return false;
      }
      maxDoc = subSpansByDoc[firstIndex].doc();
      if (++firstIndex == subSpansByDoc.length) {
        firstIndex = 0;
      }
    }
    for (int i = 0; i < subSpansByDoc.length; i++) {
      assert (subSpansByDoc[i].doc() == maxDoc)
             : " NearSpansOrdered.toSameDoc() spans " + subSpansByDoc[0]
                                 + "\n at doc " + subSpansByDoc[i].doc()
                                 + ", but should be at " + maxDoc;
    }
    inSameDoc = true;
    return true;
  
public java.lang.StringtoString()

    return getClass().getName() + "("+query.toString()+")@"+
      (firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END"));