FileDocCategorySizeDatePackage
TestScorerPerf.javaAPI DocApache Lucene 2.1.011401Wed Feb 14 10:46:36 GMT 2007org.apache.lucene.search

TestScorerPerf

public class TestScorerPerf extends TestCase
author
yonik
version
$Id$

Fields Summary
Random
r
boolean
validate
BitSet[]
sets
IndexSearcher
s
Constructors Summary
Methods Summary
java.util.BitSetaddClause(org.apache.lucene.search.BooleanQuery bq, java.util.BitSet result)

    BitSet rnd = sets[r.nextInt(sets.length)];
    Query q = new ConstantScoreQuery(new BitSetFilter(rnd));
    bq.add(q, BooleanClause.Occur.MUST);
    if (validate) {
      if (result==null) result = (BitSet)rnd.clone();
      else result.and(rnd);
    }
    return result;
  
public voidcreateDummySearcher()


       
      // Create a dummy index with nothing in it.
    // This could possibly fail if Lucene starts checking for docid ranges...
    RAMDirectory rd = new RAMDirectory();
    IndexWriter iw = new IndexWriter(rd,new WhitespaceAnalyzer(), true);
    iw.close();
    s = new IndexSearcher(rd);
  
public voidcreateRandomTerms(int nDocs, int nTerms, double power, org.apache.lucene.store.Directory dir)

    int[] freq = new int[nTerms];
    for (int i=0; i<nTerms; i++) {
      int f = (nTerms+1)-i;  // make first terms less frequent
      freq[i] = (int)Math.ceil(Math.pow(f,power));
    }

    IndexWriter iw = new IndexWriter(dir,new WhitespaceAnalyzer(), true);
    iw.setMaxBufferedDocs(123);
    for (int i=0; i<nDocs; i++) {
      Document d = new Document();
      for (int j=0; j<nTerms; j++) {
        if (r.nextInt(freq[j]) == 0) {
          d.add(new Field("f", Character.toString((char)j), Field.Store.NO, Field.Index.UN_TOKENIZED));
        }
      }
      iw.addDocument(d);
    }
    iw.close();
  
public intdoConjunctions(int iter, int maxClauses)

    int ret=0;

    for (int i=0; i<iter; i++) {
      int nClauses = r.nextInt(maxClauses-1)+2; // min 2 clauses
      BooleanQuery bq = new BooleanQuery();
      BitSet result=null;
      for (int j=0; j<nClauses; j++) {
        result = addClause(bq,result);
      }

      CountingHitCollector hc = validate ? new MatchingHitCollector(result)
                                         : new CountingHitCollector();
      s.search(bq, hc);
      ret += hc.getSum();
      if (validate) assertEquals(result.cardinality(), hc.getCount());
      // System.out.println(hc.getCount());
    }
    
    return ret;
  
public intdoNestedConjunctions(int iter, int maxOuterClauses, int maxClauses)

    int ret=0;

    for (int i=0; i<iter; i++) {
      int oClauses = r.nextInt(maxOuterClauses-1)+2;
      BooleanQuery oq = new BooleanQuery();
      BitSet result=null;

      for (int o=0; o<oClauses; o++) {

      int nClauses = r.nextInt(maxClauses-1)+2; // min 2 clauses
      BooleanQuery bq = new BooleanQuery();
      for (int j=0; j<nClauses; j++) {
        result = addClause(bq,result);
      }

      oq.add(bq, BooleanClause.Occur.MUST);
      } // outer


      CountingHitCollector hc = validate ? new MatchingHitCollector(result)
                                         : new CountingHitCollector();
      s.search(oq, hc);
      ret += hc.getSum();
      if (validate) assertEquals(result.cardinality(), hc.getCount());
      // System.out.println(hc.getCount());
    }

    return ret;
  
public intdoNestedTermConjunctions(org.apache.lucene.search.IndexSearcher s, int termsInIndex, int maxOuterClauses, int maxClauses, int iter)

    int ret=0;

    for (int i=0; i<iter; i++) {
      int oClauses = r.nextInt(maxOuterClauses-1)+2;
      BooleanQuery oq = new BooleanQuery();
      for (int o=0; o<oClauses; o++) {

      int nClauses = r.nextInt(maxClauses-1)+2; // min 2 clauses
      BooleanQuery bq = new BooleanQuery();
      BitSet terms = new BitSet(termsInIndex);
      for (int j=0; j<nClauses; j++) {
        int tnum;
        // don't pick same clause twice
        do {tnum = r.nextInt(termsInIndex);} while (terms.get(tnum));
        Query tq = new TermQuery(new Term("f",Character.toString((char)tnum)));
        bq.add(tq, BooleanClause.Occur.MUST);
      } // inner

      oq.add(bq, BooleanClause.Occur.MUST);
      } // outer


      CountingHitCollector hc = new CountingHitCollector();
      s.search(oq, hc);
      ret += hc.getSum();
    }

    return ret;
  
public intdoSloppyPhrase(org.apache.lucene.search.IndexSearcher s, int termsInIndex, int maxClauses, int iter)

    int ret=0;

    for (int i=0; i<iter; i++) {
      int nClauses = r.nextInt(maxClauses-1)+2; // min 2 clauses
      PhraseQuery q = new PhraseQuery();
      for (int j=0; j<nClauses; j++) {
        int tnum = r.nextInt(termsInIndex);
        q.add(new Term("f",Character.toString((char)tnum)), j);
      }
      q.setSlop(termsInIndex);  // this could be random too

      CountingHitCollector hc = new CountingHitCollector();
      s.search(q, hc);
      ret += hc.getSum();
    }

    return ret;
  
public intdoTermConjunctions(org.apache.lucene.search.IndexSearcher s, int termsInIndex, int maxClauses, int iter)

    int ret=0;

    for (int i=0; i<iter; i++) {
      int nClauses = r.nextInt(maxClauses-1)+2; // min 2 clauses
      BooleanQuery bq = new BooleanQuery();
      BitSet terms = new BitSet(termsInIndex);
      for (int j=0; j<nClauses; j++) {
        int tnum;
        // don't pick same clause twice
        do {tnum = r.nextInt(termsInIndex);} while (terms.get(tnum));
        Query tq = new TermQuery(new Term("f",Character.toString((char)tnum)));
        bq.add(tq, BooleanClause.Occur.MUST);
      }

      CountingHitCollector hc = new CountingHitCollector();
      s.search(bq, hc);
      ret += hc.getSum();
    }

    return ret;
  
public java.util.BitSetrandBitSet(int sz, int numBitsToSet)

    BitSet set = new BitSet(sz);
    for (int i=0; i<numBitsToSet; i++) {
      set.set(r.nextInt(sz));
    }
    return set;
  
public java.util.BitSet[]randBitSets(int numSets, int setSize)

    BitSet[] sets = new BitSet[numSets];
    for (int i=0; i<sets.length; i++) {
      sets[i] = randBitSet(setSize, r.nextInt(setSize));
    }
    return sets;
  
public voidtestConjunctions()

    // test many small sets... the bugs will be found on boundary conditions
    createDummySearcher();
    validate=true;
    sets=randBitSets(1000,10);
    doConjunctions(10000,5);
    doNestedConjunctions(10000,3,3);
    s.close();