FileDocCategorySizeDatePackage
MemoryIndexTest.javaAPI DocApache Lucene 2.2.016379Sat Jun 16 22:21:04 BST 2007org.apache.lucene.index.memory

MemoryIndexTest

public class MemoryIndexTest extends TestCase
Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour, returning the same results for any given query. Runs a set of queries against a set of files and compares results for identity. Can also be used as a simple benchmark.

Example usage:

cd lucene-svn
java -server -cp ~/unix/java/share/misc/junit/junit.jar:build/classes:build/lucene-core-2.1-dev.jar:build/contrib/memory/classes/test:build/contrib/memory/classes/java org.apache.lucene.index.memory.MemoryIndexTest 1 1 memram @contrib/memory/src/test/org/apache/lucene/index/memory/testqueries.txt *.txt *.html *.xml xdocs/*.xml src/test/org/apache/lucene/queryParser/*.java contrib/memory/src/java/org/apache/lucene/index/memory/*.java
where testqueries.txt is a file with one query per line, such as:
#
# queries extracted from TestQueryParser.java
#
Apache
Apach~ AND Copy*

a AND b
(a AND b)
c OR (a AND b)
a AND NOT b
a AND -b
a AND !b
a && b
a && ! b

a OR b
a || b
a OR !b
a OR ! b
a OR -b

+term -term term
foo:term AND field:anotherTerm
term AND "phrase phrase"
"hello there"

germ term^2.0
(term)^2.0
(germ term)^2.0
term^2.0
term^2
"germ term"^2.0
"term germ"^2

(foo OR bar) AND (baz OR boo)
((a OR b) AND NOT c) OR d
+(apple "steve jobs") -(foo bar baz)
+title:(dog OR cat) -author:"bob dole"


a&b
a&&b
.NET

"term germ"~2
"term germ"~2 flork
"term"~2
"~2 germ"
"term germ"~2^2

3
term 1.0 1 2
term term1 term2

term*
term*^2
term~
term~0.7
term~^2
term^2~
term*germ
term*germ^3


term*
Term*
TERM*
term*
Term*
TERM*

// Then 'full' wildcard queries:
te?m
Te?m
TE?M
Te?m*gerM
te?m
Te?m
TE?M
Te?m*gerM

term term term
term +stop term
term -stop term
drop AND stop AND roll
term phrase term
term AND NOT phrase term
stop


[ a TO c]
[ a TO c ]
{ a TO c}
{ a TO c }
{ a TO c }^2.0
[ a TO c] OR bar
[ a TO c] AND bar
( bar blar { a TO c})
gack ( bar blar { a TO c})


+weltbank +worlbank
+weltbank\n+worlbank
weltbank \n+worlbank
weltbank \n +worlbank
+weltbank\r+worlbank
weltbank \r+worlbank
weltbank \r +worlbank
+weltbank\r\n+worlbank
weltbank \r\n+worlbank
weltbank \r\n +worlbank
weltbank \r \n +worlbank
+weltbank\t+worlbank
weltbank \t+worlbank
weltbank \t +worlbank


term term term
term +term term
term term +term
term +term +term
-term term term


on^1.0
"hello"^2.0
hello^2.0
"on"^1.0
the^3
author
whoschek.AT.lbl.DOT.gov

Fields Summary
private Analyzer
analyzer
private boolean
fastMode
private static final String
FIELD_NAME
public String
fileDir
private static final Charset
DEFAULT_PLATFORM_CHARSET
Constructors Summary
Methods Summary
private org.apache.lucene.document.DocumentcreateDocument(java.lang.String content)

    Document doc = new Document();
    doc.add(new Field(FIELD_NAME, content, Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS));
    return doc;
  
private MemoryIndexcreateMemoryIndex(org.apache.lucene.document.Document doc)

    MemoryIndex index = new MemoryIndex();
    Enumeration iter = doc.fields();
    while (iter.hasMoreElements()) {
      Field field = (Field) iter.nextElement();
      index.addField(field.name(), field.stringValue(), analyzer);
    }
    return index;
  
private org.apache.lucene.store.RAMDirectorycreateRAMIndex(org.apache.lucene.document.Document doc)

    RAMDirectory dir = new RAMDirectory();    
    IndexWriter writer = null;
    try {
      writer = new IndexWriter(dir, analyzer, true);
      writer.setMaxFieldLength(Integer.MAX_VALUE);
      writer.addDocument(doc);
      writer.optimize();
      return dir;
    } catch (IOException e) { // should never happen (RAMDirectory)
      throw new RuntimeException(e);
    } finally {
      try {
        if (writer != null) writer.close();
      } catch (IOException e) { // should never happen (RAMDirectory)
        throw new RuntimeException(e);
      }
    }
  
private intgetMemorySize(java.lang.Object index)

    if (index instanceof Directory) {
      try {
        Directory dir = (Directory) index;
        int size = 0;
        String[] fileNames = dir.list();
        for (int i=0; i < fileNames.length; i++) {
          size += dir.fileLength(fileNames[i]);
        }
        return size;
      }
      catch (IOException e) { // can never happen (RAMDirectory)
        throw new RuntimeException(e);
      }
    }
    else {
      return ((MemoryIndex) index).getMemorySize();
    }
  
static java.lang.String[]listFiles(java.lang.String[] fileNames)
returns all files matching the given file name patterns (quick n'dirty)

    LinkedHashSet allFiles = new LinkedHashSet();
    for (int i=0; i < fileNames.length; i++) {
      int k;
      if ((k = fileNames[i].indexOf("*")) < 0) {
        allFiles.add(fileNames[i]);
      } else {
        String prefix = fileNames[i].substring(0, k);
        if (prefix.length() == 0) prefix = ".";
        final String suffix = fileNames[i].substring(k+1);
        File[] files = new File(prefix).listFiles(new FilenameFilter() {
          public boolean accept(File dir, String name) {
            return name.endsWith(suffix);
          }
        });
        if (files != null) {
          for (int j=0; j < files.length; j++) {
            allFiles.add(files[j].getPath());
          }
        }
      }      
    }
    
    String[] result = new String[allFiles.size()];
    allFiles.toArray(result);
    return result;
  
public static voidmain(java.lang.String[] args)
Runs the tests and/or benchmark


        
         
    new MemoryIndexTest().run(args);    
  
private org.apache.lucene.search.QueryparseQuery(java.lang.String expression)

    QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
//    parser.setPhraseSlop(0);
    return parser.parse(expression);
  
private floatquery(java.lang.Object index, org.apache.lucene.search.Query query)

//    System.out.println("MB=" + (getMemorySize(index) / (1024.0f * 1024.0f)));
    Searcher searcher = null;
    try {
      if (index instanceof Directory)
        searcher = new IndexSearcher((Directory)index);
      else 
        searcher = ((MemoryIndex) index).createSearcher();

      final float[] scores = new float[1]; // inits to 0.0f
      searcher.search(query, new HitCollector() {
        public void collect(int doc, float score) {
          scores[0] = score;
        }
      });
      float score = scores[0];
//      Hits hits = searcher.search(query);
//      float score = hits.length() > 0 ? hits.score(0) : 0.0f;
      return score;
    } catch (IOException e) { // should never happen (RAMDirectory)
      throw new RuntimeException(e);
    } finally {
      try {
        if (searcher != null) searcher.close();
      } catch (IOException e) { // should never happen (RAMDirectory)
        throw new RuntimeException(e);
      }
    }
  
private java.lang.String[]readLines(java.io.File file)

    BufferedReader reader = new BufferedReader(new InputStreamReader(
        new FileInputStream(file))); 
    ArrayList lines = new ArrayList();
    String line;  
    while ((line = reader.readLine()) != null) {
      String t = line.trim(); 
      if (t.length() > 0 && t.charAt(0) != '#" && (!t.startsWith("//"))) {
        lines.add(line);
      }
    }
    reader.close();
    
    String[] result = new String[lines.size()];
    lines.toArray(result);
    return result;
  
private voidrun(java.lang.String[] args)

    int k = -1;
    
    int iters = 1;
    if (args.length > ++k) iters = Math.max(1, Integer.parseInt(args[k]));
    
    int runs = 1;
    if (args.length > ++k) runs = Math.max(1, Integer.parseInt(args[k]));
    
    String cmd = "memram";
    if (args.length > ++k) cmd = args[k];
    boolean useMemIndex = cmd.indexOf("mem") >= 0;
    boolean useRAMIndex = cmd.indexOf("ram") >= 0;
    
    String[] queries = { "term", "term*", "term~", "Apache", "Apach~ AND Copy*" };
    if (args.length > ++k) {
      String arg = args[k];
      if (arg.startsWith("@")) 
        queries = readLines(new File(fileDir, arg.substring(1)));
      else
        queries = new String[] { arg };
    }
    
    File[] files = new File[] {new File("CHANGES.txt"), new File("LICENSE.txt") };
    if (args.length > ++k) {
      files = new File[args.length - k];
      for (int i=k; i < args.length; i++) {
        files[i-k] = new File(args[i]);
      }
    }
    
    boolean toLowerCase = true;
//    boolean toLowerCase = false;
//    Set stopWords = null;
    Set stopWords = StopFilter.makeStopSet(StopAnalyzer.ENGLISH_STOP_WORDS);
    
    Analyzer[] analyzers = new Analyzer[] { 
        new SimpleAnalyzer(),
        new StopAnalyzer(),
        new StandardAnalyzer(),
        PatternAnalyzer.DEFAULT_ANALYZER,
//        new WhitespaceAnalyzer(),
//        new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN, false, null),
//        new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN, true, stopWords),        
//        new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS),
    };
    
    for (int iter=0; iter < iters; iter++) {
      System.out.println("\n########### iteration=" + iter);
      long start = System.currentTimeMillis();            
      long bytes = 0;
      
      for (int anal=0; anal < analyzers.length; anal++) {
        this.analyzer = analyzers[anal];
        
        for (int i=0; i < files.length; i++) {
          File file = files[i];
          if (!file.exists() || file.isDirectory()) continue; // ignore
          bytes += file.length();
          String text = toString(new FileInputStream(file), null);
          Document doc = createDocument(text);
          System.out.println("\n*********** FILE=" + file);
          
          for (int q=0; q < queries.length; q++) {
            try {
              Query query = parseQuery(queries[q]);
              
              boolean measureIndexing = false; // toggle this to measure query performance
              MemoryIndex memind = null;
              if (useMemIndex && !measureIndexing) memind = createMemoryIndex(doc);
              RAMDirectory ramind = null;
              if (useRAMIndex && !measureIndexing) ramind = createRAMIndex(doc);
              
              for (int run=0; run < runs; run++) {
                float score1 = 0.0f; float score2 = 0.0f;
                if (useMemIndex && measureIndexing) memind = createMemoryIndex(doc);
                if (useMemIndex) score1 = query(memind, query); 
                if (useRAMIndex && measureIndexing) ramind = createRAMIndex(doc);
                if (useRAMIndex) score2 = query(ramind, query);
                if (useMemIndex && useRAMIndex) {
                  System.out.println("diff="+ (score1-score2) + ", query=" + queries[q] + ", s1=" + score1 + ", s2=" + score2);
                  if (score1 != score2 || score1 < 0.0f || score2 < 0.0f || score1 > 1.0f || score2 > 1.0f) {
                    throw new IllegalStateException("BUG DETECTED:" + (i*(q+1)) + " at query=" + queries[q] + ", file=" + file + ", anal=" + analyzer);
                  }
                }
              }

            } catch (Throwable t) {
              if (t instanceof OutOfMemoryError) t.printStackTrace();
              System.out.println("Fatal error at query=" + queries[q] + ", file=" + file + ", anal=" + analyzer);
              throw t;
            }
          }
        }
      }
      long end = System.currentTimeMillis();
      System.out.println("\nsecs = " + ((end-start)/1000.0f));
      System.out.println("queries/sec= " + 
        (1.0f * runs * queries.length * analyzers.length * files.length 
            / ((end-start)/1000.0f)));
      float mb = (1.0f * bytes * queries.length * runs) / (1024.0f * 1024.0f);
      System.out.println("MB/sec = " + (mb / ((end-start)/1000.0f)));
    }
    
    if (useMemIndex && useRAMIndex) 
      System.out.println("No bug found. done.");
    else 
      System.out.println("Done benchmarking (without checking correctness).");
  
public voidsetUp()

    fileDir = System.getProperty("lucene.common.dir", null);
  
public voidtestMany()

    String[] files = listFiles(new String[] {
      "*.txt", "*.html", "*.xml", "xdocs/*.xml", 
      "src/java/test/org/apache/lucene/queryParser/*.java",
      "contrib/memory/src/java/org/apache/lucene/index/memory/*.java",
    });
    System.out.println("files = " + java.util.Arrays.asList(files));
    String[] xargs = new String[] {
      "1", "1", "memram", 
      "@contrib/memory/src/test/org/apache/lucene/index/memory/testqueries.txt",
    };
    String[] args = new String[xargs.length + files.length];
    System.arraycopy(xargs, 0, args, 0, xargs.length);
    System.arraycopy(files, 0, args, xargs.length, files.length);
    run(args);
  
private static byte[]toByteArray(java.io.InputStream input)

    try {
      // safe and fast even if input.available() behaves weird or buggy
      int len = Math.max(256, input.available());
      byte[] buffer = new byte[len];
      byte[] output = new byte[len];
      
      len = 0;
      int n;
      while ((n = input.read(buffer)) >= 0) {
        if (len + n > output.length) { // grow capacity
          byte tmp[] = new byte[Math.max(output.length << 1, len + n)];
          System.arraycopy(output, 0, tmp, 0, len);
          System.arraycopy(buffer, 0, tmp, len, n);
          buffer = output; // use larger buffer for future larger bulk reads
          output = tmp;
        } else {
          System.arraycopy(buffer, 0, output, len, n);
        }
        len += n;
      }

      if (len == output.length) return output;
      buffer = null; // help gc
      buffer = new byte[len];
      System.arraycopy(output, 0, buffer, 0, len);
      return buffer;
    } finally {
      if (input != null) input.close();
    }
  
private static java.lang.StringtoString(java.io.InputStream input, java.nio.charset.Charset charset)

  
  
  // the following utility methods below are copied from Apache style Nux library - see http://dsd.lbl.gov/nux
           
    if (charset == null) charset = DEFAULT_PLATFORM_CHARSET;      
    byte[] data = toByteArray(input);
    return charset.decode(ByteBuffer.wrap(data)).toString();