FileDocCategorySizeDatePackage
TestData.javaAPI DocApache Lucene 2.2.018270Sat Jun 16 22:21:00 BST 2007org.apache.lucene.benchmark.stats

TestData

public class TestData extends Object
This class holds together all parameters related to a test. Single test is performed several times, and all results are averaged.
author
Andrzej Bialecki <ab@getopt.org>

Fields Summary
public static int[]
MAX_BUFFERED_DOCS_COUNTS
public static int[]
MERGEFACTOR_COUNTS
private String
id
ID of this test data.
private long
heap
Heap size.
private Vector
runData
List of results for each test run with these parameters.
private int
maxBufferedDocs
private int
mergeFactor
private File
source
Directory containing source files.
private Directory
directory
Lucene Directory implementation for creating an index.
private Analyzer
analyzer
Analyzer to use when adding documents.
private boolean
compound
If true, use compound file format.
private boolean
optimize
If true, optimize index when finished adding documents.
private QueryData[]
queries
Data for search benchmarks.
static final String
ID
Get a textual summary of the benchmark results, average from all test runs.
static final String
OP
static final String
RUNCNT
static final String
RECCNT
static final String
RECSEC
static final String
FREEMEM
static final String
TOTMEM
static final String[]
COLS
private static NumberFormat[]
numFormat
private static final String
padd
Constructors Summary
public TestData()


     
    
        heap = Runtime.getRuntime().maxMemory();
    
Methods Summary
protected java.lang.Objectclone()

        TestData cl = new TestData();
        cl.id = id;
        cl.compound = compound;
        cl.heap = heap;
        cl.mergeFactor = mergeFactor;
        cl.maxBufferedDocs = maxBufferedDocs;
        cl.optimize = optimize;
        cl.source = source;
        cl.directory = directory;
        cl.analyzer = analyzer;
        // don't clone runData
        return cl;
    
static java.lang.Stringformat(int numFracDigits, float f, java.lang.String col)

     
      numFormat[0].setMaximumFractionDigits(0);
      numFormat[0].setMinimumFractionDigits(0);
      numFormat[1].setMaximumFractionDigits(1);
      numFormat[1].setMinimumFractionDigits(1);
    
      String res = padd + numFormat[numFracDigits].format(f);
      return res.substring(res.length() - col.length());
    
static java.lang.Stringformat(int n, java.lang.String col)

      String res = padd + n;
      return res.substring(res.length() - col.length());
    
static java.lang.Stringformat(java.lang.String s, java.lang.String col)

      return (s + padd).substring(0,col.length());
    
public static org.apache.lucene.benchmark.stats.TestData[]getAll(java.io.File[] sources, org.apache.lucene.analysis.Analyzer[] analyzers)
Prepare a list of benchmark data, using all possible combinations of benchmark parameters.

param
sources list of directories containing different source document collections
param
analyzers of analyzers to use.

        List res = new ArrayList(50);
        TestData ref = new TestData();
        for (int q = 0; q < analyzers.length; q++)
        {
            for (int m = 0; m < sources.length; m++)
            {
                for (int i = 0; i < MAX_BUFFERED_DOCS_COUNTS.length; i++)
                {
                    for (int k = 0; k < MERGEFACTOR_COUNTS.length; k++)
                    {
                        for (int n = 0; n < Constants.BOOLEANS.length; n++)
                        {
                            for (int p = 0; p < Constants.BOOLEANS.length; p++)
                            {
                                ref.id = "td-" + q + m + i + k + n + p;
                                ref.source = sources[m];
                                ref.analyzer = analyzers[q];
                                ref.maxBufferedDocs = MAX_BUFFERED_DOCS_COUNTS[i];
                                ref.mergeFactor = MERGEFACTOR_COUNTS[k];
                                ref.compound = Constants.BOOLEANS[n].booleanValue();
                                ref.optimize = Constants.BOOLEANS[p].booleanValue();
                                try
                                {
                                    res.add(ref.clone());
                                }
                                catch (Exception e)
                                {
                                    e.printStackTrace();
                                }
                            }
                        }
                    }
                }
            }
        }
        return (TestData[]) res.toArray(new TestData[0]);
    
public org.apache.lucene.analysis.AnalyzergetAnalyzer()

        return analyzer;
    
public org.apache.lucene.store.DirectorygetDirectory()

        return directory;
    
public longgetHeap()

        return heap;
    
public java.lang.StringgetId()

        return id;
    
public intgetMaxBufferedDocs()

        return maxBufferedDocs;
    
public intgetMergeFactor()

        return mergeFactor;
    
public QueryData[]getQueries()

        return queries;
    
public java.util.VectorgetRunData()

        return runData;
    
public java.io.FilegetSource()

        return source;
    
public static org.apache.lucene.benchmark.stats.TestData[]getTestDataMinMaxMergeAndMaxBuffered(java.io.File[] sources, org.apache.lucene.analysis.Analyzer[] analyzers)
Similar to {@link #getAll(java.io.File[], org.apache.lucene.analysis.Analyzer[])} but only uses maxBufferedDocs of 10 and 100 and same for mergeFactor, thus reducing the number of permutations significantly. It also only uses compund file and optimize is always true.

param
sources
param
analyzers
return
An Array of {@link TestData}

        List res = new ArrayList(50);
        TestData ref = new TestData();
        for (int q = 0; q < analyzers.length; q++)
        {
            for (int m = 0; m < sources.length; m++)
            {
                ref.id = "td-" + q + m + "_" + 10 + "_" + 10;
                ref.source = sources[m];
                ref.analyzer = analyzers[q];
                ref.maxBufferedDocs = 10;
                ref.mergeFactor = 10;//MERGEFACTOR_COUNTS[k];
                ref.compound = true;
                ref.optimize = true;
                try
                {
                    res.add(ref.clone());
                }
                catch (Exception e)
                {
                    e.printStackTrace();
                }
                ref.id = "td-" + q + m  + "_" + 10 + "_" + 100;
                ref.source = sources[m];
                ref.analyzer = analyzers[q];
                ref.maxBufferedDocs = 10;
                ref.mergeFactor = 100;//MERGEFACTOR_COUNTS[k];
                ref.compound = true;
                ref.optimize = true;
                try
                {
                    res.add(ref.clone());
                }
                catch (Exception e)
                {
                    e.printStackTrace();
                }
                ref.id = "td-" + q + m + "_" + 100 + "_" + 10;
                ref.source = sources[m];
                ref.analyzer = analyzers[q];
                ref.maxBufferedDocs = 100;
                ref.mergeFactor = 10;//MERGEFACTOR_COUNTS[k];
                ref.compound = true;
                ref.optimize = true;
                try
                {
                    res.add(ref.clone());
                }
                catch (Exception e)
                {
                    e.printStackTrace();
                }
                ref.id = "td-" + q + m + "_" + 100 + "_" + 100;
                ref.source = sources[m];
                ref.analyzer = analyzers[q];
                ref.maxBufferedDocs = 100;
                ref.mergeFactor = 100;//MERGEFACTOR_COUNTS[k];
                ref.compound = true;
                ref.optimize = true;
                try
                {
                    res.add(ref.clone());
                }
                catch (Exception e)
                {
                    e.printStackTrace();
                }
            }
        }
        return (TestData[]) res.toArray(new TestData[0]);
    
public booleanisCompound()

        return compound;
    
public booleanisOptimize()

        return optimize;
    
public voidsetAnalyzer(org.apache.lucene.analysis.Analyzer analyzer)

        this.analyzer = analyzer;
    
public voidsetCompound(boolean compound)

        this.compound = compound;
    
public voidsetDirectory(org.apache.lucene.store.Directory directory)

        this.directory = directory;
    
public voidsetHeap(long heap)

        this.heap = heap;
    
public voidsetId(java.lang.String id)

        this.id = id;
    
public voidsetMaxBufferedDocs(int maxBufferedDocs)

        this.maxBufferedDocs = maxBufferedDocs;
    
public voidsetMergeFactor(int mergeFactor)

        this.mergeFactor = mergeFactor;
    
public voidsetOptimize(boolean optimize)

        this.optimize = optimize;
    
public voidsetQueries(QueryData[] queries)

        this.queries = queries;
    
public voidsetRunData(java.util.Vector runData)

        this.runData = runData;
    
public voidsetSource(java.io.File source)

        this.source = source;
    
public java.lang.StringshowRunData(java.lang.String prefix)

       
    
        if (runData.size() == 0)
        {
            return "# [NO RUN DATA]";
        }
        HashMap resByTask = new HashMap(); 
        StringBuffer sb = new StringBuffer();
        String lineSep = System.getProperty("line.separator");
        sb.append("warm = Warm Index Reader").append(lineSep).append("srch = Search Index").append(lineSep).append("trav = Traverse Hits list, optionally retrieving document").append(lineSep).append(lineSep);
        for (int i = 0; i < COLS.length; i++) {
          sb.append(COLS[i]);
        }
        sb.append("\n");
        LinkedHashMap mapMem = new LinkedHashMap();
        LinkedHashMap mapSpeed = new LinkedHashMap();
        for (int i = 0; i < runData.size(); i++)
        {
            TestRunData trd = (TestRunData) runData.get(i);
            Collection labels = trd.getLabels();
            Iterator it = labels.iterator();
            while (it.hasNext())
            {
                String label = (String) it.next();
                MemUsage mem = trd.getMemUsage(label);
                if (mem != null)
                {
                    TestData.LCounter[] tm = (TestData.LCounter[]) mapMem.get(label);
                    if (tm == null)
                    {
                        tm = new TestData.LCounter[2];
                        tm[0] = new TestData.LCounter();
                        tm[1] = new TestData.LCounter();
                        mapMem.put(label, tm);
                    }
                    tm[0].total += mem.avgFree;
                    tm[0].count++;
                    tm[1].total += mem.avgTotal;
                    tm[1].count++;
                }
                TimeData td = trd.getTotals(label);
                if (td != null)
                {
                    TestData.DCounter dc = (TestData.DCounter) mapSpeed.get(label);
                    if (dc == null)
                    {
                        dc = new TestData.DCounter();
                        mapSpeed.put(label, dc);
                    }
                    dc.count++;
                    //dc.total += td.getRate();
                    dc.total += (td.count>0 && td.elapsed<=0 ? 1 : td.elapsed); // assume atleast 1ms for any countable op
                    dc.recordCount += td.count;
                }
            }
        }
        LinkedHashMap res = new LinkedHashMap();
        Iterator it = mapSpeed.keySet().iterator();
        while (it.hasNext())
        {
            String label = (String) it.next();
            TestData.DCounter dc = (TestData.DCounter) mapSpeed.get(label);
            res.put(label, 
                format(dc.count, RUNCNT) + 
                format(dc.recordCount / dc.count, RECCNT) +
                format(1,(float) (dc.recordCount * 1000.0 / (dc.total>0 ? dc.total : 1.0)), RECSEC)
                //format((float) (dc.total / (double) dc.count), RECSEC)
                );
            
            // also sum by task
            String task = label.substring(label.lastIndexOf("-")+1);
            LDCounter ldc = (LDCounter) resByTask.get(task);
            if (ldc==null) {
              ldc = new LDCounter();
              resByTask.put(task,ldc);
            }
            ldc.Dcount += dc.count;
            ldc.DrecordCount += dc.recordCount;
            ldc.Dtotal += (dc.count>0 && dc.total<=0 ? 1 : dc.total); // assume atleast 1ms for any countable op 
        }
        it = mapMem.keySet().iterator();
        while (it.hasNext())
        {
            String label = (String) it.next();
            TestData.LCounter[] lc = (TestData.LCounter[]) mapMem.get(label);
            String speed = (String) res.get(label);
            boolean makeSpeed = false;
            if (speed == null)
            {
                makeSpeed = true;
                speed =  
                  format(lc[0].count, RUNCNT) + 
                  format(0, RECCNT) + 
                  format(0,(float)0.0, RECSEC);
            }
            res.put(label, speed + 
                format(0, lc[0].total / lc[0].count, FREEMEM) + 
                format(0, lc[1].total / lc[1].count, TOTMEM));
            
            // also sum by task
            String task = label.substring(label.lastIndexOf("-")+1);
            LDCounter ldc = (LDCounter) resByTask.get(task);
            if (ldc==null) {
              ldc = new LDCounter();
              resByTask.put(task,ldc);
              makeSpeed = true;
            }
            if (makeSpeed) {
              ldc.Dcount += lc[0].count;
            }
            ldc.Lcount0 += lc[0].count;
            ldc.Lcount1 += lc[1].count;
            ldc.Ltotal0 += lc[0].total;
            ldc.Ltotal1 += lc[1].total;
        }
        it = res.keySet().iterator();
        while (it.hasNext())
        {
            String label = (String) it.next();
            sb.append(format(prefix, ID));
            sb.append(format(label, OP));
            sb.append(res.get(label)).append("\n");
        }
        // show results by task (srch, optimize, etc.) 
        sb.append("\n");
        for (int i = 0; i < COLS.length; i++) {
          sb.append(COLS[i]);
        }
        sb.append("\n");
        it = resByTask.keySet().iterator();
        while (it.hasNext())
        {
            String task = (String) it.next();
            LDCounter ldc = (LDCounter) resByTask.get(task);
            sb.append(format("    ", ID));
            sb.append(format(task, OP));
            sb.append(format(ldc.Dcount, RUNCNT)); 
            sb.append(format(ldc.DrecordCount / ldc.Dcount, RECCNT));
            sb.append(format(1,(float) (ldc.DrecordCount * 1000.0 / (ldc.Dtotal>0 ? ldc.Dtotal : 1.0)), RECSEC));
            sb.append(format(0, ldc.Ltotal0 / ldc.Lcount0, FREEMEM)); 
            sb.append(format(0, ldc.Ltotal1 / ldc.Lcount1, TOTMEM));
            sb.append("\n");
        }
        return sb.toString();
    
public java.lang.StringtoString()

        StringBuffer res = new StringBuffer();
        res.append("#-- ID: ").append(id).append(", ").append(new Date()).append(", heap=").append(heap).append(" --\n");
        res.append("# source=").append(source).append(", directory=").append(directory).append("\n");
        res.append("# maxBufferedDocs=").append(maxBufferedDocs).append(", mergeFactor=").append(mergeFactor);
        res.append(", compound=").append(compound).append(", optimize=").append(optimize).append("\n");
        if (queries != null)
        {
            res.append(QueryData.getLabels()).append("\n");
            for (int i = 0; i < queries.length; i++)
            {
                res.append("# ").append(queries[i].toString()).append("\n");
            }
        }
        return res.toString();