FileDocCategorySizeDatePackage
BasicDocMaker.javaAPI DocApache Lucene 2.1.09185Wed Feb 14 10:46:16 GMT 2007org.apache.lucene.benchmark.byTask.feeds

BasicDocMaker

public abstract class BasicDocMaker extends Object implements DocMaker
Create documents for the test. Maintains counters of chars etc. so that sub-classes just need to provide textual content, and the create-by-size is handled here.

Fields Summary
private int
numDocsCreated
private ThreadLocal
leftovr
static final String
BODY_FIELD
private long
numBytes
private long
numUniqueBytes
protected org.apache.lucene.benchmark.byTask.utils.Config
config
protected Field$Store
storeVal
protected Field$Index
indexVal
protected Field$TermVector
termVecVal
private int
lastPrintedNumUniqueTexts
private long
lastPrintedNumUniqueBytes
private int
printNum
Constructors Summary
Methods Summary
protected synchronized voidaddBytes(long n)

    numBytes += n;
  
protected voidaddUniqueBytes(long n)

    numUniqueBytes += n;
  
protected voidcollectFiles(java.io.File f, java.util.ArrayList inputFiles)

    if (!f.canRead()) {
      return;
    }
    if (f.isDirectory()) {
      File files[] = f.listFiles();
      for (int i = 0; i < files.length; i++) {
        collectFiles(files[i],inputFiles);
      }
      return;
    }
    inputFiles.add(f);
    addUniqueBytes(f.length());
  
private org.apache.lucene.document.DocumentcreateDocument(org.apache.lucene.benchmark.byTask.feeds.BasicDocMaker$DocData docData, int size, int cnt)

    int docid = incrNumDocsCreated();
    Document doc = new Document();
    doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal));
    if (docData.name!=null) {
      String name = (cnt<0 ? docData.name : docData.name+"_"+cnt);
      doc.add(new Field("docname", name, storeVal, indexVal, termVecVal));
    }
    if (docData.date!=null) {
      String dateStr = DateTools.dateToString(docData.date, DateTools.Resolution.SECOND);
      doc.add(new Field("docdate", dateStr, storeVal, indexVal, termVecVal));
    }
    if (docData.title!=null) {
      doc.add(new Field("doctitle", docData.title, storeVal, indexVal, termVecVal));
    }
    if (docData.body!=null && docData.body.length()>0) {
      String bdy;
      if (size<=0 || size>=docData.body.length()) {
        bdy = docData.body; // use all
        docData.body = "";  // nothing left
      } else {
        // attempt not to break words - if whitespace found within next 20 chars...
        for (int n=size-1; n<size+20 && n<docData.body.length(); n++) {
          if (Character.isWhitespace(docData.body.charAt(n))) {
            size = n;
            break;
          }
        }
        bdy = docData.body.substring(0,size); // use part
        docData.body = docData.body.substring(size); // some left
      }
      doc.add(new Field(BODY_FIELD, bdy, storeVal, indexVal, termVecVal));
    }
    if (docData.props!=null) {
      for (Iterator it = docData.props.keySet().iterator(); it.hasNext(); ) {
        String key = (String) it.next();
        String val = (String) docData.props.get(key);
        doc.add(new Field(key, val, storeVal, indexVal, termVecVal));
      }
      docData.props = null;
    }
    //System.out.println("============== Created doc "+numDocsCreated+" :\n"+doc+"\n==========");
    return doc;
  
public synchronized longgetByteCount()

    return numBytes;
  
public synchronized intgetCount()

    return numDocsCreated;
  
protected abstract org.apache.lucene.benchmark.byTask.feeds.BasicDocMaker$DocDatagetNextDocData()
Return the data of the next document.

return
data of the next document.
exception
if cannot create the next doc data

private synchronized intincrNumDocsCreated()

  
      
    return numDocsCreated++;
  
public org.apache.lucene.document.DocumentmakeDocument()

    resetLeftovers();
    DocData docData = getNextDocData();
    Document doc = createDocument(docData,0,-1);
    return doc;
  
public org.apache.lucene.document.DocumentmakeDocument(int size)

    LeftOver lvr = (LeftOver) leftovr.get();
    if (lvr==null || lvr.docdata==null || lvr.docdata.body==null || lvr.docdata.body.length()==0) {
      resetLeftovers();
    }
    DocData dd = (lvr==null ? getNextDocData() : lvr.docdata);
    int cnt = (lvr==null ? 0 : lvr.cnt);
    while (dd.body==null || dd.body.length()<size) {
      DocData dd2 = dd;
      dd = getNextDocData();
      cnt = 0;
      dd.body = dd2.body + dd.body;
    }
    Document doc = createDocument(dd,size,cnt);
    if (dd.body==null || dd.body.length()==0) {
      resetLeftovers();
    } else {
      if (lvr == null) {
        lvr = new LeftOver();
        leftovr.set(lvr);
      }
      lvr.docdata = dd;
      lvr.cnt = ++cnt;
    }
    return doc;
  
public longnumUniqueBytes()

    return numUniqueBytes;
  
public voidprintDocStatistics()

     
    boolean print = false;
    String col = "                  ";
    StringBuffer sb = new StringBuffer();
    String newline = System.getProperty("line.separator");
    sb.append("------------> ").append(Format.simpleName(getClass())).append(" statistics (").append(printNum).append("): ").append(newline);
    int nut = numUniqueTexts();
    if (nut > lastPrintedNumUniqueTexts) {
      print = true;
      sb.append("total count of unique texts: ").append(Format.format(0,nut,col)).append(newline);
      lastPrintedNumUniqueTexts = nut;
    }
    long nub = numUniqueBytes();
    if (nub > lastPrintedNumUniqueBytes) {
      print = true;
      sb.append("total bytes of unique texts: ").append(Format.format(0,nub,col)).append(newline);
      lastPrintedNumUniqueBytes = nub;
    }
    if (getCount()>0) {
      print = true;
      sb.append("num docs added since last inputs reset:   ").append(Format.format(0,getCount(),col)).append(newline);
      sb.append("total bytes added since last inputs reset: ").append(Format.format(0,getByteCount(),col)).append(newline);
    }
    if (print) {
      System.out.println(sb.append(newline).toString());
      printNum++;
    }
  
public synchronized voidresetInputs()

    printDocStatistics();
    numBytes = 0;
    numDocsCreated = 0;
    resetLeftovers();
  
private voidresetLeftovers()

    leftovr.set(null);
  
public voidsetConfig(org.apache.lucene.benchmark.byTask.utils.Config config)

    this.config = config;
    boolean stored = config.get("doc.stored",false); 
    boolean tokenized = config.get("doc.tokenized",true);
    boolean termVec = config.get("doc.term.vector",false);
    storeVal = (stored ? Field.Store.YES : Field.Store.NO);
    indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED);
    termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO);