FileDocCategorySizeDatePackage
TestDeletionPolicy.javaAPI DocApache Lucene 2.2.019811Sat Jun 16 22:20:28 BST 2007org.apache.lucene.index

TestDeletionPolicy.java

package org.apache.lucene.index;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import junit.framework.TestCase;

import java.io.IOException;

import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import java.util.List;
import java.util.Iterator;
import java.util.Set;
import java.util.HashSet;

/*
  Verify we can read the pre-2.1 file format, do searches
  against it, and add documents to it.
*/

public class TestDeletionPolicy extends TestCase
{
  private void verifyCommitOrder(List commits) {
    long last = SegmentInfos.generationFromSegmentsFileName(((IndexCommitPoint) commits.get(0)).getSegmentsFileName());
    for(int i=1;i<commits.size();i++) {
      long now = SegmentInfos.generationFromSegmentsFileName(((IndexCommitPoint) commits.get(i)).getSegmentsFileName());
      assertTrue("SegmentInfos commits are out-of-order", now > last);
      last = now;
    }
  }

  class KeepAllDeletionPolicy implements IndexDeletionPolicy {
    int numOnInit;
    int numOnCommit;
    public void onInit(List commits) {
      verifyCommitOrder(commits);
      numOnInit++;
    }
    public void onCommit(List commits) {
      verifyCommitOrder(commits);
      numOnCommit++;
    }
  }

  /**
   * This is useful for adding to a big index w/ autoCommit
   * false when you know readers are not using it.
   */
  class KeepNoneOnInitDeletionPolicy implements IndexDeletionPolicy {
    int numOnInit;
    int numOnCommit;
    public void onInit(List commits) {
      verifyCommitOrder(commits);
      numOnInit++;
      // On init, delete all commit points:
      Iterator it = commits.iterator();
      while(it.hasNext()) {
        ((IndexCommitPoint) it.next()).delete();
      }
    }
    public void onCommit(List commits) {
      verifyCommitOrder(commits);
      int size = commits.size();
      // Delete all but last one:
      for(int i=0;i<size-1;i++) {
        ((IndexCommitPoint) commits.get(i)).delete();
      }
      numOnCommit++;
    }
  }

  class KeepLastNDeletionPolicy implements IndexDeletionPolicy {
    int numOnInit;
    int numOnCommit;
    int numToKeep;
    int numDelete;
    Set seen = new HashSet();

    public KeepLastNDeletionPolicy(int numToKeep) {
      this.numToKeep = numToKeep;
    }

    public void onInit(List commits) {
      verifyCommitOrder(commits);
      numOnInit++;
      // do no deletions on init
      doDeletes(commits, false);
    }

    public void onCommit(List commits) {
      verifyCommitOrder(commits);
      doDeletes(commits, true);
    }
    
    private void doDeletes(List commits, boolean isCommit) {

      // Assert that we really are only called for each new
      // commit:
      if (isCommit) {
        String fileName = ((IndexCommitPoint) commits.get(commits.size()-1)).getSegmentsFileName();
        if (seen.contains(fileName)) {
          throw new RuntimeException("onCommit was called twice on the same commit point: " + fileName);
        }
        seen.add(fileName);
        numOnCommit++;
      }
      int size = commits.size();
      for(int i=0;i<size-numToKeep;i++) {
        ((IndexCommitPoint) commits.get(i)).delete();
        numDelete++;
      }
    }
  }

  /*
   * Delete a commit only when it has been obsoleted by N
   * seconds.
   */
  class ExpirationTimeDeletionPolicy implements IndexDeletionPolicy {

    Directory dir;
    double expirationTimeSeconds;
    int numDelete;

    public ExpirationTimeDeletionPolicy(Directory dir, double seconds) {
      this.dir = dir;
      this.expirationTimeSeconds = seconds;
    }

    public void onInit(List commits) throws IOException {
      verifyCommitOrder(commits);
      onCommit(commits);
    }

    public void onCommit(List commits) throws IOException {
      verifyCommitOrder(commits);

      IndexCommitPoint lastCommit = (IndexCommitPoint) commits.get(commits.size()-1);

      // Any commit older than expireTime should be deleted:
      double expireTime = dir.fileModified(lastCommit.getSegmentsFileName())/1000.0 - expirationTimeSeconds;

      Iterator it = commits.iterator();

      while(it.hasNext()) {
        IndexCommitPoint commit = (IndexCommitPoint) it.next();
        double modTime = dir.fileModified(commit.getSegmentsFileName())/1000.0;
        if (commit != lastCommit && modTime < expireTime) {
          commit.delete();
          numDelete += 1;
        }
      }
    }
  }

  /*
   * Test "by time expiration" deletion policy:
   */
  public void testExpirationTimeDeletionPolicy() throws IOException, InterruptedException {

    final double SECONDS = 2.0;

    boolean autoCommit = false;
    boolean useCompoundFile = true;

    Directory dir = new RAMDirectory();
    ExpirationTimeDeletionPolicy policy = new ExpirationTimeDeletionPolicy(dir, SECONDS);
    IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
    writer.setUseCompoundFile(useCompoundFile);
    writer.close();

    long lastDeleteTime = 0;
    for(int i=0;i<7;i++) {
      // Record last time when writer performed deletes of
      // past commits
      lastDeleteTime = System.currentTimeMillis();
      writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
      writer.setUseCompoundFile(useCompoundFile);
      for(int j=0;j<17;j++) {
        addDoc(writer);
      }
      writer.close();

      // Make sure to sleep long enough so that some commit
      // points will be deleted:
      Thread.sleep((int) (1000.0*(SECONDS/5.0)));
    }

    // First, make sure the policy in fact deleted something:
    assertTrue("no commits were deleted", policy.numDelete > 0);

    // Then simplistic check: just verify that the
    // segments_N's that still exist are in fact within SECONDS
    // seconds of the last one's mod time, and, that I can
    // open a reader on each:
    long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
    
    String fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
                                                            "",
                                                            gen);
    while(gen > 0) {
      try {
        IndexReader reader = IndexReader.open(dir);
        reader.close();
        fileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS,
                                                         "",
                                                         gen);
        long modTime = dir.fileModified(fileName);
        assertTrue("commit point was older than " + SECONDS + " seconds but did not get deleted", lastDeleteTime - modTime < (SECONDS*1000));
      } catch (IOException e) {
        // OK
        break;
      }
      
      dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
      gen--;
    }

    dir.close();
  }

  /*
   * Test a silly deletion policy that keeps all commits around.
   */
  public void testKeepAllDeletionPolicy() throws IOException {

    for(int pass=0;pass<4;pass++) {

      boolean autoCommit = pass < 2;
      boolean useCompoundFile = (pass % 2) > 0;

      KeepAllDeletionPolicy policy = new KeepAllDeletionPolicy();

      Directory dir = new RAMDirectory();

      IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
      writer.setUseCompoundFile(useCompoundFile);
      for(int i=0;i<107;i++) {
        addDoc(writer);
      }
      writer.close();

      writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
      writer.setUseCompoundFile(useCompoundFile);
      writer.optimize();
      writer.close();

      assertEquals(2, policy.numOnInit);
      if (autoCommit) {
        assertTrue(policy.numOnCommit > 2);
      } else {
        // If we are not auto committing then there should
        // be exactly 2 commits (one per close above):
        assertEquals(2, policy.numOnCommit);
      }

      // Simplistic check: just verify all segments_N's still
      // exist, and, I can open a reader on each:
      long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
      while(gen > 0) {
        IndexReader reader = IndexReader.open(dir);
        reader.close();
        dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
        gen--;

        if (gen > 0) {
          // Now that we've removed a commit point, which
          // should have orphan'd at least one index file.
          // Open & close a writer and assert that it
          // actually removed something:
          int preCount = dir.list().length;
          writer = new IndexWriter(dir, false, new WhitespaceAnalyzer(), false, policy);
          writer.close();
          int postCount = dir.list().length;
          assertTrue(postCount < preCount);
        }
      }

      dir.close();
    }
  }

  /* Test keeping NO commit points.  This is a viable and
   * useful case eg where you want to build a big index with
   * autoCommit false and you know there are no readers.
   */
  public void testKeepNoneOnInitDeletionPolicy() throws IOException {

    for(int pass=0;pass<4;pass++) {

      boolean autoCommit = pass < 2;
      boolean useCompoundFile = (pass % 2) > 0;

      KeepNoneOnInitDeletionPolicy policy = new KeepNoneOnInitDeletionPolicy();

      Directory dir = new RAMDirectory();

      IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
      writer.setUseCompoundFile(useCompoundFile);
      for(int i=0;i<107;i++) {
        addDoc(writer);
      }
      writer.close();

      writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
      writer.setUseCompoundFile(useCompoundFile);
      writer.optimize();
      writer.close();

      assertEquals(2, policy.numOnInit);
      if (autoCommit) {
        assertTrue(policy.numOnCommit > 2);
      } else {
        // If we are not auto committing then there should
        // be exactly 2 commits (one per close above):
        assertEquals(2, policy.numOnCommit);
      }

      // Simplistic check: just verify the index is in fact
      // readable:
      IndexReader reader = IndexReader.open(dir);
      reader.close();

      dir.close();
    }
  }

  /*
   * Test a deletion policy that keeps last N commits.
   */
  public void testKeepLastNDeletionPolicy() throws IOException {

    final int N = 5;

    for(int pass=0;pass<4;pass++) {

      boolean autoCommit = pass < 2;
      boolean useCompoundFile = (pass % 2) > 0;

      Directory dir = new RAMDirectory();

      KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N);

      for(int j=0;j<N+1;j++) {
        IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
        writer.setUseCompoundFile(useCompoundFile);
        for(int i=0;i<17;i++) {
          addDoc(writer);
        }
        writer.optimize();
        writer.close();
      }

      assertTrue(policy.numDelete > 0);
      assertEquals(N+1, policy.numOnInit);
      if (autoCommit) {
        assertTrue(policy.numOnCommit > 1);
      } else {
        assertEquals(N+1, policy.numOnCommit);
      }

      // Simplistic check: just verify only the past N segments_N's still
      // exist, and, I can open a reader on each:
      long gen = SegmentInfos.getCurrentSegmentGeneration(dir);
      for(int i=0;i<N+1;i++) {
        try {
          IndexReader reader = IndexReader.open(dir);
          reader.close();
          if (i == N) {
            fail("should have failed on commits prior to last " + N);
          }
        } catch (IOException e) {
          if (i != N) {
            throw e;
          }
        }
        if (i < N) {
          dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
        }
        gen--;
      }

      dir.close();
    }
  }

  /*
   * Test a deletion policy that keeps last N commits
   * around, with reader doing deletes.
   */
  public void testKeepLastNDeletionPolicyWithReader() throws IOException {

    final int N = 10;

    for(int pass=0;pass<4;pass++) {

      boolean autoCommit = pass < 2;
      boolean useCompoundFile = (pass % 2) > 0;

      KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N);

      Directory dir = new RAMDirectory();
      IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
      writer.setUseCompoundFile(useCompoundFile);
      writer.close();
      Term searchTerm = new Term("content", "aaa");        
      Query query = new TermQuery(searchTerm);

      for(int i=0;i<N+1;i++) {
        writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
        writer.setUseCompoundFile(useCompoundFile);
        for(int j=0;j<17;j++) {
          addDoc(writer);
        }
        // this is a commit when autoCommit=false:
        writer.close();
        IndexReader reader = IndexReader.open(dir, policy);
        reader.deleteDocument(3*i+1);
        reader.setNorm(4*i+1, "content", 2.0F);
        IndexSearcher searcher = new IndexSearcher(reader);
        Hits hits = searcher.search(query);
        assertEquals(16*(1+i), hits.length());
        // this is a commit when autoCommit=false:
        reader.close();
        searcher.close();
      }
      writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
      writer.setUseCompoundFile(useCompoundFile);
      writer.optimize();
      // this is a commit when autoCommit=false:
      writer.close();

      assertEquals(2*(N+2), policy.numOnInit);
      if (autoCommit) {
        assertTrue(policy.numOnCommit > 2*(N+2)-1);
      } else {
        assertEquals(2*(N+2)-1, policy.numOnCommit);
      }

      IndexSearcher searcher = new IndexSearcher(dir);
      Hits hits = searcher.search(query);
      assertEquals(176, hits.length());

      // Simplistic check: just verify only the past N segments_N's still
      // exist, and, I can open a reader on each:
      long gen = SegmentInfos.getCurrentSegmentGeneration(dir);

      int expectedCount = 176;

      for(int i=0;i<N+1;i++) {
        try {
          IndexReader reader = IndexReader.open(dir);

          // Work backwards in commits on what the expected
          // count should be.  Only check this in the
          // autoCommit false case:
          if (!autoCommit) {
            searcher = new IndexSearcher(reader);
            hits = searcher.search(query);
            if (i > 1) {
              if (i % 2 == 0) {
                expectedCount += 1;
              } else {
                expectedCount -= 17;
              }
            }
            assertEquals(expectedCount, hits.length());
            searcher.close();
          }
          reader.close();
          if (i == N) {
            fail("should have failed on commits before last 5");
          }
        } catch (IOException e) {
          if (i != N) {
            throw e;
          }
        }
        if (i < N) {
          dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
        }
        gen--;
      }
      
      dir.close();
    }
  }

  /*
   * Test a deletion policy that keeps last N commits
   * around, through creates.
   */
  public void testKeepLastNDeletionPolicyWithCreates() throws IOException {

    final int N = 10;

    for(int pass=0;pass<4;pass++) {

      boolean autoCommit = pass < 2;
      boolean useCompoundFile = (pass % 2) > 0;

      KeepLastNDeletionPolicy policy = new KeepLastNDeletionPolicy(N);

      Directory dir = new RAMDirectory();
      IndexWriter writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
      writer.setUseCompoundFile(useCompoundFile);
      writer.close();
      Term searchTerm = new Term("content", "aaa");        
      Query query = new TermQuery(searchTerm);

      for(int i=0;i<N+1;i++) {

        writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), false, policy);
        writer.setUseCompoundFile(useCompoundFile);
        for(int j=0;j<17;j++) {
          addDoc(writer);
        }
        // this is a commit when autoCommit=false:
        writer.close();
        IndexReader reader = IndexReader.open(dir, policy);
        reader.deleteDocument(3);
        reader.setNorm(5, "content", 2.0F);
        IndexSearcher searcher = new IndexSearcher(reader);
        Hits hits = searcher.search(query);
        assertEquals(16, hits.length());
        // this is a commit when autoCommit=false:
        reader.close();
        searcher.close();

        writer = new IndexWriter(dir, autoCommit, new WhitespaceAnalyzer(), true, policy);
        // This will not commit: there are no changes
        // pending because we opened for "create":
        writer.close();
      }

      assertEquals(1+3*(N+1), policy.numOnInit);
      if (autoCommit) {
        assertTrue(policy.numOnCommit > 3*(N+1)-1);
      } else {
        assertEquals(2*(N+1), policy.numOnCommit);
      }

      IndexSearcher searcher = new IndexSearcher(dir);
      Hits hits = searcher.search(query);
      assertEquals(0, hits.length());

      // Simplistic check: just verify only the past N segments_N's still
      // exist, and, I can open a reader on each:
      long gen = SegmentInfos.getCurrentSegmentGeneration(dir);

      int expectedCount = 0;

      for(int i=0;i<N+1;i++) {
        try {
          IndexReader reader = IndexReader.open(dir);

          // Work backwards in commits on what the expected
          // count should be.  Only check this in the
          // autoCommit false case:
          if (!autoCommit) {
            searcher = new IndexSearcher(reader);
            hits = searcher.search(query);
            assertEquals(expectedCount, hits.length());
            searcher.close();
            if (expectedCount == 0) {
              expectedCount = 16;
            } else if (expectedCount == 16) {
              expectedCount = 17;
            } else if (expectedCount == 17) {
              expectedCount = 0;
            }
          }
          reader.close();
          if (i == N) {
            fail("should have failed on commits before last " + N);
          }
        } catch (IOException e) {
          if (i != N) {
            throw e;
          }
        }
        if (i < N) {
          dir.deleteFile(IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen));
        }
        gen--;
      }
      
      dir.close();
    }
  }

  private void addDoc(IndexWriter writer) throws IOException
  {
    Document doc = new Document();
    doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.TOKENIZED));
    writer.addDocument(doc);
  }
}