FileDocCategorySizeDatePackage
BitVector.javaAPI DocApache Lucene 2.1.07654Wed Feb 14 10:46:42 GMT 2007org.apache.lucene.util

BitVector

public final class BitVector extends Object
Optimized implementation of a vector of bits. This is more-or-less like java.util.BitSet, but also includes the following:
  • a count() method, which efficiently computes the number of one bits;
  • optimized read from and write to disk;
  • inlinable get() method;
  • store and load, as bit set or d-gaps, depending on sparseness;
author
Doug Cutting
version
$Id: BitVector.java 494136 2007-01-08 18:11:08Z mikemccand $

Fields Summary
private byte[]
bits
private int
size
private int
count
private static final byte[]
BYTE_COUNTS
Constructors Summary
public BitVector(int n)
Constructs a vector capable of holding n bits.


           
     
    size = n;
    bits = new byte[(size >> 3) + 1];
  
public BitVector(Directory d, String name)
Constructs a bit vector from the file name in Directory d, as written by the {@link #write} method.

    IndexInput input = d.openInput(name);
    try {
      size = input.readInt();       // read size
      if (size == -1) {
        readDgaps(input);
      } else {
        readBits(input);
      }
    } finally {
      input.close();
    }
  
Methods Summary
public final voidclear(int bit)
Sets the value of bit to zero.

    if (bit >= size) {
      throw new ArrayIndexOutOfBoundsException(bit);
    }
    bits[bit >> 3] &= ~(1 << (bit & 7));
    count = -1;
  
public final intcount()
Returns the total number of one bits in this vector. This is efficiently computed and cached, so that, if the vector is not changed, no recomputation is done for repeated calls.

    // if the vector has been modified
    if (count == -1) {
      int c = 0;
      int end = bits.length;
      for (int i = 0; i < end; i++)
        c += BYTE_COUNTS[bits[i] & 0xFF];	  // sum bits per byte
      count = c;
    }
    return count;
  
public final booleanget(int bit)
Returns true if bit is one and false if it is zero.

    if (bit >= size) {
      throw new ArrayIndexOutOfBoundsException(bit);
    }
    return (bits[bit >> 3] & (1 << (bit & 7))) != 0;
  
private booleanisSparse()
Indicates if the bit vector is sparse and should be saved as a d-gaps list, or dense, and should be saved as a bit set.

    // note: order of comparisons below set to favor smaller values (no binary range search.)
    // note: adding 4 because we start with ((int) -1) to indicate d-gaps format.
    // note: we write the d-gap for the byte number, and the byte (bits[i]) itself, therefore
    //       multiplying count by (8+8) or (8+16) or (8+24) etc.:
    //       - first 8 for writing bits[i] (1 byte vs. 1 bit), and 
    //       - second part for writing the byte-number d-gap as vint. 
    // note: factor is for read/write of byte-arrays being faster than vints.  
    int factor = 10;  
    if (bits.length < (1<< 7)) return factor * (4 + (8+ 8)*count()) < size();
    if (bits.length < (1<<14)) return factor * (4 + (8+16)*count()) < size();
    if (bits.length < (1<<21)) return factor * (4 + (8+24)*count()) < size();
    if (bits.length < (1<<28)) return factor * (4 + (8+32)*count()) < size();
    return                            factor * (4 + (8+40)*count()) < size();
  
private voidreadBits(org.apache.lucene.store.IndexInput input)
Read as a bit set

    count = input.readInt();        // read count
    bits = new byte[(size >> 3) + 1];     // allocate bits
    input.readBytes(bits, 0, bits.length);
  
private voidreadDgaps(org.apache.lucene.store.IndexInput input)
read as a d-gaps list

    size = input.readInt();       // (re)read size
    count = input.readInt();        // read count
    bits = new byte[(size >> 3) + 1];     // allocate bits
    int last=0;
    int n = count();
    while (n>0) {
      last += input.readVInt();
      bits[last] = input.readByte();
      n -= BYTE_COUNTS[bits[last] & 0xFF];
    }          
  
public final voidset(int bit)
Sets the value of bit to one.

    if (bit >= size) {
      throw new ArrayIndexOutOfBoundsException(bit);
    }
    bits[bit >> 3] |= 1 << (bit & 7);
    count = -1;
  
public final intsize()
Returns the number of bits in this vector. This is also one greater than the number of the largest valid bit number.

    return size;
  
public final voidwrite(org.apache.lucene.store.Directory d, java.lang.String name)
Writes this vector to the file name in Directory d, in a format that can be read by the constructor {@link #BitVector(Directory, String)}.



                                 
           
    IndexOutput output = d.createOutput(name);
    try {
      if (isSparse()) { 
        writeDgaps(output); // sparse bit-set more efficiently saved as d-gaps.
      } else {
        writeBits(output);
      }
    } finally {
      output.close();
    }
  
private voidwriteBits(org.apache.lucene.store.IndexOutput output)
Write as a bit set

    output.writeInt(size());        // write size
    output.writeInt(count());       // write count
    output.writeBytes(bits, bits.length);
  
private voidwriteDgaps(org.apache.lucene.store.IndexOutput output)
Write as a d-gaps list

    output.writeInt(-1);            // mark using d-gaps                         
    output.writeInt(size());        // write size
    output.writeInt(count());       // write count
    int last=0;
    int n = count();
    int m = bits.length;
    for (int i=0; i<m && n>0; i++) {
      if (bits[i]!=0) {
        output.writeVInt(i-last);
        output.writeByte(bits[i]);
        last = i;
        n -= BYTE_COUNTS[bits[i] & 0xFF];
      }
    }