* BloomSHA1, DecayingBloomFilter:

- Refactor for concurrent, at some small risk of false negatives
    - Optimizations to cache objects and reuse offsets
This commit is contained in:
zzz
2011-10-25 21:33:12 +00:00
parent 1fc6d0ad54
commit 661604dd4e
4 changed files with 311 additions and 165 deletions

View File

@@ -1,6 +1,9 @@
/* BloomSHA1.java */
package org.xlattice.crypto.filters;
import java.util.Arrays;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
/**
* A Bloom filter for sets of SHA1 digests. A Bloom filter uses a set
* of k hash functions to determine set membership. Each hash function
@@ -31,6 +34,13 @@ package org.xlattice.crypto.filters;
*
* minor tweaks by jrandom, exposing unsynchronized access and
* allowing larger M and K. changes released into the public domain.
*
* Note that this is used only by DecayingBloomFilter, which uses only
* the unsynchronized locked_foo() methods.
*
* As of 0.8.11, the locked_foo() methods are thread-safe, in that they work,
* but there is a minor risk of false-negatives if two threads are
* accessing the same bloom filter integer.
*/
public class BloomSHA1 {
@@ -39,14 +49,14 @@ public class BloomSHA1 {
protected int count;
protected final int[] filter;
protected KeySelector ks;
protected final int[] wordOffset;
protected final int[] bitOffset;
protected final KeySelector ks;
// convenience variables
protected final int filterBits;
protected final int filterWords;
private final BlockingQueue<int[]> buf;
/* (24,11) too big - see KeySelector
public static void main(String args[]) {
@@ -80,15 +90,11 @@ public class BloomSHA1 {
//}
this.m = m;
this.k = k;
count = 0;
filterBits = 1 << m;
filterWords = (filterBits + 31)/32; // round up
filter = new int[filterWords];
doClear();
// offsets into the filter
wordOffset = new int[k];
bitOffset = new int[k];
ks = new KeySelector(m, k, bitOffset, wordOffset);
ks = new KeySelector(m, k);
buf = new LinkedBlockingQueue(16);
// DEBUG
//System.out.println("Bloom constructor: m = " + m + ", k = " + k
@@ -114,9 +120,7 @@ public class BloomSHA1 {
}
/** Clear the filter, unsynchronized */
protected void doClear() {
for (int i = 0; i < filterWords; i++) {
filter[i] = 0;
}
Arrays.fill(filter, 0);
count = 0;
}
/** Synchronized version */
@@ -154,19 +158,25 @@ public class BloomSHA1 {
* @param b byte array representing a key (SHA1 digest)
*/
public void insert (byte[]b) { insert(b, 0, b.length); }
public void insert (byte[]b, int offset, int len) {
synchronized(this) {
locked_insert(b);
locked_insert(b, offset, len);
}
}
public final void locked_insert(byte[]b) { locked_insert(b, 0, b.length); }
public final void locked_insert(byte[]b, int offset, int len) {
ks.getOffsets(b, offset, len);
int[] bitOffset = acquire();
int[] wordOffset = acquire();
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
for (int i = 0; i < k; i++) {
filter[wordOffset[i]] |= 1 << bitOffset[i];
}
count++;
buf.offer(bitOffset);
buf.offer(wordOffset);
}
/**
@@ -176,13 +186,20 @@ public class BloomSHA1 {
* @return true if b is in the filter
*/
protected final boolean isMember(byte[] b) { return isMember(b, 0, b.length); }
protected final boolean isMember(byte[] b, int offset, int len) {
ks.getOffsets(b, offset, len);
int[] bitOffset = acquire();
int[] wordOffset = acquire();
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
for (int i = 0; i < k; i++) {
if (! ((filter[wordOffset[i]] & (1 << bitOffset[i])) != 0) ) {
buf.offer(bitOffset);
buf.offer(wordOffset);
return false;
}
}
buf.offer(bitOffset);
buf.offer(wordOffset);
return true;
}
@@ -202,6 +219,75 @@ public class BloomSHA1 {
}
}
/**
* Get the bloom filter offsets for reuse.
* Caller should call rv.release() when done.
* @since 0.8.11
*/
public FilterKey getFilterKey(byte[] b, int offset, int len) {
int[] bitOffset = acquire();
int[] wordOffset = acquire();
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
return new FilterKey(bitOffset, wordOffset);
}
/**
* Add the key to the filter.
* @since 0.8.11
*/
public void locked_insert(FilterKey fk) {
for (int i = 0; i < k; i++) {
filter[fk.wordOffset[i]] |= 1 << fk.bitOffset[i];
}
count++;
}
/**
* Is the key in the filter.
* @since 0.8.11
*/
public boolean locked_member(FilterKey fk) {
for (int i = 0; i < k; i++) {
if (! ((filter[fk.wordOffset[i]] & (1 << fk.bitOffset[i])) != 0) )
return false;
}
return true;
}
/**
* @since 0.8.11
*/
private int[] acquire() {
int[] rv = buf.poll();
if (rv != null)
return rv;
return new int[k];
}
/**
* @since 0.8.11
*/
public void release(FilterKey fk) {
buf.offer(fk.bitOffset);
buf.offer(fk.wordOffset);
}
/**
* Store the (opaque) bloom filter offsets for reuse.
* @since 0.8.11
*/
public static class FilterKey {
private final int[] bitOffset;
private final int[] wordOffset;
private FilterKey(int[] bitOffset, int[] wordOffset) {
this.bitOffset = bitOffset;
this.wordOffset = wordOffset;
}
}
/**
* @param n number of set members
* @return approximate false positive rate
@@ -215,6 +301,8 @@ public class BloomSHA1 {
public final double falsePositives() {
return falsePositives(count);
}
/*****
// DEBUG METHODS
public static String keyToString(byte[] key) {
StringBuilder sb = new StringBuilder().append(key[0]);
@@ -223,23 +311,32 @@ public class BloomSHA1 {
}
return sb.toString();
}
*****/
/** convert 64-bit integer to hex String */
/*****
public static String ltoh (long i) {
StringBuilder sb = new StringBuilder().append("#")
.append(Long.toString(i, 16));
return sb.toString();
}
*****/
/** convert 32-bit integer to String */
/*****
public static String itoh (int i) {
StringBuilder sb = new StringBuilder().append("#")
.append(Integer.toString(i, 16));
return sb.toString();
}
*****/
/** convert single byte to String */
/*****
public static String btoh (byte b) {
int i = 0xff & b;
return itoh(i);
}
*****/
}

View File

@@ -1,4 +1,3 @@
/* KeySelector.java */
package org.xlattice.crypto.filters;
/**
@@ -12,25 +11,34 @@ package org.xlattice.crypto.filters;
*
* minor tweaks by jrandom, exposing unsynchronized access and
* allowing larger M and K. changes released into the public domain.
*
* As of 0.8.11, bitoffset and wordoffset out parameters moved from fields
* to selector arguments, to allow concurrency.
* ALl methods are now thread-safe.
*/
public class KeySelector {
private int m;
private int k;
private byte[] b;
private int offset; // index into b to select
private int length; // length into b to select
private int[] bitOffset;
private int[] wordOffset;
private BitSelector bitSel;
private WordSelector wordSel;
private final int m;
private final int k;
private final BitSelector bitSel;
private final WordSelector wordSel;
public interface BitSelector {
public void getBitSelectors();
/**
* @param bitOffset Out parameter of length k
* @since 0.8.11 out parameter added
*/
public void getBitSelectors(byte[] b, int offset, int length, int[] bitOffset);
}
public interface WordSelector {
public void getWordSelectors();
/**
* @param wordOffset Out parameter of length k
* @since 0.8.11 out parameter added
*/
public void getWordSelectors(byte[] b, int offset, int length, int[] wordOffset);
}
/** AND with byte to expose index-many bits */
public final static int[] UNMASK = {
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
@@ -49,8 +57,6 @@ public class KeySelector {
*
* @param m size of the filter as a power of 2
* @param k number of 'hash functions'
* @param bitOffset array of k bit offsets (offset of flag bit in word)
* @param wordOffset array of k word offsets (offset of word flag is in)
*
* Note that if k and m are too big, the GenericWordSelector blows up -
* The max for 32-byte keys is m=23 and k=11.
@@ -59,15 +65,13 @@ public class KeySelector {
*
* It isn't clear how to fix this.
*/
public KeySelector (int m, int k, int[] bitOffset, int [] wordOffset) {
public KeySelector (int m, int k) {
//if ( (m < 2) || (m > 20)|| (k < 1)
// || (bitOffset == null) || (wordOffset == null)) {
// throw new IllegalArgumentException();
//}
this.m = m;
this.k = k;
this.bitOffset = bitOffset;
this.wordOffset = wordOffset;
bitSel = new GenericBitSelector();
wordSel = new GenericWordSelector();
}
@@ -78,7 +82,7 @@ public class KeySelector {
*/
public class GenericBitSelector implements BitSelector {
/** Do the extraction */
public void getBitSelectors() {
public void getBitSelectors(byte[] b, int offset, int length, int[] bitOffset) {
int curBit = 8 * offset;
int curByte;
for (int j = 0; j < k; j++) {
@@ -132,7 +136,7 @@ public class KeySelector {
*/
public class GenericWordSelector implements WordSelector {
/** Extract the k offsets into the word offset array */
public void getWordSelectors() {
public void getWordSelectors(byte[] b, int offset, int length, int[] wordOffset) {
int stride = m - 5;
//assert true: stride<16;
int curBit = (k * 5) + (offset * 8);
@@ -221,32 +225,47 @@ public class KeySelector {
}
}
}
/**
* Given a key, populate the word and bit offset arrays, each
* of which has k elements.
*
* @param key cryptographic key used in populating the arrays
* @param bitOffset Out parameter of length k
* @param wordOffset Out parameter of length k
* @since 0.8.11 out parameters added
*/
public void getOffsets (byte[] key) { getOffsets(key, 0, key.length); }
public void getOffsets (byte[] key, int off, int len) {
if (key == null) {
throw new IllegalArgumentException("null key");
}
if (len < 20) {
throw new IllegalArgumentException(
"key must be at least 20 bytes long");
}
b = key;
offset = off;
length = len;
public void getOffsets (byte[] key, int[] bitOffset, int[] wordOffset) {
getOffsets(key, 0, key.length, bitOffset, wordOffset);
}
/**
* Given a key, populate the word and bit offset arrays, each
* of which has k elements.
*
* @param key cryptographic key used in populating the arrays
* @param bitOffset Out parameter of length k
* @param wordOffset Out parameter of length k
* @since 0.8.11 out parameters added
*/
public void getOffsets (byte[] key, int off, int len, int[] bitOffset, int[] wordOffset) {
// skip these checks for speed
//if (key == null) {
// throw new IllegalArgumentException("null key");
//}
//if (len < 20) {
// throw new IllegalArgumentException(
// "key must be at least 20 bytes long");
//}
// // DEBUG
// System.out.println("KeySelector.getOffsets for "
// + BloomSHA1.keyToString(b));
// // END
bitSel.getBitSelectors();
wordSel.getWordSelectors();
bitSel.getBitSelectors(key, off, len, bitOffset);
wordSel.getWordSelectors(key, off, len, wordOffset);
}
/*****
// DEBUG METHODS ////////////////////////////////////////////////
String itoh(int i) {
return BloomSHA1.itoh(i);
@@ -254,6 +273,7 @@ public class KeySelector {
String btoh(byte b) {
return BloomSHA1.btoh(b);
}
*****/
}