forked from I2P_Developers/i2p.i2p
* BloomSHA1, DecayingBloomFilter:
- Refactor for concurrent, at some small risk of false negatives
- Optimizations to cache objects and reuse offsets
This commit is contained in:
@@ -1,6 +1,9 @@
|
||||
/* BloomSHA1.java */
|
||||
package org.xlattice.crypto.filters;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
|
||||
/**
|
||||
* A Bloom filter for sets of SHA1 digests. A Bloom filter uses a set
|
||||
* of k hash functions to determine set membership. Each hash function
|
||||
@@ -31,6 +34,13 @@ package org.xlattice.crypto.filters;
|
||||
*
|
||||
* minor tweaks by jrandom, exposing unsynchronized access and
|
||||
* allowing larger M and K. changes released into the public domain.
|
||||
*
|
||||
* Note that this is used only by DecayingBloomFilter, which uses only
|
||||
* the unsynchronized locked_foo() methods.
|
||||
*
|
||||
* As of 0.8.11, the locked_foo() methods are thread-safe, in that they work,
|
||||
* but there is a minor risk of false-negatives if two threads are
|
||||
* accessing the same bloom filter integer.
|
||||
*/
|
||||
|
||||
public class BloomSHA1 {
|
||||
@@ -39,14 +49,14 @@ public class BloomSHA1 {
|
||||
protected int count;
|
||||
|
||||
protected final int[] filter;
|
||||
protected KeySelector ks;
|
||||
protected final int[] wordOffset;
|
||||
protected final int[] bitOffset;
|
||||
protected final KeySelector ks;
|
||||
|
||||
// convenience variables
|
||||
protected final int filterBits;
|
||||
protected final int filterWords;
|
||||
|
||||
private final BlockingQueue<int[]> buf;
|
||||
|
||||
/* (24,11) too big - see KeySelector
|
||||
|
||||
public static void main(String args[]) {
|
||||
@@ -80,15 +90,11 @@ public class BloomSHA1 {
|
||||
//}
|
||||
this.m = m;
|
||||
this.k = k;
|
||||
count = 0;
|
||||
filterBits = 1 << m;
|
||||
filterWords = (filterBits + 31)/32; // round up
|
||||
filter = new int[filterWords];
|
||||
doClear();
|
||||
// offsets into the filter
|
||||
wordOffset = new int[k];
|
||||
bitOffset = new int[k];
|
||||
ks = new KeySelector(m, k, bitOffset, wordOffset);
|
||||
ks = new KeySelector(m, k);
|
||||
buf = new LinkedBlockingQueue(16);
|
||||
|
||||
// DEBUG
|
||||
//System.out.println("Bloom constructor: m = " + m + ", k = " + k
|
||||
@@ -114,9 +120,7 @@ public class BloomSHA1 {
|
||||
}
|
||||
/** Clear the filter, unsynchronized */
|
||||
protected void doClear() {
|
||||
for (int i = 0; i < filterWords; i++) {
|
||||
filter[i] = 0;
|
||||
}
|
||||
Arrays.fill(filter, 0);
|
||||
count = 0;
|
||||
}
|
||||
/** Synchronized version */
|
||||
@@ -154,19 +158,25 @@ public class BloomSHA1 {
|
||||
* @param b byte array representing a key (SHA1 digest)
|
||||
*/
|
||||
public void insert (byte[]b) { insert(b, 0, b.length); }
|
||||
|
||||
public void insert (byte[]b, int offset, int len) {
|
||||
synchronized(this) {
|
||||
locked_insert(b);
|
||||
locked_insert(b, offset, len);
|
||||
}
|
||||
}
|
||||
|
||||
public final void locked_insert(byte[]b) { locked_insert(b, 0, b.length); }
|
||||
|
||||
public final void locked_insert(byte[]b, int offset, int len) {
|
||||
ks.getOffsets(b, offset, len);
|
||||
int[] bitOffset = acquire();
|
||||
int[] wordOffset = acquire();
|
||||
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
|
||||
for (int i = 0; i < k; i++) {
|
||||
filter[wordOffset[i]] |= 1 << bitOffset[i];
|
||||
}
|
||||
count++;
|
||||
buf.offer(bitOffset);
|
||||
buf.offer(wordOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -176,13 +186,20 @@ public class BloomSHA1 {
|
||||
* @return true if b is in the filter
|
||||
*/
|
||||
protected final boolean isMember(byte[] b) { return isMember(b, 0, b.length); }
|
||||
|
||||
protected final boolean isMember(byte[] b, int offset, int len) {
|
||||
ks.getOffsets(b, offset, len);
|
||||
int[] bitOffset = acquire();
|
||||
int[] wordOffset = acquire();
|
||||
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
|
||||
for (int i = 0; i < k; i++) {
|
||||
if (! ((filter[wordOffset[i]] & (1 << bitOffset[i])) != 0) ) {
|
||||
buf.offer(bitOffset);
|
||||
buf.offer(wordOffset);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
buf.offer(bitOffset);
|
||||
buf.offer(wordOffset);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -202,6 +219,75 @@ public class BloomSHA1 {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the bloom filter offsets for reuse.
|
||||
* Caller should call rv.release() when done.
|
||||
* @since 0.8.11
|
||||
*/
|
||||
public FilterKey getFilterKey(byte[] b, int offset, int len) {
|
||||
int[] bitOffset = acquire();
|
||||
int[] wordOffset = acquire();
|
||||
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
|
||||
return new FilterKey(bitOffset, wordOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the key to the filter.
|
||||
* @since 0.8.11
|
||||
*/
|
||||
public void locked_insert(FilterKey fk) {
|
||||
for (int i = 0; i < k; i++) {
|
||||
filter[fk.wordOffset[i]] |= 1 << fk.bitOffset[i];
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Is the key in the filter.
|
||||
* @since 0.8.11
|
||||
*/
|
||||
public boolean locked_member(FilterKey fk) {
|
||||
for (int i = 0; i < k; i++) {
|
||||
if (! ((filter[fk.wordOffset[i]] & (1 << fk.bitOffset[i])) != 0) )
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @since 0.8.11
|
||||
*/
|
||||
private int[] acquire() {
|
||||
int[] rv = buf.poll();
|
||||
if (rv != null)
|
||||
return rv;
|
||||
return new int[k];
|
||||
}
|
||||
|
||||
/**
|
||||
* @since 0.8.11
|
||||
*/
|
||||
public void release(FilterKey fk) {
|
||||
buf.offer(fk.bitOffset);
|
||||
buf.offer(fk.wordOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Store the (opaque) bloom filter offsets for reuse.
|
||||
* @since 0.8.11
|
||||
*/
|
||||
public static class FilterKey {
|
||||
|
||||
private final int[] bitOffset;
|
||||
private final int[] wordOffset;
|
||||
|
||||
private FilterKey(int[] bitOffset, int[] wordOffset) {
|
||||
this.bitOffset = bitOffset;
|
||||
this.wordOffset = wordOffset;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param n number of set members
|
||||
* @return approximate false positive rate
|
||||
@@ -215,6 +301,8 @@ public class BloomSHA1 {
|
||||
public final double falsePositives() {
|
||||
return falsePositives(count);
|
||||
}
|
||||
|
||||
/*****
|
||||
// DEBUG METHODS
|
||||
public static String keyToString(byte[] key) {
|
||||
StringBuilder sb = new StringBuilder().append(key[0]);
|
||||
@@ -223,23 +311,32 @@ public class BloomSHA1 {
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
*****/
|
||||
|
||||
/** convert 64-bit integer to hex String */
|
||||
/*****
|
||||
public static String ltoh (long i) {
|
||||
StringBuilder sb = new StringBuilder().append("#")
|
||||
.append(Long.toString(i, 16));
|
||||
return sb.toString();
|
||||
}
|
||||
*****/
|
||||
|
||||
/** convert 32-bit integer to String */
|
||||
/*****
|
||||
public static String itoh (int i) {
|
||||
StringBuilder sb = new StringBuilder().append("#")
|
||||
.append(Integer.toString(i, 16));
|
||||
return sb.toString();
|
||||
}
|
||||
*****/
|
||||
|
||||
/** convert single byte to String */
|
||||
/*****
|
||||
public static String btoh (byte b) {
|
||||
int i = 0xff & b;
|
||||
return itoh(i);
|
||||
}
|
||||
*****/
|
||||
}
|
||||
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
/* KeySelector.java */
|
||||
package org.xlattice.crypto.filters;
|
||||
|
||||
/**
|
||||
@@ -12,25 +11,34 @@ package org.xlattice.crypto.filters;
|
||||
*
|
||||
* minor tweaks by jrandom, exposing unsynchronized access and
|
||||
* allowing larger M and K. changes released into the public domain.
|
||||
*
|
||||
* As of 0.8.11, bitoffset and wordoffset out parameters moved from fields
|
||||
* to selector arguments, to allow concurrency.
|
||||
* ALl methods are now thread-safe.
|
||||
*/
|
||||
public class KeySelector {
|
||||
|
||||
private int m;
|
||||
private int k;
|
||||
private byte[] b;
|
||||
private int offset; // index into b to select
|
||||
private int length; // length into b to select
|
||||
private int[] bitOffset;
|
||||
private int[] wordOffset;
|
||||
private BitSelector bitSel;
|
||||
private WordSelector wordSel;
|
||||
private final int m;
|
||||
private final int k;
|
||||
private final BitSelector bitSel;
|
||||
private final WordSelector wordSel;
|
||||
|
||||
public interface BitSelector {
|
||||
public void getBitSelectors();
|
||||
/**
|
||||
* @param bitOffset Out parameter of length k
|
||||
* @since 0.8.11 out parameter added
|
||||
*/
|
||||
public void getBitSelectors(byte[] b, int offset, int length, int[] bitOffset);
|
||||
}
|
||||
|
||||
public interface WordSelector {
|
||||
public void getWordSelectors();
|
||||
/**
|
||||
* @param wordOffset Out parameter of length k
|
||||
* @since 0.8.11 out parameter added
|
||||
*/
|
||||
public void getWordSelectors(byte[] b, int offset, int length, int[] wordOffset);
|
||||
}
|
||||
|
||||
/** AND with byte to expose index-many bits */
|
||||
public final static int[] UNMASK = {
|
||||
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
|
||||
@@ -49,8 +57,6 @@ public class KeySelector {
|
||||
*
|
||||
* @param m size of the filter as a power of 2
|
||||
* @param k number of 'hash functions'
|
||||
* @param bitOffset array of k bit offsets (offset of flag bit in word)
|
||||
* @param wordOffset array of k word offsets (offset of word flag is in)
|
||||
*
|
||||
* Note that if k and m are too big, the GenericWordSelector blows up -
|
||||
* The max for 32-byte keys is m=23 and k=11.
|
||||
@@ -59,15 +65,13 @@ public class KeySelector {
|
||||
*
|
||||
* It isn't clear how to fix this.
|
||||
*/
|
||||
public KeySelector (int m, int k, int[] bitOffset, int [] wordOffset) {
|
||||
public KeySelector (int m, int k) {
|
||||
//if ( (m < 2) || (m > 20)|| (k < 1)
|
||||
// || (bitOffset == null) || (wordOffset == null)) {
|
||||
// throw new IllegalArgumentException();
|
||||
//}
|
||||
this.m = m;
|
||||
this.k = k;
|
||||
this.bitOffset = bitOffset;
|
||||
this.wordOffset = wordOffset;
|
||||
bitSel = new GenericBitSelector();
|
||||
wordSel = new GenericWordSelector();
|
||||
}
|
||||
@@ -78,7 +82,7 @@ public class KeySelector {
|
||||
*/
|
||||
public class GenericBitSelector implements BitSelector {
|
||||
/** Do the extraction */
|
||||
public void getBitSelectors() {
|
||||
public void getBitSelectors(byte[] b, int offset, int length, int[] bitOffset) {
|
||||
int curBit = 8 * offset;
|
||||
int curByte;
|
||||
for (int j = 0; j < k; j++) {
|
||||
@@ -132,7 +136,7 @@ public class KeySelector {
|
||||
*/
|
||||
public class GenericWordSelector implements WordSelector {
|
||||
/** Extract the k offsets into the word offset array */
|
||||
public void getWordSelectors() {
|
||||
public void getWordSelectors(byte[] b, int offset, int length, int[] wordOffset) {
|
||||
int stride = m - 5;
|
||||
//assert true: stride<16;
|
||||
int curBit = (k * 5) + (offset * 8);
|
||||
@@ -221,32 +225,47 @@ public class KeySelector {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a key, populate the word and bit offset arrays, each
|
||||
* of which has k elements.
|
||||
*
|
||||
* @param key cryptographic key used in populating the arrays
|
||||
* @param bitOffset Out parameter of length k
|
||||
* @param wordOffset Out parameter of length k
|
||||
* @since 0.8.11 out parameters added
|
||||
*/
|
||||
public void getOffsets (byte[] key) { getOffsets(key, 0, key.length); }
|
||||
public void getOffsets (byte[] key, int off, int len) {
|
||||
if (key == null) {
|
||||
throw new IllegalArgumentException("null key");
|
||||
}
|
||||
if (len < 20) {
|
||||
throw new IllegalArgumentException(
|
||||
"key must be at least 20 bytes long");
|
||||
}
|
||||
b = key;
|
||||
offset = off;
|
||||
length = len;
|
||||
public void getOffsets (byte[] key, int[] bitOffset, int[] wordOffset) {
|
||||
getOffsets(key, 0, key.length, bitOffset, wordOffset);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a key, populate the word and bit offset arrays, each
|
||||
* of which has k elements.
|
||||
*
|
||||
* @param key cryptographic key used in populating the arrays
|
||||
* @param bitOffset Out parameter of length k
|
||||
* @param wordOffset Out parameter of length k
|
||||
* @since 0.8.11 out parameters added
|
||||
*/
|
||||
public void getOffsets (byte[] key, int off, int len, int[] bitOffset, int[] wordOffset) {
|
||||
// skip these checks for speed
|
||||
//if (key == null) {
|
||||
// throw new IllegalArgumentException("null key");
|
||||
//}
|
||||
//if (len < 20) {
|
||||
// throw new IllegalArgumentException(
|
||||
// "key must be at least 20 bytes long");
|
||||
//}
|
||||
// // DEBUG
|
||||
// System.out.println("KeySelector.getOffsets for "
|
||||
// + BloomSHA1.keyToString(b));
|
||||
// // END
|
||||
bitSel.getBitSelectors();
|
||||
wordSel.getWordSelectors();
|
||||
bitSel.getBitSelectors(key, off, len, bitOffset);
|
||||
wordSel.getWordSelectors(key, off, len, wordOffset);
|
||||
}
|
||||
|
||||
/*****
|
||||
// DEBUG METHODS ////////////////////////////////////////////////
|
||||
String itoh(int i) {
|
||||
return BloomSHA1.itoh(i);
|
||||
@@ -254,6 +273,7 @@ public class KeySelector {
|
||||
String btoh(byte b) {
|
||||
return BloomSHA1.btoh(b);
|
||||
}
|
||||
*****/
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user