move DecayingBloomFilter, DecayingHashSet, and xlattice filters from core to router

This commit is contained in:
zzz
2012-07-02 19:22:33 +00:00
parent 5eab417134
commit e2588a5379
9 changed files with 19 additions and 19 deletions

View File

@@ -1,343 +0,0 @@
package org.xlattice.crypto.filters;
import java.util.Arrays;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
/**
* A Bloom filter for sets of SHA1 digests. A Bloom filter uses a set
* of k hash functions to determine set membership. Each hash function
* produces a value in the range 0..M-1. The filter is of size M. To
* add a member to the set, apply each function to the new member and
* set the corresponding bit in the filter. For M very large relative
* to k, this will normally set k bits in the filter. To check whether
* x is a member of the set, apply each of the k hash functions to x
* and check whether the corresponding bits are set in the filter. If
* any are not set, x is definitely not a member. If all are set, x
* may be a member. The probability of error (the false positive rate)
* is f = (1 - e^(-kN/M))^k, where N is the number of set members.
*
* This class takes advantage of the fact that SHA1 digests are good-
* quality pseudo-random numbers. The k hash functions are the values
* of distinct sets of bits taken from the 20-byte SHA1 hash. The
* number of bits in the filter, M, is constrained to be a power of
* 2; M == 2^m. The number of bits in each hash function may not
* exceed floor(m/k).
*
* This class is designed to be thread-safe, but this has not been
* exhaustively tested.
*
* @author < A HREF="mailto:jddixon@users.sourceforge.net">Jim Dixon</A>
*
* BloomSHA1.java and KeySelector.java are BSD licensed from the xlattice
* app - http://xlattice.sourceforge.net/
*
* minor tweaks by jrandom, exposing unsynchronized access and
* allowing larger M and K. changes released into the public domain.
*
* Note that this is used only by DecayingBloomFilter, which uses only
* the unsynchronized locked_foo() methods.
* Deprecated for use outside of the router; to be moved to router.jar.
*
* As of 0.8.11, the locked_foo() methods are thread-safe, in that they work,
* but there is a minor risk of false-negatives if two threads are
* accessing the same bloom filter integer.
*/
public class BloomSHA1 {
protected final int m;
protected final int k;
protected int count;
protected final int[] filter;
protected final KeySelector ks;
// convenience variables
protected final int filterBits;
protected final int filterWords;
private final BlockingQueue<int[]> buf;
/* (24,11) too big - see KeySelector
public static void main(String args[]) {
BloomSHA1 b = new BloomSHA1(24, 11);
for (int i = 0; i < 100; i++) {
byte v[] = new byte[32];
v[0] = (byte)i;
b.insert(v);
}
}
*/
/**
* Creates a filter with 2^m bits and k 'hash functions', where
* each hash function is portion of the 160-bit SHA1 hash.
* @param m determines number of bits in filter
* @param k number of hash functionsx
*
* See KeySelector for important restriction on max m and k
*/
public BloomSHA1( int m, int k) {
// XXX need to devise more reasonable set of checks
//if ( m < 2 || m > 20) {
// throw new IllegalArgumentException("m out of range");
//}
//if ( k < 1 || ( k * m > 160 )) {
// throw new IllegalArgumentException(
// "too many hash functions for filter size");
//}
this.m = m;
this.k = k;
filterBits = 1 << m;
filterWords = (filterBits + 31)/32; // round up
filter = new int[filterWords];
ks = new KeySelector(m, k);
buf = new LinkedBlockingQueue(16);
// DEBUG
//System.out.println("Bloom constructor: m = " + m + ", k = " + k
// + "\n filterBits = " + filterBits
// + ", filterWords = " + filterWords);
// END
}
/**
* Creates a filter of 2^m bits, with the number of 'hash functions"
* k defaulting to 8.
* @param m determines size of filter
*/
public BloomSHA1 (int m) {
this(m, 8);
}
/**
* Creates a filter of 2^20 bits with k defaulting to 8.
*/
public BloomSHA1 () {
this (20, 8);
}
/** Clear the filter, unsynchronized */
protected void doClear() {
Arrays.fill(filter, 0);
count = 0;
}
/** Synchronized version */
public void clear() {
synchronized (this) {
doClear();
}
}
/**
* Returns the number of keys which have been inserted. This
* class (BloomSHA1) does not guarantee uniqueness in any sense; if the
* same key is added N times, the number of set members reported
* will increase by N.
*
* @return number of set members
*/
public final int size() {
synchronized (this) {
return count;
}
}
/**
* @return number of bits in filter
*/
public final int capacity () {
return filterBits;
}
/**
* Add a key to the set represented by the filter.
*
* XXX This version does not maintain 4-bit counters, it is not
* a counting Bloom filter.
*
* @param b byte array representing a key (SHA1 digest)
*/
public void insert (byte[]b) { insert(b, 0, b.length); }
public void insert (byte[]b, int offset, int len) {
synchronized(this) {
locked_insert(b, offset, len);
}
}
public final void locked_insert(byte[]b) { locked_insert(b, 0, b.length); }
public final void locked_insert(byte[]b, int offset, int len) {
int[] bitOffset = acquire();
int[] wordOffset = acquire();
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
for (int i = 0; i < k; i++) {
filter[wordOffset[i]] |= 1 << bitOffset[i];
}
count++;
buf.offer(bitOffset);
buf.offer(wordOffset);
}
/**
* Is a key in the filter. Sets up the bit and word offset arrays.
*
* @param b byte array representing a key (SHA1 digest)
* @return true if b is in the filter
*/
protected final boolean isMember(byte[] b) { return isMember(b, 0, b.length); }
protected final boolean isMember(byte[] b, int offset, int len) {
int[] bitOffset = acquire();
int[] wordOffset = acquire();
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
for (int i = 0; i < k; i++) {
if (! ((filter[wordOffset[i]] & (1 << bitOffset[i])) != 0) ) {
buf.offer(bitOffset);
buf.offer(wordOffset);
return false;
}
}
buf.offer(bitOffset);
buf.offer(wordOffset);
return true;
}
public final boolean locked_member(byte[]b) { return isMember(b); }
public final boolean locked_member(byte[]b, int offset, int len) { return isMember(b, offset, len); }
/**
* Is a key in the filter. External interface, internally synchronized.
*
* @param b byte array representing a key (SHA1 digest)
* @return true if b is in the filter
*/
public final boolean member(byte[]b) { return member(b, 0, b.length); }
public final boolean member(byte[]b, int offset, int len) {
synchronized (this) {
return isMember(b, offset, len);
}
}
/**
* Get the bloom filter offsets for reuse.
* Caller should call release(rv) when done with it.
* @since 0.8.11
*/
public FilterKey getFilterKey(byte[] b, int offset, int len) {
int[] bitOffset = acquire();
int[] wordOffset = acquire();
ks.getOffsets(b, offset, len, bitOffset, wordOffset);
return new FilterKey(bitOffset, wordOffset);
}
/**
* Add the key to the filter.
* @since 0.8.11
*/
public void locked_insert(FilterKey fk) {
for (int i = 0; i < k; i++) {
filter[fk.wordOffset[i]] |= 1 << fk.bitOffset[i];
}
count++;
}
/**
* Is the key in the filter.
* @since 0.8.11
*/
public boolean locked_member(FilterKey fk) {
for (int i = 0; i < k; i++) {
if (! ((filter[fk.wordOffset[i]] & (1 << fk.bitOffset[i])) != 0) )
return false;
}
return true;
}
/**
* @since 0.8.11
*/
private int[] acquire() {
int[] rv = buf.poll();
if (rv != null)
return rv;
return new int[k];
}
/**
* @since 0.8.11
*/
public void release(FilterKey fk) {
buf.offer(fk.bitOffset);
buf.offer(fk.wordOffset);
}
/**
* Store the (opaque) bloom filter offsets for reuse.
* @since 0.8.11
*/
public static class FilterKey {
private final int[] bitOffset;
private final int[] wordOffset;
private FilterKey(int[] bitOffset, int[] wordOffset) {
this.bitOffset = bitOffset;
this.wordOffset = wordOffset;
}
}
/**
* @param n number of set members
* @return approximate false positive rate
*/
public final double falsePositives(int n) {
// (1 - e(-kN/M))^k
return java.lang.Math.pow (
(1l - java.lang.Math.exp(0d- ((double)k) * (long)n / filterBits)), k);
}
public final double falsePositives() {
return falsePositives(count);
}
/*****
// DEBUG METHODS
public static String keyToString(byte[] key) {
StringBuilder sb = new StringBuilder().append(key[0]);
for (int i = 1; i < key.length; i++) {
sb.append(".").append(Integer.toString(key[i], 16));
}
return sb.toString();
}
*****/
/** convert 64-bit integer to hex String */
/*****
public static String ltoh (long i) {
StringBuilder sb = new StringBuilder().append("#")
.append(Long.toString(i, 16));
return sb.toString();
}
*****/
/** convert 32-bit integer to String */
/*****
public static String itoh (int i) {
StringBuilder sb = new StringBuilder().append("#")
.append(Integer.toString(i, 16));
return sb.toString();
}
*****/
/** convert single byte to String */
/*****
public static String btoh (byte b) {
int i = 0xff & b;
return itoh(i);
}
*****/
}

View File

@@ -1,279 +0,0 @@
package org.xlattice.crypto.filters;
/**
* Given a key, populates arrays determining word and bit offsets into
* a Bloom filter.
*
* @author <A HREF="mailto:jddixon@users.sourceforge.net">Jim Dixon</A>
*
* BloomSHA1.java and KeySelector.java are BSD licensed from the xlattice
* app - http://xlattice.sourceforge.net/
*
* minor tweaks by jrandom, exposing unsynchronized access and
* allowing larger M and K. changes released into the public domain.
*
* As of 0.8.11, bitoffset and wordoffset out parameters moved from fields
* to selector arguments, to allow concurrency.
* ALl methods are now thread-safe.
*/
public class KeySelector {
private final int m;
private final int k;
private final BitSelector bitSel;
private final WordSelector wordSel;
public interface BitSelector {
/**
* @param bitOffset Out parameter of length k
* @since 0.8.11 out parameter added
*/
public void getBitSelectors(byte[] b, int offset, int length, int[] bitOffset);
}
public interface WordSelector {
/**
* @param wordOffset Out parameter of length k
* @since 0.8.11 out parameter added
*/
public void getWordSelectors(byte[] b, int offset, int length, int[] wordOffset);
}
/** AND with byte to expose index-many bits */
public final static int[] UNMASK = {
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767};
/** AND with byte to zero out index-many bits */
public final static int[] MASK = {
~0,~1,~3,~7,~15,~31,~63,~127,~255,~511,~1023,~2047,~4095,~8191,~16383,~32767};
public final static int TWO_UP_15 = 32 * 1024;
/**
* Creates a key selector for a Bloom filter. When a key is presented
* to the getOffsets() method, the k 'hash function' values are
* extracted and used to populate bitOffset and wordOffset arrays which
* specify the k flags to be set or examined in the filter.
*
* @param m size of the filter as a power of 2
* @param k number of 'hash functions'
*
* Note that if k and m are too big, the GenericWordSelector blows up -
* The max for 32-byte keys is m=23 and k=11.
* The precise restriction appears to be:
* ((5k + (k-1)(m-5)) / 8) + 2 < keySizeInBytes
*
* It isn't clear how to fix this.
*/
public KeySelector (int m, int k) {
//if ( (m < 2) || (m > 20)|| (k < 1)
// || (bitOffset == null) || (wordOffset == null)) {
// throw new IllegalArgumentException();
//}
this.m = m;
this.k = k;
bitSel = new GenericBitSelector();
wordSel = new GenericWordSelector();
}
/**
* Extracts the k bit offsets from a key, suitable for general values
* of m and k.
*/
public class GenericBitSelector implements BitSelector {
/** Do the extraction */
public void getBitSelectors(byte[] b, int offset, int length, int[] bitOffset) {
int curBit = 8 * offset;
int curByte;
for (int j = 0; j < k; j++) {
curByte = curBit / 8;
int bitsUnused = ((curByte + 1) * 8) - curBit; // left in byte
// // DEBUG
// System.out.println (
// "this byte = " + btoh(b[curByte])
// + ", next byte = " + btoh(b[curByte + 1])
// + "; curBit=" + curBit + ", curByte= " + curByte
// + ", bitsUnused=" + bitsUnused);
// // END
if (bitsUnused > 5) {
bitOffset[j] = ((0xff & b[curByte])
>> (bitsUnused - 5)) & UNMASK[5];
// // DEBUG
// System.out.println(
// " before shifting: " + btoh(b[curByte])
// + "\n after shifting: "
// + itoh( (0xff & b[curByte]) >> (bitsUnused - 5))
// + "\n mask: " + itoh(UNMASK[5]) );
// // END
} else if (bitsUnused == 5) {
bitOffset[j] = b[curByte] & UNMASK[5];
} else {
bitOffset[j] = (b[curByte] & UNMASK[bitsUnused])
| (((0xff & b[curByte + 1]) >> 3)
& MASK[bitsUnused]);
// // DEBUG
// System.out.println(
// " contribution from first byte: "
// + itoh(b[curByte] & UNMASK[bitsUnused])
// + "\n second byte: " + btoh(b[curByte + 1])
// + "\n shifted: " + itoh((0xff & b[curByte + 1]) >> 3)
// + "\n mask: " + itoh(MASK[bitsUnused])
// + "\n contribution from second byte: "
// + itoh((0xff & b[curByte + 1] >> 3) & MASK[bitsUnused]));
// // END
}
// // DEBUG
// System.out.println (" bitOffset[j] = " + bitOffset[j]);
// // END
curBit += 5;
}
}
}
/**
* Extracts the k word offsets from a key. Suitable for general
* values of m and k. See above for formula for max m and k.
*/
public class GenericWordSelector implements WordSelector {
/** Extract the k offsets into the word offset array */
public void getWordSelectors(byte[] b, int offset, int length, int[] wordOffset) {
int stride = m - 5;
//assert true: stride<16;
int curBit = (k * 5) + (offset * 8);
int curByte;
for (int j = 0; j < k; j++) {
curByte = curBit / 8;
int bitsUnused = ((curByte + 1) * 8) - curBit; // left in byte
// // DEBUG
// System.out.println (
// "curr 3 bytes: " + btoh(b[curByte])
// + (curByte < 19 ?
// " " + btoh(b[curByte + 1]) : "")
// + (curByte < 18 ?
// " " + btoh(b[curByte + 2]) : "")
// + "; curBit=" + curBit + ", curByte= " + curByte
// + ", bitsUnused=" + bitsUnused);
// // END
if (bitsUnused > stride) {
// the value is entirely within the current byte
wordOffset[j] = ((0xff & b[curByte])
>> (bitsUnused - stride))
& UNMASK[stride];
} else if (bitsUnused == stride) {
// the value fills the current byte
wordOffset[j] = b[curByte] & UNMASK[stride];
} else { // bitsUnused < stride
// value occupies more than one byte
// bits from first byte, right-aligned in result
wordOffset[j] = b[curByte] & UNMASK[bitsUnused];
// // DEBUG
// System.out.println(" first byte contributes "
// + itoh(wordOffset[j]));
// // END
// bits from second byte
int bitsToGet = stride - bitsUnused;
if (bitsToGet >= 8) {
// 8 bits from second byte
wordOffset[j] |= (0xff & b[curByte + 1]) << bitsUnused;
// // DEBUG
// System.out.println(" second byte contributes "
// + itoh(
// (0xff & b[curByte + 1]) << bitsUnused
// ));
// // END
// bits from third byte
bitsToGet -= 8;
if (bitsToGet > 0) {
// AIOOBE here if m and k too big (23,11 is the max)
// for a 32-byte key - see above
wordOffset[j] |=
((0xff & b[curByte + 2]) >> (8 - bitsToGet))
<< (stride - bitsToGet) ;
// // DEBUG
// System.out.println(" third byte contributes "
// + itoh(
// (((0xff & b[curByte + 2]) >> (8 - bitsToGet))
// << (stride - bitsToGet))
// ));
// // END
}
} else {
// all remaining bits are within second byte
wordOffset[j] |= ((b[curByte + 1] >> (8 - bitsToGet))
& UNMASK[bitsToGet])
<< bitsUnused;
// // DEBUG
// System.out.println(" second byte contributes "
// + itoh(
// ((b[curByte + 1] >> (8 - bitsToGet))
// & UNMASK[bitsToGet])
// << bitsUnused
// ));
// // END
}
}
// // DEBUG
// System.out.println (
// " wordOffset[" + j + "] = " + wordOffset[j]
// + ", " + itoh(wordOffset[j])
// );
// // END
curBit += stride;
}
}
}
/**
* Given a key, populate the word and bit offset arrays, each
* of which has k elements.
*
* @param key cryptographic key used in populating the arrays
* @param bitOffset Out parameter of length k
* @param wordOffset Out parameter of length k
* @since 0.8.11 out parameters added
*/
public void getOffsets (byte[] key, int[] bitOffset, int[] wordOffset) {
getOffsets(key, 0, key.length, bitOffset, wordOffset);
}
/**
* Given a key, populate the word and bit offset arrays, each
* of which has k elements.
*
* @param key cryptographic key used in populating the arrays
* @param bitOffset Out parameter of length k
* @param wordOffset Out parameter of length k
* @since 0.8.11 out parameters added
*/
public void getOffsets (byte[] key, int off, int len, int[] bitOffset, int[] wordOffset) {
// skip these checks for speed
//if (key == null) {
// throw new IllegalArgumentException("null key");
//}
//if (len < 20) {
// throw new IllegalArgumentException(
// "key must be at least 20 bytes long");
//}
// // DEBUG
// System.out.println("KeySelector.getOffsets for "
// + BloomSHA1.keyToString(b));
// // END
bitSel.getBitSelectors(key, off, len, bitOffset);
wordSel.getWordSelectors(key, off, len, wordOffset);
}
/*****
// DEBUG METHODS ////////////////////////////////////////////////
String itoh(int i) {
return BloomSHA1.itoh(i);
}
String btoh(byte b) {
return BloomSHA1.btoh(b);
}
*****/
}