0.5 merging

This commit is contained in:
jrandom
2005-02-16 22:37:24 +00:00
committed by zzz
parent 9646ac2911
commit 7ef9ce8cc6
16 changed files with 1139 additions and 1213 deletions

View File

@@ -0,0 +1,224 @@
/* BloomSHA1.java */
package org.xlattice.crypto.filters;
/**
* A Bloom filter for sets of SHA1 digests. A Bloom filter uses a set
* of k hash functions to determine set membership. Each hash function
* produces a value in the range 0..M-1. The filter is of size M. To
* add a member to the set, apply each function to the new member and
* set the corresponding bit in the filter. For M very large relative
* to k, this will normally set k bits in the filter. To check whether
* x is a member of the set, apply each of the k hash functions to x
* and check whether the corresponding bits are set in the filter. If
* any are not set, x is definitely not a member. If all are set, x
* may be a member. The probability of error (the false positive rate)
* is f = (1 - e^(-kN/M))^k, where N is the number of set members.
*
* This class takes advantage of the fact that SHA1 digests are good-
* quality pseudo-random numbers. The k hash functions are the values
* of distinct sets of bits taken from the 20-byte SHA1 hash. The
* number of bits in the filter, M, is constrained to be a power of
* 2; M == 2^m. The number of bits in each hash function may not
* exceed floor(m/k).
*
* This class is designed to be thread-safe, but this has not been
* exhaustively tested.
*
* @author < A HREF="mailto:jddixon@users.sourceforge.net">Jim Dixon</A>
*
* BloomSHA1.java and KeySelector.java are BSD licensed from the xlattice
* app - http://xlattice.sourceforge.net/
*
* minor tweaks by jrandom, exposing unsynchronized access and
* allowing larger M and K. changes released into the public domain.
*/
public class BloomSHA1 {
protected final int m;
protected final int k;
protected int count;
protected final int[] filter;
protected KeySelector ks;
protected final int[] wordOffset;
protected final int[] bitOffset;
// convenience variables
protected final int filterBits;
protected final int filterWords;
/**
* Creates a filter with 2^m bits and k 'hash functions', where
* each hash function is portion of the 160-bit SHA1 hash.
* @param m determines number of bits in filter, defaults to 20
* @param k number of hash functions, defaults to 8
*/
public BloomSHA1( int m, int k) {
// XXX need to devise more reasonable set of checks
//if ( m < 2 || m > 20) {
// throw new IllegalArgumentException("m out of range");
//}
//if ( k < 1 || ( k * m > 160 )) {
// throw new IllegalArgumentException(
// "too many hash functions for filter size");
//}
this.m = m;
this.k = k;
count = 0;
filterBits = 1 << m;
filterWords = (filterBits + 31)/32; // round up
filter = new int[filterWords];
doClear();
// offsets into the filter
wordOffset = new int[k];
bitOffset = new int[k];
ks = new KeySelector(m, k, bitOffset, wordOffset);
// DEBUG
//System.out.println("Bloom constructor: m = " + m + ", k = " + k
// + "\n filterBits = " + filterBits
// + ", filterWords = " + filterWords);
// END
}
/**
* Creates a filter of 2^m bits, with the number of 'hash functions"
* k defaulting to 8.
* @param m determines size of filter
*/
public BloomSHA1 (int m) {
this(m, 8);
}
/**
* Creates a filter of 2^20 bits with k defaulting to 8.
*/
public BloomSHA1 () {
this (20, 8);
}
/** Clear the filter, unsynchronized */
protected void doClear() {
for (int i = 0; i < filterWords; i++) {
filter[i] = 0;
}
}
/** Synchronized version */
public void clear() {
synchronized (this) {
doClear();
}
}
/**
* Returns the number of keys which have been inserted. This
* class (BloomSHA1) does not guarantee uniqueness in any sense; if the
* same key is added N times, the number of set members reported
* will increase by N.
*
* @return number of set members
*/
public final int size() {
synchronized (this) {
return count;
}
}
/**
* @return number of bits in filter
*/
public final int capacity () {
return filterBits;
}
/**
* Add a key to the set represented by the filter.
*
* XXX This version does not maintain 4-bit counters, it is not
* a counting Bloom filter.
*
* @param b byte array representing a key (SHA1 digest)
*/
public void insert (byte[]b) {
synchronized(this) {
locked_insert(b);
}
}
public final void locked_insert(byte[]b) {
ks.getOffsets(b);
for (int i = 0; i < k; i++) {
filter[wordOffset[i]] |= 1 << bitOffset[i];
}
count++;
}
/**
* Is a key in the filter. Sets up the bit and word offset arrays.
*
* @param b byte array representing a key (SHA1 digest)
* @return true if b is in the filter
*/
protected final boolean isMember(byte[] b) {
ks.getOffsets(b);
for (int i = 0; i < k; i++) {
if (! ((filter[wordOffset[i]] & (1 << bitOffset[i])) != 0) ) {
return false;
}
}
return true;
}
public final boolean locked_member(byte[]b) { return isMember(b); }
/**
* Is a key in the filter. External interface, internally synchronized.
*
* @param b byte array representing a key (SHA1 digest)
* @return true if b is in the filter
*/
public final boolean member(byte[]b) {
synchronized (this) {
return isMember(b);
}
}
/**
* @param n number of set members
* @return approximate false positive rate
*/
public final double falsePositives(int n) {
// (1 - e(-kN/M))^k
return java.lang.Math.pow (
(1l - java.lang.Math.exp( ((double)k) * (long)n / (long)filterBits)), (long)k);
}
public final double falsePositives() {
return falsePositives(count);
}
// DEBUG METHODS
public static String keyToString(byte[] key) {
StringBuffer sb = new StringBuffer().append(key[0]);
for (int i = 1; i < key.length; i++) {
sb.append(".").append(Integer.toString(key[i], 16));
}
return sb.toString();
}
/** convert 64-bit integer to hex String */
public static String ltoh (long i) {
StringBuffer sb = new StringBuffer().append("#")
.append(Long.toString(i, 16));
return sb.toString();
}
/** convert 32-bit integer to String */
public static String itoh (int i) {
StringBuffer sb = new StringBuffer().append("#")
.append(Integer.toString(i, 16));
return sb.toString();
}
/** convert single byte to String */
public static String btoh (byte b) {
int i = 0xff & b;
return itoh(i);
}
}

View File

@@ -0,0 +1,245 @@
/* KeySelector.java */
package org.xlattice.crypto.filters;
/**
* Given a key, populates arrays determining word and bit offsets into
* a Bloom filter.
*
* @author <A HREF="mailto:jddixon@users.sourceforge.net">Jim Dixon</A>
*
* BloomSHA1.java and KeySelector.java are BSD licensed from the xlattice
* app - http://xlattice.sourceforge.net/
*
* minor tweaks by jrandom, exposing unsynchronized access and
* allowing larger M and K. changes released into the public domain.
*/
public class KeySelector {
private int m;
private int k;
private byte[] b;
private int[] bitOffset;
private int[] wordOffset;
private BitSelector bitSel;
private WordSelector wordSel;
public interface BitSelector {
public void getBitSelectors();
}
public interface WordSelector {
public void getWordSelectors();
}
/** AND with byte to expose index-many bits */
public final static int[] UNMASK = {
// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767};
/** AND with byte to zero out index-many bits */
public final static int[] MASK = {
~0,~1,~3,~7,~15,~31,~63,~127,~255,~511,~1023,~2047,~4095,~8191,~16383,~32767};
public final static int TWO_UP_15 = 32 * 1024;
/**
* Creates a key selector for a Bloom filter. When a key is presented
* to the getOffsets() method, the k 'hash function' values are
* extracted and used to populate bitOffset and wordOffset arrays which
* specify the k flags to be set or examined in the filter.
*
* @param m size of the filter as a power of 2
* @param k number of 'hash functions'
* @param bitOffset array of k bit offsets (offset of flag bit in word)
* @param wordOffset array of k word offsets (offset of word flag is in)
*/
public KeySelector (int m, int k, int[] bitOffset, int [] wordOffset) {
//if ( (m < 2) || (m > 20)|| (k < 1)
// || (bitOffset == null) || (wordOffset == null)) {
// throw new IllegalArgumentException();
//}
this.m = m;
this.k = k;
this.bitOffset = bitOffset;
this.wordOffset = wordOffset;
bitSel = new GenericBitSelector();
wordSel = new GenericWordSelector();
}
/**
* Extracts the k bit offsets from a key, suitable for general values
* of m and k.
*/
public class GenericBitSelector implements BitSelector {
/** Do the extraction */
public void getBitSelectors() {
int curBit = 0;
int curByte;
for (int j = 0; j < k; j++) {
curByte = curBit / 8;
int bitsUnused = ((curByte + 1) * 8) - curBit; // left in byte
// // DEBUG
// System.out.println (
// "this byte = " + btoh(b[curByte])
// + ", next byte = " + btoh(b[curByte + 1])
// + "; curBit=" + curBit + ", curByte= " + curByte
// + ", bitsUnused=" + bitsUnused);
// // END
if (bitsUnused > 5) {
bitOffset[j] = ((0xff & b[curByte])
>> (bitsUnused - 5)) & UNMASK[5];
// // DEBUG
// System.out.println(
// " before shifting: " + btoh(b[curByte])
// + "\n after shifting: "
// + itoh( (0xff & b[curByte]) >> (bitsUnused - 5))
// + "\n mask: " + itoh(UNMASK[5]) );
// // END
} else if (bitsUnused == 5) {
bitOffset[j] = b[curByte] & UNMASK[5];
} else {
bitOffset[j] = (b[curByte] & UNMASK[bitsUnused])
| (((0xff & b[curByte + 1]) >> 3)
& MASK[bitsUnused]);
// // DEBUG
// System.out.println(
// " contribution from first byte: "
// + itoh(b[curByte] & UNMASK[bitsUnused])
// + "\n second byte: " + btoh(b[curByte + 1])
// + "\n shifted: " + itoh((0xff & b[curByte + 1]) >> 3)
// + "\n mask: " + itoh(MASK[bitsUnused])
// + "\n contribution from second byte: "
// + itoh((0xff & b[curByte + 1] >> 3) & MASK[bitsUnused]));
// // END
}
// // DEBUG
// System.out.println (" bitOffset[j] = " + bitOffset[j]);
// // END
curBit += 5;
}
}
}
/**
* Extracts the k word offsets from a key. Suitable for general
* values of m and k.
*/
public class GenericWordSelector implements WordSelector {
/** Extract the k offsets into the word offset array */
public void getWordSelectors() {
int stride = m - 5;
//assert true: stride<16;
int curBit = k * 5;
int curByte;
for (int j = 0; j < k; j++) {
curByte = curBit / 8;
int bitsUnused = ((curByte + 1) * 8) - curBit; // left in byte
// // DEBUG
// System.out.println (
// "curr 3 bytes: " + btoh(b[curByte])
// + (curByte < 19 ?
// " " + btoh(b[curByte + 1]) : "")
// + (curByte < 18 ?
// " " + btoh(b[curByte + 2]) : "")
// + "; curBit=" + curBit + ", curByte= " + curByte
// + ", bitsUnused=" + bitsUnused);
// // END
if (bitsUnused > stride) {
// the value is entirely within the current byte
wordOffset[j] = ((0xff & b[curByte])
>> (bitsUnused - stride))
& UNMASK[stride];
} else if (bitsUnused == stride) {
// the value fills the current byte
wordOffset[j] = b[curByte] & UNMASK[stride];
} else { // bitsUnused < stride
// value occupies more than one byte
// bits from first byte, right-aligned in result
wordOffset[j] = b[curByte] & UNMASK[bitsUnused];
// // DEBUG
// System.out.println(" first byte contributes "
// + itoh(wordOffset[j]));
// // END
// bits from second byte
int bitsToGet = stride - bitsUnused;
if (bitsToGet >= 8) {
// 8 bits from second byte
wordOffset[j] |= (0xff & b[curByte + 1]) << bitsUnused;
// // DEBUG
// System.out.println(" second byte contributes "
// + itoh(
// (0xff & b[curByte + 1]) << bitsUnused
// ));
// // END
// bits from third byte
bitsToGet -= 8;
if (bitsToGet > 0) {
wordOffset[j] |=
((0xff & b[curByte + 2]) >> (8 - bitsToGet))
<< (stride - bitsToGet) ;
// // DEBUG
// System.out.println(" third byte contributes "
// + itoh(
// (((0xff & b[curByte + 2]) >> (8 - bitsToGet))
// << (stride - bitsToGet))
// ));
// // END
}
} else {
// all remaining bits are within second byte
wordOffset[j] |= ((b[curByte + 1] >> (8 - bitsToGet))
& UNMASK[bitsToGet])
<< bitsUnused;
// // DEBUG
// System.out.println(" second byte contributes "
// + itoh(
// ((b[curByte + 1] >> (8 - bitsToGet))
// & UNMASK[bitsToGet])
// << bitsUnused
// ));
// // END
}
}
// // DEBUG
// System.out.println (
// " wordOffset[" + j + "] = " + wordOffset[j]
// + ", " + itoh(wordOffset[j])
// );
// // END
curBit += stride;
}
}
}
/**
* Given a key, populate the word and bit offset arrays, each
* of which has k elements.
*
* @param key cryptographic key used in populating the arrays
*/
public void getOffsets (byte[] key) {
if (key == null) {
throw new IllegalArgumentException("null key");
}
if (key.length < 20) {
throw new IllegalArgumentException(
"key must be at least 20 bytes long");
}
b = key;
// // DEBUG
// System.out.println("KeySelector.getOffsets for "
// + BloomSHA1.keyToString(b));
// // END
bitSel.getBitSelectors();
wordSel.getWordSelectors();
}
// DEBUG METHODS ////////////////////////////////////////////////
String itoh(int i) {
return BloomSHA1.itoh(i);
}
String btoh(byte b) {
return BloomSHA1.btoh(b);
}
}