move DecayingBloomFilter, DecayingHashSet, and xlattice filters from core to router

2012-07-02 19:22:33 +00:00
parent 5eab417134
commit e2588a5379
9 changed files with 19 additions and 19 deletions
--- a/core/java/src/org/xlattice/crypto/filters/BloomSHA1.java
+++ b/core/java/src/org/xlattice/crypto/filters/BloomSHA1.java
@@ -1,343 +0,0 @@
-package org.xlattice.crypto.filters;
-
-import java.util.Arrays;
-import java.util.concurrent.BlockingQueue;
-import java.util.concurrent.LinkedBlockingQueue;
-
-/**
- * A Bloom filter for sets of SHA1 digests.  A Bloom filter uses a set
- * of k hash functions to determine set membership.  Each hash function
- * produces a value in the range 0..M-1.  The filter is of size M.  To
- * add a member to the set, apply each function to the new member and 
- * set the corresponding bit in the filter.  For M very large relative
- * to k, this will normally set k bits in the filter.  To check whether
- * x is a member of the set, apply each of the k hash functions to x
- * and check whether the corresponding bits are set in the filter.  If
- * any are not set, x is definitely not a member.  If all are set, x 
- * may be a member.  The probability of error (the false positive rate)
- * is f = (1 - e^(-kN/M))^k, where N is the number of set members.
- *
- * This class takes advantage of the fact that SHA1 digests are good-
- * quality pseudo-random numbers.  The k hash functions are the values
- * of distinct sets of bits taken from the 20-byte SHA1 hash.  The
- * number of bits in the filter, M, is constrained to be a power of 
- * 2; M == 2^m.  The number of bits in each hash function may not 
- * exceed floor(m/k).
- *
- * This class is designed to be thread-safe, but this has not been
- * exhaustively tested.
- *
- * @author < A HREF="mailto:jddixon@users.sourceforge.net">Jim Dixon</A>
- * 
- * BloomSHA1.java and KeySelector.java are BSD licensed from the xlattice
- * app - http://xlattice.sourceforge.net/
- * 
- * minor tweaks by jrandom, exposing unsynchronized access and 
- * allowing larger M and K.  changes released into the public domain.
- * 
- * Note that this is used only by DecayingBloomFilter, which uses only
- * the unsynchronized locked_foo() methods.
- * Deprecated for use outside of the router; to be moved to router.jar.
- * 
- * As of 0.8.11, the locked_foo() methods are thread-safe, in that they work,
- * but there is a minor risk of false-negatives if two threads are
- * accessing the same bloom filter integer.
- */
-
-public class BloomSHA1 {
-    protected final int m;
-    protected final int k;
-    protected int count;
-   
-    protected final int[] filter;
-    protected final KeySelector ks;
-    
-    // convenience variables
-    protected final int filterBits;
-    protected final int filterWords;
-    
-    private final BlockingQueue<int[]> buf;
-
-/* (24,11) too big - see KeySelector
-
-    public static void main(String args[]) {
-        BloomSHA1 b = new BloomSHA1(24, 11);
-        for (int i = 0; i < 100; i++) {
-            byte v[] = new byte[32];
-            v[0] = (byte)i;
-            b.insert(v);
-        }
-    }
-*/
-    
-    
-    /**
-     * Creates a filter with 2^m bits and k 'hash functions', where
-     * each hash function is portion of the 160-bit SHA1 hash.   
-
-     * @param m determines number of bits in filter
-     * @param k number of hash functionsx
-     *
-     * See KeySelector for important restriction on max m and k
-     */
-    public BloomSHA1( int m, int k) {
-        // XXX need to devise more reasonable set of checks
-        //if ( m < 2 || m > 20) {
-        //    throw new IllegalArgumentException("m out of range");
-        //}
-        //if ( k < 1 || ( k * m > 160 )) {
-        //    throw new IllegalArgumentException( 
-        //        "too many hash functions for filter size");
-        //}
-        this.m = m;
-        this.k = k;
-        filterBits = 1 << m;
-        filterWords = (filterBits + 31)/32;     // round up 
-        filter = new int[filterWords];
-        ks = new KeySelector(m, k);
-        buf = new LinkedBlockingQueue(16);
-
-        // DEBUG
-        //System.out.println("Bloom constructor: m = " + m + ", k = " + k
-        //    + "\n    filterBits = " + filterBits
-        //    + ", filterWords = " + filterWords);
-        // END
-    }
-
-    /**
-     * Creates a filter of 2^m bits, with the number of 'hash functions"
-     * k defaulting to 8.
-     * @param m determines size of filter
-     */
-    public BloomSHA1 (int m) {
-        this(m, 8);
-    }
-
-    /**
-     * Creates a filter of 2^20 bits with k defaulting to 8.
-     */
-    public BloomSHA1 () {
-        this (20, 8);
-    }
-    /** Clear the filter, unsynchronized */
-    protected void doClear() {
-        Arrays.fill(filter, 0);
-        count = 0;
-    }
-    /** Synchronized version */
-    public void clear() {
-        synchronized (this) {
-            doClear();
-        }
-    }
-    /**
-     * Returns the number of keys which have been inserted.  This 
-     * class (BloomSHA1) does not guarantee uniqueness in any sense; if the 
-     * same key is added N times, the number of set members reported
-     * will increase by N.
-     * 
-     * @return number of set members 
-     */
-    public final int size() {
-        synchronized (this) {
-            return count;
-        }
-    }
-    /**
-     * @return number of bits in filter
-     */
-    public final int capacity () {
-        return filterBits;
-    }
-
-    /**
-     * Add a key to the set represented by the filter.   
-     *
-     * XXX This version does not maintain 4-bit counters, it is not
-     * a counting Bloom filter.
-     * 
-     * @param b byte array representing a key (SHA1 digest)
-     */
-    public void insert (byte[]b) { insert(b, 0, b.length); }
-
-    public void insert (byte[]b, int offset, int len) {
-        synchronized(this) {
-            locked_insert(b, offset, len);
-        }
-    }
-
-    public final void locked_insert(byte[]b) { locked_insert(b, 0, b.length); }
-
-    public final void locked_insert(byte[]b, int offset, int len) { 
-        int[] bitOffset = acquire();
-        int[] wordOffset = acquire();
-        ks.getOffsets(b, offset, len, bitOffset, wordOffset);
-        for (int i = 0; i < k; i++) {
-            filter[wordOffset[i]] |=  1 << bitOffset[i];
-        }
-        count++;
-        buf.offer(bitOffset);
-        buf.offer(wordOffset);
-    }
-    
-    /**
-     * Is a key in the filter.  Sets up the bit and word offset arrays.
-     * 
-     * @param b byte array representing a key (SHA1 digest)
-     * @return true if b is in the filter 
-     */
-    protected final boolean isMember(byte[] b) { return isMember(b, 0, b.length); }
-
-    protected final boolean isMember(byte[] b, int offset, int len) {
-        int[] bitOffset = acquire();
-        int[] wordOffset = acquire();
-        ks.getOffsets(b, offset, len, bitOffset, wordOffset);
-        for (int i = 0; i < k; i++) {
-            if (! ((filter[wordOffset[i]] & (1 << bitOffset[i])) != 0) ) {
-                buf.offer(bitOffset);
-                buf.offer(wordOffset);
-                return false;
-            }
-        }
-        buf.offer(bitOffset);
-        buf.offer(wordOffset);
-        return true;
-    }
-    
-    public final boolean locked_member(byte[]b) { return isMember(b); }
-    public final boolean locked_member(byte[]b, int offset, int len) { return isMember(b, offset, len); }
-    
-    /**
-     * Is a key in the filter.  External interface, internally synchronized.
-     * 
-     * @param b byte array representing a key (SHA1 digest)
-     * @return true if b is in the filter 
-     */
-    public final boolean member(byte[]b) { return member(b, 0, b.length); }
-    public final boolean member(byte[]b, int offset, int len) {
-        synchronized (this) {
-            return isMember(b, offset, len);
-        }
-    }
-
-    /**
-     * Get the bloom filter offsets for reuse.
-     * Caller should call release(rv) when done with it.
-     * @since 0.8.11
-     */
-    public FilterKey getFilterKey(byte[] b, int offset, int len) {
-        int[] bitOffset = acquire();
-        int[] wordOffset = acquire();
-        ks.getOffsets(b, offset, len, bitOffset, wordOffset);
-        return new FilterKey(bitOffset, wordOffset);
-    }
-
-    /**
-     * Add the key to the filter.
-     * @since 0.8.11
-     */
-    public void locked_insert(FilterKey fk) {
-        for (int i = 0; i < k; i++) {
-            filter[fk.wordOffset[i]] |=  1 << fk.bitOffset[i];
-        }
-        count++;
-    }
-
-
-    /**
-     * Is the key in the filter.
-     * @since 0.8.11
-     */
-    public boolean locked_member(FilterKey fk) {
-        for (int i = 0; i < k; i++) {
-            if (! ((filter[fk.wordOffset[i]] & (1 << fk.bitOffset[i])) != 0) )
-                return false;
-        }
-        return true;
-    }
-
-    /**
-     * @since 0.8.11
-     */
-    private int[] acquire() {
-        int[] rv = buf.poll();
-        if (rv != null)
-            return rv;
-        return new int[k];
-    }
-
-    /**
-     * @since 0.8.11
-     */
-    public void release(FilterKey fk) {
-        buf.offer(fk.bitOffset);
-        buf.offer(fk.wordOffset);
-    }
-
-    /**
-     * Store the (opaque) bloom filter offsets for reuse.
-     * @since 0.8.11
-     */
-    public static class FilterKey {
-
-        private final int[] bitOffset;
-        private final int[] wordOffset;
-
-        private FilterKey(int[] bitOffset, int[] wordOffset) {
-            this.bitOffset = bitOffset;
-            this.wordOffset = wordOffset;
-        }
-    }
-
-    /** 
-     * @param n number of set members
-     * @return approximate false positive rate
-     */
-    public final double falsePositives(int n) {
-        // (1 - e(-kN/M))^k
-        return java.lang.Math.pow ( 
-                (1l - java.lang.Math.exp(0d- ((double)k) * (long)n / filterBits)), k);
-    }
-
-    public final double falsePositives() {
-        return falsePositives(count);
-    }
-
-/*****
-    // DEBUG METHODS
-    public static String keyToString(byte[] key) {
-        StringBuilder sb = new StringBuilder().append(key[0]);
-        for (int i = 1; i < key.length; i++) {
-            sb.append(".").append(Integer.toString(key[i], 16));
-        }
-        return sb.toString();
-    }
-*****/
-
-    /** convert 64-bit integer to hex String */
-/*****
-    public static String ltoh (long i) {
-        StringBuilder sb = new StringBuilder().append("#")
-                                .append(Long.toString(i, 16));
-        return sb.toString();
-    }
-*****/
-
-    /** convert 32-bit integer to String */
-/*****
-    public static String itoh (int i) {
-        StringBuilder sb = new StringBuilder().append("#")
-                                .append(Integer.toString(i, 16));
-        return sb.toString();
-    }
-*****/
-
-    /** convert single byte to String */
-/*****
-    public static String btoh (byte b) {
-        int i = 0xff & b;
-        return itoh(i);
-    }
-*****/
-}
-
--- a/core/java/src/org/xlattice/crypto/filters/KeySelector.java
+++ b/core/java/src/org/xlattice/crypto/filters/KeySelector.java
@@ -1,279 +0,0 @@
-package org.xlattice.crypto.filters;
-
-/**
- * Given a key, populates arrays determining word and bit offsets into
- * a Bloom filter.
- * 
- * @author <A HREF="mailto:jddixon@users.sourceforge.net">Jim Dixon</A>
- *
- * BloomSHA1.java and KeySelector.java are BSD licensed from the xlattice
- * app - http://xlattice.sourceforge.net/
- * 
- * minor tweaks by jrandom, exposing unsynchronized access and 
- * allowing larger M and K.  changes released into the public domain.
- *
- * As of 0.8.11, bitoffset and wordoffset out parameters moved from fields
- * to selector arguments, to allow concurrency.
- * ALl methods are now thread-safe.
- */
-public class KeySelector {
-   
-    private final int m;
-    private final int k;
-    private final BitSelector  bitSel;
-    private final WordSelector wordSel;
-    
-    public interface BitSelector {
-        /**
-         *  @param bitOffset Out parameter of length k
-         *  @since 0.8.11 out parameter added
-         */
-        public void getBitSelectors(byte[] b, int offset, int length, int[] bitOffset);
-    }
-
-    public interface WordSelector {
-        /**
-         *  @param wordOffset Out parameter of length k
-         *  @since 0.8.11 out parameter added
-         */
-        public void getWordSelectors(byte[] b, int offset, int length, int[] wordOffset);
-    }
-
-    /** AND with byte to expose index-many bits */
-    public final static int[] UNMASK = { 
- // 0  1  2  3   4   5   6    7    8   9     10   11     12    13     14     15
-    0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767};
-    /** AND with byte to zero out index-many bits */
-    public final static int[] MASK   = {
-    ~0,~1,~3,~7,~15,~31,~63,~127,~255,~511,~1023,~2047,~4095,~8191,~16383,~32767};
-
-    public final static int TWO_UP_15 = 32 * 1024;
-
-    /** 
-     * Creates a key selector for a Bloom filter.  When a key is presented
-     * to the getOffsets() method, the k 'hash function' values are 
-     * extracted and used to populate bitOffset and wordOffset arrays which
-     * specify the k flags to be set or examined in the filter.  
-     *
-     * @param m    size of the filter as a power of 2
-     * @param k    number of 'hash functions'
-     *
-     * Note that if k and m are too big, the GenericWordSelector blows up -
-     * The max for 32-byte keys is m=23 and k=11.
-     * The precise restriction appears to be:
-     * ((5k + (k-1)(m-5)) / 8) + 2 < keySizeInBytes
-     *
-     * It isn't clear how to fix this.
-     */
-    public KeySelector (int m, int k) {
-        //if ( (m < 2) || (m > 20)|| (k < 1) 
-        //             || (bitOffset == null) || (wordOffset == null)) {
-        //    throw new IllegalArgumentException();
-        //}
-        this.m = m;
-        this.k = k;
-        bitSel  = new GenericBitSelector();
-        wordSel = new GenericWordSelector();
-    }
-    
-    /** 
-     * Extracts the k bit offsets from a key, suitable for general values 
-     * of m and k.
-     */
-    public class GenericBitSelector implements BitSelector {
-        /** Do the extraction */
-        public void getBitSelectors(byte[] b, int offset, int length, int[] bitOffset) {
-            int curBit = 8 * offset; 
-            int curByte;
-            for (int j = 0; j < k; j++) {
-                curByte = curBit / 8;
-                int bitsUnused = ((curByte + 1) * 8) - curBit;    // left in byte
-
-//              // DEBUG
-//              System.out.println (
-//                  "this byte = " + btoh(b[curByte])
-//                  + ", next byte = " + btoh(b[curByte + 1])
-//                  + "; curBit=" + curBit + ", curByte= " + curByte
-//                  + ", bitsUnused=" + bitsUnused);
-//              // END
-                if (bitsUnused > 5) {
-                    bitOffset[j] = ((0xff & b[curByte])
-                                        >> (bitsUnused - 5)) & UNMASK[5];
-//                  // DEBUG
-//                  System.out.println(
-//                      "    before shifting: " + btoh(b[curByte])
-//                  + "\n    after shifting:  " 
-//                          + itoh( (0xff & b[curByte]) >> (bitsUnused - 5))
-//                  + "\n    mask:            " + itoh(UNMASK[5]) );
-//                  // END
-                } else if (bitsUnused == 5) {
-                    bitOffset[j] = b[curByte] & UNMASK[5];
-                } else {
-                    bitOffset[j] = (b[curByte]          & UNMASK[bitsUnused])
-                              | (((0xff & b[curByte + 1]) >> 3) 
-                                                        &   MASK[bitsUnused]);
-//                  // DEBUG
-//                  System.out.println(
-//                    "    contribution from first byte:  "
-//                    + itoh(b[curByte] & UNMASK[bitsUnused])
-//                + "\n    second byte: " + btoh(b[curByte + 1])
-//                + "\n    shifted:     " + itoh((0xff & b[curByte + 1]) >> 3)
-//                + "\n    mask:        " + itoh(MASK[bitsUnused])
-//                + "\n    contribution from second byte: "
-//                    + itoh((0xff & b[curByte + 1] >> 3) & MASK[bitsUnused]));
-//                  // END
-                }
-//              // DEBUG
-//              System.out.println ("    bitOffset[j] = " + bitOffset[j]);
-//              // END
-                curBit += 5;
-            }
-        } 
-    }
-    /** 
-     * Extracts the k word offsets from a key.  Suitable for general
-     * values of m and k. See above for formula for max m and k.
-     */
-    public class GenericWordSelector implements WordSelector {
-        /** Extract the k offsets into the word offset array */
-        public void getWordSelectors(byte[] b, int offset, int length, int[] wordOffset) {
-            int stride = m - 5;
-            //assert true: stride<16;
-            int curBit = (k * 5) + (offset * 8); 
-            int curByte;
-            for (int j = 0; j < k; j++) {
-                curByte = curBit / 8;
-                int bitsUnused = ((curByte + 1) * 8) - curBit;    // left in byte
-
-//              // DEBUG
-//              System.out.println (
-//                  "curr 3 bytes: " + btoh(b[curByte]) 
-//                  + (curByte < 19 ?
-//                      " " + btoh(b[curByte + 1]) : "") 
-//                  + (curByte < 18 ?
-//                      " " + btoh(b[curByte + 2]) : "")
-//                  + "; curBit=" + curBit + ", curByte= " + curByte
-//                  + ", bitsUnused=" + bitsUnused);
-//              // END
-
-                if (bitsUnused > stride) {
-                    // the value is entirely within the current byte
-                    wordOffset[j] = ((0xff & b[curByte]) 
-                                        >> (bitsUnused - stride)) 
-                                                & UNMASK[stride];
-                } else if (bitsUnused == stride) {
-                    // the value fills the current byte
-                    wordOffset[j] = b[curByte] & UNMASK[stride];
-                } else {    // bitsUnused < stride
-                    // value occupies more than one byte
-                    // bits from first byte, right-aligned in result
-                    wordOffset[j] = b[curByte] & UNMASK[bitsUnused];
-//                  // DEBUG
-//                  System.out.println("    first byte contributes "
-//                          + itoh(wordOffset[j]));
-//                  // END
-                    // bits from second byte
-                    int bitsToGet = stride - bitsUnused;
-                    if (bitsToGet >= 8) {
-                        // 8 bits from second byte
-                        wordOffset[j] |= (0xff & b[curByte + 1]) << bitsUnused;
-//                      // DEBUG
-//                      System.out.println("    second byte contributes "
-//                          + itoh(
-//                          (0xff & b[curByte + 1]) << bitsUnused
-//                      ));
-//                      // END
-                        
-                        // bits from third byte
-                        bitsToGet -= 8;
-                        if (bitsToGet > 0) {
-                            // AIOOBE here if m and k too big (23,11 is the max)
-                            // for a 32-byte key - see above
-                            wordOffset[j] |= 
-                                ((0xff & b[curByte + 2]) >> (8 - bitsToGet))
-                                                    << (stride - bitsToGet) ;
-//                          // DEBUG
-//                          System.out.println("    third byte contributes " 
-//                              + itoh(
-//                              (((0xff & b[curByte + 2]) >> (8 - bitsToGet))
-//                                                  << (stride - bitsToGet))
-//                              ));
-//                          // END
-                        }
-                    } else {
-                        // all remaining bits are within second byte
-                        wordOffset[j] |= ((b[curByte + 1] >> (8 - bitsToGet))
-                                            & UNMASK[bitsToGet])
-                                                << bitsUnused;
-//                      // DEBUG
-//                      System.out.println("    second byte contributes "
-//                          + itoh(
-//                          ((b[curByte + 1] >> (8 - bitsToGet))
-//                              & UNMASK[bitsToGet])
-//                                      << bitsUnused
-//                          ));
-//                      // END
-                    }
-                }
-//              // DEBUG
-//              System.out.println (
-//                  "    wordOffset[" + j + "] = " + wordOffset[j]
-//                  + ", "                     + itoh(wordOffset[j])
-//              );
-//              // END
-                curBit += stride;
-            }
-        } 
-    }
-
-    /**
-     * Given a key, populate the word and bit offset arrays, each
-     * of which has k elements.
-     * 
-     * @param key cryptographic key used in populating the arrays
-     * @param bitOffset Out parameter of length k
-     * @param wordOffset Out parameter of length k
-     * @since 0.8.11 out parameters added
-     */
-    public void getOffsets (byte[] key, int[] bitOffset, int[] wordOffset) {
-        getOffsets(key, 0, key.length, bitOffset, wordOffset);
-    }
-
-    /**
-     * Given a key, populate the word and bit offset arrays, each
-     * of which has k elements.
-     * 
-     * @param key cryptographic key used in populating the arrays
-     * @param bitOffset Out parameter of length k
-     * @param wordOffset Out parameter of length k
-     * @since 0.8.11 out parameters added
-     */
-    public void getOffsets (byte[] key, int off, int len, int[] bitOffset, int[] wordOffset) {
-        // skip these checks for speed
-        //if (key == null) {
-        //    throw new IllegalArgumentException("null key");
-        //}
-        //if (len < 20) {
-        //    throw new IllegalArgumentException(
-        //        "key must be at least 20 bytes long");
-        //}
-//      // DEBUG
-//      System.out.println("KeySelector.getOffsets for " 
-//                                          + BloomSHA1.keyToString(b));
-//      // END
-        bitSel.getBitSelectors(key, off, len, bitOffset);
-        wordSel.getWordSelectors(key, off, len, wordOffset);
-    }
-
-/*****
-    // DEBUG METHODS ////////////////////////////////////////////////
-    String itoh(int i) {
-        return BloomSHA1.itoh(i);
-    }
-    String btoh(byte b) {
-        return BloomSHA1.btoh(b);
-    }
-*****/
-}
-
-