From 4a03b6fcb05ae50bfaa3fe70b137cdb8f457d2d1 Mon Sep 17 00:00:00 2001 From: zzz <zzz@mail.i2p> Date: Sat, 25 Aug 2018 16:35:41 +0000 Subject: [PATCH] i2psnark: Better comment deduping, fixes rating average --- .../src/org/klomp/snark/comments/Comment.java | 28 ++++++++++++------- .../org/klomp/snark/comments/CommentSet.java | 17 +++++++++-- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/apps/i2psnark/java/src/org/klomp/snark/comments/Comment.java b/apps/i2psnark/java/src/org/klomp/snark/comments/Comment.java index 86f9eee747..d9b01b37dc 100644 --- a/apps/i2psnark/java/src/org/klomp/snark/comments/Comment.java +++ b/apps/i2psnark/java/src/org/klomp/snark/comments/Comment.java @@ -10,7 +10,11 @@ import net.i2p.I2PAppContext; import net.i2p.data.DataHelper; /** - * Store comments + * Store a single comment and/or rating. + * Unmodifiable except for marking as hidden. + * Stores a one-second timestamp but designed so identical + * comments within a certain time frame (bucket) are equal. + * Don't store in a plain set - see equals(). * * @since 0.9.31 */ @@ -30,6 +34,7 @@ public class Comment implements Comparable<Comment> { private static final int MAX_TEXT_LEN = 512; private static final int BUCKET_SIZE = 10*60*1000; private static final long TIME_SHRINK = 1000L; + private static final int MAX_SKEW = (int) (BUCKET_SIZE / TIME_SHRINK); // 1/1/2005 private static final long TIME_OFFSET = 1104537600000L; @@ -181,26 +186,29 @@ public class Comment implements Comparable<Comment> { } } - + /** + * @return bucket number + */ @Override public int hashCode() { - return time / (BUCKET_SIZE / (int) TIME_SHRINK); + return time / MAX_SKEW; } /** - * Comments in the same 10-minute bucket and otherwise equal - * are considered equal. This will result in duplicates - * near the border. + * Comments within 10 minutes (not necessarily in same bucket) + * and otherwise equal are considered equal. + * Violates contract, as equal objects may have different hashcodes and + * be in adjacent buckets. */ @Override public boolean equals(Object o) { if (o == null) return false; if (!(o instanceof Comment)) return false; Comment c = (Comment) o; - return rating == c.rating && - eq(text, c.text) && - eq(name, c.name) && - hashCode() == c.hashCode(); + int tdiff = time - c.time; + if (tdiff > MAX_SKEW || tdiff < 0 - MAX_SKEW) + return false; + return equalsIgnoreTimestamp(c); } /** diff --git a/apps/i2psnark/java/src/org/klomp/snark/comments/CommentSet.java b/apps/i2psnark/java/src/org/klomp/snark/comments/CommentSet.java index af7794b2f4..bb0a95e475 100644 --- a/apps/i2psnark/java/src/org/klomp/snark/comments/CommentSet.java +++ b/apps/i2psnark/java/src/org/klomp/snark/comments/CommentSet.java @@ -58,7 +58,7 @@ public class CommentSet extends AbstractSet<Comment> { // Assume most comments are short or null. private static final int MAX_TOTAL_TEXT_LEN = MAX_SIZE * 16; - public CommentSet() { + private CommentSet() { super(); map = new HashMap<Integer, List<Comment>>(4); } @@ -80,7 +80,7 @@ public class CommentSet extends AbstractSet<Comment> { try { br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), "UTF-8")); String line = null; - while ( (line = br.readLine()) != null) { + while ((line = br.readLine()) != null) { Comment c = Comment.fromPersistentString(line); if (c != null) add(c); @@ -129,7 +129,18 @@ public class CommentSet extends AbstractSet<Comment> { // If isMine and no text and rating changed, don't bother if (c.isMine() && c.getText() == null && c.getRating() == myRating) return false; - Integer hc = Integer.valueOf(c.hashCode()); + int hCode = c.hashCode(); + // check previous and next buckets + Integer phc = Integer.valueOf(hCode - 1); + List<Comment> plist = map.get(phc); + if (plist != null && plist.contains(c)) + return false; + Integer nhc = Integer.valueOf(hCode + 1); + List<Comment> nxlist = map.get(nhc); + if (nxlist != null && nxlist.contains(c)) + return false; + // check this bucket + Integer hc = Integer.valueOf(hCode); List<Comment> list = map.get(hc); if (list == null) { list = Collections.singletonList(c); -- GitLab