I2P Address: [http://git.idk.i2p]

Skip to content
Snippets Groups Projects
Commit 4a03b6fc authored by zzz's avatar zzz
Browse files

i2psnark: Better comment deduping, fixes rating average

parent 57ddc8ea
No related branches found
No related tags found
No related merge requests found
......@@ -10,7 +10,11 @@ import net.i2p.I2PAppContext;
import net.i2p.data.DataHelper;
/**
* Store comments
* Store a single comment and/or rating.
* Unmodifiable except for marking as hidden.
* Stores a one-second timestamp but designed so identical
* comments within a certain time frame (bucket) are equal.
* Don't store in a plain set - see equals().
*
* @since 0.9.31
*/
......@@ -30,6 +34,7 @@ public class Comment implements Comparable<Comment> {
private static final int MAX_TEXT_LEN = 512;
private static final int BUCKET_SIZE = 10*60*1000;
private static final long TIME_SHRINK = 1000L;
private static final int MAX_SKEW = (int) (BUCKET_SIZE / TIME_SHRINK);
// 1/1/2005
private static final long TIME_OFFSET = 1104537600000L;
......@@ -181,26 +186,29 @@ public class Comment implements Comparable<Comment> {
}
}
/**
* @return bucket number
*/
@Override
public int hashCode() {
return time / (BUCKET_SIZE / (int) TIME_SHRINK);
return time / MAX_SKEW;
}
/**
* Comments in the same 10-minute bucket and otherwise equal
* are considered equal. This will result in duplicates
* near the border.
* Comments within 10 minutes (not necessarily in same bucket)
* and otherwise equal are considered equal.
* Violates contract, as equal objects may have different hashcodes and
* be in adjacent buckets.
*/
@Override
public boolean equals(Object o) {
if (o == null) return false;
if (!(o instanceof Comment)) return false;
Comment c = (Comment) o;
return rating == c.rating &&
eq(text, c.text) &&
eq(name, c.name) &&
hashCode() == c.hashCode();
int tdiff = time - c.time;
if (tdiff > MAX_SKEW || tdiff < 0 - MAX_SKEW)
return false;
return equalsIgnoreTimestamp(c);
}
/**
......
......@@ -58,7 +58,7 @@ public class CommentSet extends AbstractSet<Comment> {
// Assume most comments are short or null.
private static final int MAX_TOTAL_TEXT_LEN = MAX_SIZE * 16;
public CommentSet() {
private CommentSet() {
super();
map = new HashMap<Integer, List<Comment>>(4);
}
......@@ -80,7 +80,7 @@ public class CommentSet extends AbstractSet<Comment> {
try {
br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file)), "UTF-8"));
String line = null;
while ( (line = br.readLine()) != null) {
while ((line = br.readLine()) != null) {
Comment c = Comment.fromPersistentString(line);
if (c != null)
add(c);
......@@ -129,7 +129,18 @@ public class CommentSet extends AbstractSet<Comment> {
// If isMine and no text and rating changed, don't bother
if (c.isMine() && c.getText() == null && c.getRating() == myRating)
return false;
Integer hc = Integer.valueOf(c.hashCode());
int hCode = c.hashCode();
// check previous and next buckets
Integer phc = Integer.valueOf(hCode - 1);
List<Comment> plist = map.get(phc);
if (plist != null && plist.contains(c))
return false;
Integer nhc = Integer.valueOf(hCode + 1);
List<Comment> nxlist = map.get(nhc);
if (nxlist != null && nxlist.contains(c))
return false;
// check this bucket
Integer hc = Integer.valueOf(hCode);
List<Comment> list = map.get(hc);
if (list == null) {
list = Collections.singletonList(c);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment