From 63958df99b807d81d86ec2dedbb492eb1fea680c Mon Sep 17 00:00:00 2001 From: zzz Date: Sun, 4 Sep 2011 20:26:47 +0000 Subject: [PATCH] * NetDB: Try again to fix ISJ deadlock, thx devzero --- history.txt | 8 +- .../src/net/i2p/router/MessageSelector.java | 5 ++ .../src/net/i2p/router/RouterVersion.java | 2 +- .../kademlia/IterativeLookupJob.java | 87 +++++++++++++++++++ .../kademlia/IterativeLookupSelector.java | 63 ++------------ 5 files changed, 106 insertions(+), 59 deletions(-) create mode 100644 router/java/src/net/i2p/router/networkdb/kademlia/IterativeLookupJob.java diff --git a/history.txt b/history.txt index 1a66d9c3c8..5124d9b68e 100644 --- a/history.txt +++ b/history.txt @@ -1,14 +1,18 @@ +2011-09-04 zzz + * NetDB: Try again to fix ISJ deadlock, thx devzero + * Transport: Remove one global lock in OutboundMessageRegistry. + 2011-09-03 zzz * i2psnark: Fix "eject" button in certain browsers (ticket #511) * UDP Inbound: - - Hopefully fix race NPE, thx dream + - Hopefully fix race NPE, thx devzero - Catch some more fragment errors - Exception and log tweaks - Cleanups and javadocs 2011-09-02 zzz * Console: Cache user-agent processing - * NetDB: Hopefully fix ISJ deadlock, thx dream + * NetDB: Hopefully fix ISJ deadlock, thx devzero 2011-09-02 sponge * I2PSnark: Fix GUI html tag for adding a torrent, it was missing a space. diff --git a/router/java/src/net/i2p/router/MessageSelector.java b/router/java/src/net/i2p/router/MessageSelector.java index 49db05e6ea..38dfc898c8 100644 --- a/router/java/src/net/i2p/router/MessageSelector.java +++ b/router/java/src/net/i2p/router/MessageSelector.java @@ -22,6 +22,11 @@ public interface MessageSelector { * If this returns true, the job specified by OutNetMessage.getOnReplyJob() * will be run for every OutNetMessage associated with this selector * (by InNetMessagePool), after calling setMessage() for that ReplyJob. + * + * WARNING this is called from within OutboundMessageSelector.getOriginalMessages() + * inside a lock and can lead to deadlocks if the selector does too much in isMatch(). + * Until the lock is removed, take care to keep it simple. + * */ public boolean isMatch(I2NPMessage message); diff --git a/router/java/src/net/i2p/router/RouterVersion.java b/router/java/src/net/i2p/router/RouterVersion.java index 7ca1a3ec2c..5a58b19bc2 100644 --- a/router/java/src/net/i2p/router/RouterVersion.java +++ b/router/java/src/net/i2p/router/RouterVersion.java @@ -18,7 +18,7 @@ public class RouterVersion { /** deprecated */ public final static String ID = "Monotone"; public final static String VERSION = CoreVersion.VERSION; - public final static long BUILD = 10; + public final static long BUILD = 11; /** for example "-test" */ public final static String EXTRA = ""; diff --git a/router/java/src/net/i2p/router/networkdb/kademlia/IterativeLookupJob.java b/router/java/src/net/i2p/router/networkdb/kademlia/IterativeLookupJob.java new file mode 100644 index 0000000000..d4c77dbfdf --- /dev/null +++ b/router/java/src/net/i2p/router/networkdb/kademlia/IterativeLookupJob.java @@ -0,0 +1,87 @@ +package net.i2p.router.networkdb.kademlia; + +import net.i2p.data.Hash; +import net.i2p.data.RouterInfo; +import net.i2p.data.i2np.DatabaseSearchReplyMessage; +import net.i2p.router.JobImpl; +import net.i2p.router.RouterContext; + +/** + * Ask the peer who sent us the DSRM for the RouterInfos... + * + * ... but If we have the routerInfo already, try to refetch it from that router itself, + * (if the info is old or we don't think it is floodfill) + * which will help us establish that router as a good floodfill and speed our + * integration into the network. + * + * Very similar to SingleLookupJob. + * This was all in IterativeLookupSelector.isMatch() but it caused deadlocks + * with OutboundMessageRegistry.getOriginalMessages() + * at both _search.newPeerToTry() and _search.failed(). + * + * @since 0.8.9 + */ +class IterativeLookupJob extends JobImpl { + private final DatabaseSearchReplyMessage _dsrm; + private final IterativeSearchJob _search; + + public IterativeLookupJob(RouterContext ctx, DatabaseSearchReplyMessage dsrm, IterativeSearchJob search) { + super(ctx); + _dsrm = dsrm; + _search = search; + } + + public void runJob() { + // TODO - dsrm.getFromHash() can't be trusted - check against the list of + // those we sent the search to in _search ? + Hash from = _dsrm.getFromHash(); + + // Chase the hashes from the reply + // 255 max, see comments in SingleLookupJob + int limit = Math.min(_dsrm.getNumReplies(), SingleLookupJob.MAX_TO_FOLLOW); + int newPeers = 0; + int oldPeers = 0; + int invalidPeers = 0; + for (int i = 0; i < limit; i++) { + Hash peer = _dsrm.getReply(i); + if (peer.equals(getContext().routerHash())) { + // us + oldPeers++; + continue; + } + if (peer.equals(from)) { + // wtf + invalidPeers++; + continue; + } + RouterInfo ri = getContext().netDb().lookupRouterInfoLocally(peer); + if (ri == null) { + // get the RI from the peer that told us about it + getContext().jobQueue().addJob(new IterativeFollowupJob(getContext(), peer, from, _search)); + newPeers++; + } else if (ri.getPublished() < getContext().clock().now() - 60*60*1000 || + !FloodfillNetworkDatabaseFacade.isFloodfill(ri)) { + // get an updated RI from the (now ff?) peer + getContext().jobQueue().addJob(new IterativeFollowupJob(getContext(), peer, peer, _search)); + oldPeers++; + } else { + // add it to the sorted queue + // this will check if we have already tried it + // should we really add? if we know about it but skipped it, + // it was for some reason? + _search.newPeerToTry(peer); + oldPeers++; + } + } + long timeSent = _search.timeSent(from); + // assume 0 dup + if (timeSent > 0) { + getContext().profileManager().dbLookupReply(from, newPeers, oldPeers, invalidPeers, 0, + getContext().clock().now() - timeSent); + } + + _search.failed(_dsrm.getFromHash(), false); + } + + public String getName() { return "NetDb process DSRM"; } +} diff --git a/router/java/src/net/i2p/router/networkdb/kademlia/IterativeLookupSelector.java b/router/java/src/net/i2p/router/networkdb/kademlia/IterativeLookupSelector.java index c2b951edaa..c066b36feb 100644 --- a/router/java/src/net/i2p/router/networkdb/kademlia/IterativeLookupSelector.java +++ b/router/java/src/net/i2p/router/networkdb/kademlia/IterativeLookupSelector.java @@ -1,7 +1,6 @@ package net.i2p.router.networkdb.kademlia; import net.i2p.data.Hash; -import net.i2p.data.RouterInfo; import net.i2p.data.i2np.DatabaseSearchReplyMessage; import net.i2p.data.i2np.DatabaseStoreMessage; import net.i2p.data.i2np.I2NPMessage; @@ -10,8 +9,7 @@ import net.i2p.router.RouterContext; import net.i2p.util.Log; /** - * Slightly modified version of FloodOnlyLookupSelector, - * plus it incorporates the functions of SingleLookupJob inline. + * Slightly modified version of FloodOnlyLookupSelector. * Always follows the DSRM entries. * * @since 0.8.9 @@ -50,62 +48,15 @@ class IterativeLookupSelector implements MessageSelector { } else if (message instanceof DatabaseSearchReplyMessage) { DatabaseSearchReplyMessage dsrm = (DatabaseSearchReplyMessage)message; if (_search.getKey().equals(dsrm.getSearchKey())) { - - // TODO - dsrm.getFromHash() can't be trusted - check against the list of - // those we sent the search to in _search ? - Hash from = dsrm.getFromHash(); - - // Moved from FloodOnlyLookupMatchJob so it is called for all replies - // rather than just the last one // Got a netDb reply pointing us at other floodfills... - if (_log.shouldLog(Log.INFO)) - _log.info(_search.getJobId() + ": Processing DSRM via IterativeLookupJobs, apparently from " + from); - - // Chase the hashes from the reply - // 255 max, see comments in SingleLookupJob - int limit = Math.min(dsrm.getNumReplies(), SingleLookupJob.MAX_TO_FOLLOW); - int newPeers = 0; - int oldPeers = 0; - int invalidPeers = 0; - for (int i = 0; i < limit; i++) { - Hash peer = dsrm.getReply(i); - if (peer.equals(_context.routerHash())) { - // us - oldPeers++; - continue; - } - if (peer.equals(from)) { - // wtf - invalidPeers++; - continue; - } - RouterInfo ri = _context.netDb().lookupRouterInfoLocally(peer); - if (ri == null) { - // get the RI from the peer that told us about it - _context.jobQueue().addJob(new IterativeFollowupJob(_context, peer, from, _search)); - newPeers++; - } else if (ri.getPublished() < _context.clock().now() - 60*60*1000 || - !FloodfillNetworkDatabaseFacade.isFloodfill(ri)) { - // get an updated RI from the (now ff?) peer - _context.jobQueue().addJob(new IterativeFollowupJob(_context, peer, peer, _search)); - oldPeers++; - } else { - // add it to the sorted queue - // this will check if we have already tried it - // should we really add? if we know about it but skipped it, - // it was for some reason? - _search.newPeerToTry(peer); - oldPeers++; - } - } - long timeSent = _search.timeSent(from); - // assume 0 dup - if (timeSent > 0) { - _context.profileManager().dbLookupReply(from, newPeers, oldPeers, invalidPeers, 0, - _context.clock().now() - timeSent); + if (_log.shouldLog(Log.INFO)) { + Hash from = dsrm.getFromHash(); + _log.info(_search.getJobId() + ": Processing DSRM via IterativeLookupJob, apparently from " + from); } - _search.failed(dsrm.getFromHash(), false); + // was inline, now in IterativeLookupJob due to deadlocks + _context.jobQueue().addJob(new IterativeLookupJob(_context, dsrm, _search)); + // fall through, always return false, we do not wish the match job to be called } }