diff --git a/history.txt b/history.txt index d57f196f5..fa09123f0 100644 --- a/history.txt +++ b/history.txt @@ -1,3 +1,14 @@ +2008-06-30 zzz + * configstats.jsp: Fix NPE when no stats checked (thanks nothome27!) + * i2psnark: + - Fix NPE caused by race (thanks echelon!) + - Add mastertracker, remove de-ebook + * NTCP: + - Try to fix 100% CPU, caused perhaps by JVM NIO bug... + - Fix failsafe stats + * PersistentDataStore: More leaseSet code cleanup + * SimpleTimer: Change congestion message from error to warn + 2008-06-24 zzz * FloodfillMonitorJob: Change range from 5-7 to 4-6 * NTCP: Remove getIsInbound(), duplicate of isInbound() diff --git a/router/java/src/net/i2p/router/RouterVersion.java b/router/java/src/net/i2p/router/RouterVersion.java index c99ee2835..2c4b10706 100644 --- a/router/java/src/net/i2p/router/RouterVersion.java +++ b/router/java/src/net/i2p/router/RouterVersion.java @@ -17,7 +17,7 @@ import net.i2p.CoreVersion; public class RouterVersion { public final static String ID = "$Revision: 1.548 $ $Date: 2008-06-07 23:00:00 $"; public final static String VERSION = "0.6.2"; - public final static long BUILD = 6; + public final static long BUILD = 7; public static void main(String args[]) { System.out.println("I2P Router version: " + VERSION + "-" + BUILD); System.out.println("Router ID: " + RouterVersion.ID); diff --git a/router/java/src/net/i2p/router/transport/ntcp/EventPumper.java b/router/java/src/net/i2p/router/transport/ntcp/EventPumper.java index 4bfa2c1fb..f5915789a 100644 --- a/router/java/src/net/i2p/router/transport/ntcp/EventPumper.java +++ b/router/java/src/net/i2p/router/transport/ntcp/EventPumper.java @@ -8,6 +8,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Set; +import net.i2p.data.DataHelper; import net.i2p.data.RouterIdentity; import net.i2p.data.RouterInfo; import net.i2p.router.RouterContext; @@ -124,6 +125,7 @@ public class EventPumper implements Runnable { int failsafeWrites = 0; int failsafeCloses = 0; + int failsafeInvalid = 0; // pointless if we do this every 2 seconds? long expireIdleWriteTime = 20*60*1000l; // + _context.random().nextLong(60*60*1000l); for (Iterator iter = all.iterator(); iter.hasNext(); ) { @@ -134,6 +136,33 @@ public class EventPumper implements Runnable { continue; // to the next con NTCPConnection con = (NTCPConnection)att; + /** + * 100% CPU bug + * http://forums.java.net/jive/thread.jspa?messageID=255525 + * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6595055 + * + * The problem is around a channel that was originally registered with Selector for i/o gets + * closed on the server side (due to early client side exit). But the server side can know + * about such channel only when it does i/o (read/write) and thereby getting into an IO exception. + * In this case, (bug 6595055)there are times (erroneous) when server side (selector) did not + * know the channel is already closed (peer-reset), but continue to do the selection cycle on + * a key set whose associated channel is alreay closed or invalid. Hence, selector's slect(..) + * keep spinging with zero return without blocking for the timeout period. + * + * One fix is to have a provision in the application, to check if any of the Selector's keyset + * is having a closed channel/or invalid registration due to channel closure. + */ + if ((!key.isValid()) && + (!((SocketChannel)key.channel()).isConnectionPending()) && + con.getTimeSinceCreated() > 2 * NTCPTransport.ESTABLISH_TIMEOUT) { + if (_log.shouldLog(Log.WARN)) + _log.warn("Invalid key " + con); + // this will cancel the key, and it will then be removed from the keyset + con.close(); + failsafeInvalid++; + continue; + } + if ( (con.getWriteBufCount() > 0) && ((key.interestOps() & SelectionKey.OP_WRITE) == 0) ) { // the data queued to be sent has already passed through @@ -152,11 +181,13 @@ public class EventPumper implements Runnable { } catch (CancelledKeyException cke) { // cancelled while updating the interest ops. ah well } - if (failsafeWrites > 0) - _context.statManager().addRateData("ntcp.failsafeWrites", failsafeWrites, 0); - if (failsafeCloses > 0) - _context.statManager().addRateData("ntcp.failsafeCloses", failsafeCloses, 0); } + if (failsafeWrites > 0) + _context.statManager().addRateData("ntcp.failsafeWrites", failsafeWrites, 0); + if (failsafeCloses > 0) + _context.statManager().addRateData("ntcp.failsafeCloses", failsafeCloses, 0); + if (failsafeInvalid > 0) + _context.statManager().addRateData("ntcp.failsafeInvalid", failsafeInvalid, 0); } catch (ClosedSelectorException cse) { continue; } diff --git a/router/java/src/net/i2p/router/transport/ntcp/NTCPConnection.java b/router/java/src/net/i2p/router/transport/ntcp/NTCPConnection.java index 17aea239a..593db8693 100644 --- a/router/java/src/net/i2p/router/transport/ntcp/NTCPConnection.java +++ b/router/java/src/net/i2p/router/transport/ntcp/NTCPConnection.java @@ -1277,4 +1277,11 @@ public class NTCPConnection implements FIFOBandwidthLimiter.CompleteListener { } } } + + public String toString() { + return "NTCP Connection to " + + (_remotePeer == null ? "unknown " : _remotePeer.calculateHash().toBase64().substring(0,6)) + + " inbound? " + _isInbound + " established? " + _established + + " created " + DataHelper.formatDuration(getTimeSinceCreated()) + " ago"; + } } diff --git a/router/java/src/net/i2p/router/transport/ntcp/NTCPTransport.java b/router/java/src/net/i2p/router/transport/ntcp/NTCPTransport.java index 7284e0945..8ff160d78 100644 --- a/router/java/src/net/i2p/router/transport/ntcp/NTCPTransport.java +++ b/router/java/src/net/i2p/router/transport/ntcp/NTCPTransport.java @@ -63,6 +63,7 @@ public class NTCPTransport extends TransportImpl { _context.statManager().createRateStat("ntcp.sendBacklogTime", "How long the head of the send queue has been waiting when we fail to add a new one to the queue (period is the number of messages queued)", "ntcp", new long[] { 60*1000, 10*60*1000 }); _context.statManager().createRateStat("ntcp.failsafeWrites", "How many times do we need to proactively add in an extra nio write to a peer at any given failsafe pass?", "ntcp", new long[] { 60*1000, 10*60*1000 }); _context.statManager().createRateStat("ntcp.failsafeCloses", "How many times do we need to proactively close an idle connection to a peer at any given failsafe pass?", "ntcp", new long[] { 60*1000, 10*60*1000 }); + _context.statManager().createRateStat("ntcp.failsafeInvalid", "How many times do we close a connection to a peer to work around a JVM bug?", "ntcp", new long[] { 60*1000, 10*60*1000 }); _context.statManager().createRateStat("ntcp.accept", "", "ntcp", new long[] { 60*1000, 10*60*1000 }); _context.statManager().createRateStat("ntcp.attemptShitlistedPeer", "", "ntcp", new long[] { 60*1000, 10*60*1000 }); _context.statManager().createRateStat("ntcp.attemptUnreachablePeer", "", "ntcp", new long[] { 60*1000, 10*60*1000 }); @@ -452,7 +453,7 @@ public class NTCPTransport extends TransportImpl { * how long from initial connection attempt (accept() or connect()) until * the con must be established to avoid premature close()ing */ - private static final int ESTABLISH_TIMEOUT = 10*1000; + public static final int ESTABLISH_TIMEOUT = 10*1000; /** add us to the establishment timeout process */ void establishing(NTCPConnection con) { synchronized (_establishing) {