Profiles: Count all build rejections in tunnel history

Previously did not count probabalistic or transient codes
Reduce 10 minute acceptance rate for these codes
(previously adjusted the averaged rate)
Make rates final in TunnelHistory
Static rates array to reduce object churn
Prep for removing 30m rates
Reduce now() calls, pull out of loops
This commit is contained in:
zzz
2023-02-01 16:23:45 -05:00
parent 652da9ebd5
commit 019a8b6655
5 changed files with 58 additions and 64 deletions

View File

@@ -34,9 +34,10 @@ class CapacityCalculator {
public static double calc(PeerProfile profile) {
double capacity;
RouterContext context = profile.getContext();
long now = context.clock().now();
TunnelHistory history = profile.getTunnelHistory();
if (tooOld(profile)) {
if (tooOld(profile, now)) {
capacity = 1;
} else {
RateStat acceptStat = profile.getTunnelCreateResponseTime();
@@ -51,22 +52,22 @@ class CapacityCalculator {
double capacity30m = estimateCapacity(acceptStat, rejectStat, failedStat, 30*60*1000);
double capacity60m = estimateCapacity(acceptStat, rejectStat, failedStat, 60*60*1000);
double capacity1d = estimateCapacity(acceptStat, rejectStat, failedStat, 24*60*60*1000);
// now take into account recent tunnel rejections
long cutoff = now - PeerManager.REORGANIZE_TIME_LONG;
if (history.getLastRejectedProbabalistic() > cutoff) {
capacity10m /= 2;
} else if (history.getLastRejectedTransient() > cutoff) {
// never happens
capacity10m /= 4;
}
capacity = capacity10m * periodWeight(10*60*1000) +
capacity30m * periodWeight(30*60*1000) +
capacity60m * periodWeight(60*60*1000) +
capacity1d * periodWeight(24*60*60*1000);
}
}
// now take into account non-rejection tunnel rejections (which haven't
// incremented the rejection counter, since they were only temporary)
RouterContext context = profile.getContext();
long now = context.clock().now();
if (history.getLastRejectedTransient() > now - 5*60*1000)
capacity = 1;
else if (history.getLastRejectedProbabalistic() > now - 5*60*1000)
capacity -= context.random().nextInt(5);
}
// boost new profiles
if (now - profile.getFirstHeardAbout() < 45*60*1000)
@@ -115,11 +116,8 @@ class CapacityCalculator {
* If we haven't heard from them in an hour, they aren't too useful.
*
*/
private static boolean tooOld(PeerProfile profile) {
if (profile.getIsActive(60*60*1000))
return false;
else
return true;
private static boolean tooOld(PeerProfile profile, long now) {
return !profile.getIsActive(60*60*1000, now);
}
/**

View File

@@ -54,7 +54,7 @@ class PeerManager {
* This must also be less than 10 minutes, which is the shortest
* Rate contained in the profile, as the Rates must be coalesced.
*/
private static final long REORGANIZE_TIME_LONG = 351*1000;
static final long REORGANIZE_TIME_LONG = 351*1000;
/** After first two hours of uptime ~= 246 */
static final int REORGANIZES_PER_DAY = (int) (24*60*60*1000L / REORGANIZE_TIME_LONG);
private static final long STORE_TIME = 19*60*60*1000;

View File

@@ -166,7 +166,17 @@ public class PeerProfile {
* 5 minutes)
*/
public boolean getIsActive() {
return getIsActive(5*60*1000);
return getIsActive(5*60*1000, _context.clock().now());
}
/**
* Is this peer active at the moment (sending/receiving messages within the last
* 5 minutes)
*
* @since 0.9.58
*/
public boolean getIsActive(long now) {
return getIsActive(5*60*1000, now);
}
/** @since 0.8.11 */
@@ -222,17 +232,11 @@ public class PeerProfile {
*
* @param period must be one of the periods in the RateStat constructors below
* (5*60*1000 or 60*60*1000)
*
* @since 0.9.58
*/
public boolean getIsActive(long period) {
//if ( (getSendSuccessSize().getRate(period).getCurrentEventCount() > 0) ||
// (getSendSuccessSize().getRate(period).getLastEventCount() > 0) ||
// (getReceiveSize().getRate(period).getCurrentEventCount() > 0) ||
// (getReceiveSize().getRate(period).getLastEventCount() > 0) ||
// _context.commSystem().isEstablished(_peer) )
// return true;
//else
// return false;
long before = _context.clock().now() - period;
public boolean getIsActive(long period, long now) {
long before = now - period;
return getLastHeardFrom() < before ||
getLastSendSuccessful() < before ||
isEstablished();
@@ -542,12 +546,8 @@ public class PeerProfile {
*/
public synchronized void expandProfile() {
String group = (null == _peer ? "profileUnknown" : _peer.toBase64().substring(0,6));
//if (_sendSuccessSize == null)
// _sendSuccessSize = new RateStat("sendSuccessSize", "How large successfully sent messages are", group, new long[] { 5*60*1000l, 60*60*1000l });
//if (_receiveSize == null)
// _receiveSize = new RateStat("receiveSize", "How large received messages are", group, new long[] { 5*60*1000l, 60*60*1000l } );
if (_tunnelCreateResponseTime == null)
_tunnelCreateResponseTime = new RateStat("tunnelCreateResponseTime", "how long it takes to get a tunnel create response from the peer (in milliseconds)", group, new long[] { 10*60*1000l, 30*60*1000l, 60*60*1000l, 24*60*60*1000 } );
_tunnelCreateResponseTime = new RateStat("tunnelCreateResponseTime", "how long it takes to get a tunnel create response from the peer (ms)", group, TunnelHistory.RATES);
if (ENABLE_TUNNEL_TEST_RESPONSE_TIME && _tunnelTestResponseTime == null)
_tunnelTestResponseTime = new RateStat("tunnelTestResponseTime", "how long it takes to successfully test a tunnel this peer participates in (in milliseconds)", group, new long[] { 10*60*1000l, 30*60*1000l, 60*60*1000l, 3*60*60*1000l, 24*60*60*1000 } );

View File

@@ -902,6 +902,7 @@ public class ProfileOrganizer {
if (numToPromote > 0) {
if (_log.shouldLog(Log.INFO))
_log.info("Need to explicitly promote " + numToPromote + " peers to the fast group");
long now = _context.clock().now();
for (PeerProfile cur : _strictCapacityOrder) {
if ( (!_fastPeers.containsKey(cur.getPeer())) && (!cur.getIsFailing()) ) {
if (!isSelectable(cur.getPeer())) {
@@ -910,7 +911,7 @@ public class ProfileOrganizer {
// _log.info("skip unknown peer from fast promotion: " + cur.getPeer().toBase64());
continue;
}
if (!cur.getIsActive()) {
if (!cur.getIsActive(now)) {
// skip inactive
// if (_log.shouldLog(Log.INFO))
// _log.info("skip inactive peer from fast promotion: " + cur.getPeer().toBase64());
@@ -991,8 +992,9 @@ public class ProfileOrganizer {
*/
private void locked_unfailAsNecessary() {
int notFailingActive = 0;
long now = _context.clock().now();
for (PeerProfile peer : _notFailingPeers.values()) {
if (peer.getIsActive())
if (peer.getIsActive(now))
notFailingActive++;
if (notFailingActive >= MIN_NOT_FAILING_ACTIVE) {
// we've got enough, no need to try further
@@ -1005,7 +1007,7 @@ public class ProfileOrganizer {
if (needToUnfail > 0) {
int unfailed = 0;
for (PeerProfile best : _strictCapacityOrder) {
if ( (best.getIsActive()) && (best.getIsFailing()) ) {
if ( (best.getIsActive(now)) && (best.getIsFailing()) ) {
if (_log.shouldLog(Log.WARN))
_log.warn("All peers were failing, so we have overridden the failing flag for one of the most reliable active peers (" + best.getPeer().toBase64() + ")");
best.setIsFailing(false);
@@ -1035,11 +1037,12 @@ public class ProfileOrganizer {
double totalCapacity = 0;
double totalIntegration = 0;
Set<PeerProfile> reordered = new TreeSet<PeerProfile>(_comp);
long now = _context.clock().now();
for (PeerProfile profile : allPeers) {
if (_us.equals(profile.getPeer())) continue;
// only take into account active peers that aren't failing
if (profile.getIsFailing() || (!profile.getIsActive()))
if (profile.getIsFailing() || (!profile.getIsActive(now)))
continue;
// dont bother trying to make sense of things below the baseline
@@ -1557,9 +1560,10 @@ public class ProfileOrganizer {
organizer.reorganize();
DecimalFormat fmt = new DecimalFormat("0000.0");
long now = ctx.clock().now();
for (Hash peer : organizer.selectAllPeers()) {
PeerProfile profile = organizer.getProfile(peer);
if (!profile.getIsActive()) {
if (!profile.getIsActive(now)) {
System.out.println("Peer " + peer.toBase64().substring(0,4)
+ " [" + (organizer.isFast(peer) ? "IF+R" :
organizer.isHighCapacity(peer) ? "IR " :
@@ -1567,7 +1571,7 @@ public class ProfileOrganizer {
+ " Speed:\t" + fmt.format(profile.getSpeedValue())
+ " Capacity:\t" + fmt.format(profile.getCapacityValue())
+ " Integration:\t" + fmt.format(profile.getIntegrationValue())
+ " Active?\t" + profile.getIsActive()
+ " Active?\t" + profile.getIsActive(now)
+ " Failing?\t" + profile.getIsFailing());
} else {
System.out.println("Peer " + peer.toBase64().substring(0,4)
@@ -1577,7 +1581,7 @@ public class ProfileOrganizer {
+ " Speed:\t" + fmt.format(profile.getSpeedValue())
+ " Capacity:\t" + fmt.format(profile.getCapacityValue())
+ " Integration:\t" + fmt.format(profile.getIntegrationValue())
+ " Active?\t" + profile.getIsActive()
+ " Active?\t" + profile.getIsActive(now)
+ " Failing?\t" + profile.getIsFailing());
}
}

View File

@@ -26,9 +26,10 @@ public class TunnelHistory {
private volatile long _lastRejectedProbabalistic;
private final AtomicLong _lifetimeFailed = new AtomicLong();
private volatile long _lastFailed;
private RateStat _rejectRate;
private RateStat _failRate;
private final RateStat _rejectRate;
private final RateStat _failRate;
private final String _statGroup;
static final long[] RATES = new long[] { 10*60*1000l, 30*60*1000l, 60*60*1000l, 24*60*60*1000l };
/** probabalistic tunnel rejection due to a flood of requests - infrequent */
public static final int TUNNEL_REJECT_PROBABALISTIC_REJECT = 10;
@@ -43,12 +44,8 @@ public class TunnelHistory {
_context = context;
_log = context.logManager().getLog(TunnelHistory.class);
_statGroup = statGroup;
createRates(statGroup);
}
private void createRates(String statGroup) {
_rejectRate = new RateStat("tunnelHistory.rejectRate", "How often does this peer reject a tunnel request?", statGroup, new long[] { 10*60*1000l, 30*60*1000l, 60*60*1000l, 24*60*60*1000l });
_failRate = new RateStat("tunnelHistory.failRate", "How often do tunnels this peer accepts fail?", statGroup, new long[] { 10*60*1000l, 30*60*1000l, 60*60*1000l, 24*60*60*1000l });
_rejectRate = new RateStat("tunnelHistory.rejectRate", "How often does this peer reject a tunnel request?", statGroup, RATES);
_failRate = new RateStat("tunnelHistory.failRate", "How often do tunnels this peer accepts fail?", statGroup, RATES);
}
/** total tunnels the peer has agreed to participate in */
@@ -85,19 +82,19 @@ public class TunnelHistory {
*/
public void incrementRejected(int severity) {
_lifetimeRejected.incrementAndGet();
long now = _context.clock().now();
if (severity >= TUNNEL_REJECT_CRIT) {
_lastRejectedCritical = _context.clock().now();
_rejectRate.addData(1);
_lastRejectedCritical = now;
} else if (severity >= TUNNEL_REJECT_BANDWIDTH) {
_lastRejectedBandwidth = _context.clock().now();
_rejectRate.addData(1);
_lastRejectedBandwidth = now;
} else if (severity >= TUNNEL_REJECT_TRANSIENT_OVERLOAD) {
_lastRejectedTransient = _context.clock().now();
// dont increment the reject rate in this case
_lastRejectedTransient = now;
} else if (severity >= TUNNEL_REJECT_PROBABALISTIC_REJECT) {
_lastRejectedProbabalistic = _context.clock().now();
// dont increment the reject rate in this case
_lastRejectedProbabalistic = now;
}
// a rejection is always a rejection, don't factor based on severity,
// which could impact our ability to avoid a congested peer
_rejectRate.addData(1);
}
/**
@@ -194,14 +191,9 @@ public class TunnelHistory {
_lifetimeRejected.set(getLong(props, "tunnels.lifetimeRejected"));
try {
_rejectRate.load(props, "tunnelHistory.rejectRate", true);
if (_log.shouldLog(Log.DEBUG))
_log.debug("Loading tunnelHistory.rejectRate");
_failRate.load(props, "tunnelHistory.failRate", true);
if (_log.shouldLog(Log.DEBUG))
_log.debug("Loading tunnelHistory.failRate");
} catch (IllegalArgumentException iae) {
_log.warn("TunnelHistory rates are corrupt, resetting", iae);
createRates(_statGroup);
_log.warn("TunnelHistory rates are corrupt", iae);
}
}