ACCUMULO-2198 Concurrent randomwalk: add teardown, fix server balance check
The Concurrent randomwalk test had been using a test node property to remember the last time when servers were unbalanced, but this property was not getting cleaned up between runs. Therefore, if a new Concurrent test was started some time later, it would pick up the old timestamp property from the last run. This commit adds removal of the property during test teardown, and also moves the tracking from a node property to test state. In addition, the test logic would reset the timestamp every time servers were found unbalanced, provided the 15-minute allowance hadn't expired. This commit fixes that issue as well. This could lead to more, correct, reports of unbalanced servers. Lastly, the test in 1.5.x requires three checks for unbalanced servers to fail before failing the test. This commit backports that requirement to 1.4.x. The timestamp reset and three-check fixes were added to 1.5.x in commit 0ee7e5a8. Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/cd4eac0d Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/cd4eac0d Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/cd4eac0d Branch: refs/heads/master Commit: cd4eac0d7e2820321db9fc9cdfc8dc89f7dd53d2 Parents: 91be551 Author: Bill Havanki <bhava...@cloudera.com> Authored: Thu Jan 16 09:00:34 2014 -0500 Committer: Bill Havanki <bhava...@cloudera.com> Committed: Thu Jan 16 14:35:39 2014 -0500 ---------------------------------------------------------------------- .../accumulo/server/test/randomwalk/State.java | 4 +++ .../randomwalk/concurrent/CheckBalance.java | 31 +++++++++++++++----- .../concurrent/ConcurrentFixture.java | 5 +++- 3 files changed, 31 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/cd4eac0d/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/State.java ---------------------------------------------------------------------- diff --git a/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/State.java b/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/State.java index f9bd84e..5a53340 100644 --- a/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/State.java +++ b/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/State.java @@ -64,6 +64,10 @@ public class State { public void set(String key, Object value) { stateMap.put(key, value); } + + public void remove(String key) { + stateMap.remove(key); + } public Object get(String key) { if (stateMap.containsKey(key) == false) { http://git-wip-us.apache.org/repos/asf/accumulo/blob/cd4eac0d/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/CheckBalance.java ---------------------------------------------------------------------- diff --git a/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/CheckBalance.java b/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/CheckBalance.java index beb8327..d00e2b4 100644 --- a/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/CheckBalance.java +++ b/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/CheckBalance.java @@ -33,13 +33,15 @@ import org.apache.accumulo.server.test.randomwalk.Test; */ public class CheckBalance extends Test { - private static final String LAST_UNBALANCED_TIME = "lastUnbalancedTime"; + static final String LAST_UNBALANCED_TIME = "lastUnbalancedTime"; + static final String UNBALANCED_COUNT = "unbalancedCount"; /* (non-Javadoc) * @see org.apache.accumulo.server.test.randomwalk.Node#visit(org.apache.accumulo.server.test.randomwalk.State, java.util.Properties) */ @Override public void visit(State state, Properties props) throws Exception { + log.debug("checking balance"); Map<String,Long> counts = new HashMap<String,Long>(); Scanner scanner = state.getConnector().createScanner(Constants.METADATA_TABLE_NAME, Constants.NO_AUTHS); scanner.fetchColumnFamily(Constants.METADATA_CURRENT_LOCATION_COLUMN_FAMILY); @@ -57,25 +59,38 @@ public class CheckBalance extends Test { final double average = total / counts.size(); // Check for even # of tablets on each node + double maxDifference = Math.max(1, average / 5); + String unbalancedLocation = null; + long lastCount = 0L; boolean balanced = true; for (Entry<String,Long> entry : counts.entrySet()) { - if (Math.abs(entry.getValue().longValue() - average) > Math.max(1, average / 5)) { + lastCount = entry.getValue().longValue(); + if (Math.abs(lastCount - average) > maxDifference) { balanced = false; + unbalancedLocation = entry.getKey(); break; } } // It is expected that the number of tablets will be uneven for short // periods of time. Don't complain unless we've seen it only unbalanced - // over a 15 minute period. + // over a 15 minute period and it's been at least three checks. if (!balanced) { - String last = props.getProperty(LAST_UNBALANCED_TIME); - if (last != null && System.currentTimeMillis() - Long.parseLong(last) > 15 * 60 * 1000) { - throw new Exception("servers are unbalanced!"); + Long last = state.getLong(LAST_UNBALANCED_TIME); + if (last != null && System.currentTimeMillis() - last > 15 * 60 * 1000) { + Integer count = state.getInteger(UNBALANCED_COUNT); + if (count == null) + count = Integer.valueOf(0); + if (count > 3) + throw new Exception("servers are unbalanced! location " + unbalancedLocation + " count " + lastCount + " too far from average " + average); + count++; + state.set(UNBALANCED_COUNT, count); + } else if (last == null) { + state.set(LAST_UNBALANCED_TIME, System.currentTimeMillis()); } - props.setProperty(LAST_UNBALANCED_TIME, Long.toString(System.currentTimeMillis())); } else { - props.remove(LAST_UNBALANCED_TIME); + state.remove(LAST_UNBALANCED_TIME); + state.remove(UNBALANCED_COUNT); } } http://git-wip-us.apache.org/repos/asf/accumulo/blob/cd4eac0d/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/ConcurrentFixture.java ---------------------------------------------------------------------- diff --git a/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/ConcurrentFixture.java b/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/ConcurrentFixture.java index 62fac56..3606d57 100644 --- a/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/ConcurrentFixture.java +++ b/src/server/src/main/java/org/apache/accumulo/server/test/randomwalk/concurrent/ConcurrentFixture.java @@ -31,6 +31,9 @@ public class ConcurrentFixture extends Fixture { public void setUp(State state) throws Exception {} @Override - public void tearDown(State state) throws Exception {} + public void tearDown(State state) throws Exception { + state.remove(CheckBalance.LAST_UNBALANCED_TIME); + state.remove(CheckBalance.UNBALANCED_COUNT); + } }