Repository: accumulo Updated Branches: refs/heads/master 6ce4d9698 -> 981bf5326
ACCUMULO-2913 Add some timeouts to tests missing them, turn up logging on TSBRI to debug an otherwise silent security exception Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/981bf532 Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/981bf532 Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/981bf532 Branch: refs/heads/master Commit: 981bf53268dd0b34d35f1bdc61ed5453b8ae3fe5 Parents: 6ce4d96 Author: Josh Elser <els...@apache.org> Authored: Mon Jun 16 09:45:54 2014 -0700 Committer: Josh Elser <els...@apache.org> Committed: Mon Jun 16 09:45:54 2014 -0700 ---------------------------------------------------------------------- .../test/replication/ReplicationTest.java | 359 +++++++++---------- test/src/test/resources/log4j.properties | 1 - 2 files changed, 176 insertions(+), 184 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/accumulo/blob/981bf532/test/src/test/java/org/apache/accumulo/test/replication/ReplicationTest.java ---------------------------------------------------------------------- diff --git a/test/src/test/java/org/apache/accumulo/test/replication/ReplicationTest.java b/test/src/test/java/org/apache/accumulo/test/replication/ReplicationTest.java index cac7d94..4727a86 100644 --- a/test/src/test/java/org/apache/accumulo/test/replication/ReplicationTest.java +++ b/test/src/test/java/org/apache/accumulo/test/replication/ReplicationTest.java @@ -126,7 +126,7 @@ public class ReplicationTest extends ConfigurableMacIT { return logs; } - @Test + @Test(timeout = 1000 * 60 * 5) public void correctRecordsCompleteFile() throws Exception { Connector conn = getConnector(); String table = "table1"; @@ -163,7 +163,8 @@ public class ReplicationTest extends ConfigurableMacIT { Thread.sleep(1000); } - Assert.assertTrue("'root' user could not read the replication table", conn.securityOperations().hasTablePermission("root", ReplicationTable.NAME, TablePermission.READ)); + Assert.assertTrue("'root' user could not read the replication table", + conn.securityOperations().hasTablePermission("root", ReplicationTable.NAME, TablePermission.READ)); Set<String> replRows = Sets.newHashSet(); Scanner scanner; @@ -206,7 +207,7 @@ public class ReplicationTest extends ConfigurableMacIT { Assert.assertEquals(replRows, wals); } - @Test + @Test(timeout = 1000 * 60 * 5) public void noRecordsWithoutReplication() throws Exception { Connector conn = getConnector(); List<String> tables = new ArrayList<>(); @@ -256,13 +257,13 @@ public class ReplicationTest extends ConfigurableMacIT { Assert.assertFalse(conn.tableOperations().exists(ReplicationTable.NAME)); } - @Test + @Test(timeout = 1000 * 60 * 5) public void twoEntriesForTwoTables() throws Exception { Connector conn = getConnector(); String table1 = "table1", table2 = "table2"; // replication shouldn't exist when we begin - Assert.assertFalse(conn.tableOperations().exists(ReplicationTable.NAME)); + Assert.assertFalse("Replication table already existed at the beginning of the test", conn.tableOperations().exists(ReplicationTable.NAME)); // Create two tables conn.tableOperations().create(table1); @@ -296,7 +297,7 @@ public class ReplicationTest extends ConfigurableMacIT { int attempts = 5; do { if (!exists) { - UtilWaitThread.sleep(200); + UtilWaitThread.sleep(1000); exists = conn.tableOperations().exists(ReplicationTable.NAME); attempts--; } @@ -369,8 +370,8 @@ public class ReplicationTest extends ConfigurableMacIT { Assert.assertEquals("Expected to find 2 records, but actually found " + records, 2, records.size()); for (Entry<Key,Value> metadata : records) { - Assert.assertTrue("Expected record to be in metadata but wasn't " + metadata.getKey().toStringNoTruncate() + ", tableIds remaining " + tableIdsForMetadata, - tableIdsForMetadata.remove(metadata.getKey().getColumnQualifier().toString())); + Assert.assertTrue("Expected record to be in metadata but wasn't " + metadata.getKey().toStringNoTruncate() + ", tableIds remaining " + + tableIdsForMetadata, tableIdsForMetadata.remove(metadata.getKey().getColumnQualifier().toString())); } Assert.assertTrue("Expected that we had removed all metadata entries " + tableIdsForMetadata, tableIdsForMetadata.isEmpty()); @@ -760,7 +761,7 @@ public class ReplicationTest extends ConfigurableMacIT { Assert.fail("We had a file that was referenced but didn't get closed"); } - @Test + @Test(timeout = 1000 * 60 * 5) public void singleTableWithSingleTarget() throws Exception { // We want to kill the GC so it doesn't come along and close Status records and mess up the comparisons // against expected Status messages. @@ -948,7 +949,7 @@ public class ReplicationTest extends ConfigurableMacIT { } } - @Test + @Test(timeout = 1000 * 60 * 5) public void correctClusterNameInWorkEntry() throws Exception { Connector conn = getConnector(); String table1 = "table1"; @@ -1256,212 +1257,204 @@ public class ReplicationTest extends ConfigurableMacIT { // replication shouldn't exist when we begin Assert.assertFalse(conn.tableOperations().exists(ReplicationTable.NAME)); - // ReplicationTablesPrinterThread thread = new ReplicationTablesPrinterThread(conn, System.out); - // thread.start(); - - try { - // Create two tables - conn.tableOperations().create(table1); + // Create two tables + conn.tableOperations().create(table1); - int attempts = 5; - while (attempts > 0) { - try { - // Enable replication on table1 - conn.tableOperations().setProperty(table1, Property.TABLE_REPLICATION.getKey(), "true"); - // Replicate table1 to cluster1 in the table with id of '4' - conn.tableOperations().setProperty(table1, Property.TABLE_REPLICATION_TARGET.getKey() + "cluster1", "4"); - // Use the MockReplicaSystem impl and sleep for 5seconds - conn.instanceOperations().setProperty(Property.REPLICATION_PEERS.getKey() + "cluster1", - ReplicaSystemFactory.getPeerConfigurationValue(MockReplicaSystem.class, "1000")); - attempts = 0; - } catch (Exception e) { - attempts--; - if (attempts <= 0) { - throw e; - } - UtilWaitThread.sleep(500); + int attempts = 5; + while (attempts > 0) { + try { + // Enable replication on table1 + conn.tableOperations().setProperty(table1, Property.TABLE_REPLICATION.getKey(), "true"); + // Replicate table1 to cluster1 in the table with id of '4' + conn.tableOperations().setProperty(table1, Property.TABLE_REPLICATION_TARGET.getKey() + "cluster1", "4"); + // Use the MockReplicaSystem impl and sleep for 5seconds + conn.instanceOperations().setProperty(Property.REPLICATION_PEERS.getKey() + "cluster1", + ReplicaSystemFactory.getPeerConfigurationValue(MockReplicaSystem.class, "1000")); + attempts = 0; + } catch (Exception e) { + attempts--; + if (attempts <= 0) { + throw e; } + UtilWaitThread.sleep(500); } + } - String tableId = conn.tableOperations().tableIdMap().get(table1); - Assert.assertNotNull("Could not determine table id for " + table1, tableId); + String tableId = conn.tableOperations().tableIdMap().get(table1); + Assert.assertNotNull("Could not determine table id for " + table1, tableId); - // Write some data to table1 - BatchWriter bw = conn.createBatchWriter(table1, new BatchWriterConfig()); - for (int rows = 0; rows < 2000; rows++) { - Mutation m = new Mutation(Integer.toString(rows)); - for (int cols = 0; cols < 50; cols++) { - String value = Integer.toString(cols); - m.put(value, "", value); - } - bw.addMutation(m); + // Write some data to table1 + BatchWriter bw = conn.createBatchWriter(table1, new BatchWriterConfig()); + for (int rows = 0; rows < 2000; rows++) { + Mutation m = new Mutation(Integer.toString(rows)); + for (int cols = 0; cols < 50; cols++) { + String value = Integer.toString(cols); + m.put(value, "", value); } + bw.addMutation(m); + } - bw.close(); - - // Make sure the replication table exists at this point - boolean exists = conn.tableOperations().exists(ReplicationTable.NAME); - attempts = 10; - do { - if (!exists) { - UtilWaitThread.sleep(1000); - exists = conn.tableOperations().exists(ReplicationTable.NAME); - attempts--; - } - } while (!exists && attempts > 0); - Assert.assertTrue("Replication table did not exist", exists); + bw.close(); - // Grant ourselves the write permission for later - conn.securityOperations().grantTablePermission("root", ReplicationTable.NAME, TablePermission.WRITE); + // Make sure the replication table exists at this point + boolean exists = conn.tableOperations().exists(ReplicationTable.NAME); + attempts = 10; + do { + if (!exists) { + UtilWaitThread.sleep(1000); + exists = conn.tableOperations().exists(ReplicationTable.NAME); + attempts--; + } + } while (!exists && attempts > 0); + Assert.assertTrue("Replication table did not exist", exists); - // Find the WorkSection record that will be created for that data we ingested - boolean notFound = true; - Scanner s; - for (int i = 0; i < 10 && notFound; i++) { - try { - s = ReplicationTable.getScanner(conn); - WorkSection.limit(s); - Entry<Key,Value> e = Iterables.getOnlyElement(s); - Text expectedColqual = new ReplicationTarget("cluster1", "4", tableId).toText(); - Assert.assertEquals(expectedColqual, e.getKey().getColumnQualifier()); - notFound = false; - } catch (NoSuchElementException e) { - - } catch (IllegalArgumentException e) { - // Somehow we got more than one element. Log what they were - s = ReplicationTable.getScanner(conn); - for (Entry<Key,Value> content : s) { - log.info(content.getKey().toStringNoTruncate() + " => " + content.getValue()); - } - Assert.fail("Found more than one work section entry"); - } catch (RuntimeException e) { - // Catch a propagation issue, fail if it's not what we expect - Throwable cause = e.getCause(); - if (cause instanceof AccumuloSecurityException) { - AccumuloSecurityException sec = (AccumuloSecurityException) cause; - switch (sec.getSecurityErrorCode()) { - case PERMISSION_DENIED: - // retry -- the grant didn't happen yet - log.warn("Sleeping because permission was denied"); - break; - default: - throw e; - } - } else { - throw e; - } - } + // Grant ourselves the write permission for later + conn.securityOperations().grantTablePermission("root", ReplicationTable.NAME, TablePermission.WRITE); - Thread.sleep(2000); - } + // Find the WorkSection record that will be created for that data we ingested + boolean notFound = true; + Scanner s; + for (int i = 0; i < 10 && notFound; i++) { + try { + s = ReplicationTable.getScanner(conn); + WorkSection.limit(s); + Entry<Key,Value> e = Iterables.getOnlyElement(s); + Text expectedColqual = new ReplicationTarget("cluster1", "4", tableId).toText(); + Assert.assertEquals(expectedColqual, e.getKey().getColumnQualifier()); + notFound = false; + } catch (NoSuchElementException e) { - if (notFound) { + } catch (IllegalArgumentException e) { + // Somehow we got more than one element. Log what they were s = ReplicationTable.getScanner(conn); for (Entry<Key,Value> content : s) { log.info(content.getKey().toStringNoTruncate() + " => " + content.getValue()); } - Assert.assertFalse("Did not find the work entry for the status entry", notFound); + Assert.fail("Found more than one work section entry"); + } catch (RuntimeException e) { + // Catch a propagation issue, fail if it's not what we expect + Throwable cause = e.getCause(); + if (cause instanceof AccumuloSecurityException) { + AccumuloSecurityException sec = (AccumuloSecurityException) cause; + switch (sec.getSecurityErrorCode()) { + case PERMISSION_DENIED: + // retry -- the grant didn't happen yet + log.warn("Sleeping because permission was denied"); + break; + default: + throw e; + } + } else { + throw e; + } } - /** - * By this point, we should have data ingested into a table, with at least one WAL as a candidate for replication. Compacting the table should close all - * open WALs, which should ensure all records we're going to replicate have entries in the replication table, and nothing will exist in the metadata table - * anymore - */ - - log.info("Killing tserver"); - // Kill the tserver(s) and restart them - // to ensure that the WALs we previously observed all move to closed. - for (ProcessReference proc : cluster.getProcesses().get(ServerType.TABLET_SERVER)) { - cluster.killProcess(ServerType.TABLET_SERVER, proc); + Thread.sleep(2000); + } + + if (notFound) { + s = ReplicationTable.getScanner(conn); + for (Entry<Key,Value> content : s) { + log.info(content.getKey().toStringNoTruncate() + " => " + content.getValue()); } + Assert.assertFalse("Did not find the work entry for the status entry", notFound); + } + + /** + * By this point, we should have data ingested into a table, with at least one WAL as a candidate for replication. Compacting the table should close all + * open WALs, which should ensure all records we're going to replicate have entries in the replication table, and nothing will exist in the metadata table + * anymore + */ - log.info("Starting tserver"); - cluster.exec(TabletServer.class); + log.info("Killing tserver"); + // Kill the tserver(s) and restart them + // to ensure that the WALs we previously observed all move to closed. + for (ProcessReference proc : cluster.getProcesses().get(ServerType.TABLET_SERVER)) { + cluster.killProcess(ServerType.TABLET_SERVER, proc); + } - log.info("Waiting to read tables"); + log.info("Starting tserver"); + cluster.exec(TabletServer.class); - // Make sure we can read all the tables (recovery complete) - for (String table : new String[] {MetadataTable.NAME, table1}) { - s = conn.createScanner(table, new Authorizations()); - for (@SuppressWarnings("unused") - Entry<Key,Value> entry : s) {} - } + log.info("Waiting to read tables"); - log.info("Checking for replication entries in replication"); - // Then we need to get those records over to the replication table - boolean foundResults = false; - for (int i = 0; i < 5; i++) { - s = ReplicationTable.getScanner(conn); - int count = 0; - for (Entry<Key,Value> entry : s) { - count++; - log.info("{}={}", entry.getKey().toStringNoTruncate(), entry.getValue()); - } - if (count > 0) { - foundResults = true; - break; - } - Thread.sleep(1000); + // Make sure we can read all the tables (recovery complete) + for (String table : new String[] {MetadataTable.NAME, table1}) { + s = conn.createScanner(table, new Authorizations()); + for (@SuppressWarnings("unused") + Entry<Key,Value> entry : s) {} + } + + log.info("Checking for replication entries in replication"); + // Then we need to get those records over to the replication table + boolean foundResults = false; + for (int i = 0; i < 5; i++) { + s = ReplicationTable.getScanner(conn); + int count = 0; + for (Entry<Key,Value> entry : s) { + count++; + log.info("{}={}", entry.getKey().toStringNoTruncate(), entry.getValue()); + } + if (count > 0) { + foundResults = true; + break; } + Thread.sleep(1000); + } - Assert.assertTrue("Did not find any replication entries in the replication table", foundResults); + Assert.assertTrue("Did not find any replication entries in the replication table", foundResults); - getCluster().exec(SimpleGarbageCollector.class); + getCluster().exec(SimpleGarbageCollector.class); - // Wait for a bit since the GC has to run (should be running after a one second delay) - Thread.sleep(5000); + // Wait for a bit since the GC has to run (should be running after a one second delay) + Thread.sleep(5000); - // We expect no records in the metadata table after compaction. We have to poll - // because we have to wait for the StatusMaker's next iteration which will clean - // up the dangling *closed* records after we create the record in the replication table. - // We need the GC to close the file (CloseWriteAheadLogReferences) before we can remove the record - log.info("Checking metadata table for replication entries"); - foundResults = true; - for (int i = 0; i < 5; i++) { - s = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY); - s.setRange(ReplicationSection.getRange()); - long size = 0; - for (Entry<Key,Value> e : s) { - size++; - log.info("{}={}", e.getKey().toStringNoTruncate(), ProtobufUtil.toString(Status.parseFrom(e.getValue().get()))); - } - if (size == 0) { - foundResults = false; - break; - } - Thread.sleep(1000); - log.info(""); + // We expect no records in the metadata table after compaction. We have to poll + // because we have to wait for the StatusMaker's next iteration which will clean + // up the dangling *closed* records after we create the record in the replication table. + // We need the GC to close the file (CloseWriteAheadLogReferences) before we can remove the record + log.info("Checking metadata table for replication entries"); + foundResults = true; + for (int i = 0; i < 5; i++) { + s = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY); + s.setRange(ReplicationSection.getRange()); + long size = 0; + for (Entry<Key,Value> e : s) { + size++; + log.info("{}={}", e.getKey().toStringNoTruncate(), ProtobufUtil.toString(Status.parseFrom(e.getValue().get()))); + } + if (size == 0) { + foundResults = false; + break; } + Thread.sleep(1000); + log.info(""); + } - Assert.assertFalse("Replication status messages were not cleaned up from metadata table", foundResults); + Assert.assertFalse("Replication status messages were not cleaned up from metadata table", foundResults); - /** - * After we close out and subsequently delete the metadata record, this will propagate to the replication table, which will cause those records to be - * deleted after replication occurs - */ + /** + * After we close out and subsequently delete the metadata record, this will propagate to the replication table, which will cause those records to be + * deleted after replication occurs + */ - int recordsFound = 0; - for (int i = 0; i < 10; i++) { - s = ReplicationTable.getScanner(conn); - recordsFound = 0; - for (Entry<Key,Value> entry : s) { - recordsFound++; - log.info(entry.getKey().toStringNoTruncate() + " " + ProtobufUtil.toString(Status.parseFrom(entry.getValue().get()))); - } - - if (0 == recordsFound) { - break; - } else { - Thread.sleep(1000); - log.info(""); - } + int recordsFound = 0; + for (int i = 0; i < 10; i++) { + s = ReplicationTable.getScanner(conn); + recordsFound = 0; + for (Entry<Key,Value> entry : s) { + recordsFound++; + log.info(entry.getKey().toStringNoTruncate() + " " + ProtobufUtil.toString(Status.parseFrom(entry.getValue().get()))); } - Assert.assertEquals("Found unexpected replication records in the replication table", 0, recordsFound); - } finally { - // thread.interrupt(); - // thread.join(5000); + if (0 == recordsFound) { + break; + } else { + Thread.sleep(1000); + log.info(""); + } } + + Assert.assertEquals("Found unexpected replication records in the replication table", 0, recordsFound); } } http://git-wip-us.apache.org/repos/asf/accumulo/blob/981bf532/test/src/test/resources/log4j.properties ---------------------------------------------------------------------- diff --git a/test/src/test/resources/log4j.properties b/test/src/test/resources/log4j.properties index 031b685..a960c35 100644 --- a/test/src/test/resources/log4j.properties +++ b/test/src/test/resources/log4j.properties @@ -33,7 +33,6 @@ log4j.logger.org.apache.zookeeper.ClientCnxn=FATAL log4j.logger.org.apache.zookeeper.ZooKeeper=WARN log4j.logger.org.apache.accumulo.core.file.rfile.bcfile=INFO log4j.logger.org.apache.accumulo.server.util.ReplicationTableUtil=TRACE -log4j.logger.org.apache.accumulo.core.client.impl.TabletServerBatchReaderIterator=INFO log4j.logger.org.apache.accumulo.core.client.impl.ThriftScanner=INFO log4j.logger.org.apache.accumulo.fate.zookeeper.DistributedReadWriteLock=WARN log4j.logger.org.mortbay.log=WARN