This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
commit ea5afb9d85e342834554504cdf541a43dcc79a84 Merge: 9ffc450b0a bdec3a7c3a Author: Keith Turner <ktur...@apache.org> AuthorDate: Thu Feb 22 19:00:28 2024 -0500 Merge branch '2.1' .../server/manager/state/MetaDataStateStore.java | 3 +- .../manager/state/TabletStateChangeIterator.java | 3 + .../accumulo/test/manager/SuspendedTabletsIT.java | 249 +++++++++++++-------- 3 files changed, 157 insertions(+), 98 deletions(-) diff --cc test/src/main/java/org/apache/accumulo/test/manager/SuspendedTabletsIT.java index ec99094346,764bc1dfc9..8ebb75378b --- a/test/src/main/java/org/apache/accumulo/test/manager/SuspendedTabletsIT.java +++ b/test/src/main/java/org/apache/accumulo/test/manager/SuspendedTabletsIT.java @@@ -333,6 -300,95 +303,93 @@@ public class SuspendedTabletsIT extend throws Exception; } + private class ShutdownTserverKiller implements TServerKiller { + + @Override + public void eliminateTabletServers(ClientContext ctx, TabletLocations locs, int count) + throws Exception { - + Set<TServerInstance> tserverSet = new HashSet<>(); + Set<TServerInstance> metadataServerSet = new HashSet<>(); + - TabletLocator tl = TabletLocator.getLocator(ctx, MetadataTable.ID); ++ TabletLocator tl = TabletLocator.getLocator(ctx, AccumuloTable.METADATA.tableId()); + for (TabletLocationState tls : locs.locationStates.values()) { + if (tls.current != null) { + // add to set of all servers + tserverSet.add(tls.current.getServerInstance()); + + // get server that the current tablets metadata is on + TabletLocator.TabletLocation tab = + tl.locateTablet(ctx, tls.extent.toMetaRow(), false, false); + // add it to the set of servers with metadata - metadataServerSet - .add(new TServerInstance(tab.tablet_location, Long.valueOf(tab.tablet_session, 16))); ++ metadataServerSet.add(new TServerInstance(tab.getTserverLocation(), ++ Long.valueOf(tab.getTserverSession(), 16))); + } + } + + // remove servers with metadata on them from the list of servers to be shutdown + assertEquals(1, metadataServerSet.size(), "Expecting a single tServer in metadataServerSet"); + tserverSet.removeAll(metadataServerSet); + + assertEquals(TSERVERS - 1, tserverSet.size(), + "Expecting " + (TSERVERS - 1) + " tServers in shutdown-list"); + + List<TServerInstance> tserversList = new ArrayList<>(tserverSet); - Collections.shuffle(tserversList, random); ++ Collections.shuffle(tserversList, RANDOM.get()); + + for (int i1 = 0; i1 < count; ++i1) { + final String tserverName = tserversList.get(i1).getHostPortSession(); + ThriftClientTypes.MANAGER.executeVoid(ctx, client -> { + log.info("Sending shutdown command to {} via ManagerClientService", tserverName); + client.shutdownTabletServer(null, ctx.rpcCreds(), tserverName, false); + }); + } + + log.info("Waiting for tserver process{} to die", count == 1 ? "" : "es"); + for (int i2 = 0; i2 < 10; ++i2) { + List<ProcessReference> deadProcs = new ArrayList<>(); + for (ProcessReference pr1 : getCluster().getProcesses().get(ServerType.TABLET_SERVER)) { + Process p = pr1.getProcess(); + if (!p.isAlive()) { + deadProcs.add(pr1); + } + } + for (ProcessReference pr2 : deadProcs) { + log.info("Process {} is dead, informing cluster control about this", pr2.getProcess()); + getCluster().getClusterControl().killProcess(ServerType.TABLET_SERVER, pr2); + --count; + } + if (count == 0) { + return; + } else { + Thread.sleep(SECONDS.toMillis(2)); + } + } + throw new IllegalStateException("Tablet servers didn't die!"); - + } + } + + private class CrashTserverKiller implements TServerKiller { + + @Override + public void eliminateTabletServers(ClientContext ctx, TabletLocations locs, int count) + throws Exception { + // Exclude the tablet server hosting the metadata table from the list and only + // kill tablet servers that are not hosting the metadata table. + List<ProcessReference> procs = getCluster().getProcesses().get(ServerType.TABLET_SERVER) + .stream().filter(p -> !metadataTserverProcess.equals(p)).collect(Collectors.toList()); - Collections.shuffle(procs, random); ++ Collections.shuffle(procs, RANDOM.get()); + assertEquals(TSERVERS - 1, procs.size(), "Not enough tservers exist"); + assertTrue(procs.size() >= count, "Attempting to kill more tservers (" + count + + ") than exist in the cluster (" + procs.size() + ")"); + + for (int i = 0; i < count; ++i) { + ProcessReference pr = procs.get(i); + log.info("Crashing {}", pr.getProcess()); + getCluster().killProcess(ServerType.TABLET_SERVER, pr); + } + } + } + private static final AtomicInteger threadCounter = new AtomicInteger(0); @BeforeAll