This is an automated email from the ASF dual-hosted git repository. dlmarion pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push: new 80b8fbcd16 Added calls to ZooZap when shutting down groups of servers (#5321) 80b8fbcd16 is described below commit 80b8fbcd16ea552cd6c76245f07da8f1a1d4dd4d Author: Dave Marion <dlmar...@apache.org> AuthorDate: Wed Feb 19 18:46:08 2025 -0500 Added calls to ZooZap when shutting down groups of servers (#5321) Modified accumulo-cluster to wait for all spawned ssh commands to finish and then call ZooZap when shutting down all Compactor, ScanServer, and TabletServer processes for a resource group. Modified ZooZap so that the code is consistent for Compactors, ScanServers, and TabletServers such that for those server types it performs a recursive delete at the resource group in the ZK path. Closes #5178 --- assemble/bin/accumulo-cluster | 45 +++++++++++++++++----- .../org/apache/accumulo/server/util/ZooZap.java | 42 ++++++++------------ 2 files changed, 53 insertions(+), 34 deletions(-) diff --git a/assemble/bin/accumulo-cluster b/assemble/bin/accumulo-cluster index 3d65733213..7341aa99dc 100755 --- a/assemble/bin/accumulo-cluster +++ b/assemble/bin/accumulo-cluster @@ -360,6 +360,17 @@ function parse_config() { } +function ssh_wait() { + #shellcheck disable=SC2009 + count=$(ps -ef | grep ssh | grep -c "accumulo-service") + while ((count > 0)); do + echo "waiting on $count ssh commands to complete" + sleep 2 + #shellcheck disable=SC2009 + count=$(ps -ef | grep ssh | grep -c "accumulo-service") + done +} + function execute_command() { control_cmd=$1 host=$2 @@ -431,12 +442,9 @@ function control_services() { for group in $tserver_groups; do G="TSERVER_HOSTS_$group" for tserver in ${!G}; do + debug "Stopping tservers on $addr via admin command" if echo "$tserver" | grep -q "$addr"; then - if ! isDebug; then - "$accumulo_cmd" admin stop "$addr" - else - debug "Stopping tservers on $addr via admin command" - fi + debugOrRun "$accumulo_cmd" admin stop "$addr" fi done done @@ -471,6 +479,13 @@ function control_services() { fi fi done + if [[ $ARG_LOCAL == 0 && ($operation == "stop" || $operation == "kill") ]]; then + # If the prior commands were executed via ssh, then we need to wait for them + # to complete before zapping the nodes in ZooKeeper + ssh_wait + echo "Cleaning tablet server entries from zookeeper for resource group $group" + debugOrRun "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -verbose -tservers -group "$group" + fi echo "done" done fi @@ -520,6 +535,13 @@ function control_services() { execute_command "$operation" "$sserver" sserver "$group" "-o" "sserver.group=$group" fi done + if [[ $ARG_LOCAL == 0 && ($operation == "stop" || $operation == "kill") ]]; then + # If the prior commands were executed via ssh, then we need to wait for them + # to complete before zapping the nodes in ZooKeeper + ssh_wait + echo "Cleaning scan server entries from zookeeper for resource group $group" + debugOrRun "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -verbose -sservers -group "$group" + fi done fi @@ -538,14 +560,19 @@ function control_services() { execute_command "$operation" "$compactor" compactor "$group" "-o" "compactor.group=$group" fi done + if [[ $ARG_LOCAL == 0 && ($operation == "stop" || $operation == "kill") ]]; then + # If the prior commands were executed via ssh, then we need to wait for them + # to complete before zapping the nodes in ZooKeeper + ssh_wait + echo "Cleaning compactor entries from zookeeper for resource group $group" + debugOrRun "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -verbose -compactors -group "$group" + fi done fi if [[ $ARG_LOCAL == 0 && $ARG_ALL == 1 && ($operation == "stop" || $operation == "kill") ]]; then - if ! isDebug; then - echo "Cleaning all server entries in ZooKeeper" - "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -manager -tservers -compactors -sservers - fi + debug "Cleaning all server entries in ZooKeeper" + debugOrRun "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -verbose -manager -tservers -compactors -sservers fi } diff --git a/server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java b/server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java index c97892a4c0..cc4d723591 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java +++ b/server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java @@ -27,7 +27,6 @@ import org.apache.accumulo.core.conf.Property; import org.apache.accumulo.core.conf.SiteConfiguration; import org.apache.accumulo.core.fate.zookeeper.ZooReaderWriter; import org.apache.accumulo.core.fate.zookeeper.ZooUtil.NodeMissingPolicy; -import org.apache.accumulo.core.lock.ServiceLock; import org.apache.accumulo.core.lock.ServiceLockPaths.AddressSelector; import org.apache.accumulo.core.lock.ServiceLockPaths.ResourceGroupPredicate; import org.apache.accumulo.core.lock.ServiceLockPaths.ServiceLockPath; @@ -103,7 +102,7 @@ public class ZooZap implements KeywordExecutable { Opts opts = new Opts(); opts.parseArgs(keyword(), args); - if (!opts.zapManager && !opts.zapTservers) { + if (!opts.zapManager && !opts.zapTservers && !opts.zapCompactors && !opts.zapScanServers) { new JCommander(opts).usage(); return; } @@ -136,21 +135,12 @@ public class ZooZap implements KeywordExecutable { try { Set<ServiceLockPath> tserverLockPaths = context.getServerPaths().getTabletServer(rgp, AddressSelector.all(), false); - for (ServiceLockPath tserverPath : tserverLockPaths) { - - message("Deleting " + tserverPath + " from zookeeper", opts); - - if (opts.zapManager) { - zrw.recursiveDelete(tserverPath.toString(), NodeMissingPolicy.SKIP); - } else { - if (!zrw.getChildren(tserverPath.toString()).isEmpty()) { - try { - ServiceLock.deleteLock(zrw, tserverPath); - } catch (RuntimeException e) { - message("Did not delete " + tserverPath, opts); - } - } - } + Set<String> tserverResourceGroupPaths = new HashSet<>(); + tserverLockPaths.forEach(p -> tserverResourceGroupPaths + .add(p.toString().substring(0, p.toString().lastIndexOf('/')))); + for (String group : tserverResourceGroupPaths) { + message("Deleting tserver " + group + " from zookeeper", opts); + zrw.recursiveDelete(group.toString(), NodeMissingPolicy.SKIP); } } catch (KeeperException | InterruptedException e) { log.error("{}", e.getMessage(), e); @@ -165,7 +155,7 @@ public class ZooZap implements KeywordExecutable { .add(p.toString().substring(0, p.toString().lastIndexOf('/')))); try { for (String group : compactorResourceGroupPaths) { - message("Deleting " + group + " from zookeeper", opts); + message("Deleting compactor " + group + " from zookeeper", opts); zrw.recursiveDelete(group, NodeMissingPolicy.SKIP); } } catch (KeeperException | InterruptedException e) { @@ -175,14 +165,16 @@ public class ZooZap implements KeywordExecutable { } if (opts.zapScanServers) { + Set<ServiceLockPath> sserverLockPaths = + context.getServerPaths().getScanServer(rgp, AddressSelector.all(), false); + Set<String> sserverResourceGroupPaths = new HashSet<>(); + sserverLockPaths.forEach(p -> sserverResourceGroupPaths + .add(p.toString().substring(0, p.toString().lastIndexOf('/')))); + try { - Set<ServiceLockPath> sserverLockPaths = - context.getServerPaths().getScanServer(rgp, AddressSelector.all(), false); - for (ServiceLockPath sserverPath : sserverLockPaths) { - message("Deleting " + sserverPath + " from zookeeper", opts); - if (!zrw.getChildren(sserverPath.toString()).isEmpty()) { - ServiceLock.deleteLock(zrw, sserverPath); - } + for (String group : sserverResourceGroupPaths) { + message("Deleting sserver " + group + " from zookeeper", opts); + zrw.recursiveDelete(group, NodeMissingPolicy.SKIP); } } catch (KeeperException | InterruptedException e) { log.error("{}", e.getMessage(), e);