This is an automated email from the ASF dual-hosted git repository.
dlmarion pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push:
new 80b8fbcd16 Added calls to ZooZap when shutting down groups of servers
(#5321)
80b8fbcd16 is described below
commit 80b8fbcd16ea552cd6c76245f07da8f1a1d4dd4d
Author: Dave Marion <[email protected]>
AuthorDate: Wed Feb 19 18:46:08 2025 -0500
Added calls to ZooZap when shutting down groups of servers (#5321)
Modified accumulo-cluster to wait for all spawned ssh commands
to finish and then call ZooZap when shutting down all Compactor,
ScanServer, and TabletServer processes for a resource group.
Modified ZooZap so that the code is consistent for Compactors,
ScanServers, and TabletServers such that for those server types
it performs a recursive delete at the resource group in the ZK
path.
Closes #5178
---
assemble/bin/accumulo-cluster | 45 +++++++++++++++++-----
.../org/apache/accumulo/server/util/ZooZap.java | 42 ++++++++------------
2 files changed, 53 insertions(+), 34 deletions(-)
diff --git a/assemble/bin/accumulo-cluster b/assemble/bin/accumulo-cluster
index 3d65733213..7341aa99dc 100755
--- a/assemble/bin/accumulo-cluster
+++ b/assemble/bin/accumulo-cluster
@@ -360,6 +360,17 @@ function parse_config() {
}
+function ssh_wait() {
+ #shellcheck disable=SC2009
+ count=$(ps -ef | grep ssh | grep -c "accumulo-service")
+ while ((count > 0)); do
+ echo "waiting on $count ssh commands to complete"
+ sleep 2
+ #shellcheck disable=SC2009
+ count=$(ps -ef | grep ssh | grep -c "accumulo-service")
+ done
+}
+
function execute_command() {
control_cmd=$1
host=$2
@@ -431,12 +442,9 @@ function control_services() {
for group in $tserver_groups; do
G="TSERVER_HOSTS_$group"
for tserver in ${!G}; do
+ debug "Stopping tservers on $addr via admin command"
if echo "$tserver" | grep -q "$addr"; then
- if ! isDebug; then
- "$accumulo_cmd" admin stop "$addr"
- else
- debug "Stopping tservers on $addr via admin command"
- fi
+ debugOrRun "$accumulo_cmd" admin stop "$addr"
fi
done
done
@@ -471,6 +479,13 @@ function control_services() {
fi
fi
done
+ if [[ $ARG_LOCAL == 0 && ($operation == "stop" || $operation == "kill")
]]; then
+ # If the prior commands were executed via ssh, then we need to wait
for them
+ # to complete before zapping the nodes in ZooKeeper
+ ssh_wait
+ echo "Cleaning tablet server entries from zookeeper for resource group
$group"
+ debugOrRun "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap
-verbose -tservers -group "$group"
+ fi
echo "done"
done
fi
@@ -520,6 +535,13 @@ function control_services() {
execute_command "$operation" "$sserver" sserver "$group" "-o"
"sserver.group=$group"
fi
done
+ if [[ $ARG_LOCAL == 0 && ($operation == "stop" || $operation == "kill")
]]; then
+ # If the prior commands were executed via ssh, then we need to wait
for them
+ # to complete before zapping the nodes in ZooKeeper
+ ssh_wait
+ echo "Cleaning scan server entries from zookeeper for resource group
$group"
+ debugOrRun "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap
-verbose -sservers -group "$group"
+ fi
done
fi
@@ -538,14 +560,19 @@ function control_services() {
execute_command "$operation" "$compactor" compactor "$group" "-o"
"compactor.group=$group"
fi
done
+ if [[ $ARG_LOCAL == 0 && ($operation == "stop" || $operation == "kill")
]]; then
+ # If the prior commands were executed via ssh, then we need to wait
for them
+ # to complete before zapping the nodes in ZooKeeper
+ ssh_wait
+ echo "Cleaning compactor entries from zookeeper for resource group
$group"
+ debugOrRun "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap
-verbose -compactors -group "$group"
+ fi
done
fi
if [[ $ARG_LOCAL == 0 && $ARG_ALL == 1 && ($operation == "stop" ||
$operation == "kill") ]]; then
- if ! isDebug; then
- echo "Cleaning all server entries in ZooKeeper"
- "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -manager
-tservers -compactors -sservers
- fi
+ debug "Cleaning all server entries in ZooKeeper"
+ debugOrRun "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -verbose
-manager -tservers -compactors -sservers
fi
}
diff --git
a/server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java
b/server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java
index c97892a4c0..cc4d723591 100644
--- a/server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java
@@ -27,7 +27,6 @@ import org.apache.accumulo.core.conf.Property;
import org.apache.accumulo.core.conf.SiteConfiguration;
import org.apache.accumulo.core.fate.zookeeper.ZooReaderWriter;
import org.apache.accumulo.core.fate.zookeeper.ZooUtil.NodeMissingPolicy;
-import org.apache.accumulo.core.lock.ServiceLock;
import org.apache.accumulo.core.lock.ServiceLockPaths.AddressSelector;
import org.apache.accumulo.core.lock.ServiceLockPaths.ResourceGroupPredicate;
import org.apache.accumulo.core.lock.ServiceLockPaths.ServiceLockPath;
@@ -103,7 +102,7 @@ public class ZooZap implements KeywordExecutable {
Opts opts = new Opts();
opts.parseArgs(keyword(), args);
- if (!opts.zapManager && !opts.zapTservers) {
+ if (!opts.zapManager && !opts.zapTservers && !opts.zapCompactors &&
!opts.zapScanServers) {
new JCommander(opts).usage();
return;
}
@@ -136,21 +135,12 @@ public class ZooZap implements KeywordExecutable {
try {
Set<ServiceLockPath> tserverLockPaths =
context.getServerPaths().getTabletServer(rgp,
AddressSelector.all(), false);
- for (ServiceLockPath tserverPath : tserverLockPaths) {
-
- message("Deleting " + tserverPath + " from zookeeper", opts);
-
- if (opts.zapManager) {
- zrw.recursiveDelete(tserverPath.toString(),
NodeMissingPolicy.SKIP);
- } else {
- if (!zrw.getChildren(tserverPath.toString()).isEmpty()) {
- try {
- ServiceLock.deleteLock(zrw, tserverPath);
- } catch (RuntimeException e) {
- message("Did not delete " + tserverPath, opts);
- }
- }
- }
+ Set<String> tserverResourceGroupPaths = new HashSet<>();
+ tserverLockPaths.forEach(p -> tserverResourceGroupPaths
+ .add(p.toString().substring(0,
p.toString().lastIndexOf('/'))));
+ for (String group : tserverResourceGroupPaths) {
+ message("Deleting tserver " + group + " from zookeeper", opts);
+ zrw.recursiveDelete(group.toString(), NodeMissingPolicy.SKIP);
}
} catch (KeeperException | InterruptedException e) {
log.error("{}", e.getMessage(), e);
@@ -165,7 +155,7 @@ public class ZooZap implements KeywordExecutable {
.add(p.toString().substring(0, p.toString().lastIndexOf('/'))));
try {
for (String group : compactorResourceGroupPaths) {
- message("Deleting " + group + " from zookeeper", opts);
+ message("Deleting compactor " + group + " from zookeeper", opts);
zrw.recursiveDelete(group, NodeMissingPolicy.SKIP);
}
} catch (KeeperException | InterruptedException e) {
@@ -175,14 +165,16 @@ public class ZooZap implements KeywordExecutable {
}
if (opts.zapScanServers) {
+ Set<ServiceLockPath> sserverLockPaths =
+ context.getServerPaths().getScanServer(rgp,
AddressSelector.all(), false);
+ Set<String> sserverResourceGroupPaths = new HashSet<>();
+ sserverLockPaths.forEach(p -> sserverResourceGroupPaths
+ .add(p.toString().substring(0, p.toString().lastIndexOf('/'))));
+
try {
- Set<ServiceLockPath> sserverLockPaths =
- context.getServerPaths().getScanServer(rgp,
AddressSelector.all(), false);
- for (ServiceLockPath sserverPath : sserverLockPaths) {
- message("Deleting " + sserverPath + " from zookeeper", opts);
- if (!zrw.getChildren(sserverPath.toString()).isEmpty()) {
- ServiceLock.deleteLock(zrw, sserverPath);
- }
+ for (String group : sserverResourceGroupPaths) {
+ message("Deleting sserver " + group + " from zookeeper", opts);
+ zrw.recursiveDelete(group, NodeMissingPolicy.SKIP);
}
} catch (KeeperException | InterruptedException e) {
log.error("{}", e.getMessage(), e);