This is an automated email from the ASF dual-hosted git repository.

dlmarion pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new 80b8fbcd16 Added calls to ZooZap when shutting down groups of servers 
(#5321)
80b8fbcd16 is described below

commit 80b8fbcd16ea552cd6c76245f07da8f1a1d4dd4d
Author: Dave Marion <dlmar...@apache.org>
AuthorDate: Wed Feb 19 18:46:08 2025 -0500

    Added calls to ZooZap when shutting down groups of servers (#5321)
    
    Modified accumulo-cluster to wait for all spawned ssh commands
    to finish and then call ZooZap when shutting down all Compactor,
    ScanServer, and TabletServer processes for a resource group.
    Modified ZooZap so that the code is consistent for Compactors,
    ScanServers, and TabletServers such that for those server types
    it performs a recursive delete at the resource group in the ZK
    path.
    
    Closes #5178
---
 assemble/bin/accumulo-cluster                      | 45 +++++++++++++++++-----
 .../org/apache/accumulo/server/util/ZooZap.java    | 42 ++++++++------------
 2 files changed, 53 insertions(+), 34 deletions(-)

diff --git a/assemble/bin/accumulo-cluster b/assemble/bin/accumulo-cluster
index 3d65733213..7341aa99dc 100755
--- a/assemble/bin/accumulo-cluster
+++ b/assemble/bin/accumulo-cluster
@@ -360,6 +360,17 @@ function parse_config() {
 
 }
 
+function ssh_wait() {
+  #shellcheck disable=SC2009
+  count=$(ps -ef | grep ssh | grep -c "accumulo-service")
+  while ((count > 0)); do
+    echo "waiting on $count ssh commands to complete"
+    sleep 2
+    #shellcheck disable=SC2009
+    count=$(ps -ef | grep ssh | grep -c "accumulo-service")
+  done
+}
+
 function execute_command() {
   control_cmd=$1
   host=$2
@@ -431,12 +442,9 @@ function control_services() {
       for group in $tserver_groups; do
         G="TSERVER_HOSTS_$group"
         for tserver in ${!G}; do
+          debug "Stopping tservers on $addr via admin command"
           if echo "$tserver" | grep -q "$addr"; then
-            if ! isDebug; then
-              "$accumulo_cmd" admin stop "$addr"
-            else
-              debug "Stopping tservers on $addr via admin command"
-            fi
+            debugOrRun "$accumulo_cmd" admin stop "$addr"
           fi
         done
       done
@@ -471,6 +479,13 @@ function control_services() {
           fi
         fi
       done
+      if [[ $ARG_LOCAL == 0 && ($operation == "stop" || $operation == "kill") 
]]; then
+        # If the prior commands were executed via ssh, then we need to wait 
for them
+        # to complete before zapping the nodes in ZooKeeper
+        ssh_wait
+        echo "Cleaning tablet server entries from zookeeper for resource group 
$group"
+        debugOrRun "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap 
-verbose -tservers -group "$group"
+      fi
       echo "done"
     done
   fi
@@ -520,6 +535,13 @@ function control_services() {
           execute_command "$operation" "$sserver" sserver "$group" "-o" 
"sserver.group=$group"
         fi
       done
+      if [[ $ARG_LOCAL == 0 && ($operation == "stop" || $operation == "kill") 
]]; then
+        # If the prior commands were executed via ssh, then we need to wait 
for them
+        # to complete before zapping the nodes in ZooKeeper
+        ssh_wait
+        echo "Cleaning scan server entries from zookeeper for resource group 
$group"
+        debugOrRun "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap 
-verbose -sservers -group "$group"
+      fi
     done
   fi
 
@@ -538,14 +560,19 @@ function control_services() {
           execute_command "$operation" "$compactor" compactor "$group" "-o" 
"compactor.group=$group"
         fi
       done
+      if [[ $ARG_LOCAL == 0 && ($operation == "stop" || $operation == "kill") 
]]; then
+        # If the prior commands were executed via ssh, then we need to wait 
for them
+        # to complete before zapping the nodes in ZooKeeper
+        ssh_wait
+        echo "Cleaning compactor entries from zookeeper for resource group 
$group"
+        debugOrRun "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap 
-verbose -compactors -group "$group"
+      fi
     done
   fi
 
   if [[ $ARG_LOCAL == 0 && $ARG_ALL == 1 && ($operation == "stop" || 
$operation == "kill") ]]; then
-    if ! isDebug; then
-      echo "Cleaning all server entries in ZooKeeper"
-      "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -manager 
-tservers -compactors -sservers
-    fi
+    debug "Cleaning all server entries in ZooKeeper"
+    debugOrRun "$accumulo_cmd" org.apache.accumulo.server.util.ZooZap -verbose 
-manager -tservers -compactors -sservers
   fi
 
 }
diff --git 
a/server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java 
b/server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java
index c97892a4c0..cc4d723591 100644
--- a/server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java
+++ b/server/base/src/main/java/org/apache/accumulo/server/util/ZooZap.java
@@ -27,7 +27,6 @@ import org.apache.accumulo.core.conf.Property;
 import org.apache.accumulo.core.conf.SiteConfiguration;
 import org.apache.accumulo.core.fate.zookeeper.ZooReaderWriter;
 import org.apache.accumulo.core.fate.zookeeper.ZooUtil.NodeMissingPolicy;
-import org.apache.accumulo.core.lock.ServiceLock;
 import org.apache.accumulo.core.lock.ServiceLockPaths.AddressSelector;
 import org.apache.accumulo.core.lock.ServiceLockPaths.ResourceGroupPredicate;
 import org.apache.accumulo.core.lock.ServiceLockPaths.ServiceLockPath;
@@ -103,7 +102,7 @@ public class ZooZap implements KeywordExecutable {
     Opts opts = new Opts();
     opts.parseArgs(keyword(), args);
 
-    if (!opts.zapManager && !opts.zapTservers) {
+    if (!opts.zapManager && !opts.zapTservers && !opts.zapCompactors && 
!opts.zapScanServers) {
       new JCommander(opts).usage();
       return;
     }
@@ -136,21 +135,12 @@ public class ZooZap implements KeywordExecutable {
           try {
             Set<ServiceLockPath> tserverLockPaths =
                 context.getServerPaths().getTabletServer(rgp, 
AddressSelector.all(), false);
-            for (ServiceLockPath tserverPath : tserverLockPaths) {
-
-              message("Deleting " + tserverPath + " from zookeeper", opts);
-
-              if (opts.zapManager) {
-                zrw.recursiveDelete(tserverPath.toString(), 
NodeMissingPolicy.SKIP);
-              } else {
-                if (!zrw.getChildren(tserverPath.toString()).isEmpty()) {
-                  try {
-                    ServiceLock.deleteLock(zrw, tserverPath);
-                  } catch (RuntimeException e) {
-                    message("Did not delete " + tserverPath, opts);
-                  }
-                }
-              }
+            Set<String> tserverResourceGroupPaths = new HashSet<>();
+            tserverLockPaths.forEach(p -> tserverResourceGroupPaths
+                .add(p.toString().substring(0, 
p.toString().lastIndexOf('/'))));
+            for (String group : tserverResourceGroupPaths) {
+              message("Deleting tserver " + group + " from zookeeper", opts);
+              zrw.recursiveDelete(group.toString(), NodeMissingPolicy.SKIP);
             }
           } catch (KeeperException | InterruptedException e) {
             log.error("{}", e.getMessage(), e);
@@ -165,7 +155,7 @@ public class ZooZap implements KeywordExecutable {
               .add(p.toString().substring(0, p.toString().lastIndexOf('/'))));
           try {
             for (String group : compactorResourceGroupPaths) {
-              message("Deleting " + group + " from zookeeper", opts);
+              message("Deleting compactor " + group + " from zookeeper", opts);
               zrw.recursiveDelete(group, NodeMissingPolicy.SKIP);
             }
           } catch (KeeperException | InterruptedException e) {
@@ -175,14 +165,16 @@ public class ZooZap implements KeywordExecutable {
         }
 
         if (opts.zapScanServers) {
+          Set<ServiceLockPath> sserverLockPaths =
+              context.getServerPaths().getScanServer(rgp, 
AddressSelector.all(), false);
+          Set<String> sserverResourceGroupPaths = new HashSet<>();
+          sserverLockPaths.forEach(p -> sserverResourceGroupPaths
+              .add(p.toString().substring(0, p.toString().lastIndexOf('/'))));
+
           try {
-            Set<ServiceLockPath> sserverLockPaths =
-                context.getServerPaths().getScanServer(rgp, 
AddressSelector.all(), false);
-            for (ServiceLockPath sserverPath : sserverLockPaths) {
-              message("Deleting " + sserverPath + " from zookeeper", opts);
-              if (!zrw.getChildren(sserverPath.toString()).isEmpty()) {
-                ServiceLock.deleteLock(zrw, sserverPath);
-              }
+            for (String group : sserverResourceGroupPaths) {
+              message("Deleting sserver " + group + " from zookeeper", opts);
+              zrw.recursiveDelete(group, NodeMissingPolicy.SKIP);
             }
           } catch (KeeperException | InterruptedException e) {
             log.error("{}", e.getMessage(), e);

Reply via email to