This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new a037b2aa92 fixes race condition in bulk dir reservation and adds 
logging (#5939)
a037b2aa92 is described below

commit a037b2aa9237d6e494480d49e7bd932c73ecce65
Author: Keith Turner <[email protected]>
AuthorDate: Mon Sep 29 14:14:06 2025 -0400

    fixes race condition in bulk dir reservation and adds logging (#5939)
---
 .../org/apache/accumulo/core/fate/zookeeper/ZooReservation.java  | 9 ++++++---
 .../main/java/org/apache/accumulo/manager/tableOps/Utils.java    | 3 +++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/fate/zookeeper/ZooReservation.java
 
b/core/src/main/java/org/apache/accumulo/core/fate/zookeeper/ZooReservation.java
index 88d7cbedef..42968a2805 100644
--- 
a/core/src/main/java/org/apache/accumulo/core/fate/zookeeper/ZooReservation.java
+++ 
b/core/src/main/java/org/apache/accumulo/core/fate/zookeeper/ZooReservation.java
@@ -22,10 +22,10 @@ import static java.nio.charset.StandardCharsets.UTF_8;
 
 import org.apache.accumulo.core.fate.FateId;
 import org.apache.accumulo.core.fate.zookeeper.ZooUtil.NodeExistsPolicy;
-import org.apache.accumulo.core.fate.zookeeper.ZooUtil.NodeMissingPolicy;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.KeeperException.NoNodeException;
 import org.apache.zookeeper.KeeperException.NodeExistsException;
+import org.apache.zookeeper.data.Stat;
 import org.slf4j.LoggerFactory;
 
 public class ZooReservation {
@@ -59,9 +59,10 @@ public class ZooReservation {
   public static void release(ZooReaderWriter zk, String path, FateId fateId)
       throws KeeperException, InterruptedException {
     byte[] zooData;
+    Stat stat = new Stat();
 
     try {
-      zooData = zk.getData(path);
+      zooData = zk.getData(path, stat);
     } catch (NoNodeException e) {
       // Just logging a warning, if data is gone then our work here is done.
       LoggerFactory.getLogger(ZooReservation.class).debug("Node does not exist 
{}", path);
@@ -76,7 +77,9 @@ public class ZooReservation {
           + " with data mismatch " + fateId + " " + zooDataStr);
     }
 
-    zk.recursiveDelete(path, NodeMissingPolicy.SKIP);
+    // Only delete the node if the version is the same. It should be the same 
as this holds the
+    // reservation, so for it to change at this point would probably indicate 
a bug.
+    zk.deleteStrict(path, stat.getVersion());
   }
 
 }
diff --git 
a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/Utils.java 
b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/Utils.java
index b0e5df03ed..f365bfc425 100644
--- 
a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/Utils.java
+++ 
b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/Utils.java
@@ -268,8 +268,10 @@ public class Utils {
 
     if (ZooReservation.attempt(zk, Constants.ZHDFS_RESERVATIONS + "/"
         + Base64.getEncoder().encodeToString(directory.getBytes(UTF_8)), 
fateId, "")) {
+      log.trace("{} reserved bulk dir {}", fateId, directory);
       return 0;
     } else {
+      log.trace("{} could not reserve bulk dir {} that is already reserved", 
fateId, directory);
       return 50;
     }
   }
@@ -280,6 +282,7 @@ public class Utils {
         Constants.ZHDFS_RESERVATIONS + "/"
             + Base64.getEncoder().encodeToString(directory.getBytes(UTF_8)),
         fateId);
+    log.trace("{} unreserved bulk dir {}", fateId, directory);
   }
 
   private static DistributedLock getLock(ServerContext context, AbstractId<?> 
id, FateId fateId,

Reply via email to