This is an automated email from the ASF dual-hosted git repository.
kturner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push:
new a037b2aa92 fixes race condition in bulk dir reservation and adds
logging (#5939)
a037b2aa92 is described below
commit a037b2aa9237d6e494480d49e7bd932c73ecce65
Author: Keith Turner <[email protected]>
AuthorDate: Mon Sep 29 14:14:06 2025 -0400
fixes race condition in bulk dir reservation and adds logging (#5939)
---
.../org/apache/accumulo/core/fate/zookeeper/ZooReservation.java | 9 ++++++---
.../main/java/org/apache/accumulo/manager/tableOps/Utils.java | 3 +++
2 files changed, 9 insertions(+), 3 deletions(-)
diff --git
a/core/src/main/java/org/apache/accumulo/core/fate/zookeeper/ZooReservation.java
b/core/src/main/java/org/apache/accumulo/core/fate/zookeeper/ZooReservation.java
index 88d7cbedef..42968a2805 100644
---
a/core/src/main/java/org/apache/accumulo/core/fate/zookeeper/ZooReservation.java
+++
b/core/src/main/java/org/apache/accumulo/core/fate/zookeeper/ZooReservation.java
@@ -22,10 +22,10 @@ import static java.nio.charset.StandardCharsets.UTF_8;
import org.apache.accumulo.core.fate.FateId;
import org.apache.accumulo.core.fate.zookeeper.ZooUtil.NodeExistsPolicy;
-import org.apache.accumulo.core.fate.zookeeper.ZooUtil.NodeMissingPolicy;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.KeeperException.NodeExistsException;
+import org.apache.zookeeper.data.Stat;
import org.slf4j.LoggerFactory;
public class ZooReservation {
@@ -59,9 +59,10 @@ public class ZooReservation {
public static void release(ZooReaderWriter zk, String path, FateId fateId)
throws KeeperException, InterruptedException {
byte[] zooData;
+ Stat stat = new Stat();
try {
- zooData = zk.getData(path);
+ zooData = zk.getData(path, stat);
} catch (NoNodeException e) {
// Just logging a warning, if data is gone then our work here is done.
LoggerFactory.getLogger(ZooReservation.class).debug("Node does not exist
{}", path);
@@ -76,7 +77,9 @@ public class ZooReservation {
+ " with data mismatch " + fateId + " " + zooDataStr);
}
- zk.recursiveDelete(path, NodeMissingPolicy.SKIP);
+ // Only delete the node if the version is the same. It should be the same
as this holds the
+ // reservation, so for it to change at this point would probably indicate
a bug.
+ zk.deleteStrict(path, stat.getVersion());
}
}
diff --git
a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/Utils.java
b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/Utils.java
index b0e5df03ed..f365bfc425 100644
---
a/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/Utils.java
+++
b/server/manager/src/main/java/org/apache/accumulo/manager/tableOps/Utils.java
@@ -268,8 +268,10 @@ public class Utils {
if (ZooReservation.attempt(zk, Constants.ZHDFS_RESERVATIONS + "/"
+ Base64.getEncoder().encodeToString(directory.getBytes(UTF_8)),
fateId, "")) {
+ log.trace("{} reserved bulk dir {}", fateId, directory);
return 0;
} else {
+ log.trace("{} could not reserve bulk dir {} that is already reserved",
fateId, directory);
return 50;
}
}
@@ -280,6 +282,7 @@ public class Utils {
Constants.ZHDFS_RESERVATIONS + "/"
+ Base64.getEncoder().encodeToString(directory.getBytes(UTF_8)),
fateId);
+ log.trace("{} unreserved bulk dir {}", fateId, directory);
}
private static DistributedLock getLock(ServerContext context, AbstractId<?>
id, FateId fateId,