This is an automated email from the ASF dual-hosted git repository.
dlmarion pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push:
new e510e84a66 Halt TabletServer on walog write and no TabletServer lock
(#5170)
e510e84a66 is described below
commit e510e84a66f3169fd00ee93deed4b0ff24069cd3
Author: Dave Marion <[email protected]>
AuthorDate: Mon Dec 23 08:26:57 2024 -0500
Halt TabletServer on walog write and no TabletServer lock (#5170)
Closes #5146
---
.../org/apache/accumulo/tserver/log/TabletServerLogger.java | 12 +++++++++++-
1 file changed, 11 insertions(+), 1 deletion(-)
diff --git
a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
index 6757d276ee..a124c634f3 100644
---
a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
+++
b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
@@ -39,6 +39,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.accumulo.core.client.Durability;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.dataImpl.KeyExtent;
+import org.apache.accumulo.core.fate.zookeeper.ServiceLock;
import org.apache.accumulo.core.protobuf.ProtobufUtil;
import org.apache.accumulo.core.util.Halt;
import org.apache.accumulo.core.util.Retry;
@@ -388,6 +389,7 @@ public class TabletServerLogger {
boolean success = false;
while (!success) {
+ boolean sawWriteFailure = false;
try {
// get a reference to the loggers that no other thread can touch
AtomicInteger currentId = new AtomicInteger(-1);
@@ -442,7 +444,7 @@ public class TabletServerLogger {
writeRetry.logRetry(log, "Logs closed while writing", ex);
} catch (Exception t) {
writeRetry.logRetry(log, "Failed to write to WAL", t);
-
+ sawWriteFailure = true;
try {
// Backoff
writeRetry.waitForNextAttempt(log, "write to WAL");
@@ -458,6 +460,14 @@ public class TabletServerLogger {
// the logs haven't changed.
final int finalCurrent = currentLogId;
if (!success) {
+ final ServiceLock tabletServerLock = tserver.getLock();
+ if (sawWriteFailure) {
+ log.info("WAL write failure, validating server lock in ZooKeeper");
+ if (tabletServerLock == null ||
!tabletServerLock.verifyLockAtSource()) {
+ Halt.halt("Writing to WAL has failed and TabletServer lock does
not exist", -1);
+ }
+ }
+
testLockAndRun(logIdLock, new TestCallWithWriteLock() {
@Override