This is an automated email from the ASF dual-hosted git repository.

dlmarion pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/2.1 by this push:
     new e510e84a66 Halt TabletServer on walog write and no TabletServer lock 
(#5170)
e510e84a66 is described below

commit e510e84a66f3169fd00ee93deed4b0ff24069cd3
Author: Dave Marion <dlmar...@apache.org>
AuthorDate: Mon Dec 23 08:26:57 2024 -0500

    Halt TabletServer on walog write and no TabletServer lock (#5170)
    
    Closes #5146
---
 .../org/apache/accumulo/tserver/log/TabletServerLogger.java  | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
index 6757d276ee..a124c634f3 100644
--- 
a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
+++ 
b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java
@@ -39,6 +39,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock;
 import org.apache.accumulo.core.client.Durability;
 import org.apache.accumulo.core.data.Mutation;
 import org.apache.accumulo.core.dataImpl.KeyExtent;
+import org.apache.accumulo.core.fate.zookeeper.ServiceLock;
 import org.apache.accumulo.core.protobuf.ProtobufUtil;
 import org.apache.accumulo.core.util.Halt;
 import org.apache.accumulo.core.util.Retry;
@@ -388,6 +389,7 @@ public class TabletServerLogger {
 
     boolean success = false;
     while (!success) {
+      boolean sawWriteFailure = false;
       try {
         // get a reference to the loggers that no other thread can touch
         AtomicInteger currentId = new AtomicInteger(-1);
@@ -442,7 +444,7 @@ public class TabletServerLogger {
         writeRetry.logRetry(log, "Logs closed while writing", ex);
       } catch (Exception t) {
         writeRetry.logRetry(log, "Failed to write to WAL", t);
-
+        sawWriteFailure = true;
         try {
           // Backoff
           writeRetry.waitForNextAttempt(log, "write to WAL");
@@ -458,6 +460,14 @@ public class TabletServerLogger {
       // the logs haven't changed.
       final int finalCurrent = currentLogId;
       if (!success) {
+        final ServiceLock tabletServerLock = tserver.getLock();
+        if (sawWriteFailure) {
+          log.info("WAL write failure, validating server lock in ZooKeeper");
+          if (tabletServerLock == null || 
!tabletServerLock.verifyLockAtSource()) {
+            Halt.halt("Writing to WAL has failed and TabletServer lock does 
not exist", -1);
+          }
+        }
+
         testLockAndRun(logIdLock, new TestCallWithWriteLock() {
 
           @Override

Reply via email to