This is an automated email from the ASF dual-hosted git repository. dlmarion pushed a commit to branch 2.1 in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push: new e510e84a66 Halt TabletServer on walog write and no TabletServer lock (#5170) e510e84a66 is described below commit e510e84a66f3169fd00ee93deed4b0ff24069cd3 Author: Dave Marion <dlmar...@apache.org> AuthorDate: Mon Dec 23 08:26:57 2024 -0500 Halt TabletServer on walog write and no TabletServer lock (#5170) Closes #5146 --- .../org/apache/accumulo/tserver/log/TabletServerLogger.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java index 6757d276ee..a124c634f3 100644 --- a/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java +++ b/server/tserver/src/main/java/org/apache/accumulo/tserver/log/TabletServerLogger.java @@ -39,6 +39,7 @@ import java.util.concurrent.locks.ReentrantReadWriteLock; import org.apache.accumulo.core.client.Durability; import org.apache.accumulo.core.data.Mutation; import org.apache.accumulo.core.dataImpl.KeyExtent; +import org.apache.accumulo.core.fate.zookeeper.ServiceLock; import org.apache.accumulo.core.protobuf.ProtobufUtil; import org.apache.accumulo.core.util.Halt; import org.apache.accumulo.core.util.Retry; @@ -388,6 +389,7 @@ public class TabletServerLogger { boolean success = false; while (!success) { + boolean sawWriteFailure = false; try { // get a reference to the loggers that no other thread can touch AtomicInteger currentId = new AtomicInteger(-1); @@ -442,7 +444,7 @@ public class TabletServerLogger { writeRetry.logRetry(log, "Logs closed while writing", ex); } catch (Exception t) { writeRetry.logRetry(log, "Failed to write to WAL", t); - + sawWriteFailure = true; try { // Backoff writeRetry.waitForNextAttempt(log, "write to WAL"); @@ -458,6 +460,14 @@ public class TabletServerLogger { // the logs haven't changed. final int finalCurrent = currentLogId; if (!success) { + final ServiceLock tabletServerLock = tserver.getLock(); + if (sawWriteFailure) { + log.info("WAL write failure, validating server lock in ZooKeeper"); + if (tabletServerLock == null || !tabletServerLock.verifyLockAtSource()) { + Halt.halt("Writing to WAL has failed and TabletServer lock does not exist", -1); + } + } + testLockAndRun(logIdLock, new TestCallWithWriteLock() { @Override