This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch 2.1
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/2.1 by this push:
     new 61ca7d8070 Handles RPC errors when requesting tablet unload (#4497)
61ca7d8070 is described below

commit 61ca7d8070f2eedabf987f5fb471b475babe7da5
Author: Keith Turner <ktur...@apache.org>
AuthorDate: Wed May 1 18:55:51 2024 -0400

    Handles RPC errors when requesting tablet unload (#4497)
    
    The tablet server group watcher loop will not make progress when it 
encounters
    an RPC error on a single tablet server.  It should continue communicating 
with
    the tablet servers it can inorder to make progress in its loop that assigns
    and unassigns tablets.
---
 .../accumulo/manager/TabletGroupWatcher.java       | 35 ++++++++++++++--------
 1 file changed, 23 insertions(+), 12 deletions(-)

diff --git 
a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
 
b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
index 81744441aa..216526d328 100644
--- 
a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
+++ 
b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java
@@ -344,12 +344,17 @@ abstract class TabletGroupWatcher extends 
AccumuloDaemonThread {
                 TServerConnection client =
                     
manager.tserverSet.getConnection(location.getServerInstance());
                 if (client != null) {
-                  Manager.log.trace("[{}] Requesting TabletServer {} unload {} 
{}", store.name(),
-                      location.getServerInstance(), tls.extent, 
goal.howUnload());
-                  client.unloadTablet(manager.managerLock, tls.extent, 
goal.howUnload(),
-                      manager.getSteadyTime());
-                  unloaded++;
-                  totalUnloaded++;
+                  try {
+                    Manager.log.trace("[{}] Requesting TabletServer {} unload 
{} {}", store.name(),
+                        location.getServerInstance(), tls.extent, 
goal.howUnload());
+                    client.unloadTablet(manager.managerLock, tls.extent, 
goal.howUnload(),
+                        manager.getSteadyTime());
+                    unloaded++;
+                    totalUnloaded++;
+                  } catch (TException tException) {
+                    Manager.log.warn("[{}] Failed to request tablet unload {} 
{} {}", store.name(),
+                        location.getServerInstance(), tls.extent, 
goal.howUnload(), tException);
+                  }
                 } else {
                   Manager.log.warn("Could not connect to server {}", location);
                 }
@@ -1036,13 +1041,19 @@ abstract class TabletGroupWatcher extends 
AccumuloDaemonThread {
     }
     tLists.assignments.addAll(tLists.assigned);
     for (Assignment a : tLists.assignments) {
-      TServerConnection client = manager.tserverSet.getConnection(a.server);
-      if (client != null) {
-        client.assignTablet(manager.managerLock, a.tablet);
-      } else {
-        Manager.log.warn("Could not connect to server {}", a.server);
+      try {
+        TServerConnection client = manager.tserverSet.getConnection(a.server);
+        if (client != null) {
+          client.assignTablet(manager.managerLock, a.tablet);
+          manager.assignedTablet(a.tablet);
+        } else {
+          Manager.log.warn("Could not connect to server {} for assignment of 
{}", a.server,
+              a.tablet);
+        }
+      } catch (TException tException) {
+        Manager.log.warn("Could not connect to server {} for assignment of 
{}", a.server, a.tablet,
+            tException);
       }
-      manager.assignedTablet(a.tablet);
     }
   }
 

Reply via email to