This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch 2.1 in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/2.1 by this push: new 61ca7d8070 Handles RPC errors when requesting tablet unload (#4497) 61ca7d8070 is described below commit 61ca7d8070f2eedabf987f5fb471b475babe7da5 Author: Keith Turner <ktur...@apache.org> AuthorDate: Wed May 1 18:55:51 2024 -0400 Handles RPC errors when requesting tablet unload (#4497) The tablet server group watcher loop will not make progress when it encounters an RPC error on a single tablet server. It should continue communicating with the tablet servers it can inorder to make progress in its loop that assigns and unassigns tablets. --- .../accumulo/manager/TabletGroupWatcher.java | 35 ++++++++++++++-------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java index 81744441aa..216526d328 100644 --- a/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java +++ b/server/manager/src/main/java/org/apache/accumulo/manager/TabletGroupWatcher.java @@ -344,12 +344,17 @@ abstract class TabletGroupWatcher extends AccumuloDaemonThread { TServerConnection client = manager.tserverSet.getConnection(location.getServerInstance()); if (client != null) { - Manager.log.trace("[{}] Requesting TabletServer {} unload {} {}", store.name(), - location.getServerInstance(), tls.extent, goal.howUnload()); - client.unloadTablet(manager.managerLock, tls.extent, goal.howUnload(), - manager.getSteadyTime()); - unloaded++; - totalUnloaded++; + try { + Manager.log.trace("[{}] Requesting TabletServer {} unload {} {}", store.name(), + location.getServerInstance(), tls.extent, goal.howUnload()); + client.unloadTablet(manager.managerLock, tls.extent, goal.howUnload(), + manager.getSteadyTime()); + unloaded++; + totalUnloaded++; + } catch (TException tException) { + Manager.log.warn("[{}] Failed to request tablet unload {} {} {}", store.name(), + location.getServerInstance(), tls.extent, goal.howUnload(), tException); + } } else { Manager.log.warn("Could not connect to server {}", location); } @@ -1036,13 +1041,19 @@ abstract class TabletGroupWatcher extends AccumuloDaemonThread { } tLists.assignments.addAll(tLists.assigned); for (Assignment a : tLists.assignments) { - TServerConnection client = manager.tserverSet.getConnection(a.server); - if (client != null) { - client.assignTablet(manager.managerLock, a.tablet); - } else { - Manager.log.warn("Could not connect to server {}", a.server); + try { + TServerConnection client = manager.tserverSet.getConnection(a.server); + if (client != null) { + client.assignTablet(manager.managerLock, a.tablet); + manager.assignedTablet(a.tablet); + } else { + Manager.log.warn("Could not connect to server {} for assignment of {}", a.server, + a.tablet); + } + } catch (TException tException) { + Manager.log.warn("Could not connect to server {} for assignment of {}", a.server, a.tablet, + tException); } - manager.assignedTablet(a.tablet); } }