This is an automated email from the ASF dual-hosted git repository.

kturner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/accumulo.git


The following commit(s) were added to refs/heads/main by this push:
     new e37cae1193 adds trace logging of tablet state computations (#5290)
e37cae1193 is described below

commit e37cae11939cd76c6623453a7f3771586ed02916
Author: Keith Turner <ktur...@apache.org>
AuthorDate: Sat Feb 1 16:15:50 2025 -0500

    adds trace logging of tablet state computations (#5290)
    
    Adds trace level logging of the tablet state and tablet goal state. This
    logging will help understand why the manager is making the decisions it
    does regarding a particular tablet.  For example if the manager is not
    assigning a tablet that one would expect to be assigned turning on this
    logging could help.  Because the manager uses an iterator to filter
    tablet metadata in the tablet server, may need to look in the log of
    manager and tablet servers serving root and metadata tables to see it.
    
    Manually tested this by enabling the trace level logging and runnin some
    ITs.  Found a bug with calling TableMetadata.getExtent() in this testing
    and corrected that.
---
 .../apache/accumulo/core/metadata/TabletState.java | 26 +++++++++---
 .../server/manager/state/TabletGoalState.java      | 49 ++++++++++++++++------
 2 files changed, 57 insertions(+), 18 deletions(-)

diff --git 
a/core/src/main/java/org/apache/accumulo/core/metadata/TabletState.java 
b/core/src/main/java/org/apache/accumulo/core/metadata/TabletState.java
index ba182514d1..59defc4830 100644
--- a/core/src/main/java/org/apache/accumulo/core/metadata/TabletState.java
+++ b/core/src/main/java/org/apache/accumulo/core/metadata/TabletState.java
@@ -20,11 +20,16 @@ package org.apache.accumulo.core.metadata;
 
 import java.util.Set;
 
+import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
 import org.apache.accumulo.core.metadata.schema.TabletMetadata;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public enum TabletState {
   UNASSIGNED, ASSIGNED, HOSTED, ASSIGNED_TO_DEAD_SERVER, SUSPENDED;
 
+  private static final Logger log = LoggerFactory.getLogger(TabletState.class);
+
   public static TabletState compute(TabletMetadata tm, Set<TServerInstance> 
liveTServers) {
     TabletMetadata.Location current = null;
     TabletMetadata.Location future = null;
@@ -34,18 +39,27 @@ public enum TabletState {
       future = tm.getLocation();
     }
     if (future != null) {
-      return liveTServers.contains(future.getServerInstance()) ? 
TabletState.ASSIGNED
-          : TabletState.ASSIGNED_TO_DEAD_SERVER;
+      return trace(liveTServers.contains(future.getServerInstance()) ? 
TabletState.ASSIGNED
+          : TabletState.ASSIGNED_TO_DEAD_SERVER, tm);
     } else if (current != null) {
       if (liveTServers.contains(current.getServerInstance())) {
-        return TabletState.HOSTED;
+        return trace(TabletState.HOSTED, tm);
       } else {
-        return TabletState.ASSIGNED_TO_DEAD_SERVER;
+        return trace(TabletState.ASSIGNED_TO_DEAD_SERVER, tm);
       }
     } else if (tm.getSuspend() != null) {
-      return TabletState.SUSPENDED;
+      return trace(TabletState.SUSPENDED, tm);
     } else {
-      return TabletState.UNASSIGNED;
+      return trace(TabletState.UNASSIGNED, tm);
+    }
+  }
+
+  private static TabletState trace(TabletState tabletState, TabletMetadata tm) 
{
+    if (log.isTraceEnabled()) {
+      // The prev row column for the table may not have been fetched so can 
not call tm.getExtent()
+      log.trace("Computed state of {} for {}", tabletState,
+          TabletsSection.encodeRow(tm.getTableId(), tm.getEndRow()));
     }
+    return tabletState;
   }
 }
diff --git 
a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletGoalState.java
 
b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletGoalState.java
index ca9b2ada8c..8c6452e21b 100644
--- 
a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletGoalState.java
+++ 
b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletGoalState.java
@@ -18,6 +18,8 @@
  */
 package org.apache.accumulo.server.manager.state;
 
+import java.util.function.Supplier;
+
 import org.apache.accumulo.core.data.TabletId;
 import org.apache.accumulo.core.dataImpl.KeyExtent;
 import org.apache.accumulo.core.dataImpl.TabletIdImpl;
@@ -25,6 +27,7 @@ import 
org.apache.accumulo.core.manager.balancer.TabletServerIdImpl;
 import org.apache.accumulo.core.metadata.TServerInstance;
 import org.apache.accumulo.core.metadata.TabletState;
 import org.apache.accumulo.core.metadata.schema.Ample;
+import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
 import org.apache.accumulo.core.metadata.schema.TabletMetadata;
 import org.apache.accumulo.core.metadata.schema.TabletOperationType;
 import org.apache.accumulo.core.spi.balancer.TabletBalancer;
@@ -62,7 +65,7 @@ public enum TabletGoalState {
 
     // Always follow through with assignments
     if (currentState == TabletState.ASSIGNED) {
-      return HOSTED;
+      return trace(HOSTED, tm, "tablet is in assigned state");
     }
 
     KeyExtent extent = tm.getExtent();
@@ -75,7 +78,7 @@ public enum TabletGoalState {
       if (!params.isParentLevelUpgraded()) {
         // The place where this tablet stores its metadata was not upgraded, 
so do not assign this
         // tablet yet.
-        return UNASSIGNED;
+        return trace(UNASSIGNED, tm, "parent level not upgraded");
       }
 
       // When an operation id is set tablets need to be unassigned unless 
there are still wals. When
@@ -83,11 +86,11 @@ public enum TabletGoalState {
       // tablets do not need to recover wals.
       if (tm.getOperationId() != null && (tm.getLogs().isEmpty()
           || tm.getOperationId().getType() == TabletOperationType.DELETING)) {
-        return TabletGoalState.UNASSIGNED;
+        return trace(UNASSIGNED, tm, () -> "operation id " + 
tm.getOperationId() + " is set");
       }
 
       if (!params.isTableOnline(tm.getTableId())) {
-        return UNASSIGNED;
+        return trace(UNASSIGNED, tm, "table is not online");
       }
 
       // Only want to override the HOSTED goal for tablet availability if 
there are no walog
@@ -98,10 +101,11 @@ public enum TabletGoalState {
       if (tm.getLogs().isEmpty()) {
         switch (tm.getTabletAvailability()) {
           case UNHOSTED:
-            return UNASSIGNED;
+            return trace(UNASSIGNED, tm, "tablet availability is UNHOSTED");
           case ONDEMAND:
             if (!tm.getHostingRequested()) {
-              return UNASSIGNED;
+              return trace(UNASSIGNED, tm,
+                  "tablet availability is ONDEMAND and no hosting requested");
             }
             break;
           default:
@@ -111,7 +115,7 @@ public enum TabletGoalState {
 
       TServerInstance dest = params.getMigrations().get(extent);
       if (dest != null && tm.hasCurrent() && 
!dest.equals(tm.getLocation().getServerInstance())) {
-        return UNASSIGNED;
+        return trace(UNASSIGNED, tm, () -> "tablet has a migration to " + 
dest);
       }
 
       if (currentState == TabletState.HOSTED && balancer != null) {
@@ -143,7 +147,7 @@ public enum TabletGoalState {
           });
 
           if (reassign) {
-            return UNASSIGNED;
+            return trace(UNASSIGNED, tm, "the balancer requested 
reassignment");
           }
         } else {
           log.warn("Could not find resource group for tserver {}, did not 
consult balancer to"
@@ -155,19 +159,21 @@ public enum TabletGoalState {
 
       if (params.getVolumeReplacements().size() > 0
           && VolumeUtil.needsVolumeReplacement(params.getVolumeReplacements(), 
tm)) {
-        return UNASSIGNED;
+        return trace(UNASSIGNED, tm, "tablet has volumes needing replacement");
       }
 
       if (tm.hasCurrent()
           && 
params.getServersToShutdown().contains(tm.getLocation().getServerInstance())) {
         if (params.canSuspendTablets()) {
-          return SUSPENDED;
+          return trace(SUSPENDED, tm,
+              () -> "tablet is assigned to " + tm.getLocation() + " that is 
being shutdown");
         } else {
-          return UNASSIGNED;
+          return trace(UNASSIGNED, tm,
+              () -> "tablet is assigned to " + tm.getLocation() + " that is 
being shutdown");
         }
       }
     }
-    return systemGoalState;
+    return trace(systemGoalState, tm, "it's the system goal state");
   }
 
   private static TabletGoalState getSystemGoalState(TabletMetadata tm,
@@ -194,4 +200,23 @@ public enum TabletGoalState {
         throw new IllegalStateException("Unknown Manager State");
     }
   }
+
+  private static TabletGoalState trace(TabletGoalState tabletGoalState, 
TabletMetadata tm,
+      String reason) {
+    if (log.isTraceEnabled()) {
+      // The prev row column for the table may not have been fetched so can 
not call tm.getExtent()
+      log.trace("Computed goal state of {} for {} because {}", tabletGoalState,
+          TabletsSection.encodeRow(tm.getTableId(), tm.getEndRow()), reason);
+    }
+    return tabletGoalState;
+  }
+
+  private static TabletGoalState trace(TabletGoalState tabletGoalState, 
TabletMetadata tm,
+      Supplier<String> reason) {
+    if (log.isTraceEnabled()) {
+      log.trace("Computed goal state of {} for {} because {}", tabletGoalState,
+          TabletsSection.encodeRow(tm.getTableId(), tm.getEndRow()), 
reason.get());
+    }
+    return tabletGoalState;
+  }
 }

Reply via email to