This is an automated email from the ASF dual-hosted git repository. kturner pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/accumulo.git
The following commit(s) were added to refs/heads/main by this push: new e37cae1193 adds trace logging of tablet state computations (#5290) e37cae1193 is described below commit e37cae11939cd76c6623453a7f3771586ed02916 Author: Keith Turner <ktur...@apache.org> AuthorDate: Sat Feb 1 16:15:50 2025 -0500 adds trace logging of tablet state computations (#5290) Adds trace level logging of the tablet state and tablet goal state. This logging will help understand why the manager is making the decisions it does regarding a particular tablet. For example if the manager is not assigning a tablet that one would expect to be assigned turning on this logging could help. Because the manager uses an iterator to filter tablet metadata in the tablet server, may need to look in the log of manager and tablet servers serving root and metadata tables to see it. Manually tested this by enabling the trace level logging and runnin some ITs. Found a bug with calling TableMetadata.getExtent() in this testing and corrected that. --- .../apache/accumulo/core/metadata/TabletState.java | 26 +++++++++--- .../server/manager/state/TabletGoalState.java | 49 ++++++++++++++++------ 2 files changed, 57 insertions(+), 18 deletions(-) diff --git a/core/src/main/java/org/apache/accumulo/core/metadata/TabletState.java b/core/src/main/java/org/apache/accumulo/core/metadata/TabletState.java index ba182514d1..59defc4830 100644 --- a/core/src/main/java/org/apache/accumulo/core/metadata/TabletState.java +++ b/core/src/main/java/org/apache/accumulo/core/metadata/TabletState.java @@ -20,11 +20,16 @@ package org.apache.accumulo.core.metadata; import java.util.Set; +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection; import org.apache.accumulo.core.metadata.schema.TabletMetadata; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public enum TabletState { UNASSIGNED, ASSIGNED, HOSTED, ASSIGNED_TO_DEAD_SERVER, SUSPENDED; + private static final Logger log = LoggerFactory.getLogger(TabletState.class); + public static TabletState compute(TabletMetadata tm, Set<TServerInstance> liveTServers) { TabletMetadata.Location current = null; TabletMetadata.Location future = null; @@ -34,18 +39,27 @@ public enum TabletState { future = tm.getLocation(); } if (future != null) { - return liveTServers.contains(future.getServerInstance()) ? TabletState.ASSIGNED - : TabletState.ASSIGNED_TO_DEAD_SERVER; + return trace(liveTServers.contains(future.getServerInstance()) ? TabletState.ASSIGNED + : TabletState.ASSIGNED_TO_DEAD_SERVER, tm); } else if (current != null) { if (liveTServers.contains(current.getServerInstance())) { - return TabletState.HOSTED; + return trace(TabletState.HOSTED, tm); } else { - return TabletState.ASSIGNED_TO_DEAD_SERVER; + return trace(TabletState.ASSIGNED_TO_DEAD_SERVER, tm); } } else if (tm.getSuspend() != null) { - return TabletState.SUSPENDED; + return trace(TabletState.SUSPENDED, tm); } else { - return TabletState.UNASSIGNED; + return trace(TabletState.UNASSIGNED, tm); + } + } + + private static TabletState trace(TabletState tabletState, TabletMetadata tm) { + if (log.isTraceEnabled()) { + // The prev row column for the table may not have been fetched so can not call tm.getExtent() + log.trace("Computed state of {} for {}", tabletState, + TabletsSection.encodeRow(tm.getTableId(), tm.getEndRow())); } + return tabletState; } } diff --git a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletGoalState.java b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletGoalState.java index ca9b2ada8c..8c6452e21b 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletGoalState.java +++ b/server/base/src/main/java/org/apache/accumulo/server/manager/state/TabletGoalState.java @@ -18,6 +18,8 @@ */ package org.apache.accumulo.server.manager.state; +import java.util.function.Supplier; + import org.apache.accumulo.core.data.TabletId; import org.apache.accumulo.core.dataImpl.KeyExtent; import org.apache.accumulo.core.dataImpl.TabletIdImpl; @@ -25,6 +27,7 @@ import org.apache.accumulo.core.manager.balancer.TabletServerIdImpl; import org.apache.accumulo.core.metadata.TServerInstance; import org.apache.accumulo.core.metadata.TabletState; import org.apache.accumulo.core.metadata.schema.Ample; +import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection; import org.apache.accumulo.core.metadata.schema.TabletMetadata; import org.apache.accumulo.core.metadata.schema.TabletOperationType; import org.apache.accumulo.core.spi.balancer.TabletBalancer; @@ -62,7 +65,7 @@ public enum TabletGoalState { // Always follow through with assignments if (currentState == TabletState.ASSIGNED) { - return HOSTED; + return trace(HOSTED, tm, "tablet is in assigned state"); } KeyExtent extent = tm.getExtent(); @@ -75,7 +78,7 @@ public enum TabletGoalState { if (!params.isParentLevelUpgraded()) { // The place where this tablet stores its metadata was not upgraded, so do not assign this // tablet yet. - return UNASSIGNED; + return trace(UNASSIGNED, tm, "parent level not upgraded"); } // When an operation id is set tablets need to be unassigned unless there are still wals. When @@ -83,11 +86,11 @@ public enum TabletGoalState { // tablets do not need to recover wals. if (tm.getOperationId() != null && (tm.getLogs().isEmpty() || tm.getOperationId().getType() == TabletOperationType.DELETING)) { - return TabletGoalState.UNASSIGNED; + return trace(UNASSIGNED, tm, () -> "operation id " + tm.getOperationId() + " is set"); } if (!params.isTableOnline(tm.getTableId())) { - return UNASSIGNED; + return trace(UNASSIGNED, tm, "table is not online"); } // Only want to override the HOSTED goal for tablet availability if there are no walog @@ -98,10 +101,11 @@ public enum TabletGoalState { if (tm.getLogs().isEmpty()) { switch (tm.getTabletAvailability()) { case UNHOSTED: - return UNASSIGNED; + return trace(UNASSIGNED, tm, "tablet availability is UNHOSTED"); case ONDEMAND: if (!tm.getHostingRequested()) { - return UNASSIGNED; + return trace(UNASSIGNED, tm, + "tablet availability is ONDEMAND and no hosting requested"); } break; default: @@ -111,7 +115,7 @@ public enum TabletGoalState { TServerInstance dest = params.getMigrations().get(extent); if (dest != null && tm.hasCurrent() && !dest.equals(tm.getLocation().getServerInstance())) { - return UNASSIGNED; + return trace(UNASSIGNED, tm, () -> "tablet has a migration to " + dest); } if (currentState == TabletState.HOSTED && balancer != null) { @@ -143,7 +147,7 @@ public enum TabletGoalState { }); if (reassign) { - return UNASSIGNED; + return trace(UNASSIGNED, tm, "the balancer requested reassignment"); } } else { log.warn("Could not find resource group for tserver {}, did not consult balancer to" @@ -155,19 +159,21 @@ public enum TabletGoalState { if (params.getVolumeReplacements().size() > 0 && VolumeUtil.needsVolumeReplacement(params.getVolumeReplacements(), tm)) { - return UNASSIGNED; + return trace(UNASSIGNED, tm, "tablet has volumes needing replacement"); } if (tm.hasCurrent() && params.getServersToShutdown().contains(tm.getLocation().getServerInstance())) { if (params.canSuspendTablets()) { - return SUSPENDED; + return trace(SUSPENDED, tm, + () -> "tablet is assigned to " + tm.getLocation() + " that is being shutdown"); } else { - return UNASSIGNED; + return trace(UNASSIGNED, tm, + () -> "tablet is assigned to " + tm.getLocation() + " that is being shutdown"); } } } - return systemGoalState; + return trace(systemGoalState, tm, "it's the system goal state"); } private static TabletGoalState getSystemGoalState(TabletMetadata tm, @@ -194,4 +200,23 @@ public enum TabletGoalState { throw new IllegalStateException("Unknown Manager State"); } } + + private static TabletGoalState trace(TabletGoalState tabletGoalState, TabletMetadata tm, + String reason) { + if (log.isTraceEnabled()) { + // The prev row column for the table may not have been fetched so can not call tm.getExtent() + log.trace("Computed goal state of {} for {} because {}", tabletGoalState, + TabletsSection.encodeRow(tm.getTableId(), tm.getEndRow()), reason); + } + return tabletGoalState; + } + + private static TabletGoalState trace(TabletGoalState tabletGoalState, TabletMetadata tm, + Supplier<String> reason) { + if (log.isTraceEnabled()) { + log.trace("Computed goal state of {} for {} because {}", tabletGoalState, + TabletsSection.encodeRow(tm.getTableId(), tm.getEndRow()), reason.get()); + } + return tabletGoalState; + } }