Apache9 commented on code in PR #7375:
URL: https://github.com/apache/hbase/pull/7375#discussion_r2567844775
##########
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionStates.java:
##########
@@ -109,14 +91,12 @@ public RegionStates() {
public void clear() {
regionsMap.clear();
encodedRegionsMap.clear();
- regionInTransition.clear();
regionOffline.clear();
serverMap.clear();
}
public boolean isRegionInRegionStates(final RegionInfo hri) {
Review Comment:
The method name is a bit confusing, where do we use it? Is it safe to remove
one condition in the implementation?
##########
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/RegionInTransitionTracker.java:
##########
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.master.assignment;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ConcurrentSkipListMap;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.TableState;
+import org.apache.hadoop.hbase.master.RegionState;
+import org.apache.hadoop.hbase.master.TableStateManager;
+import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Tracks regions that are currently in transition (RIT) - those not yet in
their terminal state.
+ */
[email protected]
+public class RegionInTransitionTracker {
+ private static final Logger LOG =
LoggerFactory.getLogger(RegionInTransitionTracker.class);
+
+ private final List<RegionState.State> DISABLE_TABLE_REGION_STATE =
+ List.of(RegionState.State.OFFLINE, RegionState.State.CLOSED);
+
+ private final List<RegionState.State> ENABLE_TABLE_REGION_STATE =
List.of(RegionState.State.OPEN);
+
+ private final ConcurrentSkipListMap<RegionInfo, RegionStateNode>
regionInTransition =
+ new ConcurrentSkipListMap<>(RegionInfo.COMPARATOR);
+
+ private TableStateManager tableStateManager;
+
+ public boolean isRegionInTransition(final RegionInfo regionInfo) {
+ return regionInTransition.containsKey(regionInfo);
+ }
+
+ /**
+ * Handles a region whose hosting RegionServer has crashed. When a
RegionServer fails, all regions
+ * it was hosting are automatically added to the RIT list since they need to
be reassigned to
+ * other servers.
+ */
+ public void regionCrashed(RegionStateNode regionStateNode) {
+ if (regionStateNode.getRegionInfo().getReplicaId() !=
RegionInfo.DEFAULT_REPLICA_ID) {
+ return;
+ }
+
+ if (addRegionInTransition(regionStateNode)) {
+ LOG.debug("{} added to RIT list because hosting region server is crashed
",
+ regionStateNode.getRegionInfo().getEncodedName());
+ }
+ }
+
+ /**
+ * Processes a region state change and updates the RIT tracking accordingly.
This is the core
+ * method that determines whether a region should be added to or removed
from the RIT list based
+ * on its current state and the table's enabled/disabled status. This method
should be called
+ * whenever a region state changes get stored to hbase:meta Note: Only
default replicas (replica
+ * ID 0) are tracked. Read replicas are ignored.
+ * @param regionStateNode the region state node with the current state
information
+ */
+ public void handleRegionStateNodeOperation(RegionStateNode regionStateNode) {
+ // only consider default replica for availability
+ if (regionStateNode.getRegionInfo().getReplicaId() !=
RegionInfo.DEFAULT_REPLICA_ID) {
+ return;
+ }
+
+ RegionState.State currentState = regionStateNode.getState();
+ boolean tableEnabled = isTableEnabled(regionStateNode.getTable());
+ List<RegionState.State> terminalStates =
+ tableEnabled ? ENABLE_TABLE_REGION_STATE : DISABLE_TABLE_REGION_STATE;
+
+ // if region is merged or split it should not be in RIT list
+ if (
+ currentState == RegionState.State.SPLIT || currentState ==
RegionState.State.MERGED
+ || regionStateNode.getRegionInfo().isSplit()
+ ) {
+ if (removeRegionInTransition(regionStateNode.getRegionInfo())) {
+ LOG.debug("Removed {} from RIT list as it is split or merged",
+ regionStateNode.getRegionInfo().getEncodedName());
+ }
+ } else if (!terminalStates.contains(currentState)) {
+ if (addRegionInTransition(regionStateNode)) {
+ LOG.debug("{} added to RIT list because it is in-between state, region
state : {} ",
+ regionStateNode.getRegionInfo().getEncodedName(), currentState);
+ }
+ } else {
+ if (removeRegionInTransition(regionStateNode.getRegionInfo())) {
+ LOG.debug("Removed {} from RIT list as reached to terminal state {}",
+ regionStateNode.getRegionInfo().getEncodedName(), currentState);
+ }
+ }
+ }
+
+ private boolean isTableEnabled(TableName tableName) {
+ if (tableStateManager != null) {
+ return tableStateManager.isTableState(tableName,
TableState.State.ENABLED,
+ TableState.State.ENABLING);
+ }
+ // AssignmentManager calls setTableStateManager once hbase:meta is
confirmed online, if it is
+ // still null it means confirmation is still pending. One should not
access TableStateManger
+ // till the time.
+ if (TableName.isMetaTableName(tableName)) {
Review Comment:
Maybe here we could add an `assert TableName.isMetaTableName(tableName);`,
and then `return true`?
Anyway, throwing a RuntimeException is also acceptable as in production
environment we may disable assertion.
Better add something like "CODE-BUG" to tell developers that you may write
code wrong.
##########
hbase-server/src/main/resources/hbase-webapps/master/deadRegionServers.jsp:
##########
@@ -72,7 +72,7 @@
<tr>
<th></th>
<td><%= deadServerName %></td>
- <td><%= deadServerUtil.getTimeOfDeath(deadServerName) %></td>
+ <td><%= new Date(deadServerUtil.getTimeOfDeath(deadServerName))
%></td>
Review Comment:
Pity but let's keep the old behavior first... Can change this later.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]