This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new cef8b2ad320 HDDS-13953. Optionally allow skipping ReadIndex for Leader 
(#9319)
cef8b2ad320 is described below

commit cef8b2ad3207ee5f67675faaea3111f2e5bc57b4
Author: Symious <[email protected]>
AuthorDate: Wed Dec 3 19:47:22 2025 +0800

    HDDS-13953. Optionally allow skipping ReadIndex for Leader (#9319)
---
 .../java/org/apache/hadoop/ozone/om/OmConfig.java  | 18 ++++++
 .../hadoop/ozone/shell/TestOzoneShellHA.java       | 11 +++-
 .../shell/TestOzoneShellHAWithFollowerRead.java    | 71 ++++++++++++++++++++++
 .../java/org/apache/hadoop/ozone/om/OMMetrics.java | 19 ++++++
 .../hadoop/ozone/om/execution/OMExecutionFlow.java | 28 +++++----
 ...OzoneManagerProtocolServerSideTranslatorPB.java | 25 ++++++--
 6 files changed, 152 insertions(+), 20 deletions(-)

diff --git 
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OmConfig.java 
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OmConfig.java
index 8eb3bec7a45..8f762a7dbd6 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OmConfig.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/OmConfig.java
@@ -141,6 +141,15 @@ public class OmConfig extends ReconfigurableConfig {
   )
   private boolean listAllVolumesAllowed = Defaults.LIST_ALL_VOLUMES_ALLOWED;
 
+  @Config(key = "ozone.om.allow.leader.skip.linearizable.read",
+      defaultValue = "false",
+      type = ConfigType.BOOLEAN,
+      tags = {ConfigTag.OM, ConfigTag.PERFORMANCE, ConfigTag.HA},
+      description = "Allow leader to handler requests directly, no need to 
check the leadership " +
+          "for every request."
+  )
+  private boolean allowLeaderSkipLinearizableRead;
+
   public long getRatisBasedFinalizationTimeout() {
     return ratisBasedFinalizationTimeout;
   }
@@ -173,6 +182,14 @@ public long getMaxListSize() {
     return maxListSize;
   }
 
+  public boolean isAllowLeaderSkipLinearizableRead() {
+    return allowLeaderSkipLinearizableRead;
+  }
+
+  public void setAllowLeaderSkipLinearizableRead(boolean newValue) {
+    allowLeaderSkipLinearizableRead = newValue;
+  }
+
   public void setMaxListSize(long newValue) {
     maxListSize = newValue;
     validate();
@@ -248,6 +265,7 @@ public void setFrom(OmConfig other) {
     maxUserVolumeCount = other.maxUserVolumeCount;
     userDefaultRights = other.userDefaultRights;
     groupDefaultRights = other.groupDefaultRights;
+    allowLeaderSkipLinearizableRead = other.allowLeaderSkipLinearizableRead;
 
     validate();
   }
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHA.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHA.java
index a193bc817e8..4f111017a98 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHA.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHA.java
@@ -2197,8 +2197,11 @@ public void testClientBucketLayoutValidation() {
   @Test
   public void testListAllKeys()
       throws Exception {
-    String volumeName = "vollst";
-    // Create volume vollst
+    testListAllKeysInternal("vollst");
+  }
+
+  protected void testListAllKeysInternal(String volumeName) throws Exception {
+    // Create volume
     String[] args = new String[] {
         "volume", "create", "o3://" + omServiceId +
           OZONE_URI_DELIMITER + volumeName};
@@ -2482,4 +2485,8 @@ private static String getKeyProviderURI(MiniKMS kms) {
     return KMSClientProvider.SCHEME_NAME + "://" +
         kms.getKMSUrl().toExternalForm().replace("://", "@");
   }
+
+  protected MiniOzoneHAClusterImpl getCluster() {
+    return cluster;
+  }
 }
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHAWithFollowerRead.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHAWithFollowerRead.java
new file mode 100644
index 00000000000..2abf1e020c1
--- /dev/null
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestOzoneShellHAWithFollowerRead.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.shell;
+
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.OzoneConfigKeys;
+import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServerConfig;
+import org.apache.ratis.server.RaftServerConfigKeys;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.Test;
+
+/**
+ * This class tests Ozone sh shell command with FollowerRead.
+ * Inspired by TestS3Shell
+ */
+public class TestOzoneShellHAWithFollowerRead extends TestOzoneShellHA {
+
+  @BeforeAll
+  @Override
+  public void init() throws Exception {
+    OzoneConfiguration conf = new OzoneConfiguration();
+
+    OzoneManagerRatisServerConfig omHAConfig =
+        conf.getObject(OzoneManagerRatisServerConfig.class);
+    
omHAConfig.setReadOption(RaftServerConfigKeys.Read.Option.LINEARIZABLE.name());
+
+    conf.setFromObject(omHAConfig);
+    conf.setBoolean(OzoneConfigKeys.OZONE_HBASE_ENHANCEMENTS_ALLOWED, true);
+    conf.setBoolean("ozone.client.hbase.enhancements.allowed", true);
+    conf.setBoolean("ozone.om.ha.raft.server.read.leader.lease.enabled", true);
+    conf.setBoolean("ozone.om.allow.leader.skip.linearizable.read", true);
+    conf.setBoolean(OzoneConfigKeys.OZONE_FS_HSYNC_ENABLED, true);
+    startKMS();
+    startCluster(conf);
+  }
+
+  @Test
+  public void testAllowLeaderSkipLinearizableRead() throws Exception {
+    super.testListAllKeysInternal("skipvol1");
+    long lastMetrics = 
getCluster().getOMLeader().getMetrics().getNumLeaderSkipLinearizableRead();
+    Assertions.assertTrue(lastMetrics > 0);
+
+    OzoneConfiguration oldConf = getCluster().getConf();
+    OzoneConfiguration newConf = new OzoneConfiguration(oldConf);
+    newConf.setBoolean("ozone.om.allow.leader.skip.linearizable.read", false);
+    getCluster().getOMLeader().setConfiguration(newConf);
+
+    super.testListAllKeysInternal("skipvol2");
+
+    long curMetrics = 
getCluster().getOMLeader().getMetrics().getNumLeaderSkipLinearizableRead();
+    Assertions.assertEquals(lastMetrics, curMetrics);
+
+    getCluster().getOMLeader().setConfiguration(oldConf);
+  }
+}
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
index 1fb49e5e7fd..a5001bbf626 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMMetrics.java
@@ -107,6 +107,9 @@ public class OMMetrics implements OmMetadataReaderMetrics {
   private @Metric MutableCounterLong numPutObjectTagging;
   private @Metric MutableCounterLong numDeleteObjectTagging;
 
+  private @Metric MutableCounterLong numLinearizableRead;
+  private @Metric MutableCounterLong numLeaderSkipLinearizableRead;
+
   // Failure Metrics
   private @Metric MutableCounterLong numVolumeCreateFails;
   private @Metric MutableCounterLong numVolumeUpdateFails;
@@ -959,6 +962,22 @@ public void incNumDeleteObjectTaggingFails() {
     numDeleteObjectTaggingFails.incr();
   }
 
+  public void incNumLinearizableRead() {
+    numLinearizableRead.incr();
+  }
+
+  public long getNumLinearizableRead() {
+    return numLinearizableRead.value();
+  }
+
+  public void incNumLeaderSkipLinearizableRead() {
+    numLeaderSkipLinearizableRead.incr();
+  }
+
+  public long getNumLeaderSkipLinearizableRead() {
+    return numLeaderSkipLinearizableRead.value();
+  }
+
   @VisibleForTesting
   public long getNumVolumeCreates() {
     return numVolumeCreates.value();
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/execution/OMExecutionFlow.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/execution/OMExecutionFlow.java
index 497cd565caa..de45d7f5e46 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/execution/OMExecutionFlow.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/execution/OMExecutionFlow.java
@@ -58,23 +58,27 @@ private OMResponse submitExecutionToRatis(OMRequest 
request, boolean isWrite) th
     // 1. create client request and preExecute
     OMClientRequest omClientRequest = null;
     final OMRequest requestToSubmit;
-    try {
-      omClientRequest = OzoneManagerRatisUtils.createClientRequest(request, 
ozoneManager);
-      assert (omClientRequest != null);
-      final OMClientRequest finalOmClientRequest = omClientRequest;
-      requestToSubmit = captureLatencyNs(perfMetrics.getPreExecuteLatencyNs(),
-          () -> finalOmClientRequest.preExecute(ozoneManager));
-    } catch (IOException ex) {
-      if (omClientRequest != null) {
-        OMAuditLogger.log(omClientRequest.getAuditBuilder());
-        omClientRequest.handleRequestFailure(ozoneManager);
+    if (isWrite) {
+      try {
+        omClientRequest = OzoneManagerRatisUtils.createClientRequest(request, 
ozoneManager);
+        assert (omClientRequest != null);
+        final OMClientRequest finalOmClientRequest = omClientRequest;
+        requestToSubmit = 
captureLatencyNs(perfMetrics.getPreExecuteLatencyNs(),
+            () -> finalOmClientRequest.preExecute(ozoneManager));
+      } catch (IOException ex) {
+        if (omClientRequest != null) {
+          OMAuditLogger.log(omClientRequest.getAuditBuilder());
+          omClientRequest.handleRequestFailure(ozoneManager);
+        }
+        return OzoneManagerRatisUtils.createErrorResponse(request, ex);
       }
-      return OzoneManagerRatisUtils.createErrorResponse(request, ex);
+    } else {
+      requestToSubmit = request;
     }
 
     // 2. submit request to ratis
     OMResponse response = 
ozoneManager.getOmRatisServer().submitRequest(requestToSubmit, isWrite);
-    if (!response.getSuccess()) {
+    if (!response.getSuccess() && omClientRequest != null) {
       omClientRequest.handleRequestFailure(ozoneManager);
     }
     return response;
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
index f8c762c8b36..2c68ee9bb16 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
@@ -184,14 +184,27 @@ public OMRequest getLastRequestToSubmit() {
 
   private OMResponse submitReadRequestToOM(OMRequest request)
       throws ServiceException {
-    // Read from leader or followers using linearizable read
-    if (omRatisServer.isLinearizableRead()) {
+    // Get current OM's role
+    RaftServerStatus raftServerStatus = omRatisServer.getLeaderStatus();
+    // === 1. Follower linearizable read ===
+    if (raftServerStatus == NOT_LEADER && omRatisServer.isLinearizableRead()) {
+      ozoneManager.getMetrics().incNumLinearizableRead();
       return ozoneManager.getOmExecutionFlow().submit(request, false);
     }
-    // Check if this OM is the leader.
-    RaftServerStatus raftServerStatus = omRatisServer.getLeaderStatus();
-    if (raftServerStatus == LEADER_AND_READY ||
-        request.getCmdType().equals(PrepareStatus)) {
+    // === 2. Leader local read (skip ReadIndex if allowed) ===
+    if (raftServerStatus == LEADER_AND_READY || 
request.getCmdType().equals(PrepareStatus)) {
+      if (ozoneManager.getConfig().isAllowLeaderSkipLinearizableRead()) {
+        ozoneManager.getMetrics().incNumLeaderSkipLinearizableRead();
+        // leader directly serves local committed data
+        return handler.handleReadRequest(request);
+      }
+      // otherwise use linearizable path when enabled
+      if (omRatisServer.isLinearizableRead()) {
+        ozoneManager.getMetrics().incNumLinearizableRead();
+        return ozoneManager.getOmExecutionFlow().submit(request, false);
+      }
+
+      // fallback to local read
       return handler.handleReadRequest(request);
     } else {
       throw createLeaderErrorException(raftServerStatus);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to