This is an automated email from the ASF dual-hosted git repository.

ivandika pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new a5658eebf7f HDDS-9279. Basic implementation of OM Follower read (#9222)
a5658eebf7f is described below

commit a5658eebf7fd87380d1d5eab301350bafcfe3a3e
Author: Symious <[email protected]>
AuthorDate: Tue Nov 18 10:35:28 2025 +0800

    HDDS-9279. Basic implementation of OM Follower read (#9222)
---
 .../ozone/om/ha/OMFailoverProxyProviderBase.java   |  4 +-
 .../apache/hadoop/ozone/freon/FollowerReader.java  | 96 ++++++++++++++++++++++
 .../hadoop/ozone/om/TestOMRatisSnapshots.java      |  1 +
 .../snapshot/TestSnapshotBackgroundServices.java   |  1 +
 .../hadoop/ozone/om/execution/OMExecutionFlow.java |  8 +-
 .../ozone/om/ratis/OzoneManagerRatisServer.java    | 20 +++--
 .../om/ratis/OzoneManagerRatisServerConfig.java    | 37 +++++++++
 ...OzoneManagerProtocolServerSideTranslatorPB.java |  6 +-
 .../hadoop/ozone/om/failover/TestOMFailovers.java  |  2 +-
 9 files changed, 162 insertions(+), 13 deletions(-)

diff --git 
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java
 
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java
index 356d3fb1eff..28c5597196e 100644
--- 
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java
+++ 
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProviderBase.java
@@ -24,6 +24,7 @@
 import java.io.IOException;
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -445,7 +446,8 @@ protected synchronized void setOmProxies(Map<String,
   }
 
   protected synchronized void setOmNodeIDList(List<String> omNodeIDList) {
-    this.omNodeIDList = omNodeIDList;
+    Collections.shuffle(omNodeIDList);
+    this.omNodeIDList = Collections.unmodifiableList(omNodeIDList);
   }
 
   protected synchronized List<String> getOmNodeIDList() {
diff --git 
a/hadoop-ozone/freon/src/main/java/org/apache/hadoop/ozone/freon/FollowerReader.java
 
b/hadoop-ozone/freon/src/main/java/org/apache/hadoop/ozone/freon/FollowerReader.java
new file mode 100644
index 00000000000..ee5939e4c5e
--- /dev/null
+++ 
b/hadoop-ozone/freon/src/main/java/org/apache/hadoop/ozone/freon/FollowerReader.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.freon;
+
+import com.codahale.metrics.Timer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Callable;
+import org.apache.hadoop.hdds.cli.HddsVersionProvider;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.client.OzoneClient;
+import org.kohsuke.MetaInfServices;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import picocli.CommandLine;
+import picocli.CommandLine.Command;
+
+/**
+ * Data generator tool test om performance.
+ */
+@Command(name = "fr",
+    aliases = "follower-reader",
+    description = "Read the same keySize from multiple threads.",
+    versionProvider = HddsVersionProvider.class,
+    mixinStandardHelpOptions = true,
+    showDefaultValues = true)
+@MetaInfServices(FreonSubcommand.class)
+public class FollowerReader extends BaseFreonGenerator
+    implements Callable<Void> {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(FollowerReader.class);
+
+  @CommandLine.Option(names = {"-v", "--volume"},
+      description = "Name of the bucket which contains the test data. Will be"
+          + " created if missing.",
+      defaultValue = "vol1")
+  private String volumeName;
+
+  @CommandLine.Option(names = {"-b", "--bucket"},
+      description = "Name of the bucket which contains the test data.",
+      defaultValue = "bucket1")
+  private String bucketName;
+
+  @CommandLine.Option(names = {"-k", "--key"},
+      description = "Name of the key which contains the test data.",
+      defaultValue = "key1")
+  private String keyName;
+
+  private String omServiceID = null;
+
+  private Timer timer;
+
+  private final List<OzoneClient> rpcClients = new ArrayList<>();
+
+  @Override
+  public Void call() throws Exception {
+    init();
+    OzoneConfiguration ozoneConfiguration = createOzoneConfiguration();
+
+    for (int i = 0; i < getThreadNo(); i++) {
+      OzoneClient rpcClient = createOzoneClient(omServiceID, 
ozoneConfiguration);
+      rpcClients.add(rpcClient);
+    }
+
+    timer = getMetrics().timer("follower-read");
+
+    runTests(this::readKeySize);
+    return null;
+  }
+
+  private void readKeySize(long counter) throws Exception {
+    int clientIdx = (int) (counter % rpcClients.size());
+    timer.time(() -> {
+      long unused = 
rpcClients.get(clientIdx).getObjectStore().getVolume(volumeName)
+          .getBucket(bucketName).getKey(keyName).getDataSize();
+      return null;
+    });
+  }
+
+}
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
index 051addfd8ff..8fb519bd8f2 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
@@ -151,6 +151,7 @@ public void init(TestInfo testInfo) throws Exception {
         conf.getObject(OzoneManagerRatisServerConfig.class);
     omRatisConf.setLogAppenderWaitTimeMin(10);
     conf.setFromObject(omRatisConf);
+    conf.set("ozone.om.client.rpc.timeout", "1m");
 
     cluster = MiniOzoneCluster.newHABuilder(conf)
         .setOMServiceId("om-service-test1")
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java
index 635002aa098..ad42cc35845 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/snapshot/TestSnapshotBackgroundServices.java
@@ -111,6 +111,7 @@ public void init(TestInfo testInfo) throws Exception {
     OzoneManagerRatisServerConfig omRatisConf = 
conf.getObject(OzoneManagerRatisServerConfig.class);
     omRatisConf.setLogAppenderWaitTimeMin(10);
     conf.setFromObject(omRatisConf);
+    conf.set("ozone.om.client.rpc.timeout", "1m");
     conf.setInt(OMConfigKeys.OZONE_OM_RATIS_LOG_PURGE_GAP, LOG_PURGE_GAP);
     conf.setStorageSize(OMConfigKeys.OZONE_OM_RATIS_SEGMENT_SIZE_KEY, 16, 
StorageUnit.KB);
     
conf.setStorageSize(OMConfigKeys.OZONE_OM_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY, 
16, StorageUnit.KB);
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/execution/OMExecutionFlow.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/execution/OMExecutionFlow.java
index 4ce714ab3dc..497cd565caa 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/execution/OMExecutionFlow.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/execution/OMExecutionFlow.java
@@ -49,12 +49,12 @@ public OMExecutionFlow(OzoneManager om) {
    * @return OMResponse the response of execution
    * @throws ServiceException the exception on execution
    */
-  public OMResponse submit(OMRequest omRequest) throws ServiceException {
+  public OMResponse submit(OMRequest omRequest, boolean isWrite) throws 
ServiceException {
     // TODO: currently have only execution after ratis submission, but with 
new flow can have switch later
-    return submitExecutionToRatis(omRequest);
+    return submitExecutionToRatis(omRequest, isWrite);
   }
 
-  private OMResponse submitExecutionToRatis(OMRequest request) throws 
ServiceException {
+  private OMResponse submitExecutionToRatis(OMRequest request, boolean 
isWrite) throws ServiceException {
     // 1. create client request and preExecute
     OMClientRequest omClientRequest = null;
     final OMRequest requestToSubmit;
@@ -73,7 +73,7 @@ private OMResponse submitExecutionToRatis(OMRequest request) 
throws ServiceExcep
     }
 
     // 2. submit request to ratis
-    OMResponse response = 
ozoneManager.getOmRatisServer().submitRequest(requestToSubmit);
+    OMResponse response = 
ozoneManager.getOmRatisServer().submitRequest(requestToSubmit, isWrite);
     if (!response.getSuccess()) {
       omClientRequest.handleRequestFailure(ozoneManager);
     }
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
index e3beae915d8..917ae342818 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServer.java
@@ -92,6 +92,7 @@
 import org.apache.ratis.rpc.SupportedRpcType;
 import org.apache.ratis.server.RaftServer;
 import org.apache.ratis.server.RaftServerConfigKeys;
+import org.apache.ratis.server.RaftServerConfigKeys.Read;
 import org.apache.ratis.server.RetryCache;
 import org.apache.ratis.server.protocol.TermIndex;
 import org.apache.ratis.server.storage.RaftStorage;
@@ -124,6 +125,7 @@ public final class OzoneManagerRatisServer {
 
   private final ClientId clientId = ClientId.randomId();
   private static final AtomicLong CALL_ID_COUNTER = new AtomicLong();
+  private final Read.Option readOption;
 
   private static long nextCallId() {
     return CALL_ID_COUNTER.getAndIncrement() & Long.MAX_VALUE;
@@ -171,6 +173,8 @@ private OzoneManagerRatisServer(ConfigurationSource conf, 
OzoneManager om,
     }
     this.omStateMachine = getStateMachine(conf);
 
+    this.readOption = RaftServerConfigKeys.Read.option(serverProperties);
+
     Parameters parameters = createServerTlsParameters(secConfig, certClient);
     this.server = RaftServer.newBuilder()
         .setServerId(this.raftPeerId)
@@ -239,11 +243,11 @@ public static OzoneManagerRatisServer newOMRatisServer(
    * @return OMResponse - response returned to the client.
    * @throws ServiceException
    */
-  public OMResponse submitRequest(OMRequest omRequest) throws ServiceException 
{
+  public OMResponse submitRequest(OMRequest omRequest, boolean isWrite) throws 
ServiceException {
     // In prepare mode, only prepare and cancel requests are allowed to go
     // through.
     if (ozoneManager.getPrepareState().requestAllowed(omRequest.getCmdType())) 
{
-      RaftClientRequest raftClientRequest = createRaftRequest(omRequest);
+      RaftClientRequest raftClientRequest = createRaftRequest(omRequest, 
isWrite);
       RaftClientReply raftClientReply = 
submitRequestToRatis(raftClientRequest);
       return createOmResponse(omRequest, raftClientReply);
     } else {
@@ -277,10 +281,10 @@ private RaftClientReply submitRequestToRatis(
         () -> submitRequestToRatisImpl(raftClientRequest));
   }
 
-  private RaftClientRequest createRaftRequest(OMRequest omRequest) {
+  private RaftClientRequest createRaftRequest(OMRequest omRequest, boolean 
isWrite) {
     return captureLatencyNs(
         perfMetrics.getCreateRatisRequestLatencyNs(),
-        () -> createRaftRequestImpl(omRequest));
+        () -> createRaftRequestImpl(omRequest, isWrite));
   }
 
   /**
@@ -500,7 +504,7 @@ public void removeRaftPeer(OMNodeDetails omNodeDetails) {
    * @return RaftClientRequest - Raft Client request which is submitted to
    * ratis server.
    */
-  private RaftClientRequest createRaftRequestImpl(OMRequest omRequest) {
+  private RaftClientRequest createRaftRequestImpl(OMRequest omRequest, boolean 
isWrite) {
     return RaftClientRequest.newBuilder()
         .setClientId(getClientId())
         .setServerId(server.getId())
@@ -509,7 +513,7 @@ private RaftClientRequest createRaftRequestImpl(OMRequest 
omRequest) {
         .setMessage(
             Message.valueOf(
                 OMRatisHelper.convertRequestToByteString(omRequest)))
-        .setType(RaftClientRequest.writeRequestType())
+        .setType(isWrite ? RaftClientRequest.writeRequestType() : 
RaftClientRequest.readRequestType())
         .build();
   }
 
@@ -647,6 +651,10 @@ public RaftServer.Division getServerDivision() {
     return serverDivision.get();
   }
 
+  public boolean isLinearizableRead() {
+    return readOption == Read.Option.LINEARIZABLE;
+  }
+
   /**
    * Initializes and returns OzoneManager StateMachine.
    */
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServerConfig.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServerConfig.java
index a3703dfe63b..2624fc8b2b8 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServerConfig.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/ratis/OzoneManagerRatisServerConfig.java
@@ -52,6 +52,27 @@ public class OzoneManagerRatisServerConfig {
   )
   private long retryCacheTimeout = Duration.ofSeconds(300).toMillis();
 
+  @Config(key = "read.option",
+      defaultValue = "DEFAULT",
+      type = ConfigType.STRING,
+      tags = {OZONE, OM, RATIS, PERFORMANCE},
+      description = "Select the Ratis server read option." +
+          " Possible values are: " +
+          "   DEFAULT      - Directly query statemachine (non-linearizable). " 
+
+          "     Only the leader can serve read requests. " +
+          "   LINEARIZABLE - Use ReadIndex (see Raft Paper section 6.4) to 
maintain linearizability. " +
+          " Both the leader and the followers can serve read requests."
+  )
+  private String readOption;
+
+  @Config(key = "read.leader.lease.enabled",
+      defaultValue = "false",
+      type = ConfigType.BOOLEAN,
+      tags = {OZONE, OM, RATIS, PERFORMANCE},
+      description = "If we enabled the leader lease on Ratis Leader."
+  )
+  private boolean readLeaderLeaseEnabled;
+
   public long getLogAppenderWaitTimeMin() {
     return logAppenderWaitTimeMin;
   }
@@ -67,4 +88,20 @@ public long getRetryCacheTimeout() {
   public void setRetryCacheTimeout(Duration duration) {
     this.retryCacheTimeout = duration.toMillis();
   }
+
+  public String getReadOption() {
+    return readOption;
+  }
+
+  public void setReadOption(String option) {
+    this.readOption = option;
+  }
+
+  public boolean isReadLeaderLeaseEnabled() {
+    return readLeaderLeaseEnabled;
+  }
+
+  public void setReadLeaderLeaseEnabled(boolean readLeaderLeaseEnabled) {
+    this.readLeaderLeaseEnabled = readLeaderLeaseEnabled;
+  }
 }
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
index 251e81e83ed..69e4e6d8f1a 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OzoneManagerProtocolServerSideTranslatorPB.java
@@ -171,7 +171,7 @@ private OMResponse internalProcessRequest(OMRequest 
request) throws ServiceExcep
       }
 
       this.lastRequestToSubmit = request;
-      return ozoneManager.getOmExecutionFlow().submit(request);
+      return ozoneManager.getOmExecutionFlow().submit(request, true);
     } finally {
       OzoneManager.setS3Auth(null);
     }
@@ -184,6 +184,10 @@ public OMRequest getLastRequestToSubmit() {
 
   private OMResponse submitReadRequestToOM(OMRequest request)
       throws ServiceException {
+    // Read from leader or followers using linearizable read
+    if (omRatisServer.isLinearizableRead()) {
+      return ozoneManager.getOmExecutionFlow().submit(request, false);
+    }
     // Check if this OM is the leader.
     RaftServerStatus raftServerStatus = omRatisServer.getLeaderStatus();
     if (raftServerStatus == LEADER_AND_READY ||
diff --git 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/failover/TestOMFailovers.java
 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/failover/TestOMFailovers.java
index 4e2b0c7c66a..0de7052c4ed 100644
--- 
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/failover/TestOMFailovers.java
+++ 
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/failover/TestOMFailovers.java
@@ -74,7 +74,7 @@ public void testAccessContorlExceptionFailovers() throws 
Exception {
     // Request should try all OMs one be one and fail when the last OM also
     // throws AccessControlException.
     
assertThat(serviceException).hasCauseInstanceOf(AccessControlException.class)
-        .hasMessage("ServiceException of type class 
org.apache.hadoop.security.AccessControlException for om3");
+        .hasMessageStartingWith("ServiceException of type class 
org.apache.hadoop.security.AccessControlException");
     assertThat(logCapturer.getOutput()).contains(getRetryProxyDebugMsg("om1"));
     assertThat(logCapturer.getOutput()).contains(getRetryProxyDebugMsg("om2"));
     assertThat(logCapturer.getOutput()).contains(getRetryProxyDebugMsg("om3"));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to