apurtell commented on code in PR #6462: URL: https://github.com/apache/hbase/pull/6462#discussion_r1840962664
########## hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java: ########## @@ -249,6 +249,20 @@ protected class ExecuteProceduresRemoteCall implements RemoteProcedureResolver, "hbase.regionserver.rpc.retry.interval"; private static final int DEFAULT_RS_RPC_RETRY_INTERVAL = 100; + /** + * Config to determine the retry limit while executing remote regionserver procedure. This retry + * limit applies to only specific errors. These errors could potentially get the remote + * procedure stuck for several minutes unless the retry limit is applied. + */ + private static final String RS_REMOTE_PROC_RETRY_LIMIT = + "hbase.master.rs.remote.proc.retry.limit"; + /** + * The default retry limit. Value = {@value} + */ + private static final int DEFAULT_RS_REMOTE_PROC_RETRY_LIMIT = 5; Review Comment: Why 5? Why not 3? Let's have a comment on justification for the default value. ########## hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/RSProcedureDispatcher.java: ########## @@ -371,6 +403,39 @@ private boolean isSaslError(IOException e) { } } + /** + * Returns true if the error or its cause is of type ConnectionClosedException. + * @param e IOException thrown by the underlying rpc framework. + * @return True if the error or its cause is of type ConnectionClosedException. + */ + private boolean isConnectionClosedError(IOException e) { + if (e instanceof ConnectionClosedException) { + return true; + } + Throwable cause = e; + while (true) { + if (cause instanceof IOException) { + IOException unwrappedCause = unwrapException((IOException) cause); + if (unwrappedCause instanceof ConnectionClosedException) { + return true; + } + } + cause = cause.getCause(); + if (cause == null) { + return false; + } + } + } + + /** + * Returns true if the error is eligible for imposing retry limit. + * @param e IOException thrown by the underlying rpc framework. + * @return True if the error is eligible for imposing retry limit. + */ + private boolean isErrorTypeEligibleForRetryLimit(IOException e) { Review Comment: Instead of "eligible for retry limit" type wording, you should describe this as "fast fail" in method naming and comment text. Ok, and then following this recommendation, the config `hbase.master.rs.remote.proc.retry.limit` might be better named `hbase.master.rs.remote.proc.fast.fail.limit` ########## hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java: ########## @@ -489,6 +491,15 @@ public class HMaster extends HBaseServerBase<MasterRpcServices> implements Maste public static final String WARMUP_BEFORE_MOVE = "hbase.master.warmup.before.move"; private static final boolean DEFAULT_WARMUP_BEFORE_MOVE = true; + /** + * Use RSProcedureDispatcher instance to initiate master -> rs remote procedure execution. Use + * this config to provide customized RSProcedureDispatcher. Review Comment: What are we planning here @virajjasani ? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org