[
https://issues.apache.org/jira/browse/HADOOP-19613?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18009576#comment-18009576
]
ASF GitHub Bot commented on HADOOP-19613:
-----------------------------------------
bhattmanish98 commented on code in PR #7801:
URL: https://github.com/apache/hadoop/pull/7801#discussion_r2228398088
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java:
##########
@@ -381,6 +381,41 @@ public class AbfsConfiguration{
DefaultValue = DEFAULT_ENABLE_READAHEAD)
private boolean enabledReadAhead;
+ @BooleanConfigurationValidatorAnnotation(
+ ConfigurationKey = FS_AZURE_ENABLE_READAHEAD_V2,
+ DefaultValue = DEFAULT_ENABLE_READAHEAD_V2)
+ private boolean isReadAheadV2Enabled;
+
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey =
+ FS_AZURE_READAHEAD_V2_MIN_THREAD_POOL_SIZE,
+ DefaultValue = DEFAULT_READAHEAD_V2_MIN_THREAD_POOL_SIZE)
+ private int minReadAheadV2ThreadPoolSize;
+
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey =
+ FS_AZURE_READAHEAD_V2_MAX_THREAD_POOL_SIZE,
+ DefaultValue = DEFAULT_READAHEAD_V2_MAX_THREAD_POOL_SIZE)
+ private int maxReadAheadV2ThreadPoolSize;
+
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey =
+ FS_AZURE_READAHEAD_V2_MIN_BUFFER_POOL_SIZE,
+ DefaultValue = DEFAULT_READAHEAD_V2_MIN_BUFFER_POOL_SIZE)
+ private int minReadAheadV2BufferPoolSize;
+
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey =
+ FS_AZURE_READAHEAD_V2_MAX_BUFFER_POOL_SIZE,
+ DefaultValue = DEFAULT_READAHEAD_V2_MAX_BUFFER_POOL_SIZE)
+ private int maxReadAheadV2BufferPoolSize;
+
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey =
+ FS_AZURE_READAHEAD_V2_EXECUTOR_SERVICE_TTL_MILLISECONDS,
+ DefaultValue = DEFAULT_READAHEAD_V2_EXECUTOR_SERVICE_TTL_MILLISECONDS)
+ private int readAheadExecutorServiceTTLInMilliSeconds;
+
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey =
+ FS_AZURE_READAHEAD_V2_CACHED_BUFFER_TTL_MILLISECONDS,
+ DefaultValue = DEFAULT_READAHEAD_V2_CACHED_BUFFER_TTL_MILLISECONDS)
+ private int readAheadV2CachedBufferTTLMilliseconds;
Review Comment:
Can we keep attribute naming format constant? Above attribute name ends with
TTLInMilliSeconds, this one has TTLMilliseconds.
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsInputStream.java:
##########
@@ -173,9 +176,19 @@ public AbfsInputStream(
this.fsBackRef = abfsInputStreamContext.getFsBackRef();
contextEncryptionAdapter = abfsInputStreamContext.getEncryptionAdapter();
- // Propagate the config values to ReadBufferManager so that the first
instance
- // to initialize can set the readAheadBlockSize
- ReadBufferManager.setReadBufferManagerConfigs(readAheadBlockSize);
+ /*
+ * Initialize the ReadBufferManager based on whether readAheadV2 is
enabled or not.
+ * Precedence is given to ReadBufferManagerV2.
+ * If none of the V1 and V2 are enabled, then no read ahead will be done.
+ */
+ if (readAheadV2Enabled) {
+ ReadBufferManagerV2.setReadBufferManagerConfigs(
+ readAheadBlockSize, client.getAbfsConfiguration());
+ readBufferManager = ReadBufferManagerV2.getBufferManager();
+ } else {
Review Comment:
This should be under else if (readAheadEnabled) instead of else?
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/ConfigurationKeys.java:
##########
@@ -259,10 +259,46 @@ public final class ConfigurationKeys {
public static final String AZURE_KEY_ACCOUNT_SHELLKEYPROVIDER_SCRIPT =
"fs.azure.shellkeyprovider.script";
/**
- * Enable or disable readahead buffer in AbfsInputStream.
+ * Enable or disable readahead V1 in AbfsInputStream.
* Value: {@value}.
*/
public static final String FS_AZURE_ENABLE_READAHEAD =
"fs.azure.enable.readahead";
+ /**
+ * Enable or disable readahead V2 in AbfsInputStream. This will work
independent of V1.
+ * Value: {@value}.
+ */
+ public static final String FS_AZURE_ENABLE_READAHEAD_V2 =
"fs.azure.enable.readahead.v2";
+
+ /**
+ * Minimum number of prefetch threads in the thread pool for readahead V2.
+ * {@value }
+ */
+ public static final String FS_AZURE_READAHEAD_V2_MIN_THREAD_POOL_SIZE =
"fs.azure.readahead.v2.min.thread.pool.size";
+ /**
+ * Maximum number of prefetch threads in the thread pool for readahead V2.
+ * {@value }
+ */
+ public static final String FS_AZURE_READAHEAD_V2_MAX_THREAD_POOL_SIZE =
"fs.azure.readahead.v2.max.thread.pool.size";
+ /**
+ * Minimum size of the buffer pool for caching prefetched data for readahead
V2.
+ * {@value }
+ */
+ public static final String FS_AZURE_READAHEAD_V2_MIN_BUFFER_POOL_SIZE =
"fs.azure.readahead.v2.min.buffer.pool.size";
+ /**
+ * Maximum size of the buffer pool for caching prefetched data for readahead
V2.
+ * {@value }
+ */
+ public static final String FS_AZURE_READAHEAD_V2_MAX_BUFFER_POOL_SIZE =
"fs.azure.readahead.v2.max.buffer.pool.size";
+
+ /**
+ * TTL in milliseconds for the idle threads in executor service used by read
ahead v2.
+ */
+ public static final String
FS_AZURE_READAHEAD_V2_EXECUTOR_SERVICE_TTL_MILLISECONDS =
"fs.azure.readahead.v2.executor.service.ttl.seconds";
Review Comment:
"fs.azure.readahead.v2.executor.service.ttl.seconds" ->
"fs.azure.readahead.v2.executor.service.ttl.milliseconds"
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java:
##########
@@ -1368,6 +1403,54 @@ public boolean isReadAheadEnabled() {
return this.enabledReadAhead;
}
+ public int getMinReadAheadV2ThreadPoolSize() {
+ if (minReadAheadV2ThreadPoolSize <= 0) {
+ // If the minReadAheadV2ThreadPoolSize is not set, use the default value
+ return 2 * Runtime.getRuntime().availableProcessors();
+ }
+ return minReadAheadV2ThreadPoolSize;
+ }
+
+ public int getMaxReadAheadV2ThreadPoolSize() {
+ if (maxReadAheadV2ThreadPoolSize <= 0) {
+ // If the maxReadAheadV2ThreadPoolSize is not set, use the default value
+ return 4 * Runtime.getRuntime().availableProcessors();
+ }
+ return maxReadAheadV2ThreadPoolSize;
+ }
+
+ public int getMinReadAheadV2BufferPoolSize() {
+ if (minReadAheadV2BufferPoolSize <= 0) {
+ // If the minReadAheadV2BufferPoolSize is not set, use the default value
+ return 2 * Runtime.getRuntime().availableProcessors();
+ }
+ return minReadAheadV2BufferPoolSize;
+ }
+
+ public int getMaxReadAheadV2BufferPoolSize() {
+ if (maxReadAheadV2BufferPoolSize <= 0) {
+ // If the maxReadAheadV2BufferPoolSize is not set, use the default value
+ return 4 * Runtime.getRuntime().availableProcessors();
+ }
+ return maxReadAheadV2BufferPoolSize;
+ }
+
+ public int getReadAheadExecutorServiceTTLInMilliSeconds() {
+ return readAheadExecutorServiceTTLInMilliSeconds;
+ }
+
+ public int getReadAheadV2CachedBufferTTLMilliseconds() {
Review Comment:
Same as above, method name can follow same naming format every where.
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/AbfsConfiguration.java:
##########
@@ -381,6 +381,41 @@ public class AbfsConfiguration{
DefaultValue = DEFAULT_ENABLE_READAHEAD)
private boolean enabledReadAhead;
+ @BooleanConfigurationValidatorAnnotation(
+ ConfigurationKey = FS_AZURE_ENABLE_READAHEAD_V2,
+ DefaultValue = DEFAULT_ENABLE_READAHEAD_V2)
+ private boolean isReadAheadV2Enabled;
+
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey =
+ FS_AZURE_READAHEAD_V2_MIN_THREAD_POOL_SIZE,
+ DefaultValue = DEFAULT_READAHEAD_V2_MIN_THREAD_POOL_SIZE)
+ private int minReadAheadV2ThreadPoolSize;
+
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey =
+ FS_AZURE_READAHEAD_V2_MAX_THREAD_POOL_SIZE,
+ DefaultValue = DEFAULT_READAHEAD_V2_MAX_THREAD_POOL_SIZE)
+ private int maxReadAheadV2ThreadPoolSize;
+
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey =
+ FS_AZURE_READAHEAD_V2_MIN_BUFFER_POOL_SIZE,
+ DefaultValue = DEFAULT_READAHEAD_V2_MIN_BUFFER_POOL_SIZE)
+ private int minReadAheadV2BufferPoolSize;
+
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey =
+ FS_AZURE_READAHEAD_V2_MAX_BUFFER_POOL_SIZE,
+ DefaultValue = DEFAULT_READAHEAD_V2_MAX_BUFFER_POOL_SIZE)
+ private int maxReadAheadV2BufferPoolSize;
+
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey =
+ FS_AZURE_READAHEAD_V2_EXECUTOR_SERVICE_TTL_MILLISECONDS,
+ DefaultValue = DEFAULT_READAHEAD_V2_EXECUTOR_SERVICE_TTL_MILLISECONDS)
+ private int readAheadExecutorServiceTTLInMilliSeconds;
+
+ @IntegerConfigurationValidatorAnnotation(ConfigurationKey =
+ FS_AZURE_READAHEAD_V2_CACHED_BUFFER_TTL_MILLISECONDS,
+ DefaultValue = DEFAULT_READAHEAD_V2_CACHED_BUFFER_TTL_MILLISECONDS)
+ private int readAheadV2CachedBufferTTLMilliseconds;
Review Comment:
Also, if possible we can shorten the attribute name like
readAheadV2CachedBufferTTLInMillis
> ABFS: [ReadAheadV2] Refactor ReadBufferManager to isolate new code with the
> current working code
> ------------------------------------------------------------------------------------------------
>
> Key: HADOOP-19613
> URL: https://issues.apache.org/jira/browse/HADOOP-19613
> Project: Hadoop Common
> Issue Type: Sub-task
> Components: fs/azure
> Affects Versions: 3.5.0, 3.4.1
> Reporter: Anuj Modi
> Assignee: Anuj Modi
> Priority: Major
> Labels: pull-request-available
>
> Read Buffer Manager used today was introduced way back and has been stable
> for quite a while.
> Read Buffer Manager to be introduced as part of
> https://issues.apache.org/jira/browse/HADOOP-19596 will introduce many
> changes incrementally over time. While the development goes on and we are
> able to fully stabilise the optimized version we need the current flow to be
> functional and undisturbed.
> This work item is to isolate that from new code by refactoring
> ReadBufferManager class to have 2 different implementations with same public
> interfaces: ReadBufferManagerV1 and ReadBufferManagerV2.
> This will also introduce new configs that can be used to toggle between new
> and old code.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]