[
https://issues.apache.org/jira/browse/HADOOP-19622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18033271#comment-18033271
]
ASF GitHub Bot commented on HADOOP-19622:
-----------------------------------------
bhattmanish98 commented on code in PR #7832:
URL: https://github.com/apache/hadoop/pull/7832#discussion_r2465910989
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java:
##########
@@ -128,13 +128,20 @@ public final class FileSystemConfigurations {
public static final long
DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS = 120;
public static final boolean DEFAULT_ENABLE_READAHEAD = true;
- public static final boolean DEFAULT_ENABLE_READAHEAD_V2 = false;
+ public static final boolean DEFAULT_ENABLE_READAHEAD_V2 = true;
+ public static final boolean DEFAULT_ENABLE_READAHEAD_V2_DYNAMIC_SCALING =
true;
public static final int DEFAULT_READAHEAD_V2_MIN_THREAD_POOL_SIZE = -1;
public static final int DEFAULT_READAHEAD_V2_MAX_THREAD_POOL_SIZE = -1;
public static final int DEFAULT_READAHEAD_V2_MIN_BUFFER_POOL_SIZE = -1;
public static final int DEFAULT_READAHEAD_V2_MAX_BUFFER_POOL_SIZE = -1;
- public static final int DEFAULT_READAHEAD_V2_EXECUTOR_SERVICE_TTL_MILLIS =
3_000;
+ public static final int DEFAULT_READAHEAD_V2_CPU_MONITORING_INTERVAL_MILLIS
= 6_000;
+ public static final int DEFAULT_READAHEAD_V2_THREAD_POOL_UPSCALE_PERCENTAGE
= 20;
+ public static final int
DEFAULT_READAHEAD_V2_THREAD_POOL_DOWNSCALE_PERCENTAGE = 30;
Review Comment:
Same as above
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/FileSystemConfigurations.java:
##########
@@ -128,13 +128,20 @@ public final class FileSystemConfigurations {
public static final long
DEFAULT_SAS_TOKEN_RENEW_PERIOD_FOR_STREAMS_IN_SECONDS = 120;
public static final boolean DEFAULT_ENABLE_READAHEAD = true;
- public static final boolean DEFAULT_ENABLE_READAHEAD_V2 = false;
+ public static final boolean DEFAULT_ENABLE_READAHEAD_V2 = true;
+ public static final boolean DEFAULT_ENABLE_READAHEAD_V2_DYNAMIC_SCALING =
true;
public static final int DEFAULT_READAHEAD_V2_MIN_THREAD_POOL_SIZE = -1;
public static final int DEFAULT_READAHEAD_V2_MAX_THREAD_POOL_SIZE = -1;
public static final int DEFAULT_READAHEAD_V2_MIN_BUFFER_POOL_SIZE = -1;
public static final int DEFAULT_READAHEAD_V2_MAX_BUFFER_POOL_SIZE = -1;
- public static final int DEFAULT_READAHEAD_V2_EXECUTOR_SERVICE_TTL_MILLIS =
3_000;
+ public static final int DEFAULT_READAHEAD_V2_CPU_MONITORING_INTERVAL_MILLIS
= 6_000;
+ public static final int DEFAULT_READAHEAD_V2_THREAD_POOL_UPSCALE_PERCENTAGE
= 20;
Review Comment:
For some variable you have used Persentage and for percent, we should keep
it consistent across all places.
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferManagerV2.java:
##########
@@ -17,67 +17,91 @@
*/
package org.apache.hadoop.fs.azurebfs.services;
+import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
+import org.apache.hadoop.fs.azurebfs.contracts.services.ReadBufferStatus;
+
+import com.sun.management.OperatingSystemMXBean;
+
import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.lang.management.MemoryMXBean;
+import java.lang.management.MemoryUsage;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Stack;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.ReentrantLock;
-import org.apache.hadoop.classification.VisibleForTesting;
-import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
-import org.apache.hadoop.fs.azurebfs.contracts.services.ReadBufferStatus;
import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
+import org.apache.hadoop.classification.VisibleForTesting;
-final class ReadBufferManagerV2 extends ReadBufferManager {
+import static
org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_HUNDRED;
+
+/**
+ * The Improved Read Buffer Manager for Rest AbfsClient.
+ */
+public class ReadBufferManagerV2 extends ReadBufferManager {
+ // Internal constants
+ private static final ReentrantLock LOCK = new ReentrantLock();
// Thread Pool Configurations
private static int minThreadPoolSize;
private static int maxThreadPoolSize;
+ private static int cpuMonitoringIntervalInMilliSec;
+ private static double cpuThreshold;
+ private static int threadPoolUpscalePercentage;
+ private static int threadPoolDownscalePercentage;
private static int executorServiceKeepAliveTimeInMilliSec;
+ private static final double THREAD_POOL_REQUIREMENT_BUFFER = 1.2; // 20%
more threads than the queue size
+ private static boolean isDynamicScalingEnabled;
+
+ private ScheduledExecutorService cpuMonitorThread;
private ThreadPoolExecutor workerPool;
+ private final List<ReadBufferWorker> workerRefs = new ArrayList<>();
// Buffer Pool Configurations
private static int minBufferPoolSize;
private static int maxBufferPoolSize;
+ private static int memoryMonitoringIntervalInMilliSec;
+ private static double memoryThreshold;
+
private int numberOfActiveBuffers = 0;
private byte[][] bufferPool;
+ private Stack<Integer> removedBufferList = new Stack<>();
+ private ScheduledExecutorService memoryMonitorThread;
+ // Buffer Manager Structures
private static ReadBufferManagerV2 bufferManager;
-
- // hide instance constructor
- private ReadBufferManagerV2() {
- LOGGER.trace("Creating readbuffer manager with HADOOP-18546 patch");
- }
+ private static boolean isConfigured = false;
/**
- * Sets the read buffer manager configurations.
- * @param readAheadBlockSize the size of the read-ahead block in bytes
- * @param abfsConfiguration the AbfsConfiguration instance for other
configurations
+ * Private constructor to prevent instantiation as this needs to be
singleton.
*/
- static void setReadBufferManagerConfigs(int readAheadBlockSize,
AbfsConfiguration abfsConfiguration) {
- if (bufferManager == null) {
- minThreadPoolSize = abfsConfiguration.getMinReadAheadV2ThreadPoolSize();
- maxThreadPoolSize = abfsConfiguration.getMaxReadAheadV2ThreadPoolSize();
- executorServiceKeepAliveTimeInMilliSec =
abfsConfiguration.getReadAheadExecutorServiceTTLInMillis();
-
- minBufferPoolSize = abfsConfiguration.getMinReadAheadV2BufferPoolSize();
- maxBufferPoolSize = abfsConfiguration.getMaxReadAheadV2BufferPoolSize();
-
setThresholdAgeMilliseconds(abfsConfiguration.getReadAheadV2CachedBufferTTLMillis());
- setReadAheadBlockSize(readAheadBlockSize);
- }
+ private ReadBufferManagerV2() {
+ printTraceLog("Creating Read Buffer Manager V2 with HADOOP-18546 patch");
Review Comment:
We should use LOG.trace instead of printTraceLOG.
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferManagerV2.java:
##########
@@ -106,123 +166,731 @@ void init() {
executorServiceKeepAliveTimeInMilliSec,
TimeUnit.MILLISECONDS,
new SynchronousQueue<>(),
- namedThreadFactory);
+ workerThreadFactory);
workerPool.allowCoreThreadTimeOut(true);
for (int i = 0; i < minThreadPoolSize; i++) {
- ReadBufferWorker worker = new ReadBufferWorker(i, this);
+ ReadBufferWorker worker = new ReadBufferWorker(i, getBufferManager());
+ workerRefs.add(worker);
workerPool.submit(worker);
}
ReadBufferWorker.UNLEASH_WORKERS.countDown();
+
+ if (isDynamicScalingEnabled) {
+ cpuMonitorThread = Executors.newSingleThreadScheduledExecutor(runnable
-> {
+ Thread t = new Thread(runnable, "ReadAheadV2-CPU-Monitor");
+ t.setDaemon(true);
+ return t;
+ });
+ cpuMonitorThread.scheduleAtFixedRate(this::adjustThreadPool,
+ getCpuMonitoringIntervalInMilliSec(),
getCpuMonitoringIntervalInMilliSec(),
+ TimeUnit.MILLISECONDS);
+ }
+
+ printTraceLog("ReadBufferManagerV2 initialized with {} buffers and {}
worker threads",
Review Comment:
Same as above, please change it whereever you have used it.
##########
hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/ReadBufferManagerV2.java:
##########
@@ -17,67 +17,91 @@
*/
package org.apache.hadoop.fs.azurebfs.services;
+import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
+import org.apache.hadoop.fs.azurebfs.contracts.services.ReadBufferStatus;
+
+import com.sun.management.OperatingSystemMXBean;
+
import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.lang.management.MemoryMXBean;
+import java.lang.management.MemoryUsage;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Stack;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.ReentrantLock;
-import org.apache.hadoop.classification.VisibleForTesting;
-import org.apache.hadoop.fs.azurebfs.AbfsConfiguration;
-import org.apache.hadoop.fs.azurebfs.contracts.services.ReadBufferStatus;
import org.apache.hadoop.fs.azurebfs.utils.TracingContext;
+import org.apache.hadoop.classification.VisibleForTesting;
-final class ReadBufferManagerV2 extends ReadBufferManager {
+import static
org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.ONE_HUNDRED;
+
+/**
+ * The Improved Read Buffer Manager for Rest AbfsClient.
+ */
+public class ReadBufferManagerV2 extends ReadBufferManager {
+ // Internal constants
+ private static final ReentrantLock LOCK = new ReentrantLock();
// Thread Pool Configurations
private static int minThreadPoolSize;
private static int maxThreadPoolSize;
+ private static int cpuMonitoringIntervalInMilliSec;
+ private static double cpuThreshold;
+ private static int threadPoolUpscalePercentage;
+ private static int threadPoolDownscalePercentage;
private static int executorServiceKeepAliveTimeInMilliSec;
+ private static final double THREAD_POOL_REQUIREMENT_BUFFER = 1.2; // 20%
more threads than the queue size
Review Comment:
Is this configurable? or we have fixed this number based on POC data? Is so
can we explain about it little more for future understanding.
> ABFS: [ReadAheadV2] Implement Read Buffer Manager V2 with improved
> aggressiveness
> ---------------------------------------------------------------------------------
>
> Key: HADOOP-19622
> URL: https://issues.apache.org/jira/browse/HADOOP-19622
> Project: Hadoop Common
> Issue Type: Sub-task
> Components: fs/azure
> Affects Versions: 3.5.0, 3.4.1
> Reporter: Anuj Modi
> Assignee: Anuj Modi
> Priority: Major
> Labels: pull-request-available
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]