tasanuma commented on code in PR #5143:
URL: https://github.com/apache/hadoop/pull/5143#discussion_r1031678736
##########
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java:
##########
@@ -1825,28 +1825,48 @@ public DatanodeCommand[]
handleHeartbeat(DatanodeRegistration nodeReg,
// Allocate _approximately_ maxTransfers pending tasks to DataNode.
// NN chooses pending tasks based on the ratio between the lengths of
// replication and erasure-coded block queues.
- int totalReplicateBlocks = nodeinfo.getNumberOfReplicateBlocks();
- int totalECBlocks = nodeinfo.getNumberOfBlocksToBeErasureCoded();
- int totalBlocks = totalReplicateBlocks + totalECBlocks;
+ int replicationBlocks = nodeinfo.getNumberOfReplicateBlocks();
+ int ecReplicatedBlocks = nodeinfo.getNumberOfECReplicatedBlocks();
+ int ecReconstructedBlocks = nodeinfo.getNumberOfBlocksToBeErasureCoded();
+ int totalBlocks = replicationBlocks + ecReplicatedBlocks +
ecReconstructedBlocks;
if (totalBlocks > 0) {
- int maxTransfers;
+ int maxReplicationTransfers = blockManager.getMaxReplicationStreams()
+ - xmitsInProgress;
+ int maxECReplicatedTransfers;
+ int maxECReconstructedTransfers;
if (nodeinfo.isDecommissionInProgress()) {
- maxTransfers = blockManager.getReplicationStreamsHardLimit()
+ maxECReplicatedTransfers =
blockManager.getReplicationStreamsHardLimit()
- xmitsInProgress;
+ maxECReconstructedTransfers =
blockManager.getReplicationStreamsHardLimit()
+ - xmitsInProgress;
} else {
- maxTransfers = blockManager.getMaxReplicationStreams()
+ maxECReplicatedTransfers = blockManager.getMaxReplicationStreams()
- xmitsInProgress;
+ maxECReconstructedTransfers = blockManager.getMaxReplicationStreams()
+ - xmitsInProgress;
}
int numReplicationTasks = (int) Math.ceil(
- (double) (totalReplicateBlocks * maxTransfers) / totalBlocks);
- int numECTasks = (int) Math.ceil(
- (double) (totalECBlocks * maxTransfers) / totalBlocks);
- LOG.debug("Pending replication tasks: {} erasure-coded tasks: {}.",
- numReplicationTasks, numECTasks);
+ (double) (replicationBlocks * maxReplicationTransfers) /
totalBlocks);
+ int numEcReplicatedTasks = (int) Math.ceil(
+ (double) (ecReplicatedBlocks * maxECReplicatedTransfers) /
totalBlocks);
+ int numECReconstructedTasks = (int) Math.ceil(
+ (double) (ecReconstructedBlocks * maxECReconstructedTransfers) /
totalBlocks);
+ LOG.debug("Pending replication tasks: {} ec replicated tasks: {} " +
Review Comment:
```suggestion
LOG.debug("Pending replication tasks: {} ec to be replicated tasks: {}
" +
```
##########
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java:
##########
@@ -1825,28 +1825,48 @@ public DatanodeCommand[]
handleHeartbeat(DatanodeRegistration nodeReg,
// Allocate _approximately_ maxTransfers pending tasks to DataNode.
// NN chooses pending tasks based on the ratio between the lengths of
// replication and erasure-coded block queues.
- int totalReplicateBlocks = nodeinfo.getNumberOfReplicateBlocks();
- int totalECBlocks = nodeinfo.getNumberOfBlocksToBeErasureCoded();
- int totalBlocks = totalReplicateBlocks + totalECBlocks;
+ int replicationBlocks = nodeinfo.getNumberOfReplicateBlocks();
+ int ecReplicatedBlocks = nodeinfo.getNumberOfECReplicatedBlocks();
+ int ecReconstructedBlocks = nodeinfo.getNumberOfBlocksToBeErasureCoded();
+ int totalBlocks = replicationBlocks + ecReplicatedBlocks +
ecReconstructedBlocks;
if (totalBlocks > 0) {
- int maxTransfers;
+ int maxReplicationTransfers = blockManager.getMaxReplicationStreams()
+ - xmitsInProgress;
+ int maxECReplicatedTransfers;
+ int maxECReconstructedTransfers;
Review Comment:
I think we only want to increase `maxECReplicatedTransfers` when
decommissioning DNs. So we should use soft-limit for the max number of
reconstruction tasks.
##########
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java:
##########
@@ -731,6 +746,14 @@ public int getNumberOfBlocksToBeErasureCoded() {
return erasurecodeBlocks.size();
}
+ /**
+ * The number of ec work items that are pending to be replicated.
+ */
+ @VisibleForTesting
+ public int getNumberOfECReplicatedBlocks() {
Review Comment:
```suggestion
public int getNumberOfECBlocksToBeReplicated() {
```
##########
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestDatanodeManager.java:
##########
@@ -978,9 +978,9 @@ public void testRemoveIncludedNode() throws IOException {
* @throws IOException
*/
private void verifyPendingRecoveryTasks(
- int numReplicationBlocks, int numECBlocks,
- int maxTransfers, int maxTransfersHardLimit,
- int numReplicationTasks, int numECTasks, boolean isDecommissioning)
+ int numReplicationBlocks, int numEcReplicatedBlocks, int numECBlocks,
+ int maxTransfers, int maxTransfersHardLimit, int numReplicationTasks,
+ int numECReplicatedTasks, int numECTasks, boolean isDecommissioning)
Review Comment:
- Please update the javadoc comments for the new variables.
- The names of `numECBlocks` and `numECTasks` should be refactored, as I
suggested in other places.
##########
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java:
##########
@@ -197,6 +197,9 @@ public Type getType() {
/** A queue of blocks to be replicated by this datanode */
private final BlockQueue<BlockTargetPair> replicateBlocks =
new BlockQueue<>();
+ /** A queue of ec blocks to be replicated by this datanode. */
+ private final BlockQueue<BlockTargetPair> ecReplicatedBlocks =
+ new BlockQueue<>();
/** A queue of blocks to be erasure coded by this datanode */
private final BlockQueue<BlockECReconstructionInfo> erasurecodeBlocks =
new BlockQueue<>();
Review Comment:
I prefer more descriptive variable names. What do you think?
```suggestion
/** A queue of ec blocks to be replicated by this datanode. */
private final BlockQueue<BlockTargetPair> ecBlocksToBeReplicated =
new BlockQueue<>();
/** A queue of ec blocks to be erasure coded by this datanode */
private final BlockQueue<BlockECReconstructionInfo>
ecBlocksToBeErasureCoded =
new BlockQueue<>();
```
##########
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java:
##########
@@ -678,6 +682,16 @@ public void addBlockToBeReplicated(Block block,
replicateBlocks.offer(new BlockTargetPair(block, targets));
}
+ /**
+ * Store ec block to be replicated work.
+ */
+ @VisibleForTesting
+ public void addECBlockToBeReplicated(Block block,
+ DatanodeStorageInfo[] targets) {
Review Comment:
You can write it in one line. If you are using IntelliJ, you can set Hadoop
code style using `dev-support/code-formatter/hadoop_idea_formatter.xml`.
```suggestion
public void addECBlockToBeReplicated(Block block, DatanodeStorageInfo[]
targets) {
```
##########
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeManager.java:
##########
@@ -1825,28 +1825,48 @@ public DatanodeCommand[]
handleHeartbeat(DatanodeRegistration nodeReg,
// Allocate _approximately_ maxTransfers pending tasks to DataNode.
// NN chooses pending tasks based on the ratio between the lengths of
// replication and erasure-coded block queues.
- int totalReplicateBlocks = nodeinfo.getNumberOfReplicateBlocks();
- int totalECBlocks = nodeinfo.getNumberOfBlocksToBeErasureCoded();
- int totalBlocks = totalReplicateBlocks + totalECBlocks;
+ int replicationBlocks = nodeinfo.getNumberOfReplicateBlocks();
+ int ecReplicatedBlocks = nodeinfo.getNumberOfECReplicatedBlocks();
+ int ecReconstructedBlocks = nodeinfo.getNumberOfBlocksToBeErasureCoded();
+ int totalBlocks = replicationBlocks + ecReplicatedBlocks +
ecReconstructedBlocks;
Review Comment:
```suggestion
int replicationBlocks = nodeinfo.getNumberOfReplicateBlocks();
int ecBlocksToBeReplicated =
nodeinfo.getNumberOfECBlocksToBeReplicated();
int ecBlocksToBeErasureCoded =
nodeinfo.getNumberOfBlocksToBeErasureCoded();
int totalBlocks = replicationBlocks + ecBlocksToBeReplicated +
ecBlocksToBeErasureCoded;
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]