xinglin commented on code in PR #5778:
URL: https://github.com/apache/hadoop/pull/5778#discussion_r1241492929
##########
hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml:
##########
@@ -342,6 +342,14 @@
</description>
</property>
+ <property>
+ <name>dfs.namenode.choosereplicatodelete.considerLoad</name>
+ <value>false</value>
+ <description>Decide if chooseReplicaToDelete considers the target's load or
Review Comment:
Please add a note on how the load is being determined. which metric is being
used as the load.
##########
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java:
##########
@@ -1018,6 +1018,75 @@ public void testChooseReplicaToDelete() throws Exception
{
assertEquals(chosen, storages[1]);
}
+ /**
+ * Test for the chooseReplicaToDelete are processed based on
+ * block locality, load and free space.
+ */
+ @Test
+ public void testChooseReplicaToDeleteConsiderLoad() throws Exception {
+ ((BlockPlacementPolicyDefault)
replicator).setConsiderLoad2chooseReplicaDeleting(true);
+ List<DatanodeStorageInfo> replicaList = new ArrayList<>();
+ final Map<String, List<DatanodeStorageInfo>> rackMap
+ = new HashMap<String, List<DatanodeStorageInfo>>();
+
+ storages[0].setRemainingForTests(4*1024*1024);
+ dataNodes[0].setRemaining(calculateRemaining(dataNodes[0]));
+ dataNodes[0].setXceiverCount(20);
+ replicaList.add(storages[0]);
+
+ storages[1].setRemainingForTests(3*1024*1024);
+ dataNodes[1].setRemaining(calculateRemaining(dataNodes[1]));
+ dataNodes[1].setXceiverCount(10);
+ replicaList.add(storages[1]);
+
+ storages[2].setRemainingForTests(2*1024*1024);
+ dataNodes[2].setRemaining(calculateRemaining(dataNodes[2]));
+ dataNodes[2].setXceiverCount(15);
+ replicaList.add(storages[2]);
+
+ //Even if this node has the most space, because the storage[5] has
+ //the lowest it should be chosen in case of block delete.
+ storages[5].setRemainingForTests(512 * 1024);
+ dataNodes[5].setRemaining(calculateRemaining(dataNodes[5]));
+ dataNodes[5].setXceiverCount(5);
+ replicaList.add(storages[5]);
+
+ // Refresh the last update time for all the datanodes
+ for (int i = 0; i < dataNodes.length; i++) {
+ DFSTestUtil.resetLastUpdatesWithOffset(dataNodes[i], 0);
+ }
+
+ List<DatanodeStorageInfo> first = new ArrayList<>();
+ List<DatanodeStorageInfo> second = new ArrayList<>();
+ replicator.splitNodesWithRack(replicaList, replicaList, rackMap, first,
+ second);
+ // storages[0] and storages[1] are in first set as their rack has two
+ // replica nodes, while storages[2] and dataNodes[5] are in second set.
+ assertEquals(2, first.size());
+ assertEquals(2, second.size());
+ List<StorageType> excessTypes = new ArrayList<>();
+ {
+ // test returning null
+ excessTypes.add(StorageType.SSD);
+ assertNull(((BlockPlacementPolicyDefault) replicator)
+ .chooseReplicaToDelete(first, second, excessTypes, rackMap));
+ }
+ excessTypes.add(StorageType.DEFAULT);
+ DatanodeStorageInfo chosen = ((BlockPlacementPolicyDefault) replicator)
+ .chooseReplicaToDelete(first, second, excessTypes, rackMap);
+ // Within all storages, storages[5] with least load.
Review Comment:
this test case does not seem to be properly picked: storages[5] also has the
least remaining space. Can we change space for storage[5] to 5*1024*1024 and
see if storage[5] is still picked?
##########
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java:
##########
@@ -276,6 +276,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys
{
public static final boolean
DFS_NAMENODE_REDUNDANCY_CONSIDERLOADBYVOLUME_DEFAULT
= false;
+ public static final String
DFS_NAMENODE_CHOOSEREPLICATODELETE_CONSIDERLOAD_KEY =
+ "dfs.namenode.choosereplicatodelete.considerLoad";
Review Comment:
nit: could we change to "dfs.namenode.load-based.replica.deletion"?
##########
hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java:
##########
@@ -1236,11 +1245,17 @@ public DatanodeStorageInfo chooseReplicaToDelete(
minSpace = free;
minSpaceStorage = storage;
}
+ if (considerLoad2chooseReplicaDeleting && nodeLoad < minLoad) {
+ minLoad = nodeLoad;
+ minLoadStorage = storage;
+ }
}
final DatanodeStorageInfo storage;
if (oldestHeartbeatStorage != null) {
Review Comment:
this `if else` code section seems problematic: After exiting from the
previous for loop, minSpaceStorage should not be null because minSpace is
initialized to Long.MAX_VALUE. So, `storage = minSpaceStorage;` will always be
executed.
##########
hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java:
##########
@@ -1018,6 +1018,75 @@ public void testChooseReplicaToDelete() throws Exception
{
assertEquals(chosen, storages[1]);
}
+ /**
+ * Test for the chooseReplicaToDelete are processed based on
+ * block locality, load and free space.
+ */
+ @Test
+ public void testChooseReplicaToDeleteConsiderLoad() throws Exception {
+ ((BlockPlacementPolicyDefault)
replicator).setConsiderLoad2chooseReplicaDeleting(true);
+ List<DatanodeStorageInfo> replicaList = new ArrayList<>();
+ final Map<String, List<DatanodeStorageInfo>> rackMap
+ = new HashMap<String, List<DatanodeStorageInfo>>();
+
+ storages[0].setRemainingForTests(4*1024*1024);
+ dataNodes[0].setRemaining(calculateRemaining(dataNodes[0]));
+ dataNodes[0].setXceiverCount(20);
+ replicaList.add(storages[0]);
+
+ storages[1].setRemainingForTests(3*1024*1024);
+ dataNodes[1].setRemaining(calculateRemaining(dataNodes[1]));
+ dataNodes[1].setXceiverCount(10);
+ replicaList.add(storages[1]);
+
+ storages[2].setRemainingForTests(2*1024*1024);
+ dataNodes[2].setRemaining(calculateRemaining(dataNodes[2]));
+ dataNodes[2].setXceiverCount(15);
+ replicaList.add(storages[2]);
+
+ //Even if this node has the most space, because the storage[5] has
+ //the lowest it should be chosen in case of block delete.
+ storages[5].setRemainingForTests(512 * 1024);
+ dataNodes[5].setRemaining(calculateRemaining(dataNodes[5]));
+ dataNodes[5].setXceiverCount(5);
+ replicaList.add(storages[5]);
+
+ // Refresh the last update time for all the datanodes
+ for (int i = 0; i < dataNodes.length; i++) {
+ DFSTestUtil.resetLastUpdatesWithOffset(dataNodes[i], 0);
+ }
+
+ List<DatanodeStorageInfo> first = new ArrayList<>();
+ List<DatanodeStorageInfo> second = new ArrayList<>();
+ replicator.splitNodesWithRack(replicaList, replicaList, rackMap, first,
+ second);
+ // storages[0] and storages[1] are in first set as their rack has two
+ // replica nodes, while storages[2] and dataNodes[5] are in second set.
+ assertEquals(2, first.size());
+ assertEquals(2, second.size());
+ List<StorageType> excessTypes = new ArrayList<>();
+ {
+ // test returning null
+ excessTypes.add(StorageType.SSD);
+ assertNull(((BlockPlacementPolicyDefault) replicator)
+ .chooseReplicaToDelete(first, second, excessTypes, rackMap));
+ }
+ excessTypes.add(StorageType.DEFAULT);
+ DatanodeStorageInfo chosen = ((BlockPlacementPolicyDefault) replicator)
+ .chooseReplicaToDelete(first, second, excessTypes, rackMap);
+ // Within all storages, storages[5] with least load.
+ assertEquals(chosen, storages[5]);
+
+ replicator.adjustSetsWithChosenReplica(rackMap, first, second, chosen);
+ assertEquals(2, first.size());
+ assertEquals(1, second.size());
+ // Within first set, storages[1] with less load.
Review Comment:
same here: storage[1] also has less space left than storage [0]. It would
have been chose, even when we pick replica to delete based on remaining space.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]