This is an automated email from the ASF dual-hosted git repository.
sumitagrawal pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 6ef537f302d HDDS-13174. EC duplicate replica handling for different
index in datanodes (#8887)
6ef537f302d is described below
commit 6ef537f302d9326c2fd82aea33769c5436834212
Author: Sumit Agrawal <[email protected]>
AuthorDate: Mon Aug 25 17:59:21 2025 +0530
HDDS-13174. EC duplicate replica handling for different index in datanodes
(#8887)
---
.../ozone/container/common/impl/ContainerSet.java | 21 +++++++--
.../container/metadata/ContainerCreateInfo.java | 21 ++++++---
.../WitnessedContainerMetadataStoreImpl.java | 5 ++-
.../ozone/container/ozoneimpl/ContainerReader.java | 31 +++++++++++++
.../common/impl/ContainerImplTestUtils.java | 5 +++
.../container/ozoneimpl/TestContainerReader.java | 51 +++++++++++++++++++++-
.../src/main/proto/DatanodeClientProtocol.proto | 1 +
7 files changed, 123 insertions(+), 12 deletions(-)
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java
index 758bf594299..baf6d48a949 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/impl/ContainerSet.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.ozone.container.common.impl;
import static
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto.State.RECOVERING;
+import static
org.apache.hadoop.ozone.container.metadata.ContainerCreateInfo.INVALID_REPLICA_INDEX;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
@@ -44,6 +45,7 @@
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReportsProto;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
+import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.ozone.container.common.interfaces.Container;
import org.apache.hadoop.ozone.container.common.statemachine.StateContext;
import org.apache.hadoop.ozone.container.common.utils.ContainerLogger;
@@ -98,6 +100,11 @@ public long getCurrentTime() {
return clock.millis();
}
+ @Nullable
+ public WitnessedContainerMetadataStore getContainerMetadataStore() {
+ return containerMetadataStore;
+ }
+
@VisibleForTesting
public void setRecoveringTimeout(long recoveringTimeout) {
this.recoveringTimeout = recoveringTimeout;
@@ -192,7 +199,7 @@ private boolean addContainer(Container<?> container,
boolean overwrite) throws
LOG.debug("Container with container Id {} is added to containerMap",
containerId);
}
- updateContainerIdTable(containerId, containerState);
+ updateContainerIdTable(containerId, container.getContainerData());
missingContainerSet.remove(containerId);
if (container.getContainerData().getState() == RECOVERING) {
recoveringContainerMap.put(
@@ -207,11 +214,17 @@ private boolean addContainer(Container<?> container,
boolean overwrite) throws
}
}
- private void updateContainerIdTable(long containerId, State containerState)
throws StorageContainerException {
+ private void updateContainerIdTable(long containerId, ContainerData
containerData) throws StorageContainerException {
if (null != containerMetadataStore) {
try {
-
containerMetadataStore.getContainerCreateInfoTable().put(ContainerID.valueOf(containerId),
- ContainerCreateInfo.valueOf(containerState));
+ ContainerID containerIdObj = ContainerID.valueOf(containerId);
+ Table<ContainerID, ContainerCreateInfo> containerCreateInfoTable =
+ containerMetadataStore.getContainerCreateInfoTable();
+ ContainerCreateInfo containerCreateInfo =
containerCreateInfoTable.get(containerIdObj);
+ if (containerCreateInfo == null ||
containerCreateInfo.getReplicaIndex() == INVALID_REPLICA_INDEX) {
+ containerCreateInfoTable.put(containerIdObj,
+ ContainerCreateInfo.valueOf(containerData.getState(),
containerData.getReplicaIndex()));
+ }
} catch (IOException e) {
throw new StorageContainerException(e,
ContainerProtos.Result.IO_EXCEPTION);
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/ContainerCreateInfo.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/ContainerCreateInfo.java
index ab7700c6ff3..f74b52491d8 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/ContainerCreateInfo.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/ContainerCreateInfo.java
@@ -31,12 +31,14 @@
*/
@Immutable
public final class ContainerCreateInfo {
+ public static final int INVALID_REPLICA_INDEX = -1;
private static final Codec<ContainerCreateInfo> CODEC = new DelegatedCodec<>(
Proto3Codec.get(ContainerProtos.ContainerCreateInfo.getDefaultInstance()),
ContainerCreateInfo::getFromProtobuf, ContainerCreateInfo::getProtobuf,
ContainerCreateInfo.class);
private final ContainerProtos.ContainerDataProto.State state;
+ private final int replicaIndex;
private final Supplier<ContainerProtos.ContainerCreateInfo> proto;
public static Codec<ContainerCreateInfo> getCodec() {
@@ -47,19 +49,22 @@ public static Codec<ContainerCreateInfo> getNewCodec() {
return CODEC;
}
- private ContainerCreateInfo(ContainerProtos.ContainerDataProto.State state) {
+ private ContainerCreateInfo(ContainerProtos.ContainerDataProto.State state,
int replicaIndex) {
this.state = state;
+ this.replicaIndex = replicaIndex;
this.proto = MemoizedSupplier.valueOf(
- () ->
ContainerProtos.ContainerCreateInfo.newBuilder().setState(state).build());
+ () ->
ContainerProtos.ContainerCreateInfo.newBuilder().setState(state).setReplicaIndex(replicaIndex).build());
}
/**
* Factory method for creation of ContainerCreateInfo.
- * @param state State
+ *
+ * @param state State
+ * @param replicaIndex replica index
* @return ContainerCreateInfo.
*/
- public static ContainerCreateInfo valueOf(final
ContainerProtos.ContainerDataProto.State state) {
- return new ContainerCreateInfo(state);
+ public static ContainerCreateInfo valueOf(final
ContainerProtos.ContainerDataProto.State state, int replicaIndex) {
+ return new ContainerCreateInfo(state, replicaIndex);
}
public ContainerProtos.ContainerCreateInfo getProtobuf() {
@@ -67,10 +72,14 @@ public ContainerProtos.ContainerCreateInfo getProtobuf() {
}
public static ContainerCreateInfo
getFromProtobuf(ContainerProtos.ContainerCreateInfo proto) {
- return ContainerCreateInfo.valueOf(proto.getState());
+ return ContainerCreateInfo.valueOf(proto.getState(),
proto.getReplicaIndex());
}
public ContainerProtos.ContainerDataProto.State getState() {
return state;
}
+
+ public int getReplicaIndex() {
+ return replicaIndex;
+ }
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/WitnessedContainerMetadataStoreImpl.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/WitnessedContainerMetadataStoreImpl.java
index a389d0497ff..e1ec2f4e3bf 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/WitnessedContainerMetadataStoreImpl.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/WitnessedContainerMetadataStoreImpl.java
@@ -17,6 +17,8 @@
package org.apache.hadoop.ozone.container.metadata;
+import static
org.apache.hadoop.ozone.container.metadata.ContainerCreateInfo.INVALID_REPLICA_INDEX;
+
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.concurrent.ConcurrentHashMap;
@@ -114,7 +116,8 @@ public void init(DBStore dbStore) throws
RocksDatabaseException, CodecException
if
(!VersionedDatanodeFeatures.isFinalized(HDDSLayoutFeature.WITNESSED_CONTAINER_DB_PROTO_VALUE))
{
this.containerIdsTable = dbStore.getTable(CONTAINER_IDS_STR_VAL_TABLE,
ContainerID.getCodec(),
new DelegatedCodec<>(StringCodec.get(),
- (strVal) ->
ContainerCreateInfo.valueOf(ContainerProtos.ContainerDataProto.State.valueOf(strVal)),
+ (strVal) ->
ContainerCreateInfo.valueOf(ContainerProtos.ContainerDataProto.State.valueOf(strVal),
+ INVALID_REPLICA_INDEX),
(obj) -> obj.getState().name(), ContainerCreateInfo.class));
}
}
diff --git
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
index f3b39333e08..5e77462ddd9 100644
---
a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
+++
b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ozoneimpl/ContainerReader.java
@@ -26,6 +26,7 @@
import java.io.IOException;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
+import org.apache.hadoop.hdds.scm.container.ContainerID;
import
org.apache.hadoop.hdds.scm.container.common.helpers.StorageContainerException;
import org.apache.hadoop.ozone.common.Storage;
import org.apache.hadoop.ozone.container.common.helpers.ContainerUtils;
@@ -37,6 +38,8 @@
import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainer;
import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
import
org.apache.hadoop.ozone.container.keyvalue.helpers.KeyValueContainerUtil;
+import org.apache.hadoop.ozone.container.metadata.ContainerCreateInfo;
+import
org.apache.hadoop.ozone.container.metadata.WitnessedContainerMetadataStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -235,6 +238,10 @@ public void verifyAndFixupContainerData(ContainerData
containerData)
return;
}
+ if (!isMatchedLastLoadedECContainer(kvContainer,
containerSet.getContainerMetadataStore())) {
+ return;
+ }
+
try {
containerSet.addContainer(kvContainer);
// this should be the last step of this block
@@ -261,6 +268,30 @@ public void verifyAndFixupContainerData(ContainerData
containerData)
}
}
+ private boolean isMatchedLastLoadedECContainer(
+ KeyValueContainer kvContainer, WitnessedContainerMetadataStore
containerMetadataStore) throws IOException {
+ if (null != containerMetadataStore &&
kvContainer.getContainerData().getReplicaIndex() != 0) {
+ ContainerCreateInfo containerCreateInfo =
containerMetadataStore.getContainerCreateInfoTable()
+
.get(ContainerID.valueOf(kvContainer.getContainerData().getContainerID()));
+ // check for EC container replica index matching if db entry is present
for container as last loaded,
+ // and ignore loading container if not matched.
+ // Ignore matching container replica index -1 in db as no previous
replica index
+ if (null != containerCreateInfo
+ && containerCreateInfo.getReplicaIndex() !=
ContainerCreateInfo.INVALID_REPLICA_INDEX
+ && containerCreateInfo.getReplicaIndex() !=
kvContainer.getContainerData().getReplicaIndex()) {
+ LOG.info("EC Container {} with replica index {} present at path {} is
not matched with DB replica index {}," +
+ " ignoring the load of the container.",
+ kvContainer.getContainerData().getContainerID(),
+ kvContainer.getContainerData().getReplicaIndex(),
+ kvContainer.getContainerData().getContainerPath(),
+ containerCreateInfo.getReplicaIndex());
+ return false;
+ }
+ }
+ // return true if not an EC container or entry not present in db or
matching replica index
+ return true;
+ }
+
/**
* Resolve duplicate containers.
* @param existing
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/ContainerImplTestUtils.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/ContainerImplTestUtils.java
index e005c426333..46f8289c627 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/ContainerImplTestUtils.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/common/impl/ContainerImplTestUtils.java
@@ -39,6 +39,11 @@ public static ContainerSet newContainerSet() {
public static ContainerSet newContainerSet(long recoveringTimeout) {
WitnessedContainerMetadataStore mockMetadataStore =
mock(WitnessedContainerMetadataStore.class);
when(mockMetadataStore.getContainerCreateInfoTable()).thenReturn(new
InMemoryTestTable<>());
+ return newContainerSet(recoveringTimeout, mockMetadataStore);
+ }
+
+ public static ContainerSet newContainerSet(
+ long recoveringTimeout, WitnessedContainerMetadataStore
mockMetadataStore) {
return ContainerSet.newRwContainerSet(mockMetadataStore,
recoveringTimeout);
}
diff --git
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java
index 69415972d00..eb79f6472f0 100644
---
a/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java
+++
b/hadoop-hdds/container-service/src/test/java/org/apache/hadoop/ozone/container/ozoneimpl/TestContainerReader.java
@@ -51,6 +51,8 @@
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos;
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
+import org.apache.hadoop.hdds.scm.container.ContainerID;
+import org.apache.hadoop.hdds.utils.db.InMemoryTestTable;
import org.apache.hadoop.hdds.utils.db.Table;
import org.apache.hadoop.ozone.OzoneConfigKeys;
import org.apache.hadoop.ozone.OzoneConsts;
@@ -76,7 +78,9 @@
import org.apache.hadoop.ozone.container.keyvalue.KeyValueContainerData;
import org.apache.hadoop.ozone.container.keyvalue.KeyValueHandler;
import org.apache.hadoop.ozone.container.keyvalue.helpers.BlockUtils;
+import org.apache.hadoop.ozone.container.metadata.ContainerCreateInfo;
import org.apache.hadoop.ozone.container.metadata.DatanodeStoreSchemaThreeImpl;
+import
org.apache.hadoop.ozone.container.metadata.WitnessedContainerMetadataStore;
import org.apache.hadoop.util.Time;
import org.apache.ozone.test.GenericTestUtils.LogCapturer;
import org.apache.ratis.util.FileUtils;
@@ -91,6 +95,7 @@ public class TestContainerReader {
private MutableVolumeSet volumeSet;
private HddsVolume hddsVolume;
private ContainerSet containerSet;
+ private WitnessedContainerMetadataStore mockMetadataStore;
private OzoneConfiguration conf;
private RoundRobinVolumeChoosingPolicy volumeChoosingPolicy;
@@ -112,7 +117,9 @@ private void setup(ContainerTestVersionInfo versionInfo)
throws Exception {
Files.createDirectory(tempDir.resolve("volumeDir")).toFile();
this.conf = new OzoneConfiguration();
volumeSet = mock(MutableVolumeSet.class);
- containerSet = newContainerSet();
+ mockMetadataStore = mock(WitnessedContainerMetadataStore.class);
+ when(mockMetadataStore.getContainerCreateInfoTable()).thenReturn(new
InMemoryTestTable<>());
+ containerSet = newContainerSet(1000, mockMetadataStore);
datanodeId = UUID.randomUUID();
hddsVolume = new HddsVolume.Builder(volumeDir
@@ -396,6 +403,7 @@ public void testContainerReaderWithInvalidDbPath(
assertThat(dnLogs.getOutput()).contains("Container DB file is missing");
}
+ @SuppressWarnings("checkstyle:MethodLength")
@ContainerTestVersionInfo.ContainerTest
public void testMultipleContainerReader(ContainerTestVersionInfo versionInfo)
throws Exception {
@@ -440,6 +448,11 @@ public void
testMultipleContainerReader(ContainerTestVersionInfo versionInfo)
KeyValueContainer conflict22 = null;
KeyValueContainer ec1 = null;
KeyValueContainer ec2 = null;
+ KeyValueContainer ec3 = null;
+ KeyValueContainer ec4 = null;
+ KeyValueContainer ec5 = null;
+ KeyValueContainer ec6 = null;
+ KeyValueContainer ec7 = null;
long baseBCSID = 10L;
for (int i = 0; i < containerCount; i++) {
@@ -465,6 +478,25 @@ public void
testMultipleContainerReader(ContainerTestVersionInfo versionInfo)
} else if (i == 3) {
ec1 = createContainerWithId(i, volumeSets, policy, baseBCSID, 1);
ec2 = createContainerWithId(i, volumeSets, policy, baseBCSID, 1);
+ } else if (i == 4) {
+ ec3 = createContainerWithId(i, volumeSets, policy, baseBCSID, 1);
+ ec4 = createContainerWithId(i, volumeSets, policy, baseBCSID, 2);
+ ec3.close();
+ ec4.close();
+
mockMetadataStore.getContainerCreateInfoTable().put(ContainerID.valueOf(i),
ContainerCreateInfo.valueOf(
+ ContainerProtos.ContainerDataProto.State.CLOSED, 1));
+ } else if (i == 5) {
+ ec5 = createContainerWithId(i, volumeSets, policy, baseBCSID, 1);
+ ec6 = createContainerWithId(i, volumeSets, policy, baseBCSID, 2);
+ ec6.close();
+ ec5.close();
+
mockMetadataStore.getContainerCreateInfoTable().put(ContainerID.valueOf(i),
ContainerCreateInfo.valueOf(
+ ContainerProtos.ContainerDataProto.State.CLOSED, 2));
+ } else if (i == 6) {
+ ec7 = createContainerWithId(i, volumeSets, policy, baseBCSID, 3);
+ ec7.close();
+
mockMetadataStore.getContainerCreateInfoTable().put(ContainerID.valueOf(i),
ContainerCreateInfo.valueOf(
+ ContainerProtos.ContainerDataProto.State.CLOSED, -1));
} else {
createContainerWithId(i, volumeSets, policy, baseBCSID, 0);
}
@@ -532,6 +564,23 @@ public void
testMultipleContainerReader(ContainerTestVersionInfo versionInfo)
assertTrue(Files.exists(Paths.get(ec2.getContainerData().getContainerPath())));
assertNotNull(containerSet.getContainer(3));
+ // For EC conflict with different replica index, all container present but
containerSet loaded with same
+ // replica index as the one in DB.
+
assertTrue(Files.exists(Paths.get(ec3.getContainerData().getContainerPath())));
+
assertTrue(Files.exists(Paths.get(ec4.getContainerData().getContainerPath())));
+
assertEquals(containerSet.getContainer(ec3.getContainerData().getContainerID()).getContainerData()
+ .getReplicaIndex(), ec3.getContainerData().getReplicaIndex());
+
+
assertTrue(Files.exists(Paths.get(ec5.getContainerData().getContainerPath())));
+
assertTrue(Files.exists(Paths.get(ec6.getContainerData().getContainerPath())));
+
assertEquals(containerSet.getContainer(ec6.getContainerData().getContainerID()).getContainerData()
+ .getReplicaIndex(), ec6.getContainerData().getReplicaIndex());
+
+ // for EC container whose entry in DB with replica index -1, is allowed to
be loaded
+
assertTrue(Files.exists(Paths.get(ec7.getContainerData().getContainerPath())));
+ assertEquals(3, mockMetadataStore.getContainerCreateInfoTable().get(
+
ContainerID.valueOf(ec7.getContainerData().getContainerID())).getReplicaIndex());
+
// There should be no open containers cached by the ContainerReader as it
// opens and closed them avoiding the cache.
assertEquals(0, cache.size());
diff --git
a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
index 389e0dccd5d..bdba99cffd0 100644
--- a/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
+++ b/hadoop-hdds/interface-client/src/main/proto/DatanodeClientProtocol.proto
@@ -594,4 +594,5 @@ service IntraDatanodeProtocolService {
message ContainerCreateInfo {
required ContainerDataProto.State state = 1;
+ optional int32 replicaIndex = 2 [default = -1];
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]