This is an automated email from the ASF dual-hosted git repository.
ashishkr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new e1c0e0d885b HDDS-13520. Always retry failed BlockDeleteTransactions in
SCM BlockDeletingSerice. (#8926)
e1c0e0d885b is described below
commit e1c0e0d885b173fdee81a7abe48696fee6648b91
Author: Nandakumar Vadivelu <[email protected]>
AuthorDate: Wed Sep 17 12:55:27 2025 +0530
HDDS-13520. Always retry failed BlockDeleteTransactions in SCM
BlockDeletingSerice. (#8926)
---
.../org/apache/hadoop/hdds/scm/ScmConfigKeys.java | 4 -
.../apache/hadoop/hdds/scm/client/ScmClient.java | 21 --
.../protocol/StorageContainerLocationProtocol.java | 2 +
.../common/src/main/resources/ozone-default.xml | 10 -
...inerLocationProtocolClientSideTranslatorPB.java | 25 +-
.../src/main/proto/ScmAdminProtocol.proto | 8 +-
.../proto/ScmServerDatanodeHeartbeatProtocol.proto | 3 +-
.../hadoop/hdds/scm/block/DeletedBlockLog.java | 31 +--
.../hadoop/hdds/scm/block/DeletedBlockLogImpl.java | 123 +--------
.../scm/block/DeletedBlockLogStateManager.java | 2 +
.../scm/block/DeletedBlockLogStateManagerImpl.java | 72 +-----
.../SCMDeletedBlockTransactionStatusManager.java | 36 +--
...inerLocationProtocolServerSideTranslatorPB.java | 2 +
.../hdds/scm/server/SCMClientProtocolServer.java | 39 +--
.../org/apache/hadoop/ozone/audit/SCMAction.java | 2 -
.../hadoop/hdds/scm/block/TestDeletedBlockLog.java | 155 +-----------
.../hdds/scm/cli/ContainerOperationClient.java | 13 -
.../ozone/admin/scm/DeletedBlocksTxnCommands.java | 38 ---
.../scm/GetFailedDeletedBlocksTxnSubcommand.java | 92 -------
.../scm/ResetDeletedBlockRetryCountSubcommand.java | 110 --------
.../apache/hadoop/ozone/admin/scm/ScmAdmin.java | 1 -
.../hdds/scm/TestStorageContainerManager.java | 5 +-
.../ozone/shell/TestDeletedBlocksTxnShell.java | 281 ---------------------
23 files changed, 58 insertions(+), 1017 deletions(-)
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
index e4cc9827b98..23400b1a06b 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ScmConfigKeys.java
@@ -452,10 +452,6 @@ public final class ScmConfigKeys {
public static final boolean
OZONE_SCM_PIPELINE_AUTO_CREATE_FACTOR_ONE_DEFAULT = true;
- public static final String OZONE_SCM_BLOCK_DELETION_MAX_RETRY =
- "ozone.scm.block.deletion.max.retry";
- public static final int OZONE_SCM_BLOCK_DELETION_MAX_RETRY_DEFAULT = 4096;
-
public static final String
OZONE_SCM_BLOCK_DELETION_PER_DN_DISTRIBUTION_FACTOR =
"ozone.scm.block.deletion.per.dn.distribution.factor";
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java
index 67ee84512f1..7677ed58707 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/client/ScmClient.java
@@ -29,7 +29,6 @@
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
-import
org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
@@ -412,26 +411,6 @@ StartContainerBalancerResponseProto startContainerBalancer(
*/
void transferLeadership(String newLeaderId) throws IOException;
- /**
- * Return the failed transactions of the Deleted blocks. A transaction is
- * considered to be failed if it has been sent more than MAX_RETRY limit
- * and its count is reset to -1.
- *
- * @param count Maximum num of returned transactions, if {@literal < 0}.
return all.
- * @param startTxId The least transaction id to start with.
- * @return a list of failed deleted block transactions.
- * @throws IOException
- */
- List<DeletedBlocksTransactionInfo> getFailedDeletedBlockTxn(int count,
- long startTxId) throws IOException;
-
- /**
- * Reset the failed deleted block retry count.
- * @param txIDs transactionId list to be reset
- * @throws IOException
- */
- int resetDeletedBlockRetryCount(List<Long> txIDs) throws IOException;
-
/**
* Get usage information of datanode by address or uuid.
*
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
index 1f0f8cd8b06..56411453fc8 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocol.java
@@ -347,6 +347,7 @@ Pipeline
createReplicationPipeline(HddsProtos.ReplicationType type,
* @return a list of failed deleted block transactions.
* @throws IOException
*/
+ @Deprecated
List<DeletedBlocksTransactionInfo> getFailedDeletedBlockTxn(int count,
long startTxId) throws IOException;
@@ -357,6 +358,7 @@ List<DeletedBlocksTransactionInfo>
getFailedDeletedBlockTxn(int count,
* @return num of successful reset
* @throws IOException
*/
+ @Deprecated
int resetDeletedBlockRetryCount(List<Long> txIDs) throws IOException;
/**
diff --git a/hadoop-hdds/common/src/main/resources/ozone-default.xml
b/hadoop-hdds/common/src/main/resources/ozone-default.xml
index 34efdd7b248..37acbe5c29e 100644
--- a/hadoop-hdds/common/src/main/resources/ozone-default.xml
+++ b/hadoop-hdds/common/src/main/resources/ozone-default.xml
@@ -768,16 +768,6 @@
The port number of the Ozone SCM block client service.
</description>
</property>
- <property>
- <name>ozone.scm.block.deletion.max.retry</name>
- <value>4096</value>
- <tag>OZONE, SCM</tag>
- <description>
- SCM wraps up many blocks in a deletion transaction and sends that to data
- node for physical deletion periodically. This property determines how
many
- times SCM is going to retry sending a deletion operation to the data
node.
- </description>
- </property>
<property>
<name>ozone.scm.block.deletion.per.dn.distribution.factor</name>
<value>8</value>
diff --git
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
index 7072d6090ba..2a85e6e4007 100644
---
a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
+++
b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/scm/protocolPB/StorageContainerLocationProtocolClientSideTranslatorPB.java
@@ -27,6 +27,7 @@
import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -78,8 +79,6 @@
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeRequestProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetContainersOnDecomNodeResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetExistContainerWithPipelinesInBatchRequestProto;
-import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetFailedDeletedBlocksTxnRequestProto;
-import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetFailedDeletedBlocksTxnResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetMetricsRequestProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetMetricsResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.GetPipelineRequestProto;
@@ -102,7 +101,6 @@
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ReplicationManagerReportResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ReplicationManagerStatusRequestProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ReplicationManagerStatusResponseProto;
-import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ResetDeletedBlockRetryCountRequestProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SCMCloseContainerRequestProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SCMCloseContainerResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.SCMDeleteContainerRequestProto;
@@ -789,31 +787,18 @@ public void transferLeadership(String nodeId)
builder ->
builder.setTransferScmLeadershipRequest(reqBuilder.build()));
}
+ @Deprecated
@Override
public List<DeletedBlocksTransactionInfo> getFailedDeletedBlockTxn(int count,
long startTxId) throws IOException {
- GetFailedDeletedBlocksTxnRequestProto request =
- GetFailedDeletedBlocksTxnRequestProto.newBuilder()
- .setCount(count)
- .setStartTxId(startTxId)
- .build();
- GetFailedDeletedBlocksTxnResponseProto resp = submitRequest(
- Type.GetFailedDeletedBlocksTransaction,
- builder -> builder.setGetFailedDeletedBlocksTxnRequest(request)).
- getGetFailedDeletedBlocksTxnResponse();
- return resp.getDeletedBlocksTransactionsList();
+ return Collections.emptyList();
}
+ @Deprecated
@Override
public int resetDeletedBlockRetryCount(List<Long> txIDs)
throws IOException {
- ResetDeletedBlockRetryCountRequestProto request =
- ResetDeletedBlockRetryCountRequestProto.newBuilder()
- .addAllTransactionId(txIDs)
- .build();
- return submitRequest(Type.ResetDeletedBlockRetryCount,
- builder -> builder.setResetDeletedBlockRetryCountRequest(request)).
- getResetDeletedBlockRetryCountResponse().getResetCount();
+ return 0;
}
/**
diff --git a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
index cadff023a06..3dfdea4c732 100644
--- a/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
+++ b/hadoop-hdds/interface-admin/src/main/proto/ScmAdminProtocol.proto
@@ -77,9 +77,9 @@ message ScmContainerLocationRequest {
optional GetContainerCountRequestProto getContainerCountRequest = 38;
optional GetContainerReplicasRequestProto getContainerReplicasRequest = 39;
optional ReplicationManagerReportRequestProto
replicationManagerReportRequest = 40;
- optional ResetDeletedBlockRetryCountRequestProto
resetDeletedBlockRetryCountRequest = 41;
+ optional ResetDeletedBlockRetryCountRequestProto
resetDeletedBlockRetryCountRequest = 41 [deprecated=true];
optional TransferLeadershipRequestProto transferScmLeadershipRequest = 42;
- optional GetFailedDeletedBlocksTxnRequestProto
getFailedDeletedBlocksTxnRequest = 43;
+ optional GetFailedDeletedBlocksTxnRequestProto
getFailedDeletedBlocksTxnRequest = 43 [deprecated=true];
optional DecommissionScmRequestProto decommissionScmRequest = 44;
optional SingleNodeQueryRequestProto singleNodeQueryRequest = 45;
optional GetContainersOnDecomNodeRequestProto
getContainersOnDecomNodeRequest = 46;
@@ -134,9 +134,9 @@ message ScmContainerLocationResponse {
optional GetContainerCountResponseProto getContainerCountResponse = 38;
optional GetContainerReplicasResponseProto getContainerReplicasResponse = 39;
optional ReplicationManagerReportResponseProto
getReplicationManagerReportResponse = 40;
- optional ResetDeletedBlockRetryCountResponseProto
resetDeletedBlockRetryCountResponse = 41;
+ optional ResetDeletedBlockRetryCountResponseProto
resetDeletedBlockRetryCountResponse = 41 [deprecated=true];
optional TransferLeadershipResponseProto transferScmLeadershipResponse = 42;
- optional GetFailedDeletedBlocksTxnResponseProto
getFailedDeletedBlocksTxnResponse = 43;
+ optional GetFailedDeletedBlocksTxnResponseProto
getFailedDeletedBlocksTxnResponse = 43 [deprecated=true];
optional DecommissionScmResponseProto decommissionScmResponse = 44;
optional SingleNodeQueryResponseProto singleNodeQueryResponse = 45;
optional GetContainersOnDecomNodeResponseProto
getContainersOnDecomNodeResponse = 46;
diff --git
a/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
b/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
index 752a62bd339..e48ed4d1c59 100644
---
a/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
+++
b/hadoop-hdds/interface-server/src/main/proto/ScmServerDatanodeHeartbeatProtocol.proto
@@ -367,7 +367,8 @@ message DeletedBlocksTransaction {
required int64 containerID = 2;
repeated int64 localID = 3;
// the retry time of sending deleting command to datanode.
- required int32 count = 4;
+ // We don't have to store the retry count in DB.
+ optional int32 count = 4 [deprecated=true];
}
// ACK message datanode sent to SCM, contains the result of
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java
index 21e4d1b7c56..b1283ef773c 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLog.java
@@ -51,38 +51,13 @@ DatanodeDeletedBlockTransactions getTransactions(
throws IOException;
/**
- * Return the failed transactions in batches in the log. A transaction is
- * considered to be failed if it has been sent more than MAX_RETRY limit
- * and its count is reset to -1.
- *
- * @param count Number of failed transactions to be returned.
- * @param startTxId The least transaction id to start with.
- * @return a list of failed deleted block transactions.
- * @throws IOException
- */
- List<DeletedBlocksTransaction> getFailedTransactions(int count,
- long startTxId) throws IOException;
-
- /**
- * Increments count for given list of transactions by 1.
- * The log maintains a valid range of counts for each transaction
- * [0, MAX_RETRY]. If exceed this range, resets it to -1 to indicate
- * the transaction is no longer valid.
- *
- * @param txIDs - transaction ID.
+ * Increments count for the given list of transactions by 1.
+ * The retry count is maintained only for in-flight transactions,
+ * this will be useful in debugging.
*/
void incrementCount(List<Long> txIDs)
throws IOException;
-
- /**
- * Reset DeletedBlock transaction retry count.
- *
- * @param txIDs transactionId list to be reset
- * @return num of successful reset
- */
- int resetCount(List<Long> txIDs) throws IOException;
-
/**
* Records the creation of a transaction for a DataNode.
*
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java
index c94036b9cbc..4ee3a11f7e7 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogImpl.java
@@ -17,22 +17,18 @@
package org.apache.hadoop.hdds.scm.block;
-import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY;
-import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY_DEFAULT;
import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_PER_DN_DISTRIBUTION_FACTOR;
import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_PER_DN_DISTRIBUTION_FACTOR_DEFAULT;
import static
org.apache.hadoop.hdds.scm.block.SCMDeletedBlockTransactionStatusManager.SCMDeleteBlocksCommandStatusManager.CmdStatus;
import static org.apache.hadoop.hdds.scm.ha.SequenceIdGenerator.DEL_TXN_ID;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.Lists;
import java.io.IOException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
-import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;
@@ -79,7 +75,6 @@ public class DeletedBlockLogImpl
private static final Logger LOG =
LoggerFactory.getLogger(DeletedBlockLogImpl.class);
- private final int maxRetry;
private final ContainerManager containerManager;
private final Lock lock;
// The access to DeletedBlocksTXTable is protected by
@@ -92,7 +87,6 @@ public class DeletedBlockLogImpl
transactionStatusManager;
private long scmCommandTimeoutMs = Duration.ofSeconds(300).toMillis();
- private static final int LIST_ALL_FAILED_TRANSACTIONS = -1;
private long lastProcessedTransactionId = -1;
private final int logAppenderQueueByteLimit;
private int deletionFactorPerDatanode;
@@ -102,8 +96,6 @@ public DeletedBlockLogImpl(ConfigurationSource conf,
ContainerManager containerManager,
DBTransactionBuffer dbTxBuffer,
ScmBlockDeletingServiceMetrics metrics) {
- maxRetry = conf.getInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY,
- OZONE_SCM_BLOCK_DELETION_MAX_RETRY_DEFAULT);
this.containerManager = containerManager;
this.lock = new ReentrantLock();
@@ -142,37 +134,6 @@ void setDeleteBlocksFactorPerDatanode(int
deleteBlocksFactorPerDatanode) {
this.deletionFactorPerDatanode = deleteBlocksFactorPerDatanode;
}
- @Override
- public List<DeletedBlocksTransaction> getFailedTransactions(int count,
- long startTxId) throws IOException {
- lock.lock();
- try {
- final List<DeletedBlocksTransaction> failedTXs = Lists.newArrayList();
- try (Table.KeyValueIterator<Long, DeletedBlocksTransaction> iter =
- deletedBlockLogStateManager.getReadOnlyIterator()) {
- if (count == LIST_ALL_FAILED_TRANSACTIONS) {
- while (iter.hasNext()) {
- DeletedBlocksTransaction delTX = iter.next().getValue();
- if (delTX.getCount() == -1) {
- failedTXs.add(delTX);
- }
- }
- } else {
- iter.seek(startTxId);
- while (iter.hasNext() && failedTXs.size() < count) {
- DeletedBlocksTransaction delTX = iter.next().getValue();
- if (delTX.getCount() == -1 && delTX.getTxID() >= startTxId) {
- failedTXs.add(delTX);
- }
- }
- }
- }
- return failedTXs;
- } finally {
- lock.unlock();
- }
- }
-
/**
* {@inheritDoc}
*
@@ -182,62 +143,7 @@ public List<DeletedBlocksTransaction>
getFailedTransactions(int count,
@Override
public void incrementCount(List<Long> txIDs)
throws IOException {
- lock.lock();
- try {
- transactionStatusManager.incrementRetryCount(txIDs, maxRetry);
- } finally {
- lock.unlock();
- }
- }
-
- /**
- * {@inheritDoc}
- *
- */
- @Override
- public int resetCount(List<Long> txIDs) throws IOException {
- final int batchSize = 1000;
- int totalProcessed = 0;
-
- try {
- if (txIDs != null && !txIDs.isEmpty()) {
- return resetRetryCount(txIDs);
- }
-
- // If txIDs are null or empty, fetch all failed transactions in batches
- long startTxId = 0;
- List<DeletedBlocksTransaction> batch;
-
- do {
- // Fetch the batch of failed transactions
- batch = getFailedTransactions(batchSize, startTxId);
- if (batch.isEmpty()) {
- break;
- }
-
- List<Long> batchTxIDs =
batch.stream().map(DeletedBlocksTransaction::getTxID).collect(Collectors.toList());
- totalProcessed += resetRetryCount(new ArrayList<>(batchTxIDs));
- // Update startTxId to continue from the last processed transaction
- startTxId = batch.get(batch.size() - 1).getTxID() + 1;
- } while (!batch.isEmpty());
-
- } catch (Exception e) {
- throw new IOException("Error during transaction reset", e);
- }
- return totalProcessed;
- }
-
- private int resetRetryCount(List<Long> txIDs) throws IOException {
- int totalProcessed;
- lock.lock();
- try {
- transactionStatusManager.resetRetryCount(txIDs);
- totalProcessed =
deletedBlockLogStateManager.resetRetryCountOfTransactionInDB(new ArrayList<>(
- txIDs));
- } finally {
- lock.unlock();
- }
- return totalProcessed;
+ transactionStatusManager.incrementRetryCount(txIDs);
}
private DeletedBlocksTransaction constructNewTransaction(
@@ -254,17 +160,15 @@ private DeletedBlocksTransaction constructNewTransaction(
public int getNumOfValidTransactions() throws IOException {
lock.lock();
try {
- final AtomicInteger num = new AtomicInteger(0);
+ int count = 0;
try (Table.KeyValueIterator<Long, DeletedBlocksTransaction> iter =
deletedBlockLogStateManager.getReadOnlyIterator()) {
while (iter.hasNext()) {
- DeletedBlocksTransaction delTX = iter.next().getValue();
- if (delTX.getCount() > -1) {
- num.incrementAndGet();
- }
+ iter.next();
+ count++;
}
}
- return num.get();
+ return count;
} finally {
lock.unlock();
}
@@ -350,16 +254,12 @@ private void getTransaction(DeletedBlocksTransaction tx,
return;
}
- DeletedBlocksTransaction updatedTxn =
- DeletedBlocksTransaction.newBuilder(tx)
- .setCount(transactionStatusManager.getRetryCount(tx.getTxID()))
- .build();
boolean flag = false;
for (ContainerReplica replica : replicas) {
final DatanodeID datanodeID = replica.getDatanodeDetails().getID();
if (!transactionStatusManager.isDuplication(
datanodeID, tx.getTxID(), commandStatus)) {
- transactions.addTransactionToDN(datanodeID, updatedTxn);
+ transactions.addTransactionToDN(datanodeID, tx);
flag = true;
}
}
@@ -466,15 +366,14 @@ public DatanodeDeletedBlockTransactions getTransactions(
keyValue = iter.next();
DeletedBlocksTransaction txn = keyValue.getValue();
final ContainerID id = ContainerID.valueOf(txn.getContainerID());
+ final ContainerInfo container = containerManager.getContainer(id);
try {
// HDDS-7126. When container is under replicated, it is possible
// that container is deleted, but transactions are not deleted.
- if (containerManager.getContainer(id).isDeleted()) {
- LOG.warn("Container: {} was deleted for the " +
- "transaction: {}.", id, txn);
+ if (container.isDeleted()) {
+ LOG.warn("Container: {} was deleted for the transaction: {}.",
id, txn);
txIDs.add(txn.getTxID());
- } else if (txn.getCount() > -1 && txn.getCount() <= maxRetry
- && !containerManager.getContainer(id).isOpen()) {
+ } else if (!container.isOpen()) {
Set<ContainerReplica> replicas = containerManager
.getContainerReplicas(
ContainerID.valueOf(txn.getContainerID()));
@@ -483,7 +382,7 @@ public DatanodeDeletedBlockTransactions getTransactions(
} else {
metrics.incrSkippedTransaction();
}
- } else if (txn.getCount() >= maxRetry ||
containerManager.getContainer(id).isOpen()) {
+ } else if (containerManager.getContainer(id).isOpen()) {
metrics.incrSkippedTransaction();
}
} catch (ContainerNotFoundException ex) {
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManager.java
index 398fc47ec91..060b07bbdf9 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManager.java
@@ -36,10 +36,12 @@ void
addTransactionsToDB(ArrayList<DeletedBlocksTransaction> txs)
void removeTransactionsFromDB(ArrayList<Long> txIDs)
throws IOException;
+ @Deprecated
@Replicate
void increaseRetryCountOfTransactionInDB(ArrayList<Long> txIDs)
throws IOException;
+ @Deprecated
@Replicate
int resetRetryCountOfTransactionInDB(ArrayList<Long> txIDs)
throws IOException;
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManagerImpl.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManagerImpl.java
index fd5c39e79e6..b6976c3c3f3 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManagerImpl.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/DeletedBlockLogStateManagerImpl.java
@@ -23,7 +23,6 @@
import java.util.HashMap;
import java.util.Map;
import java.util.NoSuchElementException;
-import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
@@ -54,7 +53,6 @@ public class DeletedBlockLogStateManagerImpl
private ContainerManager containerManager;
private final DBTransactionBuffer transactionBuffer;
private final Set<Long> deletingTxIDs;
- private final Set<Long> skippingRetryTxIDs;
public DeletedBlockLogStateManagerImpl(ConfigurationSource conf,
Table<Long, DeletedBlocksTransaction> deletedTable,
@@ -63,7 +61,6 @@ public DeletedBlockLogStateManagerImpl(ConfigurationSource
conf,
this.containerManager = containerManager;
this.transactionBuffer = txBuffer;
this.deletingTxIDs = ConcurrentHashMap.newKeySet();
- this.skippingRetryTxIDs = ConcurrentHashMap.newKeySet();
}
@Override
@@ -80,17 +77,13 @@ public Table.KeyValueIterator<Long,
DeletedBlocksTransaction> getReadOnlyIterato
private void findNext() {
while (iter.hasNext()) {
- TypedTable.KeyValue<Long, DeletedBlocksTransaction> next = iter
- .next();
+ final TypedTable.KeyValue<Long, DeletedBlocksTransaction> next =
iter.next();
final long txID = next.getKey();
- if ((deletingTxIDs == null || !deletingTxIDs.contains(txID)) && (
- skippingRetryTxIDs == null || !skippingRetryTxIDs
- .contains(txID))) {
+ if ((!deletingTxIDs.contains(txID))) {
nextTx = next;
if (LOG.isTraceEnabled()) {
- LOG.trace("DeletedBlocksTransaction matching txID:{}",
- txID);
+ LOG.trace("DeletedBlocksTransaction matching txID:{}", txID);
}
return;
}
@@ -169,71 +162,30 @@ public void removeTransactionsFromDB(ArrayList<Long>
txIDs)
}
}
+ @Deprecated
@Override
public void increaseRetryCountOfTransactionInDB(
ArrayList<Long> txIDs) throws IOException {
- for (Long txID : txIDs) {
- DeletedBlocksTransaction block =
- deletedTable.get(txID);
- if (block == null) {
- if (LOG.isDebugEnabled()) {
- // This can occur due to race condition between retry and old
- // service task where old task removes the transaction and the new
- // task is resending
- LOG.debug("Deleted TXID {} not found.", txID);
- }
- continue;
- }
- // if the retry time exceeds the maxRetry value
- // then set the retry value to -1, stop retrying, admins can
- // analyze those blocks and purge them manually by SCMCli.
- DeletedBlocksTransaction.Builder builder =
block.toBuilder().setCount(-1);
- transactionBuffer.addToBuffer(deletedTable, txID, builder.build());
- if (skippingRetryTxIDs != null) {
- skippingRetryTxIDs.add(txID);
- }
- }
+ // We don't store retry count in DB anymore.
+ // This method is being retained to ensure backward compatibility and
prevent
+ // issues during minor upgrades. It will be removed in the future, during
a major release.
}
+ @Deprecated
@Override
public int resetRetryCountOfTransactionInDB(ArrayList<Long> txIDs)
throws IOException {
- Objects.requireNonNull(txIDs, "txIds cannot be null.");
- int resetCount = 0;
- for (long txId: txIDs) {
- try {
- DeletedBlocksTransaction transaction = deletedTable.get(txId);
- if (transaction == null) {
- LOG.warn("txId {} is not found in deletedTable.", txId);
- continue;
- }
- if (transaction.getCount() != -1) {
- LOG.warn("txId {} has already been reset in deletedTable.", txId);
- continue;
- }
- transactionBuffer.addToBuffer(deletedTable, txId,
- transaction.toBuilder().setCount(0).build());
- resetCount += 1;
- if (LOG.isDebugEnabled()) {
- LOG.info("Reset deleted block Txn retry count to 0 in container {}" +
- " with txnId {} ", transaction.getContainerID(), txId);
- }
- } catch (IOException ex) {
- LOG.error("Could not reset deleted block transaction {}.", txId, ex);
- throw ex;
- }
- }
- LOG.info("Reset in total {} deleted block Txn retry count", resetCount);
- return resetCount;
+ // We don't reset retry count anymore.
+ // This method is being retained to ensure backward compatibility and
prevent
+ // issues during minor upgrades. It will be removed in the future, during
a major release.
+ return 0;
}
@Override
public void onFlush() {
// onFlush() can be invoked only when ratis is enabled.
Preconditions.checkNotNull(deletingTxIDs);
- Preconditions.checkNotNull(skippingRetryTxIDs);
deletingTxIDs.clear();
- skippingRetryTxIDs.clear();
}
@Override
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMDeletedBlockTransactionStatusManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMDeletedBlockTransactionStatusManager.java
index 6cc99605690..6d4d4ba3016 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMDeletedBlockTransactionStatusManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/block/SCMDeletedBlockTransactionStatusManager.java
@@ -34,6 +34,7 @@
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.stream.Collectors;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
@@ -365,37 +366,10 @@ Map<DatanodeID, Map<Long, CmdStatusData>>
getScmCmdStatusRecord() {
}
}
- public void incrementRetryCount(List<Long> txIDs, long maxRetry)
- throws IOException {
- ArrayList<Long> txIDsToUpdate = new ArrayList<>();
- for (Long txID : txIDs) {
- int currentCount =
- transactionToRetryCountMap.getOrDefault(txID, 0);
- if (currentCount > maxRetry) {
- continue;
- } else {
- currentCount += 1;
- if (currentCount > maxRetry) {
- txIDsToUpdate.add(txID);
- }
- transactionToRetryCountMap.put(txID, currentCount);
- }
- }
-
- if (!txIDsToUpdate.isEmpty()) {
- deletedBlockLogStateManager
- .increaseRetryCountOfTransactionInDB(txIDsToUpdate);
- }
- }
-
- public void resetRetryCount(List<Long> txIDs) throws IOException {
- for (Long txID: txIDs) {
- transactionToRetryCountMap.computeIfPresent(txID, (key, value) -> 0);
- }
- }
-
- int getRetryCount(long txID) {
- return transactionToRetryCountMap.getOrDefault(txID, 0);
+ public void incrementRetryCount(List<Long> txIDs) {
+ CompletableFuture.runAsync(() ->
+ txIDs.forEach(tx ->
+ transactionToRetryCountMap.compute(tx, (k, v) -> (v == null) ? 1 :
v + 1)));
}
public void onSent(DatanodeDetails dnId, SCMCommand<?> scmCommand) {
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
index 8ce5ae44ab6..b9d4b9d6aef 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/protocol/StorageContainerLocationProtocolServerSideTranslatorPB.java
@@ -1324,6 +1324,7 @@ public GetContainerCountResponseProto
getClosedContainerCount(
.build();
}
+ @Deprecated
public GetFailedDeletedBlocksTxnResponseProto getFailedDeletedBlocksTxn(
GetFailedDeletedBlocksTxnRequestProto request) throws IOException {
long startTxId = request.hasStartTxId() ? request.getStartTxId() : 0;
@@ -1333,6 +1334,7 @@ public GetFailedDeletedBlocksTxnResponseProto
getFailedDeletedBlocksTxn(
.build();
}
+ @Deprecated
public ResetDeletedBlockRetryCountResponseProto
getResetDeletedBlockRetryCount(ResetDeletedBlockRetryCountRequestProto
request) throws IOException {
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
index 370493b6dc3..c9d4f0b0792 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/server/SCMClientProtocolServer.java
@@ -82,7 +82,6 @@
import
org.apache.hadoop.hdds.scm.container.balancer.IllegalContainerBalancerStateException;
import
org.apache.hadoop.hdds.scm.container.balancer.InvalidContainerBalancerConfigurationException;
import
org.apache.hadoop.hdds.scm.container.common.helpers.ContainerWithPipeline;
-import
org.apache.hadoop.hdds.scm.container.common.helpers.DeletedBlocksTransactionInfoWrapper;
import
org.apache.hadoop.hdds.scm.container.reconciliation.ReconciliationEligibilityHandler;
import
org.apache.hadoop.hdds.scm.container.reconciliation.ReconciliationEligibilityHandler.EligibilityResult;
import org.apache.hadoop.hdds.scm.events.SCMEvents;
@@ -960,47 +959,17 @@ public void transferLeadership(String newLeaderId)
SCMAction.TRANSFER_LEADERSHIP, auditMap));
}
+ @Deprecated
@Override
public List<DeletedBlocksTransactionInfo> getFailedDeletedBlockTxn(int count,
long startTxId) throws IOException {
- List<DeletedBlocksTransactionInfo> result;
- Map<String, String> auditMap = Maps.newHashMap();
- auditMap.put("count", String.valueOf(count));
- auditMap.put("startTxId", String.valueOf(startTxId));
-
- try {
- result = scm.getScmBlockManager().getDeletedBlockLog()
- .getFailedTransactions(count, startTxId).stream()
- .map(DeletedBlocksTransactionInfoWrapper::fromTxn)
- .collect(Collectors.toList());
- AUDIT.logWriteSuccess(buildAuditMessageForSuccess(
- SCMAction.GET_FAILED_DELETED_BLOCKS_TRANSACTION, auditMap));
- return result;
- } catch (IOException ex) {
- AUDIT.logReadFailure(
- buildAuditMessageForFailure(
- SCMAction.GET_FAILED_DELETED_BLOCKS_TRANSACTION, auditMap, ex)
- );
- throw ex;
- }
+ return Collections.emptyList();
}
+ @Deprecated
@Override
public int resetDeletedBlockRetryCount(List<Long> txIDs) throws IOException {
- final Map<String, String> auditMap = Maps.newHashMap();
- auditMap.put("txIDs", txIDs.toString());
- try {
- getScm().checkAdminAccess(getRemoteUser(), false);
- int count = scm.getScmBlockManager().getDeletedBlockLog().
- resetCount(txIDs);
- AUDIT.logWriteSuccess(buildAuditMessageForSuccess(
- SCMAction.RESET_DELETED_BLOCK_RETRY_COUNT, auditMap));
- return count;
- } catch (Exception ex) {
- AUDIT.logWriteFailure(buildAuditMessageForFailure(
- SCMAction.RESET_DELETED_BLOCK_RETRY_COUNT, auditMap, ex));
- throw ex;
- }
+ return 0;
}
/**
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java
index c8e1351297e..95e13146dee 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/ozone/audit/SCMAction.java
@@ -52,9 +52,7 @@ public enum SCMAction implements AuditAction {
GET_CONTAINER_WITH_PIPELINE_BATCH,
ADD_SCM,
GET_REPLICATION_MANAGER_REPORT,
- RESET_DELETED_BLOCK_RETRY_COUNT,
TRANSFER_LEADERSHIP,
- GET_FAILED_DELETED_BLOCKS_TRANSACTION,
GET_CONTAINER_REPLICAS,
GET_CONTAINERS_ON_DECOM_NODE,
DECOMMISSION_NODES,
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java
index eba8a8eeb18..bc0c5cba4d1 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/block/TestDeletedBlockLog.java
@@ -17,9 +17,9 @@
package org.apache.hadoop.hdds.scm.block;
-import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.atLeast;
import static org.mockito.Mockito.doAnswer;
@@ -111,7 +111,6 @@ public class TestDeletedBlockLog {
@BeforeEach
public void setup() throws Exception {
conf = new OzoneConfiguration();
- conf.setInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20);
conf.set(HddsConfigKeys.OZONE_METADATA_DIRS, testDir.getAbsolutePath());
replicationManager = mock(ReplicationManager.class);
SCMConfigurator configurator = new SCMConfigurator();
@@ -234,19 +233,6 @@ private void addTransactions(Map<Long, List<Long>>
containerBlocksMap,
}
}
- private void incrementCount(List<Long> txIDs) throws IOException {
- deletedBlockLog.incrementCount(txIDs);
- scmHADBTransactionBuffer.flush();
- // mock scmHADBTransactionBuffer does not flush deletedBlockLog
- deletedBlockLog.onFlush();
- }
-
- private void resetCount(List<Long> txIDs) throws IOException {
- deletedBlockLog.resetCount(txIDs);
- scmHADBTransactionBuffer.flush();
- deletedBlockLog.onFlush();
- }
-
private void commitTransactions(
List<DeleteBlockTransactionResult> transactionResults,
DatanodeDetails... dns) throws IOException {
@@ -337,45 +323,6 @@ public void testContainerManagerTransactionId() throws
Exception {
}
}
- @Test
- public void testIncrementCount() throws Exception {
- int maxRetry = conf.getInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20);
-
- // Create 30 TXs in the log.
- addTransactions(generateData(30), true);
- mockContainerHealthResult(true);
-
- // This will return all TXs, total num 30.
- List<DeletedBlocksTransaction> blocks = getAllTransactions();
- List<Long> txIDs = blocks.stream().map(DeletedBlocksTransaction::getTxID)
- .distinct().collect(Collectors.toList());
- assertEquals(30, txIDs.size());
-
- for (DeletedBlocksTransaction block : blocks) {
- assertEquals(0, block.getCount());
- }
-
- for (int i = 0; i < maxRetry; i++) {
- incrementCount(txIDs);
- }
- blocks = getAllTransactions();
- for (DeletedBlocksTransaction block : blocks) {
- assertEquals(maxRetry, block.getCount());
- }
-
- // Increment another time so it exceed the maxRetry.
- // On this call, count will be set to -1 which means TX eventually fails.
- incrementCount(txIDs);
- blocks = getAllTransactions();
- for (DeletedBlocksTransaction block : blocks) {
- assertEquals(-1, block.getCount());
- }
-
- // If all TXs are failed, getTransactions call will always return nothing.
- blocks = getAllTransactions();
- assertEquals(0, blocks.size());
- }
-
private void mockContainerHealthResult(Boolean healthy) {
ContainerInfo containerInfo = mock(ContainerInfo.class);
ContainerHealthResult healthResult =
@@ -387,55 +334,6 @@ private void mockContainerHealthResult(Boolean healthy) {
.getContainerReplicationHealth(any(), any());
}
- @Test
- public void testResetCount() throws Exception {
- int maxRetry = conf.getInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20);
-
- // Create 30 TXs in the log.
- addTransactions(generateData(30), true);
- mockContainerHealthResult(true);
-
- // This will return all TXs, total num 30.
- List<DeletedBlocksTransaction> blocks = getAllTransactions();
- List<Long> txIDs = blocks.stream().map(DeletedBlocksTransaction::getTxID)
- .distinct().collect(Collectors.toList());
-
- for (int i = 0; i < maxRetry; i++) {
- incrementCount(txIDs);
- }
-
- // Increment another time so it exceed the maxRetry.
- // On this call, count will be set to -1 which means TX eventually fails.
- incrementCount(txIDs);
- blocks = getAllTransactions();
- for (DeletedBlocksTransaction block : blocks) {
- assertEquals(-1, block.getCount());
- }
-
- // If all TXs are failed, getTransactions call will always return nothing.
- blocks = getAllTransactions();
- assertEquals(0, blocks.size());
-
- // Reset the retry count, these transactions should be accessible.
- resetCount(txIDs);
- blocks = getAllTransactions();
- for (DeletedBlocksTransaction block : blocks) {
- assertEquals(0, block.getCount());
- }
-
- // Increment for the reset transactions.
- // Lets the SCM delete the transaction and wait for the DN reply
- // to timeout, thus allowing the transaction to resend the
- deletedBlockLog.setScmCommandTimeoutMs(-1L);
- incrementCount(txIDs);
- blocks = getAllTransactions();
- for (DeletedBlocksTransaction block : blocks) {
- assertEquals(1, block.getCount());
- }
-
- assertEquals(30 * THREE, blocks.size());
- }
-
@Test
public void testAddTransactionsIsBatched() throws Exception {
conf.setStorageSize(ScmConfigKeys.OZONE_SCM_HA_RAFT_LOG_APPENDER_QUEUE_BYTE_LIMIT,
1, StorageUnit.KB);
@@ -453,19 +351,16 @@ public void testAddTransactionsIsBatched() throws
Exception {
@Test
public void testSCMDelIteratorProgress() throws Exception {
- int maxRetry = conf.getInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20);
- // CASE1: When all transactions are valid and available
// Create 8 TXs in the log.
int noOfTransactions = 8;
addTransactions(generateData(noOfTransactions), true);
mockContainerHealthResult(true);
List<DeletedBlocksTransaction> blocks;
- List<Long> txIDs = new ArrayList<>();
int i = 1;
while (i < noOfTransactions) {
- // In each iteration read two transaction, API returns all the
transactions in order.
+ // In each iteration read two transactions, API returns all the
transactions in order.
// 1st iteration: {1, 2}
// 2nd iteration: {3, 4}
// 3rd iteration: {5, 6}
@@ -475,48 +370,9 @@ public void testSCMDelIteratorProgress() throws Exception {
assertEquals(blocks.get(1).getTxID(), i++);
}
- // CASE2: When some transactions are not available for delete in the
current iteration,
- // either due to max retry reach or some other issue.
- // New transactions Id is { 9, 10, 11, 12, 13, 14, 15, 16}
- addTransactions(generateData(noOfTransactions), true);
- mockContainerHealthResult(true);
-
- // Mark transaction Id 11 as reached max retry count so that it will be
ignored
- // by scm deleting service while fetching transaction for delete
- int ignoreTransactionId = 11;
- txIDs.add((long) ignoreTransactionId);
- for (i = 0; i < maxRetry; i++) {
- incrementCount(txIDs);
- }
- incrementCount(txIDs);
-
- i = 9;
- while (true) {
- // In each iteration read two transaction.
- // If any transaction which is not available for delete in the current
iteration,
- // it will be ignored and will be re-checked again only after complete
table is read.
- // 1st iteration: {9, 10}
- // 2nd iteration: {12, 13} Transaction 11 is ignored here
- // 3rd iteration: {14, 15} Transaction 11 is available here,
- // but it will be read only when all db records are read till the end.
- // 4th iteration: {16, 11} Since iterator reached at the end of table
after reading transaction 16,
- // Iterator starts from beginning again, and it returns transaction 11
as well
- blocks = getTransactions(2 * BLOCKS_PER_TXN * THREE);
- if (i == ignoreTransactionId) {
- i++;
- }
- assertEquals(blocks.get(0).getTxID(), i++);
- if (i == 17) {
- assertEquals(blocks.get(1).getTxID(), ignoreTransactionId);
- break;
- }
- assertEquals(blocks.get(1).getTxID(), i++);
-
- if (i == 14) {
- // Reset transaction 11 so that it will be available in scm key
deleting service in the subsequent iterations.
- resetCount(txIDs);
- }
- }
+ // Since all the transactions are in-flight, the getTransaction should
return empty list.
+ blocks = getTransactions(2 * BLOCKS_PER_TXN * THREE);
+ assertTrue(blocks.isEmpty());
}
@Test
@@ -776,7 +632,6 @@ public void testRandomOperateTransactions() throws
Exception {
for (DeletedBlocksTransaction block : blocks) {
txIDs.add(block.getTxID());
}
- incrementCount(txIDs);
} else if (state == 2) {
commitTransactions(blocks);
committed += blocks.size() / THREE;
diff --git
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
index 2d5db1dfaf3..133166dec48 100644
---
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
+++
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/hdds/scm/cli/ContainerOperationClient.java
@@ -39,7 +39,6 @@
import
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerDataProto;
import
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ReadContainerResponseProto;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
-import
org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.ContainerBalancerStatusInfoResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.DecommissionScmResponseProto;
import
org.apache.hadoop.hdds.protocol.proto.StorageContainerLocationProtocolProtos.StartContainerBalancerResponseProto;
@@ -551,18 +550,6 @@ public void transferLeadership(String newLeaderId) throws
IOException {
storageContainerLocationClient.transferLeadership(newLeaderId);
}
- @Override
- public List<DeletedBlocksTransactionInfo> getFailedDeletedBlockTxn(int count,
- long startTxId) throws IOException {
- return storageContainerLocationClient.getFailedDeletedBlockTxn(count,
- startTxId);
- }
-
- @Override
- public int resetDeletedBlockRetryCount(List<Long> txIDs) throws IOException {
- return storageContainerLocationClient.resetDeletedBlockRetryCount(txIDs);
- }
-
@Override
public List<HddsProtos.DatanodeUsageInfoProto> getDatanodeUsageInfo(
String address, String uuid) throws IOException {
diff --git
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/DeletedBlocksTxnCommands.java
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/DeletedBlocksTxnCommands.java
deleted file mode 100644
index 5862d309d64..00000000000
---
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/DeletedBlocksTxnCommands.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.ozone.admin.scm;
-
-import org.apache.hadoop.hdds.cli.HddsVersionProvider;
-import picocli.CommandLine;
-
-/**
- * Subcommand to group container related operations.
- */
[email protected](
- name = "deletedBlocksTxn",
- description = "SCM deleted blocks transaction specific operations",
- mixinStandardHelpOptions = true,
- versionProvider = HddsVersionProvider.class,
- subcommands = {
- GetFailedDeletedBlocksTxnSubcommand.class,
- ResetDeletedBlockRetryCountSubcommand.class,
- })
-public class DeletedBlocksTxnCommands {
-
-}
-
diff --git
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/GetFailedDeletedBlocksTxnSubcommand.java
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/GetFailedDeletedBlocksTxnSubcommand.java
deleted file mode 100644
index c9b8d7fbe53..00000000000
---
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/GetFailedDeletedBlocksTxnSubcommand.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.ozone.admin.scm;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-import java.util.List;
-import java.util.Objects;
-import java.util.stream.Collectors;
-import org.apache.hadoop.hdds.cli.HddsVersionProvider;
-import
org.apache.hadoop.hdds.protocol.proto.HddsProtos.DeletedBlocksTransactionInfo;
-import org.apache.hadoop.hdds.scm.cli.ScmSubcommand;
-import org.apache.hadoop.hdds.scm.client.ScmClient;
-import
org.apache.hadoop.hdds.scm.container.common.helpers.DeletedBlocksTransactionInfoWrapper;
-import org.apache.hadoop.hdds.server.JsonUtils;
-import picocli.CommandLine;
-
-/**
- * Handler of getting expired deleted blocks from SCM side.
- */
[email protected](
- name = "ls",
- description = "Print the failed DeletedBlocksTransaction(retry count =
-1)",
- mixinStandardHelpOptions = true,
- versionProvider = HddsVersionProvider.class)
-public class GetFailedDeletedBlocksTxnSubcommand extends ScmSubcommand {
-
- @CommandLine.ArgGroup(multiplicity = "1")
- private TransactionsOption group;
-
- @CommandLine.Option(names = {"-s", "--startTxId", "--start-tx-id"},
- defaultValue = "0",
- description = "The least transaction ID to start with, default 0." +
- " Only work with -c/--count")
- private long startTxId;
-
- @CommandLine.Option(names = {"-o", "--out"},
- description = "Print transactions into file in JSON format.")
- private String fileName;
-
- private static final int LIST_ALL_FAILED_TRANSACTIONS = -1;
-
- @Override
- public void execute(ScmClient client) throws IOException {
- List<DeletedBlocksTransactionInfo> response;
- int count = group.getAll ? LIST_ALL_FAILED_TRANSACTIONS : group.count;
- response = client.getFailedDeletedBlockTxn(count, startTxId);
- List<DeletedBlocksTransactionInfoWrapper> txns = response.stream()
- .map(DeletedBlocksTransactionInfoWrapper::fromProtobuf)
- .filter(Objects::nonNull)
- .collect(Collectors.toList());
-
- String result = JsonUtils.toJsonStringWithDefaultPrettyPrinter(txns);
- if (fileName != null) {
- try (OutputStream f = Files.newOutputStream(Paths.get(fileName))) {
- f.write(result.getBytes(StandardCharsets.UTF_8));
- }
- } else {
- System.out.println(result);
- }
- }
-
- static class TransactionsOption {
- @CommandLine.Option(names = {"-a", "--all"},
- description = "Get all the failed transactions.")
- private boolean getAll;
-
- @CommandLine.Option(names = {"-c", "--count"},
- defaultValue = "20",
- description = "Get at most the count number of the" +
- " failed transactions.")
- private int count;
- }
-}
diff --git
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/ResetDeletedBlockRetryCountSubcommand.java
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/ResetDeletedBlockRetryCountSubcommand.java
deleted file mode 100644
index b93b8d50b43..00000000000
---
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/ResetDeletedBlockRetryCountSubcommand.java
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.ozone.admin.scm;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-import java.util.stream.Collectors;
-import org.apache.hadoop.hdds.cli.HddsVersionProvider;
-import org.apache.hadoop.hdds.scm.cli.ScmSubcommand;
-import org.apache.hadoop.hdds.scm.client.ScmClient;
-import
org.apache.hadoop.hdds.scm.container.common.helpers.DeletedBlocksTransactionInfoWrapper;
-import org.apache.hadoop.hdds.server.JsonUtils;
-import picocli.CommandLine;
-
-/**
- * Handler of resetting expired deleted blocks from SCM side.
- */
[email protected](
- name = "reset",
- description = "Reset the retry count of failed DeletedBlocksTransaction",
- mixinStandardHelpOptions = true,
- versionProvider = HddsVersionProvider.class)
-public class ResetDeletedBlockRetryCountSubcommand extends ScmSubcommand {
-
- @CommandLine.ArgGroup(multiplicity = "1")
- private TransactionsOption group;
-
- static class TransactionsOption {
- @CommandLine.Option(names = {"-a", "--all"},
- description = "Reset all expired deleted block transaction retry" +
- " count from -1 to 0.")
- private boolean resetAll;
-
- @CommandLine.Option(names = {"-l", "--list"},
- split = ",",
- paramLabel = "txID",
- description = "Reset the only given deletedBlock transaction ID" +
- " list. Example: 100,101,102.(Separated by ',')")
- private List<Long> txList;
-
- @CommandLine.Option(names = {"-i", "--in"},
- description = "Use file as input, need to be JSON Array format and " +
- "contains multi \"txID\" key. Example:
[{\"txID\":1},{\"txID\":2}]")
- private String fileName;
- }
-
- @Override
- public void execute(ScmClient client) throws IOException {
- int count;
- if (group.resetAll) {
- count = client.resetDeletedBlockRetryCount(new ArrayList<>());
- } else if (group.fileName != null) {
- List<Long> txIDs;
- try (InputStream in = Files.newInputStream(Paths.get(group.fileName));
- Reader fileReader = new InputStreamReader(in,
- StandardCharsets.UTF_8)) {
- DeletedBlocksTransactionInfoWrapper[] txns =
JsonUtils.readFromReader(fileReader,
- DeletedBlocksTransactionInfoWrapper[].class);
- txIDs = Arrays.stream(txns)
- .map(DeletedBlocksTransactionInfoWrapper::getTxID)
- .sorted()
- .distinct()
- .collect(Collectors.toList());
- System.out.println("Num of loaded txIDs: " + txIDs.size());
- if (!txIDs.isEmpty()) {
- System.out.println("The first loaded txID: " + txIDs.get(0));
- System.out.println("The last loaded txID: " +
- txIDs.get(txIDs.size() - 1));
- }
- } catch (IOException ex) {
- final String message = "Failed to parse the file " + group.fileName +
": " + ex.getMessage();
- System.out.println(message);
- throw new IOException(message, ex);
- }
-
- count = client.resetDeletedBlockRetryCount(txIDs);
- } else {
- if (group.txList == null || group.txList.isEmpty()) {
- System.out.println("TransactionId list should not be empty");
- return;
- }
- count = client.resetDeletedBlockRetryCount(group.txList);
- }
- System.out.println("Reset " + count + " deleted block transactions in" +
- " SCM.");
- }
-}
diff --git
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/ScmAdmin.java
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/ScmAdmin.java
index 1bab77a4eb4..0228eb221be 100644
---
a/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/ScmAdmin.java
+++
b/hadoop-ozone/cli-admin/src/main/java/org/apache/hadoop/ozone/admin/scm/ScmAdmin.java
@@ -36,7 +36,6 @@
FinalizeScmUpgradeSubcommand.class,
FinalizationScmStatusSubcommand.class,
TransferScmLeaderSubCommand.class,
- DeletedBlocksTxnCommands.class,
DecommissionScmSubcommand.class,
RotateKeySubCommand.class
})
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java
index 1c220ef8d3a..e407e30bffc 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/scm/TestStorageContainerManager.java
@@ -303,7 +303,7 @@ private void testBlockDeletionTransactions(MiniOzoneCluster
cluster) throws Exce
try {
cluster.getStorageContainerManager().getScmHAManager()
.asSCMHADBTransactionBuffer().flush();
- return delLog.getFailedTransactions(-1, 0).isEmpty();
+ return delLog.getNumOfValidTransactions() == 0;
} catch (IOException e) {
return false;
}
@@ -333,7 +333,6 @@ private static void
configureBlockDeletion(OzoneConfiguration conf) {
TimeUnit.MILLISECONDS);
conf.setTimeDuration(HDDS_COMMAND_STATUS_REPORT_INTERVAL, 200,
TimeUnit.MILLISECONDS);
- conf.setInt(ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 5);
// Reset container provision size, otherwise only one container
// is created by default.
conf.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT, 10 *
KEY_COUNT);
@@ -405,7 +404,6 @@ public void testBlockDeletingThrottling() throws Exception {
int numKeys = 15;
OzoneConfiguration conf = new OzoneConfiguration();
conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 1, TimeUnit.SECONDS);
- conf.setInt(ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 5);
conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL,
100, TimeUnit.MILLISECONDS);
ScmConfig scmConfig = conf.getObject(ScmConfig.class);
@@ -660,7 +658,6 @@ public void testCloseContainerCommandOnRestart() throws
Exception {
int numKeys = 15;
OzoneConfiguration conf = new OzoneConfiguration();
conf.setTimeDuration(HDDS_CONTAINER_REPORT_INTERVAL, 1, TimeUnit.SECONDS);
- conf.setInt(ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 5);
conf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL,
100, TimeUnit.MILLISECONDS);
conf.setInt(ScmConfigKeys.OZONE_SCM_PIPELINE_OWNER_CONTAINER_COUNT,
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java
deleted file mode 100644
index 72a5ed16137..00000000000
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/shell/TestDeletedBlocksTxnShell.java
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.ozone.shell;
-
-import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE;
-import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_BLOCK_DELETION_MAX_RETRY;
-import static org.assertj.core.api.Assertions.assertThat;
-import static org.junit.jupiter.api.Assertions.assertEquals;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.util.stream.Collectors;
-import org.apache.commons.lang3.RandomUtils;
-import org.apache.hadoop.hdds.client.RatisReplicationConfig;
-import org.apache.hadoop.hdds.conf.OzoneConfiguration;
-import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
-import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State;
-import org.apache.hadoop.hdds.scm.block.DeletedBlockLog;
-import org.apache.hadoop.hdds.scm.cli.ContainerOperationClient;
-import org.apache.hadoop.hdds.scm.container.ContainerInfo;
-import org.apache.hadoop.hdds.scm.container.ContainerReplica;
-import org.apache.hadoop.hdds.scm.container.ContainerStateManager;
-import org.apache.hadoop.hdds.scm.pipeline.PipelineID;
-import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
-import org.apache.hadoop.ozone.MiniOzoneCluster;
-import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
-import org.apache.hadoop.ozone.admin.scm.GetFailedDeletedBlocksTxnSubcommand;
-import org.apache.hadoop.ozone.admin.scm.ResetDeletedBlockRetryCountSubcommand;
-import org.junit.jupiter.api.AfterEach;
-import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Test;
-import org.junit.jupiter.api.io.TempDir;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import picocli.CommandLine;
-
-/**
- * Test for DeletedBlocksTxnSubcommand Cli.
- */
-public class TestDeletedBlocksTxnShell {
-
- private static final Logger LOG = LoggerFactory
- .getLogger(TestDeletedBlocksTxnShell.class);
-
- private static final String SCM_SERVICE_ID = "scm-service-test1";
- private static final int NUM_OF_SCMS = 3;
-
- private final PrintStream originalOut = System.out;
- private final ByteArrayOutputStream outContent = new ByteArrayOutputStream();
- private MiniOzoneHAClusterImpl cluster = null;
- private OzoneConfiguration conf;
- private File txnFile;
-
- private static final String DEFAULT_ENCODING = StandardCharsets.UTF_8.name();
-
- @TempDir
- private Path tempDir;
-
- /**
- * Create a MiniOzoneHACluster for testing.
- *
- * @throws IOException
- */
- @BeforeEach
- public void init() throws Exception {
- conf = new OzoneConfiguration();
-
- conf.setInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20);
-
- cluster = MiniOzoneCluster.newHABuilder(conf)
- .setSCMServiceId(SCM_SERVICE_ID)
- .setNumOfStorageContainerManagers(NUM_OF_SCMS)
- .setNumOfActiveSCMs(NUM_OF_SCMS)
- .setNumOfOzoneManagers(1)
- .build();
- cluster.waitForClusterToBeReady();
-
- txnFile = tempDir.resolve("txn.txt").toFile();
- LOG.info("txnFile path: {}", txnFile.getAbsolutePath());
- System.setOut(new PrintStream(outContent, false, DEFAULT_ENCODING));
- }
-
- /**
- * Shutdown MiniDFSCluster.
- */
- @AfterEach
- public void shutdown() {
- if (cluster != null) {
- cluster.shutdown();
- }
- System.setOut(originalOut);
- }
-
- //<containerID, List<blockID>>
- private Map<Long, List<Long>> generateData(int dataSize) throws Exception {
- Map<Long, List<Long>> blockMap = new HashMap<>();
- int continerIDBase = RandomUtils.secure().randomInt(0, 100);
- int localIDBase = RandomUtils.secure().randomInt(0, 1000);
- for (int i = 0; i < dataSize; i++) {
- long containerID = continerIDBase + i;
- updateContainerMetadata(containerID);
- List<Long> blocks = new ArrayList<>();
- for (int j = 0; j < 5; j++) {
- long localID = localIDBase + j;
- blocks.add(localID);
- }
- blockMap.put(containerID, blocks);
- }
- return blockMap;
- }
-
- private void updateContainerMetadata(long cid) throws Exception {
- final ContainerInfo container =
- new ContainerInfo.Builder()
- .setContainerID(cid)
- .setReplicationConfig(RatisReplicationConfig.getInstance(THREE))
- .setState(HddsProtos.LifeCycleState.CLOSED)
- .setOwner("TestDeletedBlockLog")
- .setPipelineID(PipelineID.randomId())
- .build();
- final Set<ContainerReplica> replicaSet = cluster.getHddsDatanodes()
- .subList(0, 3)
- .stream()
- .map(dn -> ContainerReplica.newBuilder()
- .setContainerID(container.containerID())
- .setContainerState(State.CLOSED)
- .setDatanodeDetails(dn.getDatanodeDetails())
- .build())
- .collect(Collectors.toSet());
- ContainerStateManager containerStateManager = getSCMLeader().
- getContainerManager().getContainerStateManager();
- containerStateManager.addContainer(container.getProtobuf());
- for (ContainerReplica replica: replicaSet) {
- containerStateManager.updateContainerReplica(replica);
- }
- }
-
- private StorageContainerManager getSCMLeader() {
- return cluster.getStorageContainerManagersList()
- .stream().filter(a -> a.getScmContext().isLeaderReady())
- .collect(Collectors.toList()).get(0);
- }
-
- private void flush() throws Exception {
- // only flush leader here, avoid the follower concurrent flush and write
- getSCMLeader().getScmHAManager().asSCMHADBTransactionBuffer().flush();
- }
-
- @Test
- public void testDeletedBlocksTxnSubcommand() throws Exception {
- int maxRetry = conf.getInt(OZONE_SCM_BLOCK_DELETION_MAX_RETRY, 20);
- int currentValidTxnNum;
- // add 30 block deletion transactions
- DeletedBlockLog deletedBlockLog = getSCMLeader().
- getScmBlockManager().getDeletedBlockLog();
- deletedBlockLog.addTransactions(generateData(30));
- flush();
- currentValidTxnNum = deletedBlockLog.getNumOfValidTransactions();
- LOG.info("Valid num of txns: {}", currentValidTxnNum);
- assertEquals(30, currentValidTxnNum);
-
- // let the first 20 txns be failed
- List<Long> txIds = new ArrayList<>();
- for (int i = 1; i < 21; i++) {
- txIds.add((long) i);
- }
- // increment retry count than threshold, count will be set to -1
- for (int i = 0; i < maxRetry + 1; i++) {
- deletedBlockLog.incrementCount(txIds);
- }
- flush();
- currentValidTxnNum = deletedBlockLog.getNumOfValidTransactions();
- LOG.info("Valid num of txns: {}", currentValidTxnNum);
- assertEquals(10, currentValidTxnNum);
-
- ContainerOperationClient scmClient = new ContainerOperationClient(conf);
- CommandLine cmd;
- // getFailedDeletedBlocksTxn cmd will print all the failed txns
- GetFailedDeletedBlocksTxnSubcommand getCommand =
- new GetFailedDeletedBlocksTxnSubcommand();
- cmd = new CommandLine(getCommand);
- cmd.parseArgs("-a");
- getCommand.execute(scmClient);
- int matchCount = 0;
- Pattern p = Pattern.compile("\"txID\" : \\d+", Pattern.MULTILINE);
- Matcher m = p.matcher(outContent.toString(DEFAULT_ENCODING));
- while (m.find()) {
- matchCount += 1;
- }
- assertEquals(20, matchCount);
-
- // print the first 10 failed txns info into file
- cmd.parseArgs("-o", txnFile.getAbsolutePath(), "-c", "10");
- getCommand.execute(scmClient);
- assertThat(txnFile).exists();
-
- ResetDeletedBlockRetryCountSubcommand resetCommand =
- new ResetDeletedBlockRetryCountSubcommand();
- cmd = new CommandLine(resetCommand);
-
- // reset the txns in file
- cmd.parseArgs("-i", txnFile.getAbsolutePath());
- resetCommand.execute(scmClient);
- flush();
- currentValidTxnNum = deletedBlockLog.getNumOfValidTransactions();
- LOG.info("Valid num of txns: {}", currentValidTxnNum);
- assertEquals(20, currentValidTxnNum);
-
- // reset the given txIds list
- cmd.parseArgs("-l", "11,12,13,14,15");
- resetCommand.execute(scmClient);
- flush();
- currentValidTxnNum = deletedBlockLog.getNumOfValidTransactions();
- LOG.info("Valid num of txns: {}", currentValidTxnNum);
- assertEquals(25, currentValidTxnNum);
-
- // reset the non-existing txns and valid txns, should do nothing
- cmd.parseArgs("-l", "1,2,3,4,5,100,101,102,103,104,105");
- resetCommand.execute(scmClient);
- flush();
- currentValidTxnNum = deletedBlockLog.getNumOfValidTransactions();
- LOG.info("Valid num of txns: {}", currentValidTxnNum);
- assertEquals(25, currentValidTxnNum);
-
- // reset all the result expired txIds, all transactions should be available
- cmd.parseArgs("-a");
- resetCommand.execute(scmClient);
- flush();
- currentValidTxnNum = deletedBlockLog.getNumOfValidTransactions();
- LOG.info("Valid num of txns: {}", currentValidTxnNum);
- assertEquals(30, currentValidTxnNum);
-
- // Fail first 20 txns be failed
- // increment retry count than threshold, count will be set to -1
- for (int i = 0; i < maxRetry + 1; i++) {
- deletedBlockLog.incrementCount(txIds);
- }
- flush();
-
- GetFailedDeletedBlocksTxnSubcommand getFailedBlockCommand =
- new GetFailedDeletedBlocksTxnSubcommand();
- outContent.reset();
- cmd = new CommandLine(getFailedBlockCommand);
- // set start transaction as 15
- cmd.parseArgs("-c", "5", "-s", "15");
- getFailedBlockCommand.execute(scmClient);
- matchCount = 0;
- p = Pattern.compile("\"txID\" : \\d+", Pattern.MULTILINE);
- m = p.matcher(outContent.toString(DEFAULT_ENCODING));
- while (m.find()) {
- matchCount += 1;
- }
- assertEquals(5, matchCount);
- }
-}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]