This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 83f4b3be713 HDDS-13959. Flaky check fails at
testPurgeKeysRequestBatching despite passing repeated run (#9329)
83f4b3be713 is described below
commit 83f4b3be7130e7e84438752228c370e3b6c45b45
Author: Doroszlai, Attila <[email protected]>
AuthorDate: Thu Nov 20 06:57:37 2025 +0100
HDDS-13959. Flaky check fails at testPurgeKeysRequestBatching despite
passing repeated run (#9329)
---
.../ozone/om/service/TestKeyDeletingService.java | 252 ++++++++++-----------
1 file changed, 115 insertions(+), 137 deletions(-)
diff --git
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java
index 4d136bac17d..0ed89bb3fe6 100644
---
a/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java
+++
b/hadoop-ozone/ozone-manager/src/test/java/org/apache/hadoop/ozone/om/service/TestKeyDeletingService.java
@@ -46,7 +46,6 @@
import java.io.File;
import java.io.IOException;
import java.io.UncheckedIOException;
-import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -1076,157 +1075,136 @@ void testLastRunAnd24hMetrics() throws Exception {
}
}
- @Test
- @DisplayName("Verify PurgeKeysRequest is batched according to Ratis byte
limit")
- @Flaky("HDDS-13661")
- void testPurgeKeysRequestBatching() throws Exception {
- // Define a small Ratis limit to force multiple batches for testing
- // The actual byte size of protobuf messages depends on content.
- // A small value like 1KB or 2KB should ensure batching for ~10-20 keys.
- final int actualRatisLimitBytes = 1138;
- final int testRatisLimitBytes = 1024; // 2 KB to encourage multiple
batches, 90% of the actualRatisLimitBytes.
-
- // Create a fresh configuration for this test to control the Ratis limit
- OzoneConfiguration testConf = new OzoneConfiguration();
- File innerTestDir = Files.createTempDirectory("TestKDS").toFile();
- ServerUtils.setOzoneMetaDirPath(testConf, innerTestDir.toString());
-
- testConf.setTimeDuration(OZONE_BLOCK_DELETING_SERVICE_INTERVAL, 100,
TimeUnit.MILLISECONDS);
- // Set the specific Ratis limit for this test
-
testConf.setStorageSize(OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT,
- testRatisLimitBytes, StorageUnit.BYTES);
- testConf.setQuietMode(false);
-
- ScmBlockLocationTestingClient testScmBlockTestingClient = new
ScmBlockLocationTestingClient(null, null, 0);
- OmTestManagers testOmTestManagers = new OmTestManagers(testConf,
testScmBlockTestingClient, null);
- KeyManager testKeyManager = testOmTestManagers.getKeyManager();
- testKeyManager.getDeletingService().suspend();
- KeyDeletingService testKds = (KeyDeletingService)
testKeyManager.getDeletingService();
- OzoneManager testOm = testOmTestManagers.getOzoneManager();
-
- try (MockedStatic<OzoneManagerRatisUtils> mockedRatisUtils =
- mockStatic(OzoneManagerRatisUtils.class, CALLS_REAL_METHODS)) {
-
- // Capture all OMRequests submitted via Ratis
- ArgumentCaptor<OzoneManagerProtocolProtos.OMRequest> requestCaptor =
- ArgumentCaptor.forClass(OzoneManagerProtocolProtos.OMRequest.class);
-
- // Mock submitRequest to capture requests and return success
- mockedRatisUtils.when(() -> OzoneManagerRatisUtils.submitRequest(
- any(OzoneManager.class),
- requestCaptor.capture(), // Capture the OMRequest here
- any(),
- anyLong()))
- .thenAnswer(invocation -> {
- // Return a successful OMResponse for each captured request
- return OzoneManagerProtocolProtos.OMResponse.newBuilder()
- .setCmdType(OzoneManagerProtocolProtos.Type.PurgeKeys)
- .setStatus(OzoneManagerProtocolProtos.Status.OK)
- .build();
- });
+ /**
+ * Tests request batching with custom config.
+ */
+ @Nested
+ @TestInstance(TestInstance.Lifecycle.PER_CLASS)
+ class RequestBatching {
- final int numKeysToCreate = 50; // Create enough keys to ensure multiple
batches
- // Create and delete keys using the test-specific managers
- createAndDeleteKeys(numKeysToCreate, 1, testOmTestManagers);
-
- testKds.resume();
-
- // Manually trigger the KeyDeletingService to run its task immediately.
- // This will initiate the purge requests to Ratis.
- testKds.runPeriodicalTaskNow();
-
- // Verify that submitRequest was called multiple times.
- // The exact number of calls depends on the key size and
testRatisLimitBytes,
- // but it must be more than one to confirm batching.
- mockedRatisUtils.verify(() -> OzoneManagerRatisUtils.submitRequest(
- any(OzoneManager.class),
any(OzoneManagerProtocolProtos.OMRequest.class), any(), anyLong()),
- atLeast(2)); // At least 2 calls confirms batching
-
- // Get all captured requests that were sent
- List<OzoneManagerProtocolProtos.OMRequest> capturedRequests =
requestCaptor.getAllValues();
- int totalPurgedKeysAcrossBatches = 0;
-
- // Iterate through each captured Ratis request (batch)
- for (OzoneManagerProtocolProtos.OMRequest omRequest : capturedRequests) {
- assertNotNull(omRequest);
- assertEquals(OzoneManagerProtocolProtos.Type.PurgeKeys,
omRequest.getCmdType());
-
- OzoneManagerProtocolProtos.PurgeKeysRequest purgeRequest =
omRequest.getPurgeKeysRequest();
-
- // At runtime we enforce ~90% of the Ratis limit as a safety margin,
- // but in tests we assert against the actual limit to avoid false
negatives.
- // This ensures no batch ever exceeds the true Ratis size limit.
- assertThat(omRequest.getSerializedSize())
- .as("Batch size " + omRequest.getSerializedSize() + " should be <=
ratisLimit " + actualRatisLimitBytes)
- .isLessThanOrEqualTo(actualRatisLimitBytes);
-
- // Sum up all the keys purged in this batch (may be spread across
multiple DeletedKeys entries)
- totalPurgedKeysAcrossBatches += purgeRequest.getDeletedKeysList()
- .stream()
- .mapToInt(OzoneManagerProtocolProtos.DeletedKeys::getKeysCount)
- .sum();
- }
+ private static final int ACTUAL_RATIS_LIMIT_BYTES = 1138;
+
+ @BeforeAll
+ void setup(@TempDir File testDir) throws Exception {
+ // failCallsFrequency = 0 means all calls succeed
+ scmBlockTestingClient = new ScmBlockLocationTestingClient(null, null, 0);
- // Assert that the sum of keys across all batches equals the total
number of keys initially deleted.
- assertEquals(numKeysToCreate, totalPurgedKeysAcrossBatches,
- "Total keys purged across all batches should match initial keys
deleted.");
+ createConfig(testDir);
+ customizeConfig();
+ createSubject();
+ }
- } finally {
- // Clean up the temporary OzoneManager and its resources
- if (testOm.stop()) {
- testOm.join();
+ @AfterEach
+ void resume() {
+ keyDeletingService.resume();
+ }
+
+ @AfterAll
+ void cleanup() {
+ if (om.stop()) {
+ om.join();
}
- // Clean up the temporary directory for this test
- org.apache.commons.io.FileUtils.deleteDirectory(innerTestDir);
}
- }
- private void createAndDeleteKeys(int keyCount, int numBlocks) throws
IOException {
- createAndDeleteKeysInternal(keyCount, numBlocks, null);
- }
+ private void customizeConfig() {
+ // Define a small Ratis limit to force multiple batches for testing
+ // The actual byte size of protobuf messages depends on content.
+ // A small value like 1KB or 2KB should ensure batching for ~10-20 keys.
+ final int testRatisLimitBytes = 1024; // 2 KB to encourage multiple
batches, 90% of the actualRatisLimitBytes.
+ // Set the specific Ratis limit for this test
+
conf.setStorageSize(OMConfigKeys.OZONE_OM_RATIS_LOG_APPENDER_QUEUE_BYTE_LIMIT,
+ testRatisLimitBytes, StorageUnit.BYTES);
+ }
+
+ @Test
+ @DisplayName("Verify PurgeKeysRequest is batched according to Ratis byte
limit")
+ @Flaky("HDDS-13661")
+ void testPurgeKeysRequestBatching() throws Exception {
+ keyDeletingService.suspend();
+
+ try (MockedStatic<OzoneManagerRatisUtils> mockedRatisUtils =
+ mockStatic(OzoneManagerRatisUtils.class, CALLS_REAL_METHODS)) {
+
+ // Capture all OMRequests submitted via Ratis
+ ArgumentCaptor<OzoneManagerProtocolProtos.OMRequest> requestCaptor =
+
ArgumentCaptor.forClass(OzoneManagerProtocolProtos.OMRequest.class);
+
+ // Mock submitRequest to capture requests and return success
+ mockedRatisUtils.when(() -> OzoneManagerRatisUtils.submitRequest(
+ any(OzoneManager.class),
+ requestCaptor.capture(), // Capture the OMRequest here
+ any(),
+ anyLong()))
+ .thenAnswer(invocation -> {
+ // Return a successful OMResponse for each captured request
+ return OzoneManagerProtocolProtos.OMResponse.newBuilder()
+ .setCmdType(OzoneManagerProtocolProtos.Type.PurgeKeys)
+ .setStatus(OzoneManagerProtocolProtos.Status.OK)
+ .build();
+ });
+
+ final int numKeysToCreate = 50; // Create enough keys to ensure
multiple batches
+ // Create and delete keys using the test-specific managers
+ createAndDeleteKeys(numKeysToCreate, 1);
- private void createAndDeleteKeys(int keyCount, int numBlocks, OmTestManagers
testManager) throws IOException {
- createAndDeleteKeysInternal(keyCount, numBlocks, testManager);
+ keyDeletingService.resume();
+
+ // Manually trigger the KeyDeletingService to run its task immediately.
+ // This will initiate the purge requests to Ratis.
+ keyDeletingService.runPeriodicalTaskNow();
+
+ // Verify that submitRequest was called multiple times.
+ // The exact number of calls depends on the key size and
testRatisLimitBytes,
+ // but it must be more than one to confirm batching.
+ mockedRatisUtils.verify(() -> OzoneManagerRatisUtils.submitRequest(
+ any(OzoneManager.class),
any(OzoneManagerProtocolProtos.OMRequest.class), any(), anyLong()),
+ atLeast(2)); // At least 2 calls confirms batching
+
+ // Get all captured requests that were sent
+ List<OzoneManagerProtocolProtos.OMRequest> capturedRequests =
requestCaptor.getAllValues();
+ int totalPurgedKeysAcrossBatches = 0;
+
+ // Iterate through each captured Ratis request (batch)
+ for (OzoneManagerProtocolProtos.OMRequest omRequest :
capturedRequests) {
+ assertNotNull(omRequest);
+ assertEquals(OzoneManagerProtocolProtos.Type.PurgeKeys,
omRequest.getCmdType());
+
+ OzoneManagerProtocolProtos.PurgeKeysRequest purgeRequest =
omRequest.getPurgeKeysRequest();
+
+ // At runtime we enforce ~90% of the Ratis limit as a safety margin,
+ // but in tests we assert against the actual limit to avoid false
negatives.
+ // This ensures no batch ever exceeds the true Ratis size limit.
+ assertThat(omRequest.getSerializedSize())
+ .as("Batch size " + omRequest.getSerializedSize() + " should be
<= ratisLimit " +
+ ACTUAL_RATIS_LIMIT_BYTES)
+ .isLessThanOrEqualTo(ACTUAL_RATIS_LIMIT_BYTES);
+
+ // Sum up all the keys purged in this batch (may be spread across
multiple DeletedKeys entries)
+ totalPurgedKeysAcrossBatches += purgeRequest.getDeletedKeysList()
+ .stream()
+ .mapToInt(OzoneManagerProtocolProtos.DeletedKeys::getKeysCount)
+ .sum();
+ }
+
+ // Assert that the sum of keys across all batches equals the total
number of keys initially deleted.
+ assertEquals(numKeysToCreate, totalPurgedKeysAcrossBatches,
+ "Total keys purged across all batches should match initial keys
deleted.");
+ }
+ }
}
- private void createAndDeleteKeysInternal(int keyCount, int numBlocks,
- OmTestManagers testManager) throws IOException {
+ private void createAndDeleteKeys(int keyCount, int numBlocks) throws
IOException {
for (int x = 0; x < keyCount; x++) {
final String volumeName = getTestName();
final String bucketName = uniqueObjectName("bucket");
final String keyName = uniqueObjectName("key");
- if (testManager != null) {
- // Create volume and bucket manually in metadata manager
- OMRequestTestUtils.addVolumeToOM(
- testManager.getKeyManager().getMetadataManager(),
- OmVolumeArgs.newBuilder()
- .setOwnerName("o")
- .setAdminName("a")
- .setVolume(volumeName)
- .build());
-
- OMRequestTestUtils.addBucketToOM(
- testManager.getKeyManager().getMetadataManager(),
- OmBucketInfo.newBuilder()
- .setVolumeName(volumeName)
- .setBucketName(bucketName)
- .setIsVersionEnabled(false)
- .build());
-
- // Create and delete key via provided writeClient
- OmKeyArgs keyArg = createAndCommitKey(volumeName, bucketName,
- keyName, numBlocks, 0, testManager.getWriteClient());
- testManager.getWriteClient().deleteKey(keyArg);
-
- } else {
- // Use default client-based creation
- createVolumeAndBucket(volumeName, bucketName, false);
+ // Use default client-based creation
+ createVolumeAndBucket(volumeName, bucketName, false);
- OmKeyArgs keyArg = createAndCommitKey(volumeName, bucketName,
- keyName, numBlocks);
- writeClient.deleteKey(keyArg);
- }
+ OmKeyArgs keyArg = createAndCommitKey(volumeName, bucketName,
+ keyName, numBlocks);
+ writeClient.deleteKey(keyArg);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]