This is an automated email from the ASF dual-hosted git repository.
weichiu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new e805c15c74 HDDS-12310. Online repair command to perform compaction on
om.db (#7957)
e805c15c74 is described below
commit e805c15c744471506a21656792b43c3bf70369ce
Author: Tejaskriya <[email protected]>
AuthorDate: Fri Mar 21 00:44:52 2025 +0530
HDDS-12310. Online repair command to perform compaction on om.db (#7957)
Co-authored-by: tejaskriya <[email protected]>
Co-authored-by: Wei-Chiu Chuang <[email protected]>
---
.../hadoop/ozone/om/protocol/OMAdminProtocol.java | 6 ++
.../protocolPB/OMAdminProtocolClientSideImpl.java | 22 +++++-
.../src/main/proto/OMAdminProtocol.proto | 17 ++++-
.../org/apache/hadoop/ozone/om/OzoneManager.java | 8 ++-
.../hadoop/ozone/om/service/CompactDBService.java | 70 +++++++++++++++++++
.../protocolPB/OMAdminProtocolServerSideImpl.java | 20 ++++++
.../apache/hadoop/ozone/repair/om/CompactOMDB.java | 78 ++++++++++++++++++++++
.../apache/hadoop/ozone/repair/om/OMRepair.java | 3 +-
8 files changed, 219 insertions(+), 5 deletions(-)
diff --git
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OMAdminProtocol.java
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OMAdminProtocol.java
index f89c6388d3..8588620074 100644
---
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OMAdminProtocol.java
+++
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocol/OMAdminProtocol.java
@@ -39,4 +39,10 @@ public interface OMAdminProtocol extends Closeable {
* Remove OM from HA ring.
*/
void decommission(OMNodeDetails removeOMNode) throws IOException;
+
+ /**
+ * Requests compaction of a column family of om.db.
+ * @param columnFamily
+ */
+ void compactOMDB(String columnFamily) throws IOException;
}
diff --git
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolClientSideImpl.java
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolClientSideImpl.java
index 4512373b84..f7d22713b3 100644
---
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolClientSideImpl.java
+++
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OMAdminProtocolClientSideImpl.java
@@ -40,6 +40,8 @@
import org.apache.hadoop.ozone.om.helpers.OMNodeDetails;
import org.apache.hadoop.ozone.om.protocol.OMAdminProtocol;
import org.apache.hadoop.ozone.om.protocol.OMConfiguration;
+import
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.CompactRequest;
+import
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.CompactResponse;
import
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.DecommissionOMRequest;
import
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.DecommissionOMResponse;
import
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMConfigurationRequest;
@@ -212,9 +214,27 @@ public void decommission(OMNodeDetails removeOMNode)
throws IOException {
}
}
+ @Override
+ public void compactOMDB(String columnFamily) throws IOException {
+ CompactRequest compactRequest = CompactRequest.newBuilder()
+ .setColumnFamily(columnFamily)
+ .build();
+ CompactResponse response;
+ try {
+ response = rpcProxy.compactDB(NULL_RPC_CONTROLLER, compactRequest);
+ } catch (ServiceException e) {
+ throw ProtobufHelper.getRemoteException(e);
+ }
+ if (!response.getSuccess()) {
+ throwException("Request to compact \'" + columnFamily +
+ "\', sent to " + omPrintInfo + " failed with error: " +
+ response.getErrorMsg());
+ }
+ }
+
private void throwException(String errorMsg)
throws IOException {
- throw new IOException("Failed to Decommission OM. Error: " + errorMsg);
+ throw new IOException("Request Failed. Error: " + errorMsg);
}
@Override
diff --git a/hadoop-ozone/interface-client/src/main/proto/OMAdminProtocol.proto
b/hadoop-ozone/interface-client/src/main/proto/OMAdminProtocol.proto
index 60e8ea171a..84a5de5d54 100644
--- a/hadoop-ozone/interface-client/src/main/proto/OMAdminProtocol.proto
+++ b/hadoop-ozone/interface-client/src/main/proto/OMAdminProtocol.proto
@@ -69,11 +69,20 @@ message DecommissionOMResponse {
optional string errorMsg = 3;
}
+message CompactRequest {
+ required string columnFamily = 1;
+}
+
+message CompactResponse {
+ required bool success = 1;
+ optional string errorMsg = 3;
+}
+
/**
The service for OM admin operations.
*/
service OzoneManagerAdminService {
- // RPC request to OM to return its confugration - in memory OM nodes list
+ // RPC request to OM to return its configuration - in memory OM nodes list
// and the anticipated nodes list from the config files (upon reloading).
rpc getOMConfiguration(OMConfigurationRequest)
returns(OMConfigurationResponse);
@@ -81,4 +90,8 @@ service OzoneManagerAdminService {
// RPC request from admin to remove an OM from the cluster
rpc decommission(DecommissionOMRequest)
returns(DecommissionOMResponse);
-}
\ No newline at end of file
+
+ // RPC request from admin to compact a column family of the OM's db
+ rpc compactDB(CompactRequest)
+ returns(CompactResponse);
+}
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
index 45df187050..eb5d83b5a8 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
@@ -271,6 +271,7 @@
import org.apache.hadoop.ozone.om.request.OMClientRequest;
import org.apache.hadoop.ozone.om.s3.S3SecretCacheProvider;
import org.apache.hadoop.ozone.om.s3.S3SecretStoreProvider;
+import org.apache.hadoop.ozone.om.service.CompactDBService;
import org.apache.hadoop.ozone.om.service.OMRangerBGSyncService;
import org.apache.hadoop.ozone.om.service.QuotaRepairTask;
import org.apache.hadoop.ozone.om.snapshot.OmSnapshotUtils;
@@ -995,7 +996,7 @@ public void close() throws IOException {
/**
* Class which schedule saving metrics to a file.
*/
- private class ScheduleOMMetricsWriteTask extends TimerTask {
+ private final class ScheduleOMMetricsWriteTask extends TimerTask {
@Override
public void run() {
saveOmMetrics();
@@ -5029,6 +5030,11 @@ public void checkFeatureEnabled(OzoneManagerVersion
feature) throws OMException
}
}
+ public void compactOMDB(String columnFamily) throws IOException {
+ checkAdminUserPrivilege("compact column family " + columnFamily);
+ new CompactDBService(this).compact(columnFamily);
+ }
+
public OMExecutionFlow getOmExecutionFlow() {
return omExecutionFlow;
}
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/CompactDBService.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/CompactDBService.java
new file mode 100644
index 0000000000..f39521c921
--- /dev/null
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/service/CompactDBService.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.om.service;
+
+import java.io.IOException;
+import java.util.concurrent.CompletableFuture;
+import org.apache.hadoop.hdds.utils.db.RDBStore;
+import org.apache.hadoop.hdds.utils.db.RocksDatabase;
+import org.apache.hadoop.hdds.utils.db.managed.ManagedCompactRangeOptions;
+import org.apache.hadoop.ozone.om.OzoneManager;
+import org.apache.hadoop.util.Time;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This service issues a compaction request for a column family of om.db.
+ */
+public class CompactDBService {
+ private static final Logger LOG = LoggerFactory.getLogger(
+ CompactDBService.class);
+ private final OzoneManager om;
+
+ public CompactDBService(OzoneManager ozoneManager) {
+ this.om = ozoneManager;
+ }
+
+ public CompletableFuture<Void> compact(String columnFamily) throws
+ IOException {
+ return CompletableFuture.supplyAsync(() -> {
+ try {
+ return compactAsync(columnFamily);
+ } catch (Exception e) {
+ LOG.warn("Failed to compact column family: {}", columnFamily, e);
+ }
+ return null;
+ });
+ }
+
+ private Void compactAsync(String columnFamilyName) throws IOException {
+ LOG.info("Compacting column family: {}", columnFamilyName);
+ long startTime = Time.monotonicNow();
+ ManagedCompactRangeOptions options =
+ new ManagedCompactRangeOptions();
+ options.setBottommostLevelCompaction(
+ ManagedCompactRangeOptions.BottommostLevelCompaction.kForce);
+ // Find CF Handler
+ RocksDatabase.ColumnFamily columnFamily =
+
((RDBStore)om.getMetadataManager().getStore()).getDb().getColumnFamily(columnFamilyName);
+ ((RDBStore)om.getMetadataManager().getStore()).getDb().compactRange(
+ columnFamily, null, null, options);
+ LOG.info("Compaction of column family: {} completed in {} ms",
+ columnFamilyName, Time.monotonicNow() - startTime);
+ return null;
+ }
+}
diff --git
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OMAdminProtocolServerSideImpl.java
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OMAdminProtocolServerSideImpl.java
index e4b7d8672a..a43e496fdc 100644
---
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OMAdminProtocolServerSideImpl.java
+++
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/protocolPB/OMAdminProtocolServerSideImpl.java
@@ -27,6 +27,8 @@
import org.apache.hadoop.ozone.om.protocolPB.OMAdminProtocolPB;
import org.apache.hadoop.ozone.om.ratis.OzoneManagerRatisServer;
import org.apache.hadoop.ozone.om.ratis.utils.OzoneManagerRatisUtils;
+import
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.CompactRequest;
+import
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.CompactResponse;
import
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.DecommissionOMRequest;
import
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.DecommissionOMResponse;
import
org.apache.hadoop.ozone.protocol.proto.OzoneManagerAdminProtocolProtos.OMConfigurationRequest;
@@ -101,4 +103,22 @@ public DecommissionOMResponse decommission(RpcController
controller,
.setSuccess(true)
.build();
}
+
+ @Override
+ public CompactResponse compactDB(RpcController controller, CompactRequest
compactRequest)
+ throws ServiceException {
+ try {
+ // check if table exists. IOException is thrown if table is not found.
+
ozoneManager.getMetadataManager().getStore().getTable(compactRequest.getColumnFamily());
+ ozoneManager.compactOMDB(compactRequest.getColumnFamily());
+ } catch (Exception ex) {
+ return CompactResponse.newBuilder()
+ .setSuccess(false)
+ .setErrorMsg(ex.getMessage())
+ .build();
+ }
+
+ return CompactResponse.newBuilder()
+ .setSuccess(true).build();
+ }
}
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/CompactOMDB.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/CompactOMDB.java
new file mode 100644
index 0000000000..87c5140c03
--- /dev/null
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/CompactOMDB.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.ozone.repair.om;
+
+import java.io.IOException;
+import org.apache.hadoop.hdds.cli.HddsVersionProvider;
+import org.apache.hadoop.hdds.conf.OzoneConfiguration;
+import org.apache.hadoop.ozone.om.helpers.OMNodeDetails;
+import org.apache.hadoop.ozone.om.protocolPB.OMAdminProtocolClientSideImpl;
+import org.apache.hadoop.ozone.repair.RepairTool;
+import org.apache.hadoop.security.UserGroupInformation;
+import picocli.CommandLine;
+
+/**
+ * Tool to perform compaction on a column family of an om.db.
+ */
[email protected](
+ name = "compact",
+ description = "CLI to compact a column family in the om.db. " +
+ "The compaction happens asynchronously. Requires admin privileges.",
+ mixinStandardHelpOptions = true,
+ versionProvider = HddsVersionProvider.class
+)
+public class CompactOMDB extends RepairTool {
+
+ @CommandLine.Option(names = {"--column-family", "--column_family", "--cf"},
+ required = true,
+ description = "Column family name")
+ private String columnFamilyName;
+
+ @CommandLine.Option(
+ names = {"--service-id", "--om-service-id"},
+ description = "Ozone Manager Service ID",
+ required = false
+ )
+ private String omServiceId;
+
+ @CommandLine.Option(
+ names = {"--node-id"},
+ description = "NodeID of the OM for which db needs to be compacted.",
+ required = false
+ )
+ private String nodeId;
+
+ @Override
+ public void execute() throws Exception {
+
+ OzoneConfiguration conf = getOzoneConf();
+ OMNodeDetails omNodeDetails = OMNodeDetails.getOMNodeDetailsFromConf(
+ conf, omServiceId, nodeId);
+ if (!isDryRun()) {
+ try (OMAdminProtocolClientSideImpl omAdminProtocolClient =
+ OMAdminProtocolClientSideImpl.createProxyForSingleOM(conf,
+ UserGroupInformation.getCurrentUser(), omNodeDetails)) {
+ omAdminProtocolClient.compactOMDB(columnFamilyName);
+ info("Compaction request issued for om.db of om node: %s,
column-family: %s.", nodeId, columnFamilyName);
+ info("Please check role logs of %s for completion status.", nodeId);
+ } catch (IOException ex) {
+ error("Couldn't compact column %s. \nException: %s", columnFamilyName,
ex);
+ }
+ }
+ }
+}
diff --git
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java
index 9b099b79c7..cac844be88 100644
---
a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java
+++
b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/OMRepair.java
@@ -31,7 +31,8 @@
FSORepairTool.class,
SnapshotRepair.class,
TransactionInfoRepair.class,
- QuotaRepair.class
+ QuotaRepair.class,
+ CompactOMDB.class
},
description = "Operational tool to repair OM.")
@MetaInfServices(RepairSubcommand.class)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]