This is an automated email from the ASF dual-hosted git repository.
arafat2198 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new b610fd0a42 HDDS-13327. Improve log and error handling while starting
Recon (#8688)
b610fd0a42 is described below
commit b610fd0a42e075238385f977e27a139e38cfd99b
Author: Devesh Kumar Singh <[email protected]>
AuthorDate: Tue Jul 8 14:20:54 2025 +0530
HDDS-13327. Improve log and error handling while starting Recon (#8688)
---
.../org/apache/hadoop/ozone/recon/ReconServer.java | 46 +++++++++++++++++++---
.../ozone/recon/fsck/ReconSafeModeMgrTask.java | 6 ++-
.../scm/ReconStorageContainerManagerFacade.java | 3 ++
.../spi/impl/OzoneManagerServiceProviderImpl.java | 3 ++
4 files changed, 51 insertions(+), 7 deletions(-)
diff --git
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServer.java
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServer.java
index 5c75850d6c..0a1437ada7 100644
---
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServer.java
+++
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServer.java
@@ -48,7 +48,6 @@
import org.apache.hadoop.ozone.recon.metrics.ReconTaskStatusMetrics;
import org.apache.hadoop.ozone.recon.scm.ReconSafeModeManager;
import org.apache.hadoop.ozone.recon.scm.ReconStorageConfig;
-import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade;
import org.apache.hadoop.ozone.recon.security.ReconCertificateClient;
import org.apache.hadoop.ozone.recon.spi.OzoneManagerServiceProvider;
import org.apache.hadoop.ozone.recon.spi.ReconContainerMetadataManager;
@@ -180,12 +179,8 @@ public Void call() throws Exception {
isStarted = true;
LOG.info("Recon server initialized successfully!");
} catch (Exception e) {
- ReconStorageContainerManagerFacade reconStorageContainerManagerFacade =
- (ReconStorageContainerManagerFacade)
this.getReconStorageContainerManager();
- ReconContext reconContext =
reconStorageContainerManagerFacade.getReconContext();
- reconContext.updateHealthStatus(new AtomicBoolean(false));
- reconContext.getErrors().add(ReconContext.ErrorCode.INTERNAL_ERROR);
LOG.error("Error during initializing Recon server.", e);
+ updateAndLogReconHealthStatus();
}
ShutdownHookManager.get().addShutdownHook(() -> {
@@ -199,6 +194,45 @@ public Void call() throws Exception {
return null;
}
+ private void updateAndLogReconHealthStatus() {
+ ReconContext reconContext = injector.getInstance(ReconContext.class);
+ assert reconContext != null;
+
+ checkComponentAndLog(
+ this.getReconStorageContainerManager(),
+ "ReconStorageContainerManagerFacade is not initialized properly.",
+ reconContext
+ );
+
+ checkComponentAndLog(
+ this.getReconNamespaceSummaryManager(),
+ "ReconNamespaceSummaryManager is not initialized properly.",
+ reconContext
+ );
+
+ checkComponentAndLog(
+ this.getOzoneManagerServiceProvider(),
+ "OzoneManagerServiceProvider is not initialized properly.",
+ reconContext
+ );
+
+ checkComponentAndLog(
+ this.getReconContainerMetadataManager(),
+ "ReconContainerMetadataManager is not initialized properly.",
+ reconContext
+ );
+ }
+
+ private void checkComponentAndLog(Object component, String errorMessage,
ReconContext context) {
+ // Updating health status and adding error code in ReconContext will help
to expose the information to user
+ // via /recon/health endpoint.
+ if (component == null) {
+ LOG.error("{} Setting health status to false and adding error code.",
errorMessage);
+ context.updateHealthStatus(new AtomicBoolean(false));
+ context.getErrors().add(ReconContext.ErrorCode.INTERNAL_ERROR);
+ }
+ }
+
/**
* Initializes secure Recon.
* */
diff --git
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ReconSafeModeMgrTask.java
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ReconSafeModeMgrTask.java
index 5cffb5a84c..39c5186164 100644
---
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ReconSafeModeMgrTask.java
+++
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ReconSafeModeMgrTask.java
@@ -82,10 +82,14 @@ public synchronized void start() {
}
// Exceeded safe mode grace period. Exit safe mode
if (safeModeManager.getInSafeMode()) {
+ LOG.warn("Recon could not exit safe mode after {} ms. Exiting safe
mode anyway. " +
+ "Please check for any unexpected startup issues", timeElapsed);
safeModeManager.setInSafeMode(false);
+ } else {
+ LOG.info("Recon exited safe mode after {} ms.", timeElapsed);
}
} catch (Throwable t) {
- LOG.error("Exception in Missing Container task Thread.", t);
+ LOG.error("Exception in ReconSafeModeMgrTask Thread.", t);
if (t instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
diff --git
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java
index 0dacc31955..cb773004b0 100644
---
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java
+++
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java
@@ -436,6 +436,7 @@ public void start() {
if (!this.safeModeManager.getInSafeMode()) {
this.reconScmTasks.forEach(ReconScmTask::start);
}
+ LOG.info("Successfully started Recon Storage Container Manager.");
}
/**
@@ -510,6 +511,8 @@ private void initializeSCMDB() {
}
} catch (IOException e) {
LOG.error("Exception encountered while getting SCM DB.");
+ reconContext.updateHealthStatus(new AtomicBoolean(false));
+ reconContext.updateErrors(ReconContext.ErrorCode.INTERNAL_ERROR);
} finally {
isSyncDataFromSCMRunning.compareAndSet(true, false);
}
diff --git
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
index d774b21296..f8b60fdd7f 100644
---
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
+++
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
@@ -247,6 +247,8 @@ public void start() {
omMetadataManager.start(configuration);
} catch (IOException ioEx) {
LOG.error("Error starting Recon OM Metadata Manager.", ioEx);
+ reconContext.updateHealthStatus(new AtomicBoolean(false));
+ reconContext.updateErrors(ReconContext.ErrorCode.INTERNAL_ERROR);
} catch (RuntimeException runtimeException) {
LOG.warn("Unexpected runtime error starting Recon OM Metadata Manager.",
runtimeException);
LOG.warn("Trying to delete existing recon OM snapshot DB and fetch new
one.");
@@ -309,6 +311,7 @@ public void start() {
}
reconTaskController.reInitializeTasks(omMetadataManager, reconOmTaskMap);
startSyncDataFromOM(initialDelay);
+ LOG.info("Ozone Manager Service Provider is started.");
}
private void startSyncDataFromOM(long initialDelay) {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]