This is an automated email from the ASF dual-hosted git repository.

arafat2198 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new b610fd0a42 HDDS-13327. Improve log and error handling while starting 
Recon (#8688)
b610fd0a42 is described below

commit b610fd0a42e075238385f977e27a139e38cfd99b
Author: Devesh Kumar Singh <[email protected]>
AuthorDate: Tue Jul 8 14:20:54 2025 +0530

    HDDS-13327. Improve log and error handling while starting Recon (#8688)
---
 .../org/apache/hadoop/ozone/recon/ReconServer.java | 46 +++++++++++++++++++---
 .../ozone/recon/fsck/ReconSafeModeMgrTask.java     |  6 ++-
 .../scm/ReconStorageContainerManagerFacade.java    |  3 ++
 .../spi/impl/OzoneManagerServiceProviderImpl.java  |  3 ++
 4 files changed, 51 insertions(+), 7 deletions(-)

diff --git 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServer.java
 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServer.java
index 5c75850d6c..0a1437ada7 100644
--- 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServer.java
+++ 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/ReconServer.java
@@ -48,7 +48,6 @@
 import org.apache.hadoop.ozone.recon.metrics.ReconTaskStatusMetrics;
 import org.apache.hadoop.ozone.recon.scm.ReconSafeModeManager;
 import org.apache.hadoop.ozone.recon.scm.ReconStorageConfig;
-import org.apache.hadoop.ozone.recon.scm.ReconStorageContainerManagerFacade;
 import org.apache.hadoop.ozone.recon.security.ReconCertificateClient;
 import org.apache.hadoop.ozone.recon.spi.OzoneManagerServiceProvider;
 import org.apache.hadoop.ozone.recon.spi.ReconContainerMetadataManager;
@@ -180,12 +179,8 @@ public Void call() throws Exception {
       isStarted = true;
       LOG.info("Recon server initialized successfully!");
     } catch (Exception e) {
-      ReconStorageContainerManagerFacade reconStorageContainerManagerFacade =
-          (ReconStorageContainerManagerFacade) 
this.getReconStorageContainerManager();
-      ReconContext reconContext = 
reconStorageContainerManagerFacade.getReconContext();
-      reconContext.updateHealthStatus(new AtomicBoolean(false));
-      reconContext.getErrors().add(ReconContext.ErrorCode.INTERNAL_ERROR);
       LOG.error("Error during initializing Recon server.", e);
+      updateAndLogReconHealthStatus();
     }
 
     ShutdownHookManager.get().addShutdownHook(() -> {
@@ -199,6 +194,45 @@ public Void call() throws Exception {
     return null;
   }
 
+  private void updateAndLogReconHealthStatus() {
+    ReconContext reconContext = injector.getInstance(ReconContext.class);
+    assert reconContext != null;
+
+    checkComponentAndLog(
+        this.getReconStorageContainerManager(),
+        "ReconStorageContainerManagerFacade is not initialized properly.",
+        reconContext
+    );
+
+    checkComponentAndLog(
+        this.getReconNamespaceSummaryManager(),
+        "ReconNamespaceSummaryManager is not initialized properly.",
+        reconContext
+    );
+
+    checkComponentAndLog(
+        this.getOzoneManagerServiceProvider(),
+        "OzoneManagerServiceProvider is not initialized properly.",
+        reconContext
+    );
+
+    checkComponentAndLog(
+        this.getReconContainerMetadataManager(),
+        "ReconContainerMetadataManager is not initialized properly.",
+        reconContext
+    );
+  }
+
+  private void checkComponentAndLog(Object component, String errorMessage, 
ReconContext context) {
+    // Updating health status and adding error code in ReconContext will help 
to expose the information to user
+    // via /recon/health endpoint.
+    if (component == null) {
+      LOG.error("{} Setting health status to false and adding error code.", 
errorMessage);
+      context.updateHealthStatus(new AtomicBoolean(false));
+      context.getErrors().add(ReconContext.ErrorCode.INTERNAL_ERROR);
+    }
+  }
+
   /**
    * Initializes secure Recon.
    * */
diff --git 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ReconSafeModeMgrTask.java
 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ReconSafeModeMgrTask.java
index 5cffb5a84c..39c5186164 100644
--- 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ReconSafeModeMgrTask.java
+++ 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/fsck/ReconSafeModeMgrTask.java
@@ -82,10 +82,14 @@ public synchronized void start() {
       }
       // Exceeded safe mode grace period. Exit safe mode
       if (safeModeManager.getInSafeMode()) {
+        LOG.warn("Recon could not exit safe mode after {} ms. Exiting safe 
mode anyway. " +
+            "Please check for any unexpected startup issues", timeElapsed);
         safeModeManager.setInSafeMode(false);
+      } else {
+        LOG.info("Recon exited safe mode after {} ms.", timeElapsed);
       }
     } catch (Throwable t) {
-      LOG.error("Exception in Missing Container task Thread.", t);
+      LOG.error("Exception in ReconSafeModeMgrTask Thread.", t);
       if (t instanceof InterruptedException) {
         Thread.currentThread().interrupt();
       }
diff --git 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java
 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java
index 0dacc31955..cb773004b0 100644
--- 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java
+++ 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/scm/ReconStorageContainerManagerFacade.java
@@ -436,6 +436,7 @@ public void start() {
     if (!this.safeModeManager.getInSafeMode()) {
       this.reconScmTasks.forEach(ReconScmTask::start);
     }
+    LOG.info("Successfully started Recon Storage Container Manager.");
   }
 
   /**
@@ -510,6 +511,8 @@ private void initializeSCMDB() {
       }
     } catch (IOException e) {
       LOG.error("Exception encountered while getting SCM DB.");
+      reconContext.updateHealthStatus(new AtomicBoolean(false));
+      reconContext.updateErrors(ReconContext.ErrorCode.INTERNAL_ERROR);
     } finally {
       isSyncDataFromSCMRunning.compareAndSet(true, false);
     }
diff --git 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
index d774b21296..f8b60fdd7f 100644
--- 
a/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
+++ 
b/hadoop-ozone/recon/src/main/java/org/apache/hadoop/ozone/recon/spi/impl/OzoneManagerServiceProviderImpl.java
@@ -247,6 +247,8 @@ public void start() {
       omMetadataManager.start(configuration);
     } catch (IOException ioEx) {
       LOG.error("Error starting Recon OM Metadata Manager.", ioEx);
+      reconContext.updateHealthStatus(new AtomicBoolean(false));
+      reconContext.updateErrors(ReconContext.ErrorCode.INTERNAL_ERROR);
     } catch (RuntimeException runtimeException) {
       LOG.warn("Unexpected runtime error starting Recon OM Metadata Manager.", 
runtimeException);
       LOG.warn("Trying to delete existing recon OM snapshot DB and fetch new 
one.");
@@ -309,6 +311,7 @@ public void start() {
     }
     reconTaskController.reInitializeTasks(omMetadataManager, reconOmTaskMap);
     startSyncDataFromOM(initialDelay);
+    LOG.info("Ozone Manager Service Provider is started.");
   }
 
   private void startSyncDataFromOM(long initialDelay) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to