This is an automated email from the ASF dual-hosted git repository.

lahirujayathilake pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/airavata-custos.git

commit 9dbbc9e24876d94150f658d5af4b2f7e3d07271c
Author: lahiruj <[email protected]>
AuthorDate: Fri Mar 27 14:19:17 2026 -0400

    add Prometheus metrics, health indicator, instrument poller/worker, and 
structured JSON logging for prod and MDC packet correlation
---
 allocations/access-ci-service/pom.xml              |  14 +-
 .../custos/access/ci/service/AmiePoller.java       |  23 +++-
 .../ci/service/metrics/AmieHealthIndicator.java    |  78 +++++++++++
 .../access/ci/service/metrics/AmieMetrics.java     | 124 +++++++++++++++++
 .../service/worker/amie/ProcessingEventWorker.java |  28 +++-
 .../src/main/resources/application.yml             |  15 ++
 .../resources/distribution/conf/application.yml    |  15 ++
 .../resources/distribution/conf/logback-spring.xml | 104 ++++++++++----
 .../src/main/resources/logback-spring.xml          | 104 ++++++++++----
 .../service/metrics/AmieHealthIndicatorTest.java   | 118 ++++++++++++++++
 .../access/ci/service/metrics/AmieMetricsTest.java | 129 +++++++++++++++++
 .../worker/amie/ProcessingEventWorkerTest.java     |  16 ++-
 compose/docker-compose.yml                         |  25 ++++
 compose/grafana/dashboards/amie-service.json       | 152 +++++++++++++++++++++
 .../grafana/provisioning/dashboards/dashboards.yml |  12 ++
 .../provisioning/datasources/prometheus.yml        |   9 ++
 compose/prometheus/prometheus.yml                  |  18 +++
 pom.xml                                            |   6 +
 18 files changed, 925 insertions(+), 65 deletions(-)

diff --git a/allocations/access-ci-service/pom.xml 
b/allocations/access-ci-service/pom.xml
index 0b0f10b98..528083099 100644
--- a/allocations/access-ci-service/pom.xml
+++ b/allocations/access-ci-service/pom.xml
@@ -77,7 +77,19 @@
             <groupId>com.google.protobuf</groupId>
             <artifactId>protobuf-java</artifactId>
         </dependency>
-        
+        <dependency>
+            <groupId>org.springframework.boot</groupId>
+            <artifactId>spring-boot-starter-actuator</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>io.micrometer</groupId>
+            <artifactId>micrometer-registry-prometheus</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>net.logstash.logback</groupId>
+            <artifactId>logstash-logback-encoder</artifactId>
+        </dependency>
+
         <!-- Test dependencies -->
         <dependency>
             <groupId>org.springframework.boot</groupId>
diff --git 
a/allocations/access-ci-service/src/main/java/org/apache/custos/access/ci/service/AmiePoller.java
 
b/allocations/access-ci-service/src/main/java/org/apache/custos/access/ci/service/AmiePoller.java
index 7a179aff8..ca3d07e50 100644
--- 
a/allocations/access-ci-service/src/main/java/org/apache/custos/access/ci/service/AmiePoller.java
+++ 
b/allocations/access-ci-service/src/main/java/org/apache/custos/access/ci/service/AmiePoller.java
@@ -20,6 +20,7 @@ package org.apache.custos.access.ci.service;
 
 import com.fasterxml.jackson.databind.JsonNode;
 import org.apache.custos.access.ci.service.client.amie.AmieClient;
+import org.apache.custos.access.ci.service.metrics.AmieMetrics;
 import org.apache.custos.access.ci.service.model.amie.PacketEntity;
 import org.apache.custos.access.ci.service.model.amie.PacketStatus;
 import org.apache.custos.access.ci.service.model.amie.ProcessingEventEntity;
@@ -30,6 +31,7 @@ import 
org.apache.custos.access.ci.service.repo.amie.ProcessingEventRepository;
 import org.apache.custos.access.ci.service.util.ProtoUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.slf4j.MDC;
 import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
 import org.springframework.scheduling.annotation.Scheduled;
 import org.springframework.stereotype.Component;
@@ -51,11 +53,14 @@ public class AmiePoller {
     private final AmieClient client;
     private final PacketRepository packetRepo;
     private final ProcessingEventRepository eventRepo;
+    private final AmieMetrics amieMetrics;
 
-    public AmiePoller(AmieClient client, PacketRepository packetRepo, 
ProcessingEventRepository eventRepo) {
+    public AmiePoller(AmieClient client, PacketRepository packetRepo, 
ProcessingEventRepository eventRepo,
+                      AmieMetrics amieMetrics) {
         this.client = client;
         this.packetRepo = packetRepo;
         this.eventRepo = eventRepo;
+        this.amieMetrics = amieMetrics;
     }
 
 
@@ -71,12 +76,24 @@ public class AmiePoller {
         }
 
         LOGGER.info("Found {} packets to process.", packets.size());
+        amieMetrics.recordPollerFetch(packets.size());
+
         for (JsonNode packetNode : packets) {
+            String packetType = packetNode.path("type").asText(null);
+            long amiePacketRecId = 
packetNode.at("/header/packet_rec_id").asLong(-1);
+
+            MDC.put("amieId", String.valueOf(amiePacketRecId));
+            if (packetType != null) {
+                MDC.put("packetType", packetType);
+            }
             try {
                 processIndividualPacket(packetNode);
             } catch (Exception e) {
                 // If a malformed packet is found
                 LOGGER.error("An unexpected error occurred while processing a 
packet. Raw packet: {}", packetNode.toString(), e);
+            } finally {
+                MDC.remove("amieId");
+                MDC.remove("packetType");
             }
         }
     }
@@ -109,6 +126,8 @@ public class AmiePoller {
                     newPacket.setReceivedAt(Instant.now());
                     packetRepo.save(newPacket);
 
+                    amieMetrics.recordPacketReceived(packetType);
+
                     ProcessingEventEntity decodeEvent = new 
ProcessingEventEntity();
                     decodeEvent.setPacket(newPacket);
                     decodeEvent.setType(ProcessingEventType.DECODE_PACKET);
@@ -122,4 +141,4 @@ public class AmiePoller {
                 }
         );
     }
-}
\ No newline at end of file
+}
diff --git 
a/allocations/access-ci-service/src/main/java/org/apache/custos/access/ci/service/metrics/AmieHealthIndicator.java
 
b/allocations/access-ci-service/src/main/java/org/apache/custos/access/ci/service/metrics/AmieHealthIndicator.java
new file mode 100644
index 000000000..cb97d28b8
--- /dev/null
+++ 
b/allocations/access-ci-service/src/main/java/org/apache/custos/access/ci/service/metrics/AmieHealthIndicator.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package org.apache.custos.access.ci.service.metrics;
+
+import org.apache.custos.access.ci.service.config.AmieProperties;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.boot.actuate.health.AbstractHealthIndicator;
+import org.springframework.boot.actuate.health.Health;
+import org.springframework.http.ResponseEntity;
+import org.springframework.stereotype.Component;
+import org.springframework.web.client.RestClientException;
+import org.springframework.web.client.RestTemplate;
+
+/**
+ * Actuator health indicator for the upstream AMIE API.
+ *
+ * <p>Performs an HTTP GET to the configured AMIE base URL and reports
+ * {@link Health#up()} when the endpoint responds, or {@link Health#down()} 
when
+ * a network or HTTP error occurs.
+ */
+@Component
+public class AmieHealthIndicator extends AbstractHealthIndicator {
+
+    private static final Logger LOGGER = 
LoggerFactory.getLogger(AmieHealthIndicator.class);
+
+    private final AmieProperties amieProperties;
+    private final RestTemplate restTemplate;
+
+    public AmieHealthIndicator(AmieProperties amieProperties,
+                               @Qualifier("amieRestTemplate") RestTemplate 
restTemplate) {
+        super("AMIE API health check failed");
+        this.amieProperties = amieProperties;
+        this.restTemplate = restTemplate;
+    }
+
+    @Override
+    protected void doHealthCheck(Health.Builder builder) {
+        String baseUrl = amieProperties.getBaseUrl();
+        String siteCode = amieProperties.getSiteCode();
+        try {
+            ResponseEntity<String> response = 
restTemplate.getForEntity(baseUrl, String.class);
+            if (response.getStatusCode().is2xxSuccessful() || 
response.getStatusCode().is3xxRedirection()) {
+                builder.up()
+                        .withDetail("url", baseUrl)
+                        .withDetail("siteCode", siteCode)
+                        .withDetail("httpStatus", 
response.getStatusCode().value());
+            } else {
+                builder.down()
+                        .withDetail("url", baseUrl)
+                        .withDetail("siteCode", siteCode)
+                        .withDetail("httpStatus", 
response.getStatusCode().value());
+            }
+        } catch (RestClientException ex) {
+            LOGGER.warn("AMIE API health check failed for URL [{}]: {}", 
baseUrl, ex.getMessage());
+            builder.down(ex)
+                    .withDetail("url", baseUrl)
+                    .withDetail("siteCode", siteCode);
+        }
+    }
+}
diff --git 
a/allocations/access-ci-service/src/main/java/org/apache/custos/access/ci/service/metrics/AmieMetrics.java
 
b/allocations/access-ci-service/src/main/java/org/apache/custos/access/ci/service/metrics/AmieMetrics.java
new file mode 100644
index 000000000..981775687
--- /dev/null
+++ 
b/allocations/access-ci-service/src/main/java/org/apache/custos/access/ci/service/metrics/AmieMetrics.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package org.apache.custos.access.ci.service.metrics;
+
+import io.micrometer.core.instrument.Counter;
+import io.micrometer.core.instrument.MeterRegistry;
+import io.micrometer.core.instrument.Timer;
+import org.springframework.stereotype.Component;
+
+/**
+ * Metrics for the AMIE packet processing pipeline.
+ *
+ * <p>All metric names follow the Prometheus naming convention:
+ * {@code amie_<subsystem>_<measurement>_<unit>}.
+ */
+@Component
+public class AmieMetrics {
+
+    private static final String PACKETS_RECEIVED_TOTAL = 
"amie_packets_received_total";
+    private static final String PACKETS_PROCESSED_TOTAL = 
"amie_packets_processed_total";
+    private static final String EVENTS_RETRY_TOTAL = "amie_events_retry_total";
+    private static final String PROCESSING_DURATION_SECONDS = 
"amie_packet_processing_duration_seconds";
+    private static final String POLLER_PACKETS_FETCHED = 
"amie_poller_packets_fetched";
+
+    private static final String TAG_TYPE = "type";
+    private static final String TAG_OUTCOME = "outcome";
+    private static final String TAG_HANDLER = "handler";
+
+    private final MeterRegistry meterRegistry;
+
+    public AmieMetrics(MeterRegistry meterRegistry) {
+        this.meterRegistry = meterRegistry;
+    }
+
+    /**
+     * Increments the counter tracking raw packets received from the AMIE API, 
tagged by packet type.
+     *
+     * @param packetType the AMIE packet type (e.g., "request_project_create")
+     */
+    public void recordPacketReceived(String packetType) {
+        Counter.builder(PACKETS_RECEIVED_TOTAL)
+                .tag(TAG_TYPE, packetType)
+                .description("Total number of AMIE packets received from the 
API")
+                .register(meterRegistry)
+                .increment();
+    }
+
+    /**
+     * Increments the counter tracking processed packets, tagged by packet 
type and outcome.
+     *
+     * @param packetType the AMIE packet type (e.g., "request_project_create")
+     * @param outcome    the processing outcome (e.g., "success", "failure")
+     */
+    public void recordPacketProcessed(String packetType, String outcome) {
+        Counter.builder(PACKETS_PROCESSED_TOTAL)
+                .tag(TAG_TYPE, packetType)
+                .tag(TAG_OUTCOME, outcome)
+                .description("Total number of AMIE packets that completed 
processing")
+                .register(meterRegistry)
+                .increment();
+    }
+
+    /**
+     * Increments the counter tracking event retry attempts.
+     */
+    public void recordRetry() {
+        Counter.builder(EVENTS_RETRY_TOTAL)
+                .description("Total number of AMIE processing event retry 
attempts")
+                .register(meterRegistry)
+                .increment();
+    }
+
+    /**
+     * Starts a timer sample for measuring packet processing duration.
+     *
+     * @return a {@link Timer.Sample} that must be stopped via {@link 
#stopProcessingTimer}
+     */
+    public Timer.Sample startProcessingTimer() {
+        return Timer.start(meterRegistry);
+    }
+
+    /**
+     * Stops a previously started timer sample and records the duration 
against the
+     * {@code amie_packet_processing_duration_seconds} timer, tagged by 
handler type.
+     *
+     * @param sample      the sample returned by {@link 
#startProcessingTimer()}
+     * @param handlerType the name of the handler that processed the packet
+     */
+    public void stopProcessingTimer(Timer.Sample sample, String handlerType) {
+        Timer timer = Timer.builder(PROCESSING_DURATION_SECONDS)
+                .tag(TAG_HANDLER, handlerType)
+                .description("Time taken to process an AMIE packet by handler 
type")
+                .register(meterRegistry);
+        sample.stop(timer);
+    }
+
+    /**
+     * Increments the counter tracking the number of packets fetched during a 
poller run.
+     *
+     * @param count the number of packets fetched
+     */
+    public void recordPollerFetch(int count) {
+        Counter.builder(POLLER_PACKETS_FETCHED)
+                .description("Total number of AMIE packets fetched by the 
poller")
+                .register(meterRegistry)
+                .increment(count);
+    }
+}
diff --git 
a/allocations/access-ci-service/src/main/java/org/apache/custos/access/ci/service/worker/amie/ProcessingEventWorker.java
 
b/allocations/access-ci-service/src/main/java/org/apache/custos/access/ci/service/worker/amie/ProcessingEventWorker.java
index bdc2d105b..9e5b5d268 100644
--- 
a/allocations/access-ci-service/src/main/java/org/apache/custos/access/ci/service/worker/amie/ProcessingEventWorker.java
+++ 
b/allocations/access-ci-service/src/main/java/org/apache/custos/access/ci/service/worker/amie/ProcessingEventWorker.java
@@ -19,7 +19,9 @@
 package org.apache.custos.access.ci.service.worker.amie;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
+import io.micrometer.core.instrument.Timer;
 import org.apache.custos.access.ci.service.handler.amie.PacketRouter;
+import org.apache.custos.access.ci.service.metrics.AmieMetrics;
 import org.apache.custos.access.ci.service.model.amie.PacketEntity;
 import org.apache.custos.access.ci.service.model.amie.PacketStatus;
 import org.apache.custos.access.ci.service.model.amie.ProcessingErrorEntity;
@@ -31,6 +33,7 @@ import 
org.apache.custos.access.ci.service.repo.amie.ProcessingErrorRepository;
 import org.apache.custos.access.ci.service.repo.amie.ProcessingEventRepository;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+import org.slf4j.MDC;
 import org.springframework.context.annotation.Lazy;
 import org.springframework.scheduling.annotation.Scheduled;
 import org.springframework.stereotype.Component;
@@ -76,6 +79,7 @@ public class ProcessingEventWorker {
     private final PacketRepository packetRepo;
     private final ProcessingErrorRepository errorRepo;
     private final PacketRouter router;
+    private final AmieMetrics amieMetrics;
     private final ObjectMapper objectMapper = new ObjectMapper();
     private final ProcessingEventWorker self;
 
@@ -83,11 +87,13 @@ public class ProcessingEventWorker {
                                  PacketRepository packetRepo,
                                  ProcessingErrorRepository errorRepo,
                                  PacketRouter router,
+                                 AmieMetrics amieMetrics,
                                  @Lazy ProcessingEventWorker self) {
         this.eventRepo = eventRepo;
         this.packetRepo = packetRepo;
         this.errorRepo = errorRepo;
         this.router = router;
+        this.amieMetrics = amieMetrics;
         this.self = self;
     }
 
@@ -109,18 +115,33 @@ public class ProcessingEventWorker {
 
         for (ProcessingEventEntity event : eventsToProcess) {
             String eventId = event.getId();
+            PacketEntity packet = event.getPacket();
+
+            MDC.put("packetId", packet.getId());
+            MDC.put("amieId", String.valueOf(packet.getAmieId()));
+            MDC.put("packetType", packet.getType());
+
+            Timer.Sample timerSample = amieMetrics.startProcessingTimer();
             try {
                 self.executeEventInTransaction(event);
             } catch (Exception e) {
                 LOGGER.error("Transaction failed for eventId [{}]. Opening 
recovery transaction to record failure.",
                         eventId, e);
+                amieMetrics.stopProcessingTimer(timerSample, packet.getType());
                 try {
                     self.recordFailureInNewTransaction(eventId, e);
                 } catch (Exception recoveryEx) {
                     LOGGER.error("CRITICAL: Recovery transaction also failed 
for eventId [{}]. " +
                             "Event may remain stuck until the next worker 
cycle.", eventId, recoveryEx);
                 }
+                continue;
+            } finally {
+                MDC.remove("packetId");
+                MDC.remove("amieId");
+                MDC.remove("packetType");
+                MDC.remove("handler");
             }
+            amieMetrics.stopProcessingTimer(timerSample, packet.getType());
         }
     }
 
@@ -136,7 +157,7 @@ public class ProcessingEventWorker {
         eventRepo.saveAndFlush(event);
 
         var packetJson = objectMapper.readTree(packet.getRawJson());
-        router.route(packetJson, packet);
+        router.route(packetJson, packet, event.getId());
 
         handleSuccess(event, packet);
     }
@@ -166,10 +187,13 @@ public class ProcessingEventWorker {
         if (isRetryable) {
             Instant nextRetryAt = computeNextRetryAt(effectiveAttempts);
             event.setNextRetryAt(nextRetryAt);
+            amieMetrics.recordRetry();
+            amieMetrics.recordPacketProcessed(packet.getType(), 
"retry_scheduled");
             LOGGER.warn("Event [{}] for packet amie_id [{}] failed on attempt 
{}/{}. Scheduled for retry after [{}].",
                     eventId, packet.getAmieId(), effectiveAttempts, 
MAX_ATTEMPTS, nextRetryAt);
         } else {
             event.setNextRetryAt(null);
+            amieMetrics.recordPacketProcessed(packet.getType(), 
"permanently_failed");
             LOGGER.error("Event [{}] for packet amie_id [{}] is 
PERMANENTLY_FAILED after {} attempt(s). Manual intervention required.",
                     eventId, packet.getAmieId(), effectiveAttempts);
             packet.setStatus(PacketStatus.FAILED);
@@ -203,6 +227,8 @@ public class ProcessingEventWorker {
             packetRepo.save(packet);
         }
 
+        amieMetrics.recordPacketProcessed(packet.getType(), "succeeded");
+
         LOGGER.info("Successfully processed event [{}] for packet amie_id 
[{}].",
                 event.getType(), packet.getAmieId());
     }
diff --git a/allocations/access-ci-service/src/main/resources/application.yml 
b/allocations/access-ci-service/src/main/resources/application.yml
index 49506f8c6..0156e6caf 100644
--- a/allocations/access-ci-service/src/main/resources/application.yml
+++ b/allocations/access-ci-service/src/main/resources/application.yml
@@ -18,6 +18,8 @@ server:
   port: 8083
 
 spring:
+  profiles:
+    active: dev
   application:
     name: access-ci-service
   datasource:
@@ -55,6 +57,19 @@ logging:
   level:
     root: info
 
+management:
+  endpoints:
+    web:
+      exposure:
+        include: health, prometheus, info
+  endpoint:
+    health:
+      show-details: when-authorized
+  prometheus:
+    metrics:
+      export:
+        enabled: true
+
 springdoc:
   swagger-ui:
     title: CUSTOS ACCESS CI SERVICE API
diff --git 
a/allocations/access-ci-service/src/main/resources/distribution/conf/application.yml
 
b/allocations/access-ci-service/src/main/resources/distribution/conf/application.yml
index 49506f8c6..fbd9d6ca4 100644
--- 
a/allocations/access-ci-service/src/main/resources/distribution/conf/application.yml
+++ 
b/allocations/access-ci-service/src/main/resources/distribution/conf/application.yml
@@ -18,6 +18,8 @@ server:
   port: 8083
 
 spring:
+  profiles:
+    active: prod
   application:
     name: access-ci-service
   datasource:
@@ -55,6 +57,19 @@ logging:
   level:
     root: info
 
+management:
+  endpoints:
+    web:
+      exposure:
+        include: health, prometheus, info
+  endpoint:
+    health:
+      show-details: when-authorized
+  prometheus:
+    metrics:
+      export:
+        enabled: true
+
 springdoc:
   swagger-ui:
     title: CUSTOS ACCESS CI SERVICE API
diff --git 
a/allocations/access-ci-service/src/main/resources/distribution/conf/logback-spring.xml
 
b/allocations/access-ci-service/src/main/resources/distribution/conf/logback-spring.xml
index 295c31dae..42794e77f 100644
--- 
a/allocations/access-ci-service/src/main/resources/distribution/conf/logback-spring.xml
+++ 
b/allocations/access-ci-service/src/main/resources/distribution/conf/logback-spring.xml
@@ -23,33 +23,77 @@
 <configuration>
     <include resource="org/springframework/boot/logging/logback/base.xml"/>
     <springProperty scope="context" name="appName" 
source="spring.application.name"/>
-    <!-- Rolling File Appender -->
-    <appender name="FILE" 
class="ch.qos.logback.core.rolling.RollingFileAppender">
-        <file>logs/custos-amie-decoder.log</file>
-        <encoder>
-            <pattern>%date{YYYY-MM-dd HH:mm:ss.SSS} app=${appName}, 
host=${HOSTNAME}, traceID=%X{traceId:-NONE},
-                level=%-5level, [%thread] %logger{36} - %msg%n
-            </pattern>
-        </encoder>
-        <rollingPolicy 
class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
-            
<fileNamePattern>logs/%d{yyyy-MM}/custos-amie-decoder-%d{MM-dd-yyyy}-%i.log.gz</fileNamePattern>
-            <maxFileSize>10MB</maxFileSize>
-            <maxHistory>30</maxHistory>
-            <totalSizeCap>1GB</totalSizeCap>
-        </rollingPolicy>
-    </appender>
-
-    <appender name="ASYNC_FILE" class="ch.qos.logback.classic.AsyncAppender">
-        <appender-ref ref="FILE"/>
-        <queueSize>500</queueSize>
-        <discardingThreshold>0</discardingThreshold>
-        <includeCallerData>true</includeCallerData>
-    </appender>
-
-    <logger name="org.hibernate" level="ERROR"/>
-    <logger name="org.springframework" level="INFO"/>
-
-    <root level="INFO">
-        <appender-ref ref="ASYNC_FILE"/>
-    </root>
-</configuration>
\ No newline at end of file
+
+    <!-- ================================================================ -->
+    <!-- DEV / DEFAULT PROFILE — human-readable text format with MDC      -->
+    <!-- ================================================================ -->
+    <springProfile name="default,dev">
+
+        <appender name="FILE" 
class="ch.qos.logback.core.rolling.RollingFileAppender">
+            <file>logs/custos-amie-decoder.log</file>
+            <encoder>
+                <pattern>%date{yyyy-MM-dd HH:mm:ss.SSS} app=${appName}, 
host=${HOSTNAME}, traceId=%X{traceId:-}, packetId=%X{packetId:-}, 
amieId=%X{amieId:-}, packetType=%X{packetType:-}, handler=%X{handler:-}, 
level=%-5level, [%thread] %logger{36} - %msg%n</pattern>
+            </encoder>
+            <rollingPolicy 
class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
+                
<fileNamePattern>logs/%d{yyyy-MM}/custos-amie-decoder-%d{MM-dd-yyyy}-%i.log.gz</fileNamePattern>
+                <maxFileSize>10MB</maxFileSize>
+                <maxHistory>30</maxHistory>
+                <totalSizeCap>1GB</totalSizeCap>
+            </rollingPolicy>
+        </appender>
+
+        <appender name="ASYNC_FILE" 
class="ch.qos.logback.classic.AsyncAppender">
+            <appender-ref ref="FILE"/>
+            <queueSize>500</queueSize>
+            <discardingThreshold>0</discardingThreshold>
+            <includeCallerData>true</includeCallerData>
+        </appender>
+
+        <logger name="org.hibernate" level="ERROR"/>
+        <logger name="org.springframework" level="INFO"/>
+
+        <root level="INFO">
+            <appender-ref ref="ASYNC_FILE"/>
+        </root>
+
+    </springProfile>
+
+    <!-- ================================================================ -->
+    <!-- PROD PROFILE — structured JSON format for log aggregation        -->
+    <!-- ================================================================ -->
+    <springProfile name="prod">
+
+        <appender name="FILE_JSON" 
class="ch.qos.logback.core.rolling.RollingFileAppender">
+            <file>logs/custos-amie-decoder.log</file>
+            <encoder class="net.logstash.logback.encoder.LogstashEncoder">
+                <includeMdcKeyName>packetId</includeMdcKeyName>
+                <includeMdcKeyName>amieId</includeMdcKeyName>
+                <includeMdcKeyName>packetType</includeMdcKeyName>
+                <includeMdcKeyName>handler</includeMdcKeyName>
+                <includeMdcKeyName>traceId</includeMdcKeyName>
+            </encoder>
+            <rollingPolicy 
class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
+                
<fileNamePattern>logs/%d{yyyy-MM}/custos-amie-decoder-%d{MM-dd-yyyy}-%i.log.gz</fileNamePattern>
+                <maxFileSize>10MB</maxFileSize>
+                <maxHistory>30</maxHistory>
+                <totalSizeCap>1GB</totalSizeCap>
+            </rollingPolicy>
+        </appender>
+
+        <appender name="ASYNC_FILE" 
class="ch.qos.logback.classic.AsyncAppender">
+            <appender-ref ref="FILE_JSON"/>
+            <queueSize>500</queueSize>
+            <discardingThreshold>0</discardingThreshold>
+            <includeCallerData>true</includeCallerData>
+        </appender>
+
+        <logger name="org.hibernate" level="ERROR"/>
+        <logger name="org.springframework" level="INFO"/>
+
+        <root level="INFO">
+            <appender-ref ref="ASYNC_FILE"/>
+        </root>
+
+    </springProfile>
+
+</configuration>
diff --git 
a/allocations/access-ci-service/src/main/resources/logback-spring.xml 
b/allocations/access-ci-service/src/main/resources/logback-spring.xml
index 295c31dae..42794e77f 100644
--- a/allocations/access-ci-service/src/main/resources/logback-spring.xml
+++ b/allocations/access-ci-service/src/main/resources/logback-spring.xml
@@ -23,33 +23,77 @@
 <configuration>
     <include resource="org/springframework/boot/logging/logback/base.xml"/>
     <springProperty scope="context" name="appName" 
source="spring.application.name"/>
-    <!-- Rolling File Appender -->
-    <appender name="FILE" 
class="ch.qos.logback.core.rolling.RollingFileAppender">
-        <file>logs/custos-amie-decoder.log</file>
-        <encoder>
-            <pattern>%date{YYYY-MM-dd HH:mm:ss.SSS} app=${appName}, 
host=${HOSTNAME}, traceID=%X{traceId:-NONE},
-                level=%-5level, [%thread] %logger{36} - %msg%n
-            </pattern>
-        </encoder>
-        <rollingPolicy 
class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
-            
<fileNamePattern>logs/%d{yyyy-MM}/custos-amie-decoder-%d{MM-dd-yyyy}-%i.log.gz</fileNamePattern>
-            <maxFileSize>10MB</maxFileSize>
-            <maxHistory>30</maxHistory>
-            <totalSizeCap>1GB</totalSizeCap>
-        </rollingPolicy>
-    </appender>
-
-    <appender name="ASYNC_FILE" class="ch.qos.logback.classic.AsyncAppender">
-        <appender-ref ref="FILE"/>
-        <queueSize>500</queueSize>
-        <discardingThreshold>0</discardingThreshold>
-        <includeCallerData>true</includeCallerData>
-    </appender>
-
-    <logger name="org.hibernate" level="ERROR"/>
-    <logger name="org.springframework" level="INFO"/>
-
-    <root level="INFO">
-        <appender-ref ref="ASYNC_FILE"/>
-    </root>
-</configuration>
\ No newline at end of file
+
+    <!-- ================================================================ -->
+    <!-- DEV / DEFAULT PROFILE — human-readable text format with MDC      -->
+    <!-- ================================================================ -->
+    <springProfile name="default,dev">
+
+        <appender name="FILE" 
class="ch.qos.logback.core.rolling.RollingFileAppender">
+            <file>logs/custos-amie-decoder.log</file>
+            <encoder>
+                <pattern>%date{yyyy-MM-dd HH:mm:ss.SSS} app=${appName}, 
host=${HOSTNAME}, traceId=%X{traceId:-}, packetId=%X{packetId:-}, 
amieId=%X{amieId:-}, packetType=%X{packetType:-}, handler=%X{handler:-}, 
level=%-5level, [%thread] %logger{36} - %msg%n</pattern>
+            </encoder>
+            <rollingPolicy 
class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
+                
<fileNamePattern>logs/%d{yyyy-MM}/custos-amie-decoder-%d{MM-dd-yyyy}-%i.log.gz</fileNamePattern>
+                <maxFileSize>10MB</maxFileSize>
+                <maxHistory>30</maxHistory>
+                <totalSizeCap>1GB</totalSizeCap>
+            </rollingPolicy>
+        </appender>
+
+        <appender name="ASYNC_FILE" 
class="ch.qos.logback.classic.AsyncAppender">
+            <appender-ref ref="FILE"/>
+            <queueSize>500</queueSize>
+            <discardingThreshold>0</discardingThreshold>
+            <includeCallerData>true</includeCallerData>
+        </appender>
+
+        <logger name="org.hibernate" level="ERROR"/>
+        <logger name="org.springframework" level="INFO"/>
+
+        <root level="INFO">
+            <appender-ref ref="ASYNC_FILE"/>
+        </root>
+
+    </springProfile>
+
+    <!-- ================================================================ -->
+    <!-- PROD PROFILE — structured JSON format for log aggregation        -->
+    <!-- ================================================================ -->
+    <springProfile name="prod">
+
+        <appender name="FILE_JSON" 
class="ch.qos.logback.core.rolling.RollingFileAppender">
+            <file>logs/custos-amie-decoder.log</file>
+            <encoder class="net.logstash.logback.encoder.LogstashEncoder">
+                <includeMdcKeyName>packetId</includeMdcKeyName>
+                <includeMdcKeyName>amieId</includeMdcKeyName>
+                <includeMdcKeyName>packetType</includeMdcKeyName>
+                <includeMdcKeyName>handler</includeMdcKeyName>
+                <includeMdcKeyName>traceId</includeMdcKeyName>
+            </encoder>
+            <rollingPolicy 
class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
+                
<fileNamePattern>logs/%d{yyyy-MM}/custos-amie-decoder-%d{MM-dd-yyyy}-%i.log.gz</fileNamePattern>
+                <maxFileSize>10MB</maxFileSize>
+                <maxHistory>30</maxHistory>
+                <totalSizeCap>1GB</totalSizeCap>
+            </rollingPolicy>
+        </appender>
+
+        <appender name="ASYNC_FILE" 
class="ch.qos.logback.classic.AsyncAppender">
+            <appender-ref ref="FILE_JSON"/>
+            <queueSize>500</queueSize>
+            <discardingThreshold>0</discardingThreshold>
+            <includeCallerData>true</includeCallerData>
+        </appender>
+
+        <logger name="org.hibernate" level="ERROR"/>
+        <logger name="org.springframework" level="INFO"/>
+
+        <root level="INFO">
+            <appender-ref ref="ASYNC_FILE"/>
+        </root>
+
+    </springProfile>
+
+</configuration>
diff --git 
a/allocations/access-ci-service/src/test/java/org/apache/custos/access/ci/service/metrics/AmieHealthIndicatorTest.java
 
b/allocations/access-ci-service/src/test/java/org/apache/custos/access/ci/service/metrics/AmieHealthIndicatorTest.java
new file mode 100644
index 000000000..2ec32e14c
--- /dev/null
+++ 
b/allocations/access-ci-service/src/test/java/org/apache/custos/access/ci/service/metrics/AmieHealthIndicatorTest.java
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package org.apache.custos.access.ci.service.metrics;
+
+import org.apache.custos.access.ci.service.config.AmieProperties;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.extension.ExtendWith;
+import org.mockito.Mock;
+import org.mockito.junit.jupiter.MockitoExtension;
+import org.springframework.boot.actuate.health.Health;
+import org.springframework.boot.actuate.health.Status;
+import org.springframework.http.HttpStatus;
+import org.springframework.http.ResponseEntity;
+import org.springframework.web.client.ResourceAccessException;
+import org.springframework.web.client.RestTemplate;
+
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.mockito.Mockito.when;
+
+@ExtendWith(MockitoExtension.class)
+@Tag("unit")
+class AmieHealthIndicatorTest {
+
+    private static final String BASE_URL = 
"https://a3mdev.xsede.org/amie-api-test";;
+    private static final String SITE_CODE = "NEXUS";
+
+    @Mock
+    private RestTemplate restTemplate;
+
+    private AmieProperties amieProperties;
+    private AmieHealthIndicator healthIndicator;
+
+    @BeforeEach
+    void setUp() {
+        amieProperties = new AmieProperties();
+        amieProperties.setBaseUrl(BASE_URL);
+        amieProperties.setSiteCode(SITE_CODE);
+        healthIndicator = new AmieHealthIndicator(amieProperties, 
restTemplate);
+    }
+
+    @Test
+    void health_whenAmieApiReturns200_shouldBeUp() {
+        when(restTemplate.getForEntity(BASE_URL, String.class))
+                .thenReturn(ResponseEntity.ok("OK"));
+
+        Health health = healthIndicator.health();
+
+        assertThat(health.getStatus()).isEqualTo(Status.UP);
+        assertThat(health.getDetails()).containsEntry("url", BASE_URL);
+        assertThat(health.getDetails()).containsEntry("siteCode", SITE_CODE);
+        assertThat(health.getDetails()).containsEntry("httpStatus", 200);
+    }
+
+    @Test
+    void health_whenAmieApiReturns302_shouldBeUp() {
+        when(restTemplate.getForEntity(BASE_URL, String.class))
+                .thenReturn(ResponseEntity.status(HttpStatus.FOUND).build());
+
+        Health health = healthIndicator.health();
+
+        assertThat(health.getStatus()).isEqualTo(Status.UP);
+        assertThat(health.getDetails()).containsEntry("httpStatus", 302);
+    }
+
+    @Test
+    void health_whenAmieApiReturns500_shouldBeDown() {
+        when(restTemplate.getForEntity(BASE_URL, String.class))
+                
.thenReturn(ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).body("error"));
+
+        Health health = healthIndicator.health();
+
+        assertThat(health.getStatus()).isEqualTo(Status.DOWN);
+        assertThat(health.getDetails()).containsEntry("url", BASE_URL);
+        assertThat(health.getDetails()).containsEntry("httpStatus", 500);
+    }
+
+    @Test
+    void health_whenRestClientExceptionThrown_shouldBeDown() {
+        when(restTemplate.getForEntity(BASE_URL, String.class))
+                .thenThrow(new ResourceAccessException("Connection refused"));
+
+        Health health = healthIndicator.health();
+
+        assertThat(health.getStatus()).isEqualTo(Status.DOWN);
+        assertThat(health.getDetails()).containsEntry("url", BASE_URL);
+        assertThat(health.getDetails()).containsEntry("siteCode", SITE_CODE);
+    }
+
+    @Test
+    void health_whenNetworkTimeout_shouldIncludeUrlInDownDetails() {
+        when(restTemplate.getForEntity(BASE_URL, String.class))
+                .thenThrow(new ResourceAccessException("Read timed out"));
+
+        Health health = healthIndicator.health();
+
+        assertThat(health.getStatus()).isEqualTo(Status.DOWN);
+        assertThat(health.getDetails()).containsKey("url");
+        assertThat(health.getDetails().get("url")).isEqualTo(BASE_URL);
+    }
+}
diff --git 
a/allocations/access-ci-service/src/test/java/org/apache/custos/access/ci/service/metrics/AmieMetricsTest.java
 
b/allocations/access-ci-service/src/test/java/org/apache/custos/access/ci/service/metrics/AmieMetricsTest.java
new file mode 100644
index 000000000..fe1e10721
--- /dev/null
+++ 
b/allocations/access-ci-service/src/test/java/org/apache/custos/access/ci/service/metrics/AmieMetricsTest.java
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ *  specific language governing permissions and limitations
+ *  under the License.
+ */
+package org.apache.custos.access.ci.service.metrics;
+
+import io.micrometer.core.instrument.Counter;
+import io.micrometer.core.instrument.Timer;
+import io.micrometer.core.instrument.simple.SimpleMeterRegistry;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+
+import static org.assertj.core.api.Assertions.assertThat;
+
+@Tag("unit")
+class AmieMetricsTest {
+
+    private SimpleMeterRegistry registry;
+    private AmieMetrics amieMetrics;
+
+    @BeforeEach
+    void setUp() {
+        registry = new SimpleMeterRegistry();
+        amieMetrics = new AmieMetrics(registry);
+    }
+
+    @Test
+    void recordPacketReceived_shouldIncrementCounterWithTypeTag() {
+        amieMetrics.recordPacketReceived("request_project_create");
+        amieMetrics.recordPacketReceived("request_project_create");
+        amieMetrics.recordPacketReceived("request_user_modify");
+
+        Counter createCounter = registry.find("amie_packets_received_total")
+                .tag("type", "request_project_create")
+                .counter();
+        Counter modifyCounter = registry.find("amie_packets_received_total")
+                .tag("type", "request_user_modify")
+                .counter();
+
+        assertThat(createCounter).isNotNull();
+        assertThat(createCounter.count()).isEqualTo(2.0);
+        assertThat(modifyCounter).isNotNull();
+        assertThat(modifyCounter.count()).isEqualTo(1.0);
+    }
+
+    @Test
+    void recordPacketProcessed_shouldIncrementCounterWithTypeAndOutcomeTags() {
+        amieMetrics.recordPacketProcessed("request_project_create", "success");
+        amieMetrics.recordPacketProcessed("request_project_create", "failure");
+        amieMetrics.recordPacketProcessed("request_project_create", "success");
+
+        Counter successCounter = registry.find("amie_packets_processed_total")
+                .tag("type", "request_project_create")
+                .tag("outcome", "success")
+                .counter();
+        Counter failureCounter = registry.find("amie_packets_processed_total")
+                .tag("type", "request_project_create")
+                .tag("outcome", "failure")
+                .counter();
+
+        assertThat(successCounter).isNotNull();
+        assertThat(successCounter.count()).isEqualTo(2.0);
+        assertThat(failureCounter).isNotNull();
+        assertThat(failureCounter.count()).isEqualTo(1.0);
+    }
+
+    @Test
+    void recordRetry_shouldIncrementRetryCounter() {
+        amieMetrics.recordRetry();
+        amieMetrics.recordRetry();
+        amieMetrics.recordRetry();
+
+        Counter retryCounter = 
registry.find("amie_events_retry_total").counter();
+
+        assertThat(retryCounter).isNotNull();
+        assertThat(retryCounter.count()).isEqualTo(3.0);
+    }
+
+    @Test
+    void startAndStopProcessingTimer_shouldRecordDurationWithHandlerTag() 
throws InterruptedException {
+        Timer.Sample sample = amieMetrics.startProcessingTimer();
+        Thread.sleep(5);
+        amieMetrics.stopProcessingTimer(sample, "RequestProjectCreateHandler");
+
+        Timer timer = registry.find("amie_packet_processing_duration_seconds")
+                .tag("handler", "RequestProjectCreateHandler")
+                .timer();
+
+        assertThat(timer).isNotNull();
+        assertThat(timer.count()).isEqualTo(1);
+        
assertThat(timer.totalTime(java.util.concurrent.TimeUnit.MILLISECONDS)).isGreaterThan(0);
+    }
+
+    @Test
+    void recordPollerFetch_shouldIncrementByCount() {
+        amieMetrics.recordPollerFetch(5);
+        amieMetrics.recordPollerFetch(3);
+
+        Counter fetchCounter = 
registry.find("amie_poller_packets_fetched").counter();
+
+        assertThat(fetchCounter).isNotNull();
+        assertThat(fetchCounter.count()).isEqualTo(8.0);
+    }
+
+    @Test
+    void recordPollerFetch_withZeroCount_shouldNotChangeCounter() {
+        amieMetrics.recordPollerFetch(0);
+
+        Counter fetchCounter = 
registry.find("amie_poller_packets_fetched").counter();
+
+        assertThat(fetchCounter).isNotNull();
+        assertThat(fetchCounter.count()).isEqualTo(0.0);
+    }
+}
diff --git 
a/allocations/access-ci-service/src/test/java/org/apache/custos/access/ci/service/worker/amie/ProcessingEventWorkerTest.java
 
b/allocations/access-ci-service/src/test/java/org/apache/custos/access/ci/service/worker/amie/ProcessingEventWorkerTest.java
index 05e7f1df5..9a8ab9a0a 100644
--- 
a/allocations/access-ci-service/src/test/java/org/apache/custos/access/ci/service/worker/amie/ProcessingEventWorkerTest.java
+++ 
b/allocations/access-ci-service/src/test/java/org/apache/custos/access/ci/service/worker/amie/ProcessingEventWorkerTest.java
@@ -19,6 +19,7 @@
 package org.apache.custos.access.ci.service.worker.amie;
 
 import org.apache.custos.access.ci.service.handler.amie.PacketRouter;
+import org.apache.custos.access.ci.service.metrics.AmieMetrics;
 import org.apache.custos.access.ci.service.model.amie.PacketEntity;
 import org.apache.custos.access.ci.service.model.amie.PacketStatus;
 import org.apache.custos.access.ci.service.model.amie.ProcessingErrorEntity;
@@ -43,7 +44,9 @@ import java.util.Optional;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.ArgumentMatchers.anyList;
+import static org.mockito.ArgumentMatchers.anyString;
 import static org.mockito.Mockito.doThrow;
+import static org.mockito.Mockito.lenient;
 import static org.mockito.Mockito.never;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
@@ -67,6 +70,9 @@ class ProcessingEventWorkerTest {
     @Mock
     private PacketRouter router;
 
+    @Mock
+    private AmieMetrics amieMetrics;
+
     @Mock
     private ProcessingEventWorker self;
 
@@ -74,7 +80,8 @@ class ProcessingEventWorkerTest {
 
     @BeforeEach
     void setUp() {
-        worker = new ProcessingEventWorker(eventRepo, packetRepo, errorRepo, 
router, self);
+        lenient().when(amieMetrics.startProcessingTimer()).thenReturn(null);
+        worker = new ProcessingEventWorker(eventRepo, packetRepo, errorRepo, 
router, amieMetrics, self);
     }
 
     // ------------------------------------------------------------------
@@ -103,6 +110,8 @@ class ProcessingEventWorkerTest {
         verify(self).executeEventInTransaction(event1);
         verify(self).executeEventInTransaction(event2);
         verify(self, never()).recordFailureInNewTransaction(any(), any());
+        verify(amieMetrics, 
org.mockito.Mockito.times(2)).startProcessingTimer();
+        verify(amieMetrics, 
org.mockito.Mockito.times(2)).stopProcessingTimer(any(), anyString());
     }
 
     @Test
@@ -164,6 +173,8 @@ class ProcessingEventWorkerTest {
 
         verify(eventRepo).save(event);
         verify(packetRepo, never()).save(any());
+        verify(amieMetrics).recordRetry();
+        verify(amieMetrics).recordPacketProcessed("request_account_create", 
"retry_scheduled");
 
         ArgumentCaptor<ProcessingErrorEntity> errorCaptor = 
ArgumentCaptor.forClass(ProcessingErrorEntity.class);
         verify(errorRepo).save(errorCaptor.capture());
@@ -219,6 +230,9 @@ class ProcessingEventWorkerTest {
         assertThat(packet.getStatus()).isEqualTo(PacketStatus.FAILED);
         assertThat(packet.getLastError()).isEqualTo("final failure");
 
+        verify(amieMetrics).recordPacketProcessed("request_account_create", 
"permanently_failed");
+        verify(amieMetrics, never()).recordRetry();
+
         verify(eventRepo).save(event);
         verify(errorRepo).save(any(ProcessingErrorEntity.class));
     }
diff --git a/compose/docker-compose.yml b/compose/docker-compose.yml
index 5e883281e..2895e3443 100644
--- a/compose/docker-compose.yml
+++ b/compose/docker-compose.yml
@@ -57,6 +57,31 @@ services:
     ports:
       - 18080:8080
 
+  prometheus:
+    image: prom/prometheus:latest
+    container_name: prometheus
+    restart: unless-stopped
+    ports:
+      - "9090:9090"
+    volumes:
+      - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+
+  grafana:
+    image: grafana/grafana:latest
+    container_name: grafana
+    restart: unless-stopped
+    ports:
+      - "3000:3000"
+    environment:
+      GF_SECURITY_ADMIN_USER: admin
+      GF_SECURITY_ADMIN_PASSWORD: admin
+      GF_DASHBOARDS_DEFAULT_HOME_DASHBOARD_PATH: 
/var/lib/grafana/dashboards/amie-service.json
+    volumes:
+      - ./grafana/provisioning:/etc/grafana/provisioning
+      - ./grafana/dashboards:/var/lib/grafana/dashboards
+
   vault:
     image: vault:1.11.0
     container_name: vault
diff --git a/compose/grafana/dashboards/amie-service.json 
b/compose/grafana/dashboards/amie-service.json
new file mode 100644
index 000000000..53072cb06
--- /dev/null
+++ b/compose/grafana/dashboards/amie-service.json
@@ -0,0 +1,152 @@
+{
+  "annotations": { "list": [] },
+  "editable": true,
+  "fiscalYearStartMonth": 0,
+  "graphTooltip": 1,
+  "links": [],
+  "panels": [
+    {
+      "type": "stat",
+      "title": "Total Packets Received",
+      "gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 },
+      "targets": [{ "expr": "sum(amie_packets_received_total)", 
"legendFormat": "Total" }],
+      "fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": 
"blue", "value": null }] } } },
+      "options": { "colorMode": "background", "textMode": "value_and_name" },
+      "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }
+    },
+    {
+      "type": "stat",
+      "title": "Packets Succeeded",
+      "gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 },
+      "targets": [{ "expr": 
"sum(amie_packets_processed_total{outcome=\"succeeded\"})", "legendFormat": 
"Succeeded" }],
+      "fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": 
"green", "value": null }] } } },
+      "options": { "colorMode": "background", "textMode": "value_and_name" },
+      "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }
+    },
+    {
+      "type": "stat",
+      "title": "Packets Failed",
+      "gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 },
+      "targets": [{ "expr": 
"sum(amie_packets_processed_total{outcome=~\"permanently_failed|failed\"})", 
"legendFormat": "Failed" }],
+      "fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": 
"green", "value": null }, { "color": "red", "value": 1 }] } } },
+      "options": { "colorMode": "background", "textMode": "value_and_name" },
+      "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }
+    },
+    {
+      "type": "stat",
+      "title": "Retries",
+      "gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 },
+      "targets": [{ "expr": "sum(amie_events_retry_total) or vector(0)", 
"legendFormat": "Retries" }],
+      "fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": 
"green", "value": null }, { "color": "orange", "value": 1 }, { "color": "red", 
"value": 5 }] } } },
+      "options": { "colorMode": "background", "textMode": "value_and_name" },
+      "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }
+    },
+    {
+      "type": "timeseries",
+      "title": "Packets Processed Over Time",
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 },
+      "targets": [
+        { "expr": "sum by (type) 
(rate(amie_packets_processed_total{outcome=\"succeeded\"}[5m]))", 
"legendFormat": "{{type}}" }
+      ],
+      "fieldConfig": { "defaults": { "custom": { "drawStyle": "line", 
"lineWidth": 2, "fillOpacity": 15, "pointSize": 5, "showPoints": "auto" } } },
+      "options": { "tooltip": { "mode": "multi" }, "legend": { "displayMode": 
"table", "placement": "bottom", "calcs": ["sum"] } },
+      "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }
+    },
+    {
+      "type": "timeseries",
+      "title": "Failures & Retries Over Time",
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
+      "targets": [
+        { "expr": 
"sum(rate(amie_packets_processed_total{outcome=~\"permanently_failed|retry_scheduled\"}[5m]))",
 "legendFormat": "Failures" },
+        { "expr": "sum(rate(amie_events_retry_total[5m]))", "legendFormat": 
"Retries" }
+      ],
+      "fieldConfig": { "defaults": { "custom": { "drawStyle": "line", 
"lineWidth": 2, "fillOpacity": 15 }, "color": { "mode": "palette-classic" } } },
+      "options": { "tooltip": { "mode": "multi" }, "legend": { "displayMode": 
"table", "placement": "bottom", "calcs": ["sum"] } },
+      "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }
+    },
+    {
+      "type": "piechart",
+      "title": "Packets by Type",
+      "gridPos": { "h": 8, "w": 8, "x": 0, "y": 12 },
+      "targets": [
+        { "expr": "sum by (type) (amie_packets_received_total)", 
"legendFormat": "{{type}}" }
+      ],
+      "options": { "legend": { "displayMode": "table", "placement": "right", 
"values": ["value", "percent"] }, "pieType": "donut" },
+      "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }
+    },
+    {
+      "type": "piechart",
+      "title": "Processing Outcomes",
+      "gridPos": { "h": 8, "w": 8, "x": 8, "y": 12 },
+      "targets": [
+        { "expr": "sum by (outcome) (amie_packets_processed_total)", 
"legendFormat": "{{outcome}}" }
+      ],
+      "fieldConfig": { "overrides": [
+        { "matcher": { "id": "byName", "options": "succeeded" }, "properties": 
[{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] },
+        { "matcher": { "id": "byName", "options": "permanently_failed" }, 
"properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" 
} }] },
+        { "matcher": { "id": "byName", "options": "retry_scheduled" }, 
"properties": [{ "id": "color", "value": { "fixedColor": "orange", "mode": 
"fixed" } }] }
+      ] },
+      "options": { "legend": { "displayMode": "table", "placement": "right", 
"values": ["value", "percent"] }, "pieType": "donut" },
+      "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }
+    },
+    {
+      "type": "bargauge",
+      "title": "Packets Received by Type",
+      "gridPos": { "h": 8, "w": 8, "x": 16, "y": 12 },
+      "targets": [
+        { "expr": "sum by (type) (amie_packets_received_total)", 
"legendFormat": "{{type}}" }
+      ],
+      "fieldConfig": { "defaults": { "thresholds": { "steps": [{ "color": 
"blue", "value": null }] } } },
+      "options": { "displayMode": "gradient", "orientation": "horizontal", 
"showUnfilled": true },
+      "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }
+    },
+    {
+      "type": "timeseries",
+      "title": "Poller: Packets Fetched per Cycle",
+      "gridPos": { "h": 8, "w": 12, "x": 0, "y": 20 },
+      "targets": [
+        { "expr": "rate(amie_poller_packets_fetched_total[5m]) * 60", 
"legendFormat": "Packets/min" }
+      ],
+      "fieldConfig": { "defaults": { "custom": { "drawStyle": "bars", 
"lineWidth": 1, "fillOpacity": 50 }, "color": { "fixedColor": "purple", "mode": 
"fixed" } } },
+      "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }
+    },
+    {
+      "type": "timeseries",
+      "title": "Processing Duration (p50 / p95 / p99)",
+      "gridPos": { "h": 8, "w": 12, "x": 12, "y": 20 },
+      "targets": [
+        { "expr": "histogram_quantile(0.50, sum by (le) 
(rate(amie_packet_processing_duration_seconds_bucket[5m])))", "legendFormat": 
"p50" },
+        { "expr": "histogram_quantile(0.95, sum by (le) 
(rate(amie_packet_processing_duration_seconds_bucket[5m])))", "legendFormat": 
"p95" },
+        { "expr": "histogram_quantile(0.99, sum by (le) 
(rate(amie_packet_processing_duration_seconds_bucket[5m])))", "legendFormat": 
"p99" }
+      ],
+      "fieldConfig": { "defaults": { "unit": "s", "custom": { "drawStyle": 
"line", "lineWidth": 2, "fillOpacity": 10 } } },
+      "options": { "tooltip": { "mode": "multi" }, "legend": { "displayMode": 
"table", "placement": "bottom", "calcs": ["lastNotNull"] } },
+      "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }
+    },
+    {
+      "type": "table",
+      "title": "Processed Count by Type & Outcome",
+      "gridPos": { "h": 8, "w": 24, "x": 0, "y": 28 },
+      "targets": [
+        { "expr": "sum by (type, outcome) (amie_packets_processed_total)", 
"legendFormat": "{{type}} — {{outcome}}", "format": "table", "instant": true }
+      ],
+      "transformations": [
+        { "id": "organize", "options": { "excludeByName": { "Time": true }, 
"renameByName": { "type": "Packet Type", "outcome": "Outcome", "Value": "Count" 
} } }
+      ],
+      "fieldConfig": { "overrides": [
+        { "matcher": { "id": "byName", "options": "Outcome" }, "properties": 
[{ "id": "custom.cellOptions", "value": { "type": "color-text" } }, { "id": 
"mappings", "value": [{ "type": "value", "options": { "succeeded": { "color": 
"green", "text": "Succeeded" }, "permanently_failed": { "color": "red", "text": 
"Failed" }, "retry_scheduled": { "color": "orange", "text": "Retried" } } }] }] 
}
+      ] },
+      "datasource": { "type": "prometheus", "uid": "PBFA97CFB590B2093" }
+    }
+  ],
+  "refresh": "10s",
+  "schemaVersion": 39,
+  "tags": ["custos", "amie", "access-ci"],
+  "templating": { "list": [] },
+  "time": { "from": "now-1h", "to": "now" },
+  "timepicker": {},
+  "timezone": "browser",
+  "title": "ACCESS CI — AMIE Packet Processing",
+  "uid": "custos-amie-overview",
+  "version": 1
+}
diff --git a/compose/grafana/provisioning/dashboards/dashboards.yml 
b/compose/grafana/provisioning/dashboards/dashboards.yml
new file mode 100644
index 000000000..3b928441c
--- /dev/null
+++ b/compose/grafana/provisioning/dashboards/dashboards.yml
@@ -0,0 +1,12 @@
+apiVersion: 1
+
+providers:
+  - name: 'Custos'
+    orgId: 1
+    folder: 'Custos'
+    type: file
+    disableDeletion: false
+    editable: true
+    options:
+      path: /var/lib/grafana/dashboards
+      foldersFromFilesStructure: false
diff --git a/compose/grafana/provisioning/datasources/prometheus.yml 
b/compose/grafana/provisioning/datasources/prometheus.yml
new file mode 100644
index 000000000..bb009bb21
--- /dev/null
+++ b/compose/grafana/provisioning/datasources/prometheus.yml
@@ -0,0 +1,9 @@
+apiVersion: 1
+
+datasources:
+  - name: Prometheus
+    type: prometheus
+    access: proxy
+    url: http://prometheus:9090
+    isDefault: true
+    editable: false
diff --git a/compose/prometheus/prometheus.yml 
b/compose/prometheus/prometheus.yml
new file mode 100644
index 000000000..dbeb9c5d1
--- /dev/null
+++ b/compose/prometheus/prometheus.yml
@@ -0,0 +1,18 @@
+global:
+  scrape_interval: 15s
+  evaluation_interval: 15s
+
+scrape_configs:
+  - job_name: 'access-ci-service'
+    metrics_path: '/actuator/prometheus'
+    static_configs:
+      - targets: ['host.docker.internal:8083']
+        labels:
+          service: 'access-ci-service'
+
+  - job_name: 'custos-signer'
+    metrics_path: '/metrics'
+    static_configs:
+      - targets: ['host.docker.internal:8084']
+        labels:
+          service: 'custos-signer'
diff --git a/pom.xml b/pom.xml
index 707937e04..a3b6f42cd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -141,6 +141,11 @@
                 <artifactId>jakarta.ws.rs-api</artifactId>
                 <version>${jakarta.ws.version}</version>
             </dependency>
+            <dependency>
+                <groupId>net.logstash.logback</groupId>
+                <artifactId>logstash-logback-encoder</artifactId>
+                <version>${logstash.logback.version}</version>
+            </dependency>
         </dependencies>
     </dependencyManagement>
 
@@ -204,6 +209,7 @@
 
         <ssh.username>ubuntu</ssh.username>
         <jakarta.ws.version>4.0.0</jakarta.ws.version>
+        <logstash.logback.version>7.4</logstash.logback.version>
         
<custos.dist.name>apache-airavata-custos-${project.version}</custos.dist.name>
         
<access.service.dist.name>apache-airavata-custos-access-ci-service-${project.version}</access.service.dist.name>
     </properties>

Reply via email to