henry3260 commented on code in PR #64523:
URL: https://github.com/apache/airflow/pull/64523#discussion_r3067764098
##########
airflow-core/src/airflow/api_fastapi/common/http_access_log.py:
##########
@@ -23,13 +23,87 @@
from typing import TYPE_CHECKING
import structlog
+from starlette.routing import Match
+
+from airflow._shared.observability.metrics.stats import Stats
if TYPE_CHECKING:
from starlette.types import ASGIApp, Message, Receive, Scope, Send
logger = structlog.get_logger(logger_name="http.access")
_HEALTH_PATHS = frozenset(["/api/v2/monitor/health"])
+_API_PATH_PREFIX_TO_SURFACE = (
+ ("/api/v2", "public"),
+ ("/ui", "ui"),
+)
+
+
+def _get_api_surface(path: str) -> str | None:
+ for prefix, surface in _API_PATH_PREFIX_TO_SURFACE:
+ if path == prefix or path.startswith(f"{prefix}/"):
+ return surface
+ return None
+
+
+def _get_status_family(status_code: int) -> str:
+ if status_code < 100:
+ return "unknown"
+ return f"{status_code // 100}xx"
+
+
+def _get_route_tag(scope: Scope) -> str:
+ router = scope.get("router")
+ routes = getattr(router, "routes", None)
+ partial_route_path: str | None = None
+ if routes:
+ for route in routes:
+ match, _ = route.matches(scope)
+ route_path = getattr(route, "path", None)
+ if not isinstance(route_path, str) or not route_path:
+ continue
+ if match == Match.FULL:
+ return route_path
+ if match == Match.PARTIAL and partial_route_path is None:
+ partial_route_path = route_path
+
+ if partial_route_path:
+ return partial_route_path
+
+ endpoint = scope.get("endpoint")
+ endpoint_name = getattr(endpoint, "__name__", None)
+ if isinstance(endpoint_name, str) and endpoint_name:
+ return endpoint_name
+
+ return "unmatched"
+
+
+def _emit_api_metrics(
+ *,
+ scope: Scope,
+ path: str,
+ method: str,
+ status_code: int,
+ duration_us: int,
+) -> None:
+ api_surface = _get_api_surface(path)
+ if api_surface is None:
+ return
+
+ # Keep tags bounded so API metrics remain usable across supported backends.
+ tags = {
+ "api_surface": api_surface,
+ "method": method or "UNKNOWN",
+ "route": _get_route_tag(scope),
+ "status_code": str(status_code) if status_code else "unknown",
+ "status_family": _get_status_family(status_code),
+ }
+ duration_ms = duration_us / 1000.0
+
+ Stats.incr("api.requests", tags=tags)
+ Stats.timing("api.request.duration", duration_ms, tags=tags)
+ if status_code >= 400:
+ Stats.incr("api.request.errors", tags=tags)
Review Comment:
> We should probably split client vs server error.
>
> Client spamming invalid entries shouldn't inflate the number of 'errors',
those are not really errors.
Yes, agreed. I think `api.request.errors` should count only server errors
(5xx). For client-side failures, `api.requests` with `status_family=4xx` should
already give us the visibility we need.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]