Copilot commented on code in PR #64523:
URL: https://github.com/apache/airflow/pull/64523#discussion_r3066495579
##########
airflow-core/src/airflow/api_fastapi/common/http_access_log.py:
##########
@@ -23,13 +23,87 @@
from typing import TYPE_CHECKING
import structlog
+from starlette.routing import Match
+
+from airflow._shared.observability.metrics.stats import Stats
if TYPE_CHECKING:
from starlette.types import ASGIApp, Message, Receive, Scope, Send
logger = structlog.get_logger(logger_name="http.access")
_HEALTH_PATHS = frozenset(["/api/v2/monitor/health"])
+_API_PATH_PREFIX_TO_SURFACE = (
+ ("/api/v2", "public"),
+ ("/ui", "ui"),
+)
+
+
+def _get_api_surface(path: str) -> str | None:
+ for prefix, surface in _API_PATH_PREFIX_TO_SURFACE:
+ if path == prefix or path.startswith(f"{prefix}/"):
+ return surface
+ return None
+
+
+def _get_status_family(status_code: int) -> str:
+ if status_code < 100:
+ return "unknown"
+ return f"{status_code // 100}xx"
+
+
+def _get_route_tag(scope: Scope) -> str:
+ router = scope.get("router")
+ routes = getattr(router, "routes", None)
+ partial_route_path: str | None = None
+ if routes:
+ for route in routes:
+ match, _ = route.matches(scope)
+ route_path = getattr(route, "path", None)
+ if not isinstance(route_path, str) or not route_path:
+ continue
+ if match == Match.FULL:
+ return route_path
+ if match == Match.PARTIAL and partial_route_path is None:
+ partial_route_path = route_path
+
+ if partial_route_path:
+ return partial_route_path
Review Comment:
This re-runs route matching by iterating over all routes and calling
`route.matches(scope)` for every API request, which can add avoidable overhead.
Starlette typically puts the matched route into the ASGI scope (e.g.,
`scope.get(\"route\")`); prefer using that when present. If you keep the
partial-match fallback, selecting the *most specific* partial match (e.g.,
longest `route_path`) would be more accurate than taking the first partial
match.
##########
shared/observability/src/airflow_shared/observability/metrics/metrics_template.yaml:
##########
@@ -504,9 +504,30 @@ metrics:
legacy_name: "edge_worker.ti.finish.{queue}.{state}.{dag_id}.{task_id}"
name_variables: ["queue", "state", "dag_id", "task_id"]
+ - name: "api.requests"
+ description: "Number of completed REST API requests. Metric with
api_surface, method, route,
+ status_code and status_family tagging."
Review Comment:
These descriptions are split across lines inside a double-quoted scalar.
While YAML allows multiline quoted scalars, this often leads to unexpected
newline/whitespace rendering in generated docs or tooling. Consider switching
to a single-line description or using a folded scalar style (`>-`) for
predictable formatting.
##########
airflow-core/src/airflow/api_fastapi/common/http_access_log.py:
##########
@@ -23,13 +23,87 @@
from typing import TYPE_CHECKING
import structlog
+from starlette.routing import Match
+
+from airflow._shared.observability.metrics.stats import Stats
if TYPE_CHECKING:
from starlette.types import ASGIApp, Message, Receive, Scope, Send
logger = structlog.get_logger(logger_name="http.access")
_HEALTH_PATHS = frozenset(["/api/v2/monitor/health"])
+_API_PATH_PREFIX_TO_SURFACE = (
+ ("/api/v2", "public"),
+ ("/ui", "ui"),
+)
+
+
+def _get_api_surface(path: str) -> str | None:
+ for prefix, surface in _API_PATH_PREFIX_TO_SURFACE:
+ if path == prefix or path.startswith(f"{prefix}/"):
+ return surface
+ return None
+
+
+def _get_status_family(status_code: int) -> str:
+ if status_code < 100:
+ return "unknown"
+ return f"{status_code // 100}xx"
+
+
+def _get_route_tag(scope: Scope) -> str:
+ router = scope.get("router")
+ routes = getattr(router, "routes", None)
+ partial_route_path: str | None = None
+ if routes:
+ for route in routes:
+ match, _ = route.matches(scope)
+ route_path = getattr(route, "path", None)
+ if not isinstance(route_path, str) or not route_path:
+ continue
+ if match == Match.FULL:
+ return route_path
+ if match == Match.PARTIAL and partial_route_path is None:
+ partial_route_path = route_path
+
+ if partial_route_path:
+ return partial_route_path
+
+ endpoint = scope.get("endpoint")
+ endpoint_name = getattr(endpoint, "__name__", None)
+ if isinstance(endpoint_name, str) and endpoint_name:
+ return endpoint_name
+
+ return "unmatched"
+
+
+def _emit_api_metrics(
+ *,
+ scope: Scope,
+ path: str,
+ method: str,
+ status_code: int,
+ duration_us: int,
+) -> None:
+ api_surface = _get_api_surface(path)
+ if api_surface is None:
+ return
+
+ # Keep tags bounded so API metrics remain usable across supported backends.
+ tags = {
+ "api_surface": api_surface,
+ "method": method or "UNKNOWN",
+ "route": _get_route_tag(scope),
+ "status_code": str(status_code) if status_code else "unknown",
+ "status_family": _get_status_family(status_code),
+ }
+ duration_ms = duration_us / 1000.0
+
+ Stats.incr("api.requests", tags=tags)
+ Stats.timing("api.request.duration", duration_ms, tags=tags)
Review Comment:
`api.request.errors` is incremented for any `status_code >= 400`, which
includes expected client errors (4xx) such as 404. The metric name/description
currently reads like it might be server-side failures only; please clarify the
definition (e.g., '4xx/5xx responses') in the metrics template and/or add an
inline comment here, or change the threshold to `>= 500` if the intent is to
capture only server errors.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]