bbovenzi commented on code in PR #44332:
URL: https://github.com/apache/airflow/pull/44332#discussion_r1867988633
##########
airflow/ui/openapi-gen/requests/types.gen.ts:
##########
@@ -683,6 +683,45 @@ export type GraphDataResponse = {
export type arrange = "BT" | "LR" | "RL" | "TB";
+/**
+ * DAG Run model for the Grid UI with Task Instances.
+ */
+export type GridDAGRunwithTIs = {
+ run_id: string;
+ queued_at: string | null;
+ start_date: string | null;
+ end_date: string | null;
+ state: string;
+ run_type: string;
+ data_interval_start: string | null;
+ data_interval_end: string | null;
+ version_number: string | null;
+ task_instances: Array<GridTaskInstanceSummary> | null;
+};
+
+/**
+ * Response model for the Grid UI.
+ */
+export type GridResponse = {
+ dag_runs: Array<GridDAGRunwithTIs>;
+};
Review Comment:
Let's remove a nesting step if possible `export type GridResponse =
Array<GridDAGRunwithTIs>`
##########
airflow/ui/openapi-gen/requests/types.gen.ts:
##########
@@ -683,6 +683,45 @@ export type GraphDataResponse = {
export type arrange = "BT" | "LR" | "RL" | "TB";
+/**
+ * DAG Run model for the Grid UI with Task Instances.
+ */
+export type GridDAGRunwithTIs = {
+ run_id: string;
+ queued_at: string | null;
+ start_date: string | null;
+ end_date: string | null;
+ state: string;
+ run_type: string;
+ data_interval_start: string | null;
+ data_interval_end: string | null;
+ version_number: string | null;
+ task_instances: Array<GridTaskInstanceSummary> | null;
Review Comment:
It would be great if this wasn't ever `null` and just be an empty array if
there was no information
##########
airflow/api_fastapi/core_api/routes/ui/grid.py:
##########
@@ -0,0 +1,324 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import collections
+import itertools
+import operator
+from functools import cache
+
+from fastapi import HTTPException, Request, status
+from sqlalchemy import func, select
+from sqlalchemy.sql.operators import ColumnOperators
+from typing_extensions import Any
+
+from airflow import DAG
+from airflow.api_fastapi.common.db.common import SessionDep, paginated_select
+from airflow.api_fastapi.common.parameters import (
+ OptionalDateTimeQuery,
+ QueryDagRunRunTypesFilter,
+ QueryDagRunStateFilter,
+ QueryLimit,
+ QueryOffset,
+ SortParam,
+)
+from airflow.api_fastapi.common.router import AirflowRouter
+from airflow.api_fastapi.core_api.datamodels.ui.grid import (
+ GridDAGRunwithTIs,
+ GridResponse,
+ GridTaskInstanceSummary,
+)
+from airflow.api_fastapi.core_api.openapi.exceptions import
create_openapi_http_exception_doc
+from airflow.configuration import conf
+from airflow.exceptions import AirflowConfigException
+from airflow.models import DagRun, MappedOperator, TaskInstance
+from airflow.models.baseoperator import BaseOperator
+from airflow.models.taskmap import TaskMap
+from airflow.utils import timezone
+from airflow.utils.state import TaskInstanceState
+from airflow.utils.task_group import MappedTaskGroup, TaskGroup
+
+grid_router = AirflowRouter(prefix="/grid", tags=["Grid"])
+
+
+@grid_router.get(
+ "/{dag_id}",
+ include_in_schema=False,
+ responses=create_openapi_http_exception_doc([status.HTTP_400_BAD_REQUEST,
status.HTTP_404_NOT_FOUND]),
+)
+def grid_data(
+ dag_id: str,
+ run_types: QueryDagRunRunTypesFilter,
+ run_states: QueryDagRunStateFilter,
+ session: SessionDep,
+ offset: QueryOffset,
+ request: Request,
+ num_runs: QueryLimit,
+ base_date: OptionalDateTimeQuery = None,
+ root: str | None = None,
+ filter_upstream: bool = False,
+ filter_downstream: bool = False,
Review Comment:
+1 on moving to a shared param since the grid and graph data endpoints will
be used together in the UI so often.
##########
airflow/api_fastapi/core_api/routes/ui/grid.py:
##########
@@ -0,0 +1,324 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import collections
+import itertools
+import operator
+from functools import cache
+
+from fastapi import HTTPException, Request, status
+from sqlalchemy import func, select
+from sqlalchemy.sql.operators import ColumnOperators
+from typing_extensions import Any
+
+from airflow import DAG
+from airflow.api_fastapi.common.db.common import SessionDep, paginated_select
+from airflow.api_fastapi.common.parameters import (
+ OptionalDateTimeQuery,
+ QueryDagRunRunTypesFilter,
+ QueryDagRunStateFilter,
+ QueryLimit,
+ QueryOffset,
+ SortParam,
+)
+from airflow.api_fastapi.common.router import AirflowRouter
+from airflow.api_fastapi.core_api.datamodels.ui.grid import (
+ GridDAGRunwithTIs,
+ GridResponse,
+ GridTaskInstanceSummary,
+)
+from airflow.api_fastapi.core_api.openapi.exceptions import
create_openapi_http_exception_doc
+from airflow.configuration import conf
+from airflow.exceptions import AirflowConfigException
+from airflow.models import DagRun, MappedOperator, TaskInstance
+from airflow.models.baseoperator import BaseOperator
+from airflow.models.taskmap import TaskMap
+from airflow.utils import timezone
+from airflow.utils.state import TaskInstanceState
+from airflow.utils.task_group import MappedTaskGroup, TaskGroup
+
+grid_router = AirflowRouter(prefix="/grid", tags=["Grid"])
+
+
+@grid_router.get(
+ "/{dag_id}",
+ include_in_schema=False,
+ responses=create_openapi_http_exception_doc([status.HTTP_400_BAD_REQUEST,
status.HTTP_404_NOT_FOUND]),
+)
+def grid_data(
+ dag_id: str,
+ run_types: QueryDagRunRunTypesFilter,
+ run_states: QueryDagRunStateFilter,
+ session: SessionDep,
+ offset: QueryOffset,
+ request: Request,
+ num_runs: QueryLimit,
+ base_date: OptionalDateTimeQuery = None,
+ root: str | None = None,
+ filter_upstream: bool = False,
+ filter_downstream: bool = False,
+) -> GridResponse:
+ """Return grid data."""
+ ## Database calls to retrieve the DAG Runs and Task Instances and validate
the data
+ dag: DAG = request.app.state.dag_bag.get_dag(dag_id)
+ if not dag:
+ raise HTTPException(status.HTTP_404_NOT_FOUND, f"Dag with id {dag_id}
was not found")
+
+ if root:
+ dag = dag.partial_subset(
+ task_ids_or_regex=root, include_upstream=filter_upstream,
include_downstream=filter_downstream
+ )
+
+ current_time = timezone.utcnow()
+ # Retrieve, sort and encode the previous DAG Runs
+ base_query = (
+ select(
+ DagRun.run_id,
+ DagRun.queued_at,
+ DagRun.start_date,
+ DagRun.end_date,
+ DagRun.state,
+ DagRun.run_type,
+ DagRun.data_interval_start,
+ DagRun.data_interval_end,
+ DagRun.dag_version_id.label("version_number"),
+ )
+ .select_from(DagRun)
+ .where(DagRun.dag_id == dag.dag_id, DagRun.logical_date <=
func.coalesce(base_date, current_time))
+ .order_by(DagRun.id.desc())
+ )
+
+ def get_dag_run_sort_param():
+ """Get the Sort Param for the DAG Run."""
+
+ def _get_run_ordering_expr(name: str) -> ColumnOperators:
+ """Get the Run Ordering Expression."""
+ expr = DagRun.__mapper__.columns[name]
+ # Data interval columns are NULL for runs created before 2.3, but
SQL's
+ # NULL-sorting logic would make those old runs always appear
first. In a
+ # perfect world we'd want to sort by ``get_run_data_interval()``,
but that's
+ # not efficient, so instead the columns are coalesced into
logical_date,
+ # which is good enough in most cases.
+ if name in ("data_interval_start", "data_interval_end"):
+ expr = func.coalesce(expr, DagRun.logical_date)
+ return expr.desc()
+
+ ordering_expression = (_get_run_ordering_expr(name) for name in
dag.timetable.run_ordering)
+ # create SortParam with ordering_expression and DagRun.id.desc()
+ return ordering_expression
+
+ dag_runs_select_filter, _ = paginated_select(
+ statement=base_query.order_by(*get_dag_run_sort_param(),
DagRun.id.desc()),
+ filters=[
+ run_types,
+ run_states,
+ ],
+ order_by=None,
+ offset=offset,
+ limit=num_runs,
+ )
+
+ dag_runs = session.execute(dag_runs_select_filter)
+
+ # Check if there are any DAG Runs with given criteria to eliminate
unnecessary queries/errors
+ if not dag_runs:
+ return GridResponse(dag_runs=[])
+
+ # Retrieve, sort and encode the Task Instances
+ tis_of_dag_runs, _ = paginated_select(
+ statement=select(
+ TaskInstance.run_id,
+ TaskInstance.task_id,
+ TaskInstance.try_number,
+ TaskInstance.state,
+ TaskInstance.start_date,
+ TaskInstance.end_date,
+ TaskInstance.queued_dttm.label("queued_dttm"),
+ )
+ .join(TaskInstance.task_instance_note, isouter=True)
+ .where(TaskInstance.dag_id == dag.dag_id),
+ filters=[],
+ order_by=SortParam(allowed_attrs=["task_id", "run_id"],
model=TaskInstance).dynamic_depends(
+ "task_id"
+ )(),
+ offset=offset,
+ limit=None,
+ )
+
+ task_instances = session.execute(tis_of_dag_runs)
+
+ @cache
+ def get_task_group_children_getter() -> operator.methodcaller:
+ """Get the Task Group Children Getter for the DAG."""
+ sort_order = conf.get("webserver", "grid_view_sorting_order",
fallback="topological")
+ if sort_order == "topological":
+ return operator.methodcaller("topological_sort")
+ if sort_order == "hierarchical_alphabetical":
+ return operator.methodcaller("hierarchical_alphabetical_sort")
+ raise AirflowConfigException(f"Unsupported grid_view_sorting_order:
{sort_order}")
+
+ @cache
+ def get_task_group_map() -> dict[str, dict[str, Any]]:
+ """Get the Task Group Map for the DAG."""
+ task_nodes = {}
+
+ def _fill_task_group_map(
+ task_node: BaseOperator | MappedTaskGroup | TaskMap | None,
+ parent_node: BaseOperator | MappedTaskGroup | TaskMap | None,
+ ):
+ """Recursively fill the Task Group Map."""
+ if task_node is None:
+ return
+ if isinstance(task_node, MappedOperator):
+ task_nodes[task_node.node_id] = {
+ "is_group": False,
Review Comment:
Actually, a task group can be mapped so I think we need an additional check
here.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]