pierrejeambrun commented on code in PR #64845: URL: https://github.com/apache/airflow/pull/64845#discussion_r3079702909
########## airflow-core/src/airflow/api_fastapi/common/cursors.py: ########## @@ -0,0 +1,143 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Cursor-based (keyset) pagination helpers. + +:meta private: +""" + +from __future__ import annotations + +import base64 +import uuid as uuid_mod +from typing import Any + +import msgspec +from fastapi import HTTPException, status +from sqlalchemy import and_, or_ +from sqlalchemy.sql import Select +from sqlalchemy.sql.elements import ColumnElement +from sqlalchemy.sql.sqltypes import Uuid + +from airflow.api_fastapi.common.parameters import SortParam + + +def _b64url_decode_padded(token: str) -> bytes: + padding = 4 - (len(token) % 4) + if padding != 4: + token = token + ("=" * padding) + return base64.urlsafe_b64decode(token.encode("ascii")) + + +def _nonstrict_bound(col: ColumnElement, value: Any, is_desc: bool) -> ColumnElement[bool]: + """Inclusive range edge on the leading column at each nesting level (``>=`` / ``<=``).""" + return col <= value if is_desc else col >= value + + +def _strict_bound(col: ColumnElement, value: Any, is_desc: bool) -> ColumnElement[bool]: + """Strict inequality for ``or_`` branches (``<`` / ``>``).""" + return col < value if is_desc else col > value + + +def _nested_keyset_predicate( + resolved: list[tuple[str, ColumnElement, bool]], values: list[Any] +) -> ColumnElement[bool]: + """ + Keyset predicate for rows strictly after the cursor in ``ORDER BY`` order. + + Uses nested ``and_(non-strict, or_(strict, ...))`` so leading sort keys use + inclusive range bounds and inner branches use strict inequalities—friendly + for composite index range scans. Logically equivalent to an OR-of-prefix- + equalities formulation. + """ + n = len(resolved) + _, col, is_desc = resolved[n - 1] + inner: ColumnElement[bool] = _strict_bound(col, values[n - 1], is_desc) + for i in range(n - 2, -1, -1): + _, col_i, is_desc_i = resolved[i] + inner = and_( + _nonstrict_bound(col_i, values[i], is_desc_i), + or_(_strict_bound(col_i, values[i], is_desc_i), inner), + ) + return inner + + +def _coerce_value(column: ColumnElement, value: Any) -> Any: + """Normalize decoded values for SQL bind parameters (e.g. UUID columns).""" + if value is None or not isinstance(value, str): + return value + ctype = getattr(column, "type", None) + if isinstance(ctype, Uuid): + try: + return uuid_mod.UUID(value) + except ValueError: + return value Review Comment: Done ########## airflow-core/src/airflow/api_fastapi/core_api/routes/public/task_instances.py: ########## @@ -470,13 +472,26 @@ def get_task_instances( ], readable_ti_filter: ReadableTIFilterDep, session: SessionDep, + cursor: str | None = Query( + None, + description="Cursor for keyset-based pagination (mutually exclusive with offset). " + "Pass an empty string for the first page, then use ``next_cursor`` from the response.", + ), ) -> TaskInstanceCollectionResponse: """ Get list of task instances. - This endpoint allows specifying `~` as the dag_id, dag_run_id to retrieve Task Instances for all DAGs - and DAG runs. + This endpoint allows specifying `~` as the dag_id, dag_run_id + to retrieve task instances for all DAGs and DAG runs. + + Supports two pagination modes: + + **Offset (default):** use `limit` and `offset` query parameters. Returns `total_entries`. + + **Cursor:** pass `cursor` (empty string for the first page, then `next_cursor` from the response). + When `cursor` is provided, `offset` is ignored and `total_entries` is not returned. """ + use_cursor = cursor is not None Review Comment: Done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
