This is an automated email from the ASF dual-hosted git repository.
kaxilnaik pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new d6364992b8a Allow dropping `_xcom_archive` table via CLI (#44291)
d6364992b8a is described below
commit d6364992b8a48c907ac119fb21900e47d4adaf3c
Author: Kaxil Naik <[email protected]>
AuthorDate: Fri Nov 22 22:53:08 2024 +0000
Allow dropping `_xcom_archive` table via CLI (#44291)
This tables was created to not cause data loss (in
https://github.com/apache/airflow/pull/44166) when upgrading from AF 2 to AF 3
if a user had pickled values in XCom table.
- Introduced `ARCHIVED_TABLES_FROM_DB_MIGRATIONS` to track tables created
during database migrations, such as `_xcom_archive`.
- Added `_xcom_archive` to the db cleanup `config_list` for handling its
records based on `timestamp`.
- Add support in `airflow db drop-archived` to drop `_xcom_archive`.
---
airflow/utils/db_cleanup.py | 16 ++++++++++++++--
newsfragments/aip-72.significant.rst | 6 +++++-
2 files changed, 19 insertions(+), 3 deletions(-)
diff --git a/airflow/utils/db_cleanup.py b/airflow/utils/db_cleanup.py
index 9f0f8d63fe1..f71caf06ac8 100644
--- a/airflow/utils/db_cleanup.py
+++ b/airflow/utils/db_cleanup.py
@@ -53,6 +53,10 @@ if TYPE_CHECKING:
logger = logging.getLogger(__name__)
ARCHIVE_TABLE_PREFIX = "_airflow_deleted__"
+# Archived tables created by DB migrations
+ARCHIVED_TABLES_FROM_DB_MIGRATIONS = [
+ "_xcom_archive" # Table created by the AF 2 -> 3.0.0 migration when the
XComs had pickled values
+]
@dataclass
@@ -116,6 +120,7 @@ config_list: list[_TableConfig] = [
_TableConfig(table_name="task_instance_history",
recency_column_name="start_date"),
_TableConfig(table_name="task_reschedule",
recency_column_name="start_date"),
_TableConfig(table_name="xcom", recency_column_name="timestamp"),
+ _TableConfig(table_name="_xcom_archive", recency_column_name="timestamp"),
_TableConfig(table_name="callback_request",
recency_column_name="created_at"),
_TableConfig(table_name="celery_taskmeta",
recency_column_name="date_done"),
_TableConfig(table_name="celery_tasksetmeta",
recency_column_name="date_done"),
@@ -380,13 +385,20 @@ def _effective_table_names(*, table_names: list[str] |
None) -> tuple[set[str],
def _get_archived_table_names(table_names: list[str] | None, session: Session)
-> list[str]:
inspector = inspect(session.bind)
- db_table_names = [x for x in inspector.get_table_names() if
x.startswith(ARCHIVE_TABLE_PREFIX)]
+ db_table_names = [
+ x
+ for x in inspector.get_table_names()
+ if x.startswith(ARCHIVE_TABLE_PREFIX) or x in
ARCHIVED_TABLES_FROM_DB_MIGRATIONS
+ ]
effective_table_names, _ = _effective_table_names(table_names=table_names)
# Filter out tables that don't start with the archive prefix
archived_table_names = [
table_name
for table_name in db_table_names
- if any("__" + x + "__" in table_name for x in effective_table_names)
+ if (
+ any("__" + x + "__" in table_name for x in effective_table_names)
+ or table_name in ARCHIVED_TABLES_FROM_DB_MIGRATIONS
+ )
]
return archived_table_names
diff --git a/newsfragments/aip-72.significant.rst
b/newsfragments/aip-72.significant.rst
index 9fc34004de7..e43e0c2f86c 100644
--- a/newsfragments/aip-72.significant.rst
+++ b/newsfragments/aip-72.significant.rst
@@ -30,4 +30,8 @@ As part of this change the following breaking changes have
occurred:
The ``value`` field in the XCom table has been changed to a ``JSON`` type
via DB migration. The XCom records that
contains pickled data are archived in the ``_xcom_archive`` table. You can
safely drop this table if you don't need
- the data anymore.
+ the data anymore. To drop the table, you can use the following command or
manually drop the table from the database.
+
+ .. code-block:: bash
+
+ airflow db drop-archived -t "_xcom_archive"