This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch 3.2.0-docs in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 4a211675c3223085be06e3c4f23de76b8a81e255 Author: Jarek Potiuk <[email protected]> AuthorDate: Mon Apr 6 16:34:01 2026 +0200 Add prek hook to validate security doc constants against config.yml New hook `check-security-doc-constants` validates that: - [section] option references in security RST files match config.yml - AIRFLOW__X__Y env var references correspond to real config options - Default values in doc tables match config.yml defaults - Sensitive config variables are listed (warning, not error, since the list is documented as non-exhaustive) Loads both airflow-core config.yml and provider.yaml files to cover all config sections (including celery, sentry, workers, etc.). Runs automatically when config.yml or security RST docs are modified. --- airflow-core/.pre-commit-config.yaml | 10 + scripts/ci/prek/check_security_doc_constants.py | 296 ++++++++++++++++++++++++ 2 files changed, 306 insertions(+) diff --git a/airflow-core/.pre-commit-config.yaml b/airflow-core/.pre-commit-config.yaml index d995d79d3c1..f29bc1c5a2e 100644 --- a/airflow-core/.pre-commit-config.yaml +++ b/airflow-core/.pre-commit-config.yaml @@ -271,6 +271,16 @@ repos: require_serial: true pass_filenames: false files: ^src/airflow/config_templates/config\.yml$ + - id: check-security-doc-constants + name: Check security docs match config.yml constants + entry: ../scripts/ci/prek/check_security_doc_constants.py + language: python + pass_filenames: false + files: > + (?x) + ^src/airflow/config_templates/config\.yml$| + ^docs/security/jwt_token_authentication\.rst$| + ^docs/security/security_model\.rst$ - id: check-airflow-version-checks-in-core language: pygrep name: No AIRFLOW_V_* imports in airflow-core diff --git a/scripts/ci/prek/check_security_doc_constants.py b/scripts/ci/prek/check_security_doc_constants.py new file mode 100755 index 00000000000..41c26db0a6f --- /dev/null +++ b/scripts/ci/prek/check_security_doc_constants.py @@ -0,0 +1,296 @@ +#!/usr/bin/env python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# /// script +# requires-python = ">=3.10,<3.11" +# dependencies = [ +# "pyyaml>=6.0.3", +# "rich>=13.6.0", +# ] +# /// +""" +Validate that security documentation references to config.yml options stay in sync. + +Checks performed: + 1. Every ``[section] option`` reference in the security RST files corresponds to an + actual option in config.yml. + 2. Default values quoted in the docs match the defaults in config.yml. + 3. The sensitive-variable list in security_model.rst includes all config options + marked ``sensitive: true`` in config.yml that have an ``AIRFLOW__`` env-var form. + 4. ``[section] option`` references use correct section names. +""" + +from __future__ import annotations + +import re +import sys +from pathlib import Path + +import yaml +from rich.console import Console + +sys.path.insert(0, str(Path(__file__).parent.resolve())) + +from common_prek_utils import AIRFLOW_ROOT_PATH + +console = Console(color_system="standard", width=200) + +CONFIG_YML = AIRFLOW_ROOT_PATH / "airflow-core" / "src" / "airflow" / "config_templates" / "config.yml" +PROVIDERS_ROOT = AIRFLOW_ROOT_PATH / "providers" + +SECURITY_DOCS = [ + AIRFLOW_ROOT_PATH / "airflow-core" / "docs" / "security" / "jwt_token_authentication.rst", + AIRFLOW_ROOT_PATH / "airflow-core" / "docs" / "security" / "security_model.rst", +] + +# Pattern to match ``[section] option_name`` references in RST +SECTION_OPTION_RE = re.compile(r"``\[(\w+)\]\s+(\w+)``") + +# Pattern to match default value claims like "default: VALUE" or "Default VALUE" or "(default VALUE)" +# in table rows like "- 86400 (24h)" preceded by a config option +DEFAULT_IN_TABLE_RE = re.compile( + r"``\[(\w+)\]\s+(\w+)``.*?(?:default[:\s]+|Default[:\s]+)(\S+)", re.IGNORECASE +) + +# Pattern to match AIRFLOW__SECTION__OPTION env var references +ENV_VAR_RE = re.compile(r"``(AIRFLOW__\w+)``") + +# Map section+option to the AIRFLOW__ env var form +SECTION_OPT_TO_ENV = re.compile(r"AIRFLOW__([A-Z_]+)__([A-Z_]+)") + + +def option_to_env_var(section: str, option: str) -> str: + """Convert a config section+option to its AIRFLOW__ env var form.""" + return f"AIRFLOW__{section.upper()}__{option.upper()}" + + +def load_config() -> dict: + """Load config.yml and provider.yaml files to get all config sections/options.""" + with open(CONFIG_YML) as f: + config = yaml.safe_load(f) + + # Also load provider.yaml files which define config sections under "config:" key + # (e.g., [celery], [sentry], [workers]) + for provider_yaml in PROVIDERS_ROOT.glob("*/provider.yaml"): + with open(provider_yaml) as f: + provider_data = yaml.safe_load(f) + if provider_data and "config" in provider_data: + for section_name, section_data in provider_data["config"].items(): + if isinstance(section_data, dict) and section_name not in config: + config[section_name] = section_data + + return config + + +def get_all_options(config: dict) -> dict[tuple[str, str], dict]: + """Return a dict of (section, option) -> option_config for all config options.""" + result = {} + for section_name, section_data in config.items(): + if not isinstance(section_data, dict) or "options" not in section_data: + continue + for option_name, option_config in section_data["options"].items(): + if isinstance(option_config, dict): + result[(section_name, option_name)] = option_config + return result + + +def get_sensitive_env_vars(all_options: dict[tuple[str, str], dict]) -> set[str]: + """Return set of AIRFLOW__X__Y env vars for all sensitive config options.""" + result = set() + for (section, option), config in all_options.items(): + if config.get("sensitive"): + result.add(option_to_env_var(section, option)) + return result + + +def check_option_references(doc_path: Path, all_options: dict[tuple[str, str], dict]) -> list[str]: + """Check that all [section] option references in the doc exist in config.yml.""" + errors = [] + content = doc_path.read_text() + + for line_num, line in enumerate(content.splitlines(), 1): + for match in SECTION_OPTION_RE.finditer(line): + section = match.group(1) + option = match.group(2) + if (section, option) not in all_options: + # Check if the section exists at all + section_exists = any(s == section for s, _ in all_options) + if section_exists: + errors.append( + f"{doc_path.name}:{line_num}: Option ``[{section}] {option}`` not found in config.yml" + ) + else: + errors.append( + f"{doc_path.name}:{line_num}: Section ``[{section}]`` not found in config.yml" + ) + return errors + + +def check_env_var_references(doc_path: Path, all_options: dict[tuple[str, str], dict]) -> list[str]: + """Check that AIRFLOW__X__Y env var references correspond to real config options.""" + errors = [] + content = doc_path.read_text() + + for line_num, line in enumerate(content.splitlines(), 1): + for match in ENV_VAR_RE.finditer(line): + env_var = match.group(1) + m = SECTION_OPT_TO_ENV.match(env_var) + if not m: + continue + section = m.group(1).lower() + option = m.group(2).lower() + if (section, option) not in all_options: + # Check if the section exists + section_exists = any(s == section for s, _ in all_options) + if section_exists: + errors.append( + f"{doc_path.name}:{line_num}: Env var ``{env_var}`` references " + f"option [{section}] {option} which is not in config.yml" + ) + else: + errors.append( + f"{doc_path.name}:{line_num}: Env var ``{env_var}`` references " + f"section [{section}] which is not in config.yml" + ) + return errors + + +def check_sensitive_vars_listed(security_model_path: Path, sensitive_env_vars: set[str]) -> list[str]: + """ + Check that security_model.rst lists all sensitive config vars (as env vars). + + Returns warnings (printed but not counted as errors) since the doc explicitly + states the list is non-exhaustive. + """ + content = security_model_path.read_text() + + # Find the env vars that ARE listed in the doc + listed_env_vars = set() + for match in ENV_VAR_RE.finditer(content): + env_var = match.group(1) + if env_var.startswith("AIRFLOW__"): + listed_env_vars.add(env_var) + + # Print warnings for missing sensitive vars (not errors — the list is non-exhaustive) + missing = sorted(sensitive_env_vars - listed_env_vars) + if missing: + console.print() + console.print( + f" [yellow]⚠[/] {security_model_path.name}: The following sensitive config variables " + f"are not mentioned in the deployment hardening section (the list is documented as " + f"non-exhaustive, so these are warnings, not errors):" + ) + for env_var in missing: + console.print(f" [yellow]- {env_var}[/]") + + # No errors returned — these are warnings only + return [] + + +def check_defaults_in_tables(doc_path: Path, all_options: dict[tuple[str, str], dict]) -> list[str]: + """ + Check default values in RST table rows match config.yml. + + Looks for patterns like: + * - ``[section] option`` + - 86400 (24h) + in list-table blocks. + """ + errors = [] + content = doc_path.read_text() + lines = content.splitlines() + + # Simple heuristic: find table rows with a config reference followed by a default value row + i = 0 + while i < len(lines): + line = lines[i] + match = SECTION_OPTION_RE.search(line) + if match and "* -" in line: + section = match.group(1) + option = match.group(2) + # Next non-empty line starting with "- " or " -" is the value + j = i + 1 + while j < len(lines) and not lines[j].strip(): + j += 1 + if j < len(lines) and lines[j].strip().startswith("-"): + value_line = lines[j].strip().lstrip("- ").strip() + key = (section, option) + if key in all_options: + config_default = str(all_options[key].get("default", "~")) + # Extract numeric part from doc value like "86400 (24h)" -> "86400" + doc_value = value_line.split()[0] if value_line else "" + # Strip surrounding backticks + doc_value = doc_value.strip("`") + # Normalize: remove quotes from config default + config_default_clean = config_default.strip('"').strip("'") + if ( + doc_value + and config_default_clean + and config_default_clean not in ("~", "None", "none", "") + and doc_value != config_default_clean + # Don't flag if the doc value is a human-readable form + and not doc_value.startswith("Auto") + and not doc_value.startswith("None") + and doc_value != "``GUESS``" + ): + errors.append( + f"{doc_path.name}:{j + 1}: Default for [{section}] {option} is " + f"'{doc_value}' in docs but '{config_default_clean}' in config.yml" + ) + i += 1 + + return errors + + +def main() -> int: + config = load_config() + all_options = get_all_options(config) + sensitive_env_vars = get_sensitive_env_vars(all_options) + + all_errors: list[str] = [] + + for doc_path in SECURITY_DOCS: + if not doc_path.exists(): + console.print(f" [yellow]⚠[/] {doc_path.name} not found, skipping") + continue + + all_errors.extend(check_option_references(doc_path, all_options)) + all_errors.extend(check_env_var_references(doc_path, all_options)) + all_errors.extend(check_defaults_in_tables(doc_path, all_options)) + + # Check sensitive vars are listed in security_model.rst + security_model = AIRFLOW_ROOT_PATH / "airflow-core" / "docs" / "security" / "security_model.rst" + if security_model.exists(): + all_errors.extend(check_sensitive_vars_listed(security_model, sensitive_env_vars)) + + if all_errors: + console.print() + for error in all_errors: + console.print(f" [red]✗[/] {error}") + console.print() + console.print(f"[red]Security doc constants check failed with {len(all_errors)} error(s).[/]") + console.print( + "[yellow]Fix the documentation to match config.yml, or update config.yml if the docs are correct.[/]" + ) + return 1 + + console.print("[green]Security doc constants check passed.[/]") + return 0 + + +if __name__ == "__main__": + sys.exit(main())
