Copilot commented on code in PR #64012: URL: https://github.com/apache/airflow/pull/64012#discussion_r3025331773
########## scripts/ci/prek/breeze_context.py: ########## @@ -0,0 +1,281 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Breeze context detection and command selection. + +This module exists for agent workflows that must reliably choose between: + +- "host" execution (commands intended to run on the contributor machine) +- "breeze" execution (commands intended to run inside the Breeze container) + +Architecture +------------ +Agents call :meth:`BreezieContext.get_command` with a skill id and parameters. +The helper detects the current runtime context with :meth:`BreezieContext.is_in_breeze`, +optionally using a manual override for debugging/testing. + +Detection strategy (priority) +------------------------------ +1. ``BREEZE_HOME`` environment variable +2. ``/.dockerenv`` (Docker container marker) +3. ``/opt/airflow`` directory presence (Breeze install path) +4. Default to ``host`` + +Testing +------- +This module is covered by unit tests: +- `scripts/tests/test_breeze_context.py` +- `scripts/tests/test_edge_cases.py` + +For manual debugging you can run: +`python3 scripts/ci/prek/breeze_context.py --force-context host` +or +`python3 scripts/ci/prek/breeze_context.py --force-context breeze` +""" + +from __future__ import annotations + +import argparse +import logging +import os +from typing import Any, TypedDict + +logger = logging.getLogger(__name__) + + +class SkillParam(TypedDict): + required: bool + + +class SkillDef(TypedDict): + host: str + breeze: str | None + preferred: str + params: dict[str, SkillParam] + + +class BreezieContext: + """Detect whether code is running in Breeze container or on host.""" + + @staticmethod + def is_in_breeze(force_context: str | None = None) -> bool: + """ + Detect whether the current process is running inside Breeze. + + Parameters + ---------- + force_context: + Optional manual override. + - ``"breeze"`` forces Breeze context. + - ``"host"`` forces host context. + - ``None`` uses auto-detection. + + Returns + ------- + bool + ``True`` when running in Breeze, otherwise ``False``. + + Raises + ------ + ValueError + If ``force_context`` is not one of ``"host"`` or ``"breeze"``. + """ + if force_context is not None: + normalized = force_context.strip().lower() + if normalized == "breeze": + logger.info("Context forced to breeze via --force-context") + return True + if normalized == "host": + logger.info("Context forced to host via --force-context") + return False + raise ValueError(f"Invalid force_context={force_context!r}. Expected 'host' or 'breeze'.") + + # 1) BREEZE_HOME environment variable (explicit marker) + try: + breeze_home = os.environ.get("BREEZE_HOME") + except Exception as e: # pragma: no cover (defensive) + logger.exception("Failed to read BREEZE_HOME from environment: %s", e) + breeze_home = None + if breeze_home: + logger.debug("Detected Breeze via BREEZE_HOME=%r", breeze_home) + return True + + dockerenv_marker = "/.dockerenv" + opt_airflow_dir = "/opt/airflow" + + # 2) /.dockerenv (Docker container marker) + try: + if os.path.exists(dockerenv_marker): + logger.debug("Detected Breeze via %s marker", dockerenv_marker) + return True + except OSError as e: # pragma: no cover (defensive) + logger.warning("Failed checking %s: %s", dockerenv_marker, e) + + # 3) /opt/airflow (Breeze standard installation path) + try: + if os.path.isdir(opt_airflow_dir): + logger.debug("Detected Breeze via %s directory", opt_airflow_dir) + return True + except OSError as e: # pragma: no cover (defensive) + logger.warning("Failed checking %s: %s", opt_airflow_dir, e) + + # Optional: log SSH hint without changing behavior. + try: + ssh_connection = os.environ.get("SSH_CONNECTION") + except Exception: # pragma: no cover (defensive) + ssh_connection = None + if ssh_connection: + logger.debug("SSH_CONNECTION detected; defaulting to host unless Breeze markers exist") + + # 5) Default: assume host + logger.debug("No Breeze markers detected; assuming host execution") + return False + + @staticmethod + def get_command(skill_id: str, *, force_context: str | None = None, **kwargs) -> str: + """ + Generate the appropriate command for the given skill and context. + + Args: + skill_id: Unique skill identifier (e.g., "run-unit-tests") + **kwargs: Skill-specific parameters + + Returns: + Command string to execute + + Raises: + ValueError: If ``skill_id`` is unknown or required parameters are missing. + """ + is_breeze = BreezieContext.is_in_breeze(force_context=force_context) + context = "breeze" if is_breeze else "host" + + # Define all available skills + skills: dict[str, Any] = { + "stage-changes": { + # git add operates on the host working tree — not inside the container. + # This is explicitly a host-only operation per the contributor workflow. + "host": "git add {path}", + "breeze": None, + "preferred": "host", + "params": {"path": {"required": True}}, + }, + "run-static-checks": { + # prek is available on both host and inside Breeze; same command in both contexts. + "host": "prek run {module}", + "breeze": "prek run {module}", + "preferred": "host", + "params": {"module": {"required": False}}, + }, + "run-unit-tests": { + "host": "uv run --project airflow pytest {test_path}", + "breeze": "breeze exec pytest {test_path}", + "preferred": "host", + "params": {"test_path": {"required": True}}, + }, + } + + # Validate skill exists + if skill_id not in skills: + available = ", ".join(sorted(skills.keys())) + raise ValueError(f"Skill not found: {skill_id}. Available skills: {available}") + + skill: SkillDef = skills[skill_id] + + # Validate required parameters + params = skill["params"] + for param_name, param_info in params.items(): + if param_info["required"] and param_name not in kwargs: + raise ValueError(f"Missing required parameter: {param_name} for skill {skill_id!r}") + + # Choose context; guard host-only skills + if context == "breeze": + command_template = skill["breeze"] + else: + command_template = skill["host"] + if command_template is None: + raise ValueError( + f"Skill {skill_id!r} is host-only and cannot be run inside Breeze. " + f"Run this command from your host terminal before switching to Breeze." + ) + + # Substitute parameters + command: str = command_template + for param_name, param_value in kwargs.items(): + placeholder = "{" + param_name + "}" + command = command.replace(placeholder, str(param_value)) Review Comment: Direct string replacement into shell command templates does not safely handle paths with spaces/quotes (and can enable command injection if any values are untrusted). Use shell-escaping (e.g., `shlex.quote`) per substituted parameter, or return an argv list and execute with `subprocess.run(..., shell=False)` so paths like `tests/unit/my file's test.py` remain a single argument. ########## scripts/ci/prek/breeze_context.py: ########## @@ -0,0 +1,281 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Breeze context detection and command selection. + +This module exists for agent workflows that must reliably choose between: + +- "host" execution (commands intended to run on the contributor machine) +- "breeze" execution (commands intended to run inside the Breeze container) + +Architecture +------------ +Agents call :meth:`BreezieContext.get_command` with a skill id and parameters. +The helper detects the current runtime context with :meth:`BreezieContext.is_in_breeze`, +optionally using a manual override for debugging/testing. + +Detection strategy (priority) +------------------------------ +1. ``BREEZE_HOME`` environment variable +2. ``/.dockerenv`` (Docker container marker) +3. ``/opt/airflow`` directory presence (Breeze install path) +4. Default to ``host`` + +Testing +------- +This module is covered by unit tests: +- `scripts/tests/test_breeze_context.py` +- `scripts/tests/test_edge_cases.py` + +For manual debugging you can run: +`python3 scripts/ci/prek/breeze_context.py --force-context host` +or +`python3 scripts/ci/prek/breeze_context.py --force-context breeze` +""" + +from __future__ import annotations + +import argparse +import logging +import os +from typing import Any, TypedDict + +logger = logging.getLogger(__name__) + + +class SkillParam(TypedDict): + required: bool + + +class SkillDef(TypedDict): + host: str + breeze: str | None + preferred: str + params: dict[str, SkillParam] + + +class BreezieContext: Review Comment: The class name `BreezieContext` is inconsistent with the module name `breeze_context.py` and the terminology used elsewhere (“Breeze”). Consider renaming to `BreezeContext` for clarity, and keep `BreezieContext = BreezeContext` as a compatibility alias if needed. ########## scripts/ci/prek/validate_skills.py: ########## @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Validate Agent Skills adherence to the agentskills.io standard. + +This script is used by the project's prek/pre-commit pipeline to ensure that: + +- All skills in `.agents/skills` follow the valid directory structure +- SKILL.md files have proper YAML frontmatter with `name` and `description` +""" + +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path + +try: + import yaml +except ImportError: + import sys + + print("PyYAML is required for validate_skills.py. Install with `pip install pyyaml`.") + sys.exit(1) + +logger = logging.getLogger(__name__) + Review Comment: There’s a redundant `import sys` inside the `except ImportError` block (it’s already imported at module scope), and `logger` is defined but unused. Removing the duplicate import and either using `logger` or dropping it keeps the script simpler. ```suggestion print("PyYAML is required for validate_skills.py. Install with `pip install pyyaml`.") sys.exit(1) ``` ########## scripts/ci/prek/breeze_context.py: ########## @@ -0,0 +1,281 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Breeze context detection and command selection. + +This module exists for agent workflows that must reliably choose between: + +- "host" execution (commands intended to run on the contributor machine) +- "breeze" execution (commands intended to run inside the Breeze container) + +Architecture +------------ +Agents call :meth:`BreezieContext.get_command` with a skill id and parameters. +The helper detects the current runtime context with :meth:`BreezieContext.is_in_breeze`, +optionally using a manual override for debugging/testing. + +Detection strategy (priority) +------------------------------ +1. ``BREEZE_HOME`` environment variable +2. ``/.dockerenv`` (Docker container marker) +3. ``/opt/airflow`` directory presence (Breeze install path) +4. Default to ``host`` + +Testing +------- +This module is covered by unit tests: +- `scripts/tests/test_breeze_context.py` +- `scripts/tests/test_edge_cases.py` + +For manual debugging you can run: +`python3 scripts/ci/prek/breeze_context.py --force-context host` +or +`python3 scripts/ci/prek/breeze_context.py --force-context breeze` +""" + +from __future__ import annotations + +import argparse +import logging +import os +from typing import Any, TypedDict + +logger = logging.getLogger(__name__) + + +class SkillParam(TypedDict): + required: bool + + +class SkillDef(TypedDict): + host: str + breeze: str | None + preferred: str + params: dict[str, SkillParam] + + +class BreezieContext: + """Detect whether code is running in Breeze container or on host.""" + + @staticmethod + def is_in_breeze(force_context: str | None = None) -> bool: + """ + Detect whether the current process is running inside Breeze. + + Parameters + ---------- + force_context: + Optional manual override. + - ``"breeze"`` forces Breeze context. + - ``"host"`` forces host context. + - ``None`` uses auto-detection. + + Returns + ------- + bool + ``True`` when running in Breeze, otherwise ``False``. + + Raises + ------ + ValueError + If ``force_context`` is not one of ``"host"`` or ``"breeze"``. + """ + if force_context is not None: + normalized = force_context.strip().lower() + if normalized == "breeze": + logger.info("Context forced to breeze via --force-context") + return True + if normalized == "host": + logger.info("Context forced to host via --force-context") + return False + raise ValueError(f"Invalid force_context={force_context!r}. Expected 'host' or 'breeze'.") + + # 1) BREEZE_HOME environment variable (explicit marker) + try: + breeze_home = os.environ.get("BREEZE_HOME") + except Exception as e: # pragma: no cover (defensive) + logger.exception("Failed to read BREEZE_HOME from environment: %s", e) + breeze_home = None + if breeze_home: + logger.debug("Detected Breeze via BREEZE_HOME=%r", breeze_home) + return True + + dockerenv_marker = "/.dockerenv" + opt_airflow_dir = "/opt/airflow" + + # 2) /.dockerenv (Docker container marker) + try: + if os.path.exists(dockerenv_marker): + logger.debug("Detected Breeze via %s marker", dockerenv_marker) + return True + except OSError as e: # pragma: no cover (defensive) + logger.warning("Failed checking %s: %s", dockerenv_marker, e) + + # 3) /opt/airflow (Breeze standard installation path) + try: + if os.path.isdir(opt_airflow_dir): + logger.debug("Detected Breeze via %s directory", opt_airflow_dir) + return True + except OSError as e: # pragma: no cover (defensive) + logger.warning("Failed checking %s: %s", opt_airflow_dir, e) + + # Optional: log SSH hint without changing behavior. + try: + ssh_connection = os.environ.get("SSH_CONNECTION") + except Exception: # pragma: no cover (defensive) + ssh_connection = None + if ssh_connection: + logger.debug("SSH_CONNECTION detected; defaulting to host unless Breeze markers exist") + + # 5) Default: assume host + logger.debug("No Breeze markers detected; assuming host execution") + return False + + @staticmethod + def get_command(skill_id: str, *, force_context: str | None = None, **kwargs) -> str: + """ + Generate the appropriate command for the given skill and context. + + Args: + skill_id: Unique skill identifier (e.g., "run-unit-tests") + **kwargs: Skill-specific parameters + + Returns: + Command string to execute + + Raises: + ValueError: If ``skill_id`` is unknown or required parameters are missing. + """ + is_breeze = BreezieContext.is_in_breeze(force_context=force_context) + context = "breeze" if is_breeze else "host" + + # Define all available skills + skills: dict[str, Any] = { + "stage-changes": { + # git add operates on the host working tree — not inside the container. + # This is explicitly a host-only operation per the contributor workflow. + "host": "git add {path}", + "breeze": None, + "preferred": "host", + "params": {"path": {"required": True}}, + }, + "run-static-checks": { + # prek is available on both host and inside Breeze; same command in both contexts. + "host": "prek run {module}", + "breeze": "prek run {module}", + "preferred": "host", + "params": {"module": {"required": False}}, + }, + "run-unit-tests": { + "host": "uv run --project airflow pytest {test_path}", + "breeze": "breeze exec pytest {test_path}", + "preferred": "host", + "params": {"test_path": {"required": True}}, + }, + } + + # Validate skill exists + if skill_id not in skills: + available = ", ".join(sorted(skills.keys())) + raise ValueError(f"Skill not found: {skill_id}. Available skills: {available}") + + skill: SkillDef = skills[skill_id] + + # Validate required parameters + params = skill["params"] + for param_name, param_info in params.items(): + if param_info["required"] and param_name not in kwargs: + raise ValueError(f"Missing required parameter: {param_name} for skill {skill_id!r}") + + # Choose context; guard host-only skills + if context == "breeze": + command_template = skill["breeze"] + else: + command_template = skill["host"] + if command_template is None: + raise ValueError( + f"Skill {skill_id!r} is host-only and cannot be run inside Breeze. " + f"Run this command from your host terminal before switching to Breeze." + ) + + # Substitute parameters + command: str = command_template + for param_name, param_value in kwargs.items(): + placeholder = "{" + param_name + "}" + command = command.replace(placeholder, str(param_value)) + + # Handle optional parameters that weren't provided + command = command.replace("{--target module}", "").replace("{module}", "") Review Comment: Optional-parameter handling is currently ad-hoc (e.g., `{--target module}` doesn’t appear in templates, and removing `{module}` can leave trailing whitespace like `prek run `). Prefer constructing commands from structured parts (base + optional args) so omission doesn’t require placeholder cleanup and produces stable output. ########## scripts/tests/test_edge_cases.py: ########## @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Additional edge-case tests for Breeze context.""" + +from __future__ import annotations + +import os +from unittest.mock import patch + +from ci.prek.breeze_context import BreezieContext + + +class TestEdgeCases: + """Edge cases around context detection.""" + + def test_context_detects_ssh_host(self) -> None: + """Should default to host when Breeze markers are absent even over SSH.""" + with patch.dict(os.environ, {"SSH_CONNECTION": "1"}, clear=True): + with patch("os.path.exists", return_value=False): + with patch("os.path.isdir", return_value=False): + assert BreezieContext.is_in_breeze() is False + + def test_context_detects_symlink_marker(self) -> None: + """If dockerenv marker exists (even via symlink), detect Breeze.""" + with patch.dict(os.environ, {}, clear=True): + with patch("os.path.exists") as mock_exists: + mock_exists.return_value = True + with patch("os.path.isdir", return_value=False): + assert BreezieContext.is_in_breeze() is True + + def test_parameter_substitution_handles_quotes_spaces(self) -> None: + """Command template substitution should preserve special characters.""" + test_path = "tests/unit/my file's test.py" + cmd = BreezieContext.get_command("run-unit-tests", test_path=test_path) + assert test_path in cmd Review Comment: This test currently only asserts substring inclusion, but the real failure mode with spaces/quotes is incorrect shell tokenization. Strengthen the assertion to verify the generated command properly quotes/escapes the path (or, if you switch to argv-list output, assert the path is a single argument element). ########## scripts/ci/prek/breeze_context.py: ########## @@ -0,0 +1,281 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Breeze context detection and command selection. + +This module exists for agent workflows that must reliably choose between: + +- "host" execution (commands intended to run on the contributor machine) +- "breeze" execution (commands intended to run inside the Breeze container) + +Architecture +------------ +Agents call :meth:`BreezieContext.get_command` with a skill id and parameters. +The helper detects the current runtime context with :meth:`BreezieContext.is_in_breeze`, +optionally using a manual override for debugging/testing. + +Detection strategy (priority) +------------------------------ +1. ``BREEZE_HOME`` environment variable +2. ``/.dockerenv`` (Docker container marker) +3. ``/opt/airflow`` directory presence (Breeze install path) +4. Default to ``host`` + +Testing +------- +This module is covered by unit tests: +- `scripts/tests/test_breeze_context.py` +- `scripts/tests/test_edge_cases.py` + +For manual debugging you can run: +`python3 scripts/ci/prek/breeze_context.py --force-context host` +or +`python3 scripts/ci/prek/breeze_context.py --force-context breeze` +""" + +from __future__ import annotations + +import argparse +import logging +import os +from typing import Any, TypedDict + +logger = logging.getLogger(__name__) + + +class SkillParam(TypedDict): + required: bool + + +class SkillDef(TypedDict): + host: str + breeze: str | None + preferred: str + params: dict[str, SkillParam] + + +class BreezieContext: + """Detect whether code is running in Breeze container or on host.""" + + @staticmethod + def is_in_breeze(force_context: str | None = None) -> bool: + """ + Detect whether the current process is running inside Breeze. + + Parameters + ---------- + force_context: + Optional manual override. + - ``"breeze"`` forces Breeze context. + - ``"host"`` forces host context. + - ``None`` uses auto-detection. + + Returns + ------- + bool + ``True`` when running in Breeze, otherwise ``False``. + + Raises + ------ + ValueError + If ``force_context`` is not one of ``"host"`` or ``"breeze"``. + """ + if force_context is not None: + normalized = force_context.strip().lower() + if normalized == "breeze": + logger.info("Context forced to breeze via --force-context") + return True + if normalized == "host": + logger.info("Context forced to host via --force-context") + return False + raise ValueError(f"Invalid force_context={force_context!r}. Expected 'host' or 'breeze'.") + + # 1) BREEZE_HOME environment variable (explicit marker) + try: + breeze_home = os.environ.get("BREEZE_HOME") + except Exception as e: # pragma: no cover (defensive) + logger.exception("Failed to read BREEZE_HOME from environment: %s", e) + breeze_home = None + if breeze_home: + logger.debug("Detected Breeze via BREEZE_HOME=%r", breeze_home) + return True + + dockerenv_marker = "/.dockerenv" + opt_airflow_dir = "/opt/airflow" + + # 2) /.dockerenv (Docker container marker) + try: + if os.path.exists(dockerenv_marker): + logger.debug("Detected Breeze via %s marker", dockerenv_marker) + return True + except OSError as e: # pragma: no cover (defensive) + logger.warning("Failed checking %s: %s", dockerenv_marker, e) + + # 3) /opt/airflow (Breeze standard installation path) + try: + if os.path.isdir(opt_airflow_dir): + logger.debug("Detected Breeze via %s directory", opt_airflow_dir) + return True + except OSError as e: # pragma: no cover (defensive) + logger.warning("Failed checking %s: %s", opt_airflow_dir, e) + + # Optional: log SSH hint without changing behavior. + try: + ssh_connection = os.environ.get("SSH_CONNECTION") + except Exception: # pragma: no cover (defensive) + ssh_connection = None + if ssh_connection: + logger.debug("SSH_CONNECTION detected; defaulting to host unless Breeze markers exist") + + # 5) Default: assume host + logger.debug("No Breeze markers detected; assuming host execution") + return False + + @staticmethod + def get_command(skill_id: str, *, force_context: str | None = None, **kwargs) -> str: + """ + Generate the appropriate command for the given skill and context. + + Args: + skill_id: Unique skill identifier (e.g., "run-unit-tests") + **kwargs: Skill-specific parameters + + Returns: + Command string to execute + + Raises: + ValueError: If ``skill_id`` is unknown or required parameters are missing. + """ + is_breeze = BreezieContext.is_in_breeze(force_context=force_context) + context = "breeze" if is_breeze else "host" + + # Define all available skills + skills: dict[str, Any] = { + "stage-changes": { + # git add operates on the host working tree — not inside the container. + # This is explicitly a host-only operation per the contributor workflow. + "host": "git add {path}", + "breeze": None, + "preferred": "host", + "params": {"path": {"required": True}}, + }, + "run-static-checks": { + # prek is available on both host and inside Breeze; same command in both contexts. + "host": "prek run {module}", + "breeze": "prek run {module}", + "preferred": "host", + "params": {"module": {"required": False}}, + }, Review Comment: Optional-parameter handling is currently ad-hoc (e.g., `{--target module}` doesn’t appear in templates, and removing `{module}` can leave trailing whitespace like `prek run `). Prefer constructing commands from structured parts (base + optional args) so omission doesn’t require placeholder cleanup and produces stable output. ########## scripts/ci/prek/validate_skills.py: ########## @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Validate Agent Skills adherence to the agentskills.io standard. + +This script is used by the project's prek/pre-commit pipeline to ensure that: + +- All skills in `.agents/skills` follow the valid directory structure +- SKILL.md files have proper YAML frontmatter with `name` and `description` +""" + +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path + +try: + import yaml +except ImportError: + import sys + + print("PyYAML is required for validate_skills.py. Install with `pip install pyyaml`.") + sys.exit(1) + +logger = logging.getLogger(__name__) + +SKILLS_DIR = Path(".agents/skills") + + +def validate_skill(skill_dir: Path) -> list[str]: + """Validate a single skill directory and return a list of errors.""" + errors = [] + + skill_md = skill_dir / "SKILL.md" + if not skill_md.exists(): + errors.append(f"Missing SKILL.md in {skill_dir}") + return errors + + content = skill_md.read_text(encoding="utf-8") + + # Strip leading HTML comment blocks (e.g. SPDX license and markdownlint + # pragmas inserted/maintained by hooks) before checking for YAML frontmatter. + stripped = content.lstrip() + while stripped.startswith("<!--"): + end_comment = stripped.find("-->") + if end_comment == -1: + break + stripped = stripped[end_comment + 3 :].lstrip() + + if not stripped.startswith("---\n"): + errors.append(f"Missing YAML frontmatter at start of {skill_md}") + return errors + + parts = stripped.split("---\n") + if len(parts) < 3: + errors.append(f"Incomplete YAML frontmatter in {skill_md}") + return errors Review Comment: Frontmatter detection is newline-sensitive (`---\\n`) and may fail on CRLF (`---\\r\\n`), causing false negatives on Windows checkouts. Normalize newlines before parsing (e.g., replace `\\r\\n` with `\\n`) or use a line-based/regex approach that recognizes `---` delimiters regardless of newline style. ########## scripts/ci/prek/validate_skills.py: ########## @@ -0,0 +1,122 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Validate Agent Skills adherence to the agentskills.io standard. + +This script is used by the project's prek/pre-commit pipeline to ensure that: + +- All skills in `.agents/skills` follow the valid directory structure +- SKILL.md files have proper YAML frontmatter with `name` and `description` +""" + +from __future__ import annotations + +import argparse +import logging +import sys +from pathlib import Path + +try: + import yaml +except ImportError: + import sys + + print("PyYAML is required for validate_skills.py. Install with `pip install pyyaml`.") + sys.exit(1) + +logger = logging.getLogger(__name__) + Review Comment: There’s a redundant `import sys` inside the `except ImportError` block (it’s already imported at module scope), and `logger` is defined but unused. Removing the duplicate import and either using `logger` or dropping it keeps the script simpler. ```suggestion import sys from pathlib import Path try: import yaml except ImportError: print("PyYAML is required for validate_skills.py. Install with `pip install pyyaml`.") sys.exit(1) ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
