github-advanced-security[bot] commented on code in PR #52581:
URL: https://github.com/apache/airflow/pull/52581#discussion_r2175465435


##########
airflow-core/src/airflow/utils/serve_logs.py:
##########
@@ -43,8 +45,95 @@
 logger = logging.getLogger(__name__)
 
 
+def _validate_log_file_path(filename: str, log_directory: str) -> Path:
+    """
+    Validate that the requested file path is within the log directory and safe 
to serve.
+
+    :param filename: The requested filename from the URL path
+    :param log_directory: The base log directory path
+
+    :returns: Path: The validated file path
+    """
+    # URL decode the filename to handle encoded path traversal attempts
+    try:
+        decoded_filename = urllib.parse.unquote(filename)
+    except Exception as e:
+        logger.warning("Failed to URL decode filename '%s': %s", filename, e)
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Invalid URL encoding in filename",
+        )
+
+    # Check for control characters and other invalid characters
+    if not decoded_filename or any(ord(c) < 32 for c in decoded_filename):
+        logger.warning(
+            "Invalid characters detected in filename: '%s' (decoded: '%s')",
+            filename,
+            decoded_filename,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Invalid characters in filename",
+        )
+
+    # Check for null bytes specifically (additional security check)
+    if "\x00" in decoded_filename:
+        logger.warning(
+            "Null byte detected in filename: '%s' (decoded: '%s')",
+            filename,
+            decoded_filename,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Null byte in filename",
+        )
+
+    try:
+        # Normalize the log directory path and join the decoded filename with 
the log directory
+        log_dir = Path(log_directory).resolve()
+        file_path = log_dir / decoded_filename
+
+        # Resolve the full path to handle any symbolic links or relative 
components
+        resolved_path = file_path.resolve()

Review Comment:
   ## Uncontrolled data used in path expression
   
   This path depends on a [user-provided value](1).
   
   [Show more 
details](https://github.com/apache/airflow/security/code-scanning/492)



##########
airflow-core/src/airflow/utils/serve_logs.py:
##########
@@ -43,8 +45,95 @@
 logger = logging.getLogger(__name__)
 
 
+def _validate_log_file_path(filename: str, log_directory: str) -> Path:
+    """
+    Validate that the requested file path is within the log directory and safe 
to serve.
+
+    :param filename: The requested filename from the URL path
+    :param log_directory: The base log directory path
+
+    :returns: Path: The validated file path
+    """
+    # URL decode the filename to handle encoded path traversal attempts
+    try:
+        decoded_filename = urllib.parse.unquote(filename)
+    except Exception as e:
+        logger.warning("Failed to URL decode filename '%s': %s", filename, e)
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Invalid URL encoding in filename",
+        )
+
+    # Check for control characters and other invalid characters
+    if not decoded_filename or any(ord(c) < 32 for c in decoded_filename):
+        logger.warning(
+            "Invalid characters detected in filename: '%s' (decoded: '%s')",
+            filename,
+            decoded_filename,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Invalid characters in filename",
+        )
+
+    # Check for null bytes specifically (additional security check)
+    if "\x00" in decoded_filename:
+        logger.warning(
+            "Null byte detected in filename: '%s' (decoded: '%s')",
+            filename,
+            decoded_filename,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Null byte in filename",
+        )
+
+    try:
+        # Normalize the log directory path and join the decoded filename with 
the log directory
+        log_dir = Path(log_directory).resolve()
+        file_path = log_dir / decoded_filename
+
+        # Resolve the full path to handle any symbolic links or relative 
components
+        resolved_path = file_path.resolve()
+
+        # Ensure the resolved path is within the log directory (prevents path 
traversal)
+        if not resolved_path.is_relative_to(log_dir):
+            logger.warning(
+                "Path traversal attempt detected. Requested path '%s' 
(decoded: '%s') resolves to '%s' "
+                "which is outside the log directory '%s'",
+                filename,
+                decoded_filename,
+                resolved_path,
+                log_dir,
+            )
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail="Access denied: path outside log directory",
+            )
+
+        # Additional security check: ensure it's a regular file (not a 
directory or special file)
+        if resolved_path.exists() and not resolved_path.is_file():

Review Comment:
   ## Uncontrolled data used in path expression
   
   This path depends on a [user-provided value](1).
   
   [Show more 
details](https://github.com/apache/airflow/security/code-scanning/493)



##########
airflow-core/src/airflow/utils/serve_logs.py:
##########
@@ -119,25 +210,39 @@
                 get_docs_url("configurations-ref.html#secret-key"),
                 exc_info=True,
             )
-            abort(403)
+            raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, 
detail="Expired signature")
         except InvalidIssuedAtError:
             logger.warning(
-                "The request was issues in the future. Make sure that all 
components "
+                "The request was issued in the future. Make sure that all 
components "
                 "in your system have synchronized clocks. "
                 "See more at %s",
                 get_docs_url("configurations-ref.html#secret-key"),
                 exc_info=True,
             )
-            abort(403)
+            raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, 
detail="Token issued in future")
         except Exception:
             logger.warning("Unknown error", exc_info=True)
-            abort(403)
+            raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, 
detail="Authentication failed")
+
+    fastapi_app = FastAPI(
+        dependencies=[Depends(validate_jwt_token)],
+    )
+
+    @fastapi_app.get("/log/{filename:path}")
+    def serve_logs_handler(filename: str):
+        # Validate the file path for security
+        file_path = _validate_log_file_path(filename, log_directory)
+
+        if not file_path.exists():
+            raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, 
detail="Log file not found")
 
-    @flask_app.route("/log/<path:filename>")
-    def serve_logs_view(filename):
-        return send_from_directory(log_directory, filename, 
mimetype="application/json", as_attachment=False)
+        return FileResponse(
+            path=file_path,

Review Comment:
   ## Uncontrolled data used in path expression
   
   This path depends on a [user-provided value](1).
   
   [Show more 
details](https://github.com/apache/airflow/security/code-scanning/497)



##########
airflow-core/src/airflow/utils/serve_logs.py:
##########
@@ -119,25 +210,39 @@
                 get_docs_url("configurations-ref.html#secret-key"),
                 exc_info=True,
             )
-            abort(403)
+            raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, 
detail="Expired signature")
         except InvalidIssuedAtError:
             logger.warning(
-                "The request was issues in the future. Make sure that all 
components "
+                "The request was issued in the future. Make sure that all 
components "
                 "in your system have synchronized clocks. "
                 "See more at %s",
                 get_docs_url("configurations-ref.html#secret-key"),
                 exc_info=True,
             )
-            abort(403)
+            raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, 
detail="Token issued in future")
         except Exception:
             logger.warning("Unknown error", exc_info=True)
-            abort(403)
+            raise HTTPException(status_code=status.HTTP_403_FORBIDDEN, 
detail="Authentication failed")
+
+    fastapi_app = FastAPI(
+        dependencies=[Depends(validate_jwt_token)],
+    )
+
+    @fastapi_app.get("/log/{filename:path}")
+    def serve_logs_handler(filename: str):
+        # Validate the file path for security
+        file_path = _validate_log_file_path(filename, log_directory)
+
+        if not file_path.exists():

Review Comment:
   ## Uncontrolled data used in path expression
   
   This path depends on a [user-provided value](1).
   
   [Show more 
details](https://github.com/apache/airflow/security/code-scanning/496)



##########
airflow-core/src/airflow/utils/serve_logs.py:
##########
@@ -43,8 +45,95 @@
 logger = logging.getLogger(__name__)
 
 
+def _validate_log_file_path(filename: str, log_directory: str) -> Path:
+    """
+    Validate that the requested file path is within the log directory and safe 
to serve.
+
+    :param filename: The requested filename from the URL path
+    :param log_directory: The base log directory path
+
+    :returns: Path: The validated file path
+    """
+    # URL decode the filename to handle encoded path traversal attempts
+    try:
+        decoded_filename = urllib.parse.unquote(filename)
+    except Exception as e:
+        logger.warning("Failed to URL decode filename '%s': %s", filename, e)
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Invalid URL encoding in filename",
+        )
+
+    # Check for control characters and other invalid characters
+    if not decoded_filename or any(ord(c) < 32 for c in decoded_filename):
+        logger.warning(
+            "Invalid characters detected in filename: '%s' (decoded: '%s')",
+            filename,
+            decoded_filename,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Invalid characters in filename",
+        )
+
+    # Check for null bytes specifically (additional security check)
+    if "\x00" in decoded_filename:
+        logger.warning(
+            "Null byte detected in filename: '%s' (decoded: '%s')",
+            filename,
+            decoded_filename,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Null byte in filename",
+        )
+
+    try:
+        # Normalize the log directory path and join the decoded filename with 
the log directory
+        log_dir = Path(log_directory).resolve()
+        file_path = log_dir / decoded_filename
+
+        # Resolve the full path to handle any symbolic links or relative 
components
+        resolved_path = file_path.resolve()
+
+        # Ensure the resolved path is within the log directory (prevents path 
traversal)
+        if not resolved_path.is_relative_to(log_dir):
+            logger.warning(
+                "Path traversal attempt detected. Requested path '%s' 
(decoded: '%s') resolves to '%s' "
+                "which is outside the log directory '%s'",
+                filename,
+                decoded_filename,
+                resolved_path,
+                log_dir,
+            )
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail="Access denied: path outside log directory",
+            )
+
+        # Additional security check: ensure it's a regular file (not a 
directory or special file)
+        if resolved_path.exists() and not resolved_path.is_file():

Review Comment:
   ## Uncontrolled data used in path expression
   
   This path depends on a [user-provided value](1).
   
   [Show more 
details](https://github.com/apache/airflow/security/code-scanning/494)



##########
airflow-core/src/airflow/utils/serve_logs.py:
##########
@@ -43,8 +45,95 @@
 logger = logging.getLogger(__name__)
 
 
+def _validate_log_file_path(filename: str, log_directory: str) -> Path:
+    """
+    Validate that the requested file path is within the log directory and safe 
to serve.
+
+    :param filename: The requested filename from the URL path
+    :param log_directory: The base log directory path
+
+    :returns: Path: The validated file path
+    """
+    # URL decode the filename to handle encoded path traversal attempts
+    try:
+        decoded_filename = urllib.parse.unquote(filename)
+    except Exception as e:
+        logger.warning("Failed to URL decode filename '%s': %s", filename, e)
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Invalid URL encoding in filename",
+        )
+
+    # Check for control characters and other invalid characters
+    if not decoded_filename or any(ord(c) < 32 for c in decoded_filename):
+        logger.warning(
+            "Invalid characters detected in filename: '%s' (decoded: '%s')",
+            filename,
+            decoded_filename,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Invalid characters in filename",
+        )
+
+    # Check for null bytes specifically (additional security check)
+    if "\x00" in decoded_filename:
+        logger.warning(
+            "Null byte detected in filename: '%s' (decoded: '%s')",
+            filename,
+            decoded_filename,
+        )
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail="Null byte in filename",
+        )
+
+    try:
+        # Normalize the log directory path and join the decoded filename with 
the log directory
+        log_dir = Path(log_directory).resolve()
+        file_path = log_dir / decoded_filename
+
+        # Resolve the full path to handle any symbolic links or relative 
components
+        resolved_path = file_path.resolve()
+
+        # Ensure the resolved path is within the log directory (prevents path 
traversal)
+        if not resolved_path.is_relative_to(log_dir):
+            logger.warning(
+                "Path traversal attempt detected. Requested path '%s' 
(decoded: '%s') resolves to '%s' "
+                "which is outside the log directory '%s'",
+                filename,
+                decoded_filename,
+                resolved_path,
+                log_dir,
+            )
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail="Access denied: path outside log directory",
+            )
+
+        # Additional security check: ensure it's a regular file (not a 
directory or special file)
+        if resolved_path.exists() and not resolved_path.is_file():
+            logger.warning(
+                "Attempt to access non-file resource: '%s' (type: %s)",
+                resolved_path,
+                "directory" if resolved_path.is_dir() else "special file",

Review Comment:
   ## Uncontrolled data used in path expression
   
   This path depends on a [user-provided value](1).
   
   [Show more 
details](https://github.com/apache/airflow/security/code-scanning/495)



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to