bito-code-review[bot] commented on code in PR #35021: URL: https://github.com/apache/superset/pull/35021#discussion_r2873640697
########## superset/views/redirect.py: ########## @@ -0,0 +1,74 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +View that handles external-link redirects with a warning page. + +Links in alert/report emails are rewritten to point here. Internal links +redirect immediately; external links are shown to the user for confirmation +via the React ``RedirectWarning`` page. +""" + +import logging + +from flask import abort, redirect, request +from flask_appbuilder import expose + +from superset import is_feature_enabled +from superset.superset_typing import FlaskResponse +from superset.utils.link_redirect import is_safe_redirect_url +from superset.views.base import BaseSupersetView + +logger = logging.getLogger(__name__) + +DANGEROUS_SCHEMES: frozenset[str] = frozenset( + ("javascript:", "data:", "vbscript:", "file:") +) + + +class RedirectView(BaseSupersetView): + """ + Warning page for external links found in alert/report emails. + + This endpoint is publicly accessible (no authentication required) + because email recipients may not have an active Superset session. + """ + + route_base = "/redirect" + + @expose("/") + def redirect_warning(self) -> FlaskResponse: + """Validate the target URL and either redirect or show the warning page.""" + if not is_feature_enabled("ALERT_REPORTS"): + abort(404) + + target_url = request.args.get("url", "").strip() + + if not target_url: + abort(400, description="Missing URL parameter") + + # Block dangerous schemes (check the decoded value) + if target_url.lower().startswith(tuple(DANGEROUS_SCHEMES)): Review Comment: <div> <div id="suggestion"> <div id="issue"><b>Improve URL scheme validation</b></div> <div id="fix"> The check for dangerous schemes uses startswith, which may miss edge cases or be imprecise. Parse the URL with urlparse and check parsed.scheme.lower() in DANGEROUS_SCHEMES (updated to scheme names without colons). This aligns with how is_safe_redirect_url validates schemes and improves security. </div> <details> <summary> <b>Code suggestion</b> </summary> <blockquote>Check the AI-generated fix before applying</blockquote> <div id="code"> ``` - import logging + import logging + from urllib.parse import urlparse @@ -37,1 +38,1 @@ - DANGEROUS_SCHEMES: frozenset[str] = frozenset( - ("javascript:", "data:", "vbscript:", "file:") - ) + DANGEROUS_SCHEMES: frozenset[str] = frozenset( + ("javascript", "data", "vbscript", "file") + ) @@ -64,1 +65,3 @@ - # Block dangerous schemes (check the decoded value) - if target_url.lower().startswith(tuple(DANGEROUS_SCHEMES)): + # Block dangerous schemes (check the decoded value) + parsed = urlparse(target_url) + if parsed.scheme.lower() in DANGEROUS_SCHEMES: ``` </div> </details> </div> <small><i>Code Review Run #f8bf0f</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them ########## superset-frontend/src/pages/RedirectWarning/utils.ts: ########## @@ -0,0 +1,96 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +const TRUSTED_URLS_KEY = 'superset_trusted_urls'; +const MAX_TRUSTED_URLS = 100; +const ALLOWED_SCHEMES = ['http:', 'https:']; + +/** + * Normalize a URL for comparison (origin + path without trailing slash + search). + */ +function normalizeUrl(url: string): string { + try { + const parsed = new URL(url); + return parsed.origin + parsed.pathname.replace(/\/$/, '') + parsed.search; + } catch { + return url; + } +} + +/** + * Return true if the URL scheme is safe for navigation. + * Blocks javascript:, data:, vbscript:, file:, etc. + */ +export function isAllowedScheme(url: string): boolean { + try { + const parsed = new URL(url); + return ALLOWED_SCHEMES.includes(parsed.protocol); + } catch { + // relative URLs or unparseable — allow (they'll resolve against current origin) + return true; + } +} + +/** + * Read the target URL from the current page's query string. + * + * URLSearchParams.get() already percent-decodes the value, so we must NOT + * call decodeURIComponent again (doing so would allow double-encoded + * payloads like `javascript%253Aalert(1)` to bypass scheme checks). + */ +export function getTargetUrl(): string { + const params = new URLSearchParams(window.location.search); + const url = params.get('url') ?? ''; + return url.trim(); +} + +function getTrustedUrls(): string[] { + try { + const stored = localStorage.getItem(TRUSTED_URLS_KEY); + if (!stored) return []; + const parsed: unknown = JSON.parse(stored); + return Array.isArray(parsed) ? (parsed as string[]) : []; + } catch { + return []; + } +} + +function saveTrustedUrls(urls: string[]): void { + const limited = + urls.length > MAX_TRUSTED_URLS ? urls.slice(-MAX_TRUSTED_URLS) : urls; + try { + localStorage.setItem(TRUSTED_URLS_KEY, JSON.stringify(limited)); + } catch { + // Ignore storage errors (private browsing, quota exceeded, etc.) + } +} + +export function isUrlTrusted(url: string): boolean { + const normalized = normalizeUrl(url); + return getTrustedUrls().some(t => normalizeUrl(t) === normalized); +} + +export function trustUrl(url: string): void { + const normalized = normalizeUrl(url); + const trusted = getTrustedUrls(); + if (!trusted.some(t => normalizeUrl(t) === normalized)) { + trusted.push(url); + saveTrustedUrls(trusted); + } +} Review Comment: <div> <div id="suggestion"> <div id="issue"><b>Missing test coverage for security functions</b></div> <div id="fix"> This new utils.ts file implements security-critical URL handling for external redirects, but lacks unit tests. The functions manage trusted URL storage and scheme validation to prevent open redirect vulnerabilities. Without tests, potential bugs in normalization or storage logic could introduce security risks. Add Jest tests to verify correct behavior across all code paths. </div> </div> <small><i>Code Review Run #f8bf0f</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them ########## superset/utils/link_redirect.py: ########## @@ -0,0 +1,149 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Utilities for processing links in alert/report emails. + +External links are rewritten to go through a redirect warning page so that +recipients see a confirmation before navigating to an external site. +""" + +import logging +import re +from urllib.parse import quote, urlparse + +from flask import current_app + +logger = logging.getLogger(__name__) + +# Matches href="..." in anchor tags (both single and double quotes) +_HREF_RE = re.compile( + r"""(<a\s[^>]*?href\s*=\s*)(["'])(.*?)\2""", + re.IGNORECASE | re.DOTALL, +) + + +def _get_base_hosts() -> set[str]: + """Return the set of hosts that are considered internal (lower-cased).""" + hosts: set[str] = set() + for key in ("WEBDRIVER_BASEURL_USER_FRIENDLY", "WEBDRIVER_BASEURL"): + url = current_app.config.get(key, "") + if url: + parsed = urlparse(url) + if parsed.scheme and parsed.netloc: + hosts.add(parsed.netloc.lower()) + return hosts + + +def _get_redirect_base() -> str: + """Return the base URL used to build redirect links.""" + for key in ("WEBDRIVER_BASEURL_USER_FRIENDLY", "WEBDRIVER_BASEURL"): + url = current_app.config.get(key, "") + if url: + return url.rstrip("/") + return "" + + +def _is_external(href: str, base_hosts: set[str]) -> bool: + """Return True if *href* points to an external host.""" + parsed = urlparse(href) + # Only rewrite http(s) links with a host that differs from ours + if parsed.scheme not in ("http", "https"): + return False + return bool(parsed.netloc) and parsed.netloc.lower() not in base_hosts + + +def _replace_href( + match: re.Match[str], + base_hosts: set[str], + redirect_base: str, +) -> str: + """Regex replacer: rewrite external hrefs to go through the redirect page.""" + prefix, quote_char, href = match.group(1), match.group(2), match.group(3) + href = href.strip() + + # Don't double-redirect + if "/redirect/" in href: + return match.group(0) + + if not _is_external(href, base_hosts): + return match.group(0) + + redirect_url = f"{redirect_base}/redirect/?url={quote(href, safe='')}" + return f"{prefix}{quote_char}{redirect_url}{quote_char}" + + +def process_html_links(html_content: str) -> str: + """ + Rewrite external links in *html_content* to go through the redirect page. + + Internal links (matching the configured base URL hosts) are left untouched. + """ + if not html_content or not html_content.strip(): + return html_content + + if not current_app.config.get("ALERT_REPORTS_ENABLE_LINK_REDIRECT", True): + return html_content + + base_hosts = _get_base_hosts() + if not base_hosts: + logger.warning("No base URL configured, skipping link redirect processing") + return html_content + + redirect_base = _get_redirect_base() + if not redirect_base: + return html_content + + try: + return _HREF_RE.sub( + lambda m: _replace_href(m, base_hosts, redirect_base), + html_content, + ) + except Exception: Review Comment: <div> <div id="suggestion"> <div id="issue"><b>Avoid catching blind generic Exception</b></div> <div id="fix"> Replace the broad `Exception` catch with specific exception types (e.g., `re.error`, `ValueError`, `AttributeError`) to improve error handling clarity and avoid masking unexpected errors. </div> <details> <summary> <b>Code suggestion</b> </summary> <blockquote>Check the AI-generated fix before applying</blockquote> <div id="code"> ````suggestion except (re.error, ValueError, AttributeError): ```` </div> </details> </div> <small><i>Code Review Run #f8bf0f</i></small> </div> --- Should Bito avoid suggestions like this for future reviews? (<a href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>) - [ ] Yes, avoid them -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
