This is an automated email from the ASF dual-hosted git repository.
chia7712 pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/kafka.git
The following commit(s) were added to refs/heads/trunk by this push:
new 7b5d4cacedc KAFKA-20362 Fall back to GitHub handle when reviewer email
is unavailable (#22108)
7b5d4cacedc is described below
commit 7b5d4cacedc6da352f402e621681055ea3fc1823
Author: Ming-Yen Chung <[email protected]>
AuthorDate: Wed Apr 22 21:35:01 2026 +0800
KAFKA-20362 Fall back to GitHub handle when reviewer email is unavailable
(#22108)
Fix https://github.com/apache/kafka/pull/21928#issuecomment-4284041251.
When a reviewer has "Keep my email addresses private" enabled, their
real email is unreachable through either the commits API (GitHub
rewrites the author email to the `@users.noreply.github.com` form on
squash-merge) or the GitHub user profile (which typically returns null).
The previous script wrote that noreply email, or a
`{login}@email-not-found` placeholder when both tiers missed, into the
`Reviewers:` trailer -- neither identifies the reviewer.
This PR adds two fallbacks, in order:
1. **Tier 3 -- past Reviewers: trailers in `git log`.** Once a reviewer
has been credited with a real email in any earlier merged PR, match
by name and reuse that email. This makes real-email attribution
"sticky" even after a reviewer later enables email privacy on
GitHub.
2. **`Name (@login)` fallback.** When no usable email can be resolved
from any tier, fall back to the GitHub handle, which is stable,
public, and unambiguously identifies the reviewer.
Reviewers whose real email is recoverable from any tier continue to use
the existing `Name <email>` form.
Kafka committers typically count reviewer contributions by grepping `git
log` for the reviewer's name, e.g., `git log | grep -i Reviewer | grep
-i "Ming-Yen Chung" | wc -l`. Since the name portion is preserved in
both `<email>` and `(@login)` forms, this change does not affect those
counts.
**For first-time reviewers who want to be credited by email instead of
`(@login)` (subsequent PRs are handled automatically by Tier 3):**
- **Tier 2 (works immediately, recommended):** set **Settings → Profile
→ Public email**. Verify with `gh api "users/<login>"` -- the `email`
field should be non-null.
- **Tier 1 (only after your first merged commit in apache/kafka):**
disable **Settings → Emails → Keep my email addresses private** before
committing. Verify with
`gh api "repos/apache/kafka/commits?author=<login>&per_page=1"` --
returns `[]` if you have no commits in the repo yet, and the author
email is the noreply form if privacy was on at commit time.
Reviewers: Chia-Ping Tsai <[email protected]>
---
.github/scripts/pr-format.py | 73 +++++++++++++++++++++++++++++++++-----------
1 file changed, 56 insertions(+), 17 deletions(-)
diff --git a/.github/scripts/pr-format.py b/.github/scripts/pr-format.py
index 3bc911c0634..aab44e3e08d 100644
--- a/.github/scripts/pr-format.py
+++ b/.github/scripts/pr-format.py
@@ -107,14 +107,24 @@ def split_paragraphs(text: str):
def resolve_reviewer(login: str) -> tuple:
"""Map a GitHub login to (name, email).
- Tries the repo commit history first, then falls back to the GitHub user
profile.
- If the display name is unavailable, the login is used as the name.
- If the email is unavailable, '{login}@email-not-found' is used as a
placeholder.
+ Tries three tiers in order: repo commit history, GitHub user profile,
+ and past `Reviewers:` trailers in git log (matched by name).
+ Noreply emails (@users.noreply.github.com) are treated as missing since
+ they are GitHub privacy placeholders that do not identify the reviewer.
+ Returns (name, None) when no usable email is found; the caller falls
+ back to the '(@login)' form in the Reviewers trailer.
"""
+ def _usable_email(e):
+ if not e or e.endswith("@users.noreply.github.com"):
+ return None
+ return e
+
name = None
email = None
- # Tier 1: find from repo commit history
+ # Tier 1: find from repo commit history. Misses when the reviewer has no
+ # merged commit in apache/kafka, or had "Keep my email private" enabled
+ # at commit time (GitHub rewrites the author to the noreply form).
try:
cmd = f"gh api repos/apache/kafka/commits?author={login}&per_page=1"
p = subprocess.run(shlex.split(cmd), capture_output=True, text=True)
@@ -123,11 +133,12 @@ def resolve_reviewer(login: str) -> tuple:
if commits:
author = commits[0].get("commit", {}).get("author", {})
name = author.get("name")
- email = author.get("email")
+ email = _usable_email(author.get("email"))
except Exception as e:
logger.debug(f"Failed to resolve {login} from commit history: {e}")
- # Tier 2: GitHub user profile
+ # Tier 2: GitHub user profile. Only exposes an email when the reviewer
+ # has set a Public email in their profile settings.
if not name or not email:
try:
cmd = f"gh api users/{login}"
@@ -137,23 +148,47 @@ def resolve_reviewer(login: str) -> tuple:
if not name:
name = user.get("name")
if not email:
- email = user.get("email")
+ email = _usable_email(user.get("email"))
except Exception as e:
logger.debug(f"Failed to resolve {login} from GitHub profile: {e}")
+ # Tier 3: past Reviewers: trailers in git log, matched by name. Catches
+ # pure reviewers (no commits in apache/kafka, no public profile email)
+ # who have been credited with a real email in an earlier merged PR.
+ # git log is newest-first, so the first usable match is the most recent.
+ if name and not email:
+ try:
+ p = subprocess.run(
+ ["git", "log",
+
"--pretty=format:%(trailers:key=Reviewers,valueonly=true,unfold=true)"],
+ capture_output=True, text=True,
+ )
+ if p.returncode == 0:
+ pattern = re.compile(rf"{re.escape(name)}\s*<([^>]+)>")
+ for line in p.stdout.splitlines():
+ for m in pattern.finditer(line):
+ candidate = _usable_email(m.group(1))
+ if candidate:
+ email = candidate
+ break
+ if email:
+ break
+ except Exception as e:
+ logger.debug(f"Failed to resolve {login} from past Reviewers
trailers: {e}")
+
if not name:
name = login
- if not email:
- email = f"{login}@email-not-found"
-
return (name, email)
-def already_exists(email: str, existing_reviewers: List[str]) -> bool:
- """Check if a reviewer with the given email is already in the existing
reviewers list."""
- existing_emails = re.findall(r'<(.+?)>', ", ".join(existing_reviewers))
- return email.lower() in [e.lower() for e in existing_emails]
+def already_exists(identity: str, existing_reviewers: List[str]) -> bool:
+ """Check if a reviewer identity is already in the existing reviewers list.
+
+ identity is the delimited token that uniquely identifies a reviewer, either
+ '<email>' (for the email form) or '(@login)' (for the login fallback).
+ """
+ return identity.lower() in ", ".join(existing_reviewers).lower()
def update_reviewers_trailer(body: str, trailer: str) -> str:
@@ -207,11 +242,15 @@ if __name__ == "__main__":
reviewer_login = get_env("REVIEWER_LOGIN")
pr_author = (gh_json.get("author") or {}).get("login")
if reviewer_login and reviewer_login != pr_author:
- existing_reviewers = parse_trailers(title, body).get("Reviewers", [])
name, email = resolve_reviewer(reviewer_login)
- if not already_exists(email, existing_reviewers):
+ if email:
+ identity = f"<{email}>"
+ else:
+ identity = f"(@{reviewer_login})"
+ resolved = f"{name} {identity}"
+ existing_reviewers = parse_trailers(title, body).get("Reviewers", [])
+ if not already_exists(identity, existing_reviewers):
existing_value = ", ".join(existing_reviewers)
- resolved = f"{name} <{email}>"
new_value = f"{existing_value}, {resolved}" if existing_value else
resolved
body = update_reviewers_trailer(body, f"Reviewers: {new_value}")