Ma77Ball commented on code in PR #5359:
URL: https://github.com/apache/texera/pull/5359#discussion_r3362393929
##########
.github/workflows/comment-commands.yml:
##########
@@ -137,6 +145,150 @@ jobs:
return;
}
+ // Find a reviewer for the PR by running `git blame` on each
+ // changed file, picking the most recent commit author for
+ // that file, and turning that commit into a GitHub login.
+ // Returns an array of unique logins.
+ async function getReviewersFromBlame() {
+ const { data: pull } = await github.rest.pulls.get({
+ owner, repo, pull_number,
+ });
+
+ // The default checkout targets the repo's default branch, so
+ // pull.base.sha may not be reachable when the PR targets a
+ // release or divergent branch. Fetch the base ref explicitly
+ // so git blame <base.sha> always works.
+ try {
+ execFileSync('git', ['fetch', 'origin', pull.base.ref], {
encoding: 'utf8' });
+ } catch (e) {
+ core.warning(`git fetch for base ref ${pull.base.ref} failed:
${e.message}`);
+ }
+
+ // List files changed in the pull request (paginated).
+ const files = await github.paginate(github.rest.pulls.listFiles,
{
+ owner, repo, pull_number, per_page: 100,
+ });
+
+ // Parse the `git blame -p` output to find, for each file,
+ // the commit (SHA) that was the most recent author-time.
+ function latestBlameCommit(blameOutput) {
+ let latest = null; // { sha, authorTime }
+ let current = null;
+
+ function finalizeCurrent() {
+ if (!current || current.authorTime == null) return;
+ if (!latest || current.authorTime > latest.authorTime) {
+ latest = current;
+ }
+ }
+
+ for (const line of blameOutput.split(/\r?\n/)) {
+ // A header line marks a new blamed commit block.
+ const header =
line.match(/^([0-9a-f^]+)\s+\d+\s+\d+\s+\d+$/);
+ if (header) {
+ finalizeCurrent();
+ current = { sha: header[1].replace(/^\^/, ''), authorTime:
null };
+ continue;
+ }
+
+ // `author-time` lines give a UNIX timestamp we can use.
+ const authorTime = line.match(/^author-time\s+(\d+)$/);
+ if (authorTime && current) {
+ current.authorTime = Number(authorTime[1]);
+ }
+ }
+
+ finalizeCurrent();
+ return latest;
+ }
+
+ // Fetch collaborators once as a fallback pool for files whose
+ // blamed commit has no linked GitHub login.
+ let collaboratorPool = [];
+ try {
+ const collabs = await
github.paginate(github.rest.repos.listCollaborators, {
+ owner, repo, per_page: 100,
+ });
+ collaboratorPool = collabs
+ .filter(u => u.type !== 'Bot' && u.login.toLowerCase() !==
author.toLowerCase())
+ .map(u => u.login);
+ } catch (e) {
+ core.warning(`Could not fetch collaborators for fallback:
${e.message}`);
+ }
+
+ // Count how many changed files each candidate most recently
+ // touched. Top MAX_REVIEWERS avoids the GitHub 422 that
+ // rejects >15 reviewers and keeps noise low.
+ const MAX_REVIEWERS = 2;
+ const reviewerCounts = new Map(); // login -> file count
+ for (const { filename } of files) {
+ let blameOutput;
+ try {
+ // Run blame at the PR base commit so we attribute
+ // existing lines correctly (not the PR tip).
+ blameOutput = execFileSync('git', ['blame', '-p',
pull.base.sha, '--', filename], { encoding: 'utf8' });
+ } catch (e) {
+ core.warning(`git blame on ${filename} at ${pull.base.sha}
failed: ${e.message}`);
+ continue;
+ }
+
+ const latest = latestBlameCommit(blameOutput);
+ if (!latest) {
+ core.warning(`Could not determine a blamed commit for
${filename}; skipping.`);
+ continue;
+ }
+
+ let commit;
+ try {
+ ({ data: commit } = await github.rest.repos.getCommit({
owner, repo, ref: latest.sha }));
+ } catch (e) {
+ core.warning(`Commit lookup for ${latest.sha} from
${filename} failed: ${e.message}`);
+ continue;
+ }
+
+ // Prefer the GitHub-linked author/committer; fall back to a
+ // random collaborator if the commit has no linked GitHub login
+ // (e.g., authored by email only).
+ let login = commit.author?.login ?? commit.committer?.login;
+ if (!login) {
+ if (!collaboratorPool.length) {
+ core.warning(`Commit ${latest.sha} from ${filename} has no
GitHub user and no collaborator fallback; skipping.`);
+ continue;
+ }
+ login = collaboratorPool[Math.floor(Math.random() *
collaboratorPool.length)];
+ core.info(`Commit ${latest.sha} from ${filename} has no
GitHub user; falling back to random collaborator ${login}.`);
+ }
Review Comment:
The random fallback pick gets counted in `reviewerCounts`, so it can outrank
a real blame author from another file. Don't count random picks: use real
committers whenever any file has one, and fall back to a single random
collaborator only when no file produced a real committer.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]