This is an automated email from the ASF dual-hosted git repository.

lhotari pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/pulsar-site.git


The following commit(s) were added to refs/heads/main by this push:
     new 8b83055e87e Fix site-publisher to see all changes since last 
successful publish
8b83055e87e is described below

commit 8b83055e87eb7b3a7c04bdb5d4ef9f3ec0ed3e9c
Author: Lari Hotari <[email protected]>
AuthorDate: Tue Apr 28 13:37:44 2026 +0300

    Fix site-publisher to see all changes since last successful publish
    
    The CI workflow checks out main with fetch-depth=2, so site-publisher's
    previous `git diff HEAD~1 HEAD` lost any commits stacked on top of each
    other between publishes, and a `data/release-pulsar.js`-only change
    never re-triggered the affected versioned-docs builds (e.g. f15a213).
    
    - Track the published source-repo SHA in `.publish-ref` at the root of
      asf-site-next, committed atomically with the published content.
    - Use the GitHub compare API to enumerate changed files and commit
      messages for the whole range. Fall back to a full rebuild on missing
      ref / 404 / truncated response (>=300 files or >=250 commits).
    - When `data/release-pulsar.js` is in the diff, parse both versions of
      the file and rebuild every versioned-docs branch whose entry-set
      changed (catches new patches under existing vtags).
    - Replace scripts/split-version-build.sh + scripts/split-version.js
      with a Python orchestrator (tools/pytools/lib/execute/version_build.py)
      that does the same per-version yarn build / stash / restore loop.
    - Honor BUILD_ALL_VERSION=1 and BUILD_VERSIONS=... in any commit
      message across the publish range, not just HEAD.
    - Use the workflow's default GITHUB_TOKEN (permissions: contents: write)
      for both API calls and the asf-site-next push; drop PULSARBOT_TOKEN.
---
 .github/workflows/ci-build-site.yml        |   5 +-
 docusaurus.config.ts                       |   9 +-
 scripts/split-version-build.sh             |  77 ---------
 scripts/split-version.js                   |   8 -
 tools/pytools/bin/site-publisher.py        |  11 +-
 tools/pytools/lib/execute/changed_files.py | 248 +++++++++++++++++++++++++++++
 tools/pytools/lib/execute/site_builder.py  |  41 ++---
 tools/pytools/lib/execute/site_uploader.py |  13 +-
 tools/pytools/lib/execute/version_build.py | 148 +++++++++++++++++
 9 files changed, 444 insertions(+), 116 deletions(-)

diff --git a/.github/workflows/ci-build-site.yml 
b/.github/workflows/ci-build-site.yml
index 164503434da..6b01af534f9 100644
--- a/.github/workflows/ci-build-site.yml
+++ b/.github/workflows/ci-build-site.yml
@@ -31,6 +31,8 @@ jobs:
     name: Build and publish pulsar website-next
     runs-on: ubuntu-latest
     timeout-minutes: 600
+    permissions:
+      contents: write
     steps:
       - uses: actions/checkout@v6
         with:
@@ -39,7 +41,6 @@ jobs:
         with:
           ref: 'asf-site-next'
           path: tmp/asf-site-next
-          token: ${{ secrets.PULSARBOT_TOKEN }}
       - name: Install poetry
         run: pipx install poetry
       - uses: actions/setup-python@v6
@@ -53,6 +54,8 @@ jobs:
       - run: corepack enable
       - name: Update generated docs
         working-directory: tools/pytools
+        env:
+          GITHUB_TOKEN: ${{ github.token }}
         run: |
           poetry install
           poetry run bin/site-publisher.py 
--site-path=$GITHUB_WORKSPACE/tmp/asf-site-next
diff --git a/docusaurus.config.ts b/docusaurus.config.ts
index c67e02c6073..70ad60d2657 100644
--- a/docusaurus.config.ts
+++ b/docusaurus.config.ts
@@ -34,10 +34,11 @@ try {
 // Versioned legacy URLs like /docs/<version>/client-libraries-<slug> are NOT
 // included here on purpose: plugin-client-redirects generates a stub HTML file
 // at every `from` path, which would create 
build/docs/<version>/client-libraries-*/
-// directories in every yarn build. The CI split-version-build.sh script (which
-// builds each version separately and then mv's build-<v>/<v> into build/docs/)
-// would then fail because build/docs/<v>/ is non-empty from those stubs. Those
-// versioned URLs are handled exclusively by static/.htaccess in production.
+// directories in every yarn build. The CI per-version build orchestrator
+// (tools/pytools/lib/execute/version_build.py) builds each version separately
+// and then folds build-<v>/<v> back into build/docs/; it would fail because
+// build/docs/<v>/ is non-empty from those stubs. Those versioned URLs are
+// handled exclusively by static/.htaccess in production.
 function clientLibrariesLegacyRedirects() {
   const slugs = [
     "java", "java-setup", "java-initialize", "java-use", "java-tracing",
diff --git a/scripts/split-version-build.sh b/scripts/split-version-build.sh
deleted file mode 100755
index 3f3fb4cd55c..00000000000
--- a/scripts/split-version-build.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash
-
-set -x -e
-
-node scripts/split-version.js
-
-latest=$(cat scripts/.latest)
-buildVersion="next"
-
-echo "changed files: "
-echo "$@"
-
-_build() {
-    yarn build
-}
-
-_buildVersion() {
-    if [[ $buildVersion = "next" ]]; then
-        echo "..." "${buildVersion}" "and" "$latest" "begin build..."
-        echo "[\"current\", \"${latest}\"]" >.build-versions.json
-    else
-        echo "..." "$buildVersion" "begin build..."
-        echo "[\"${buildVersion}\"]" >.build-versions.json
-    fi
-
-    _build
-
-    if [[ $buildVersion != "$latest" ]]; then
-        mkdir -p "build-${buildVersion}/${buildVersion}"
-        mkdir -p build-assets
-        cp -r "build/docs/${buildVersion}/"* 
"build-${buildVersion}/${buildVersion}"
-        cp -r build/assets/* build-assets/
-        rm -rf "build/docs/${buildVersion}"
-    fi
-    echo "..." "$buildVersion" "build done..."
-}
-
-COMMIT_MSG=$(git show -s --format="%s %B")
-FORCE_BUILD_ALL_VERSION=$(echo "$COMMIT_MSG" | sed -n 
's/.*BUILD_ALL_VERSION=\([0-1]*\).*/\1/p')
-FORCE_BUILD_VERSIONS=$(echo "$COMMIT_MSG" | sed -n 
's/.*BUILD_VERSIONS=\([0-9\.x,]*\).*/\1/p')
-if [[ $FORCE_BUILD_VERSIONS =~ ^[0-9\.x,]+$ ]]; then
-    SUPPLEMENT_VERSIONS=$FORCE_BUILD_VERSIONS
-else
-    SUPPLEMENT_VERSIONS=""
-fi
-
-CURRENT_HOUR=$(date +%H)
-CURRENT_HOUR=${CURRENT_HOUR#0}
-echo "CURRENT_HOUR: ${CURRENT_HOUR}"
-
-if [[ "$FORCE_BUILD_ALL_VERSION" == "1" ]] || [[ "$FORCE_BUILD_ALL_VERSION" == 
"0" ]]; then
-    BUILD_ALL_VERSION="$FORCE_BUILD_ALL_VERSION"
-    echo "force build all versions"
-fi
-
-# Build only the versions that has changed and build next version that has any 
changed
-while read -r version; do
-    buildVersion=$version
-    if [[ "$*" == *versioned_docs/version-"$version"* || $buildVersion == 
"next" || $BUILD_ALL_VERSION == "1" || $BUILD_VERSION == *"$buildVersion"* || 
$SUPPLEMENT_VERSIONS == *"$buildVersion"* ]]; then
-        _buildVersion
-    else
-        echo "..." "$buildVersion" "no change, skip"
-    fi
-done <scripts/.versions
-
-while read -r version; do
-    if [ -d "build-$version" ]; then
-        mv -f "build-$version/"* build/docs
-        rm -rf "build-$version"
-    fi
-done <scripts/.versions
-
-cp -r build-assets/* build/assets/
-rm -rf build-assets
-cp static/.htaccess build/
-
-echo "$BUILD_ALL_VERSION" >scripts/.build
diff --git a/scripts/split-version.js b/scripts/split-version.js
deleted file mode 100644
index fb61a57770f..00000000000
--- a/scripts/split-version.js
+++ /dev/null
@@ -1,8 +0,0 @@
-const fs = require("fs");
-const path = require("path");
-const _ = require("lodash");
-let versions = require("../versions.json");
-const latestVersion = versions.shift();
-versions = versions.concat(["next"]);
-fs.writeFileSync(path.join(__dirname, ".versions"), versions.join("\n") + 
"\n");
-fs.writeFileSync(path.join(__dirname, ".latest"), latestVersion + "\n");
diff --git a/tools/pytools/bin/site-publisher.py 
b/tools/pytools/bin/site-publisher.py
index c13a03d0508..c1f33a024d6 100755
--- a/tools/pytools/bin/site-publisher.py
+++ b/tools/pytools/bin/site-publisher.py
@@ -17,6 +17,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import os
 import tempfile
 from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter
 from pathlib import Path
@@ -42,8 +43,10 @@ if __name__ == '__main__':
         else:
             site = Path(args.site_path)
 
-        commit = run_pipe(git, 'rev-parse', '--short', 'HEAD', 
cwd=root_path()).read().strip()
-        msg = f'Site updated at revision {commit}'
+        head_sha = run_pipe(git, 'rev-parse', 'HEAD', 
cwd=root_path()).read().strip()
+        short_sha = head_sha[:12]
+        msg = f'Site updated at revision {short_sha}'
+        token = os.getenv('GITHUB_TOKEN')
 
-        site_builder.execute(site)
-        site_uploader.execute(args.push, msg, site, branch)
+        site_builder.execute(site, head_sha, token)
+        site_uploader.execute(args.push, msg, site, branch, head_sha)
diff --git a/tools/pytools/lib/execute/changed_files.py 
b/tools/pytools/lib/execute/changed_files.py
new file mode 100644
index 00000000000..de20c620f05
--- /dev/null
+++ b/tools/pytools/lib/execute/changed_files.py
@@ -0,0 +1,248 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import base64
+import json
+import os
+import re
+from collections import defaultdict
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, Iterable, List, Optional, Set, Tuple
+
+import requests
+
+from constant import site_path
+
+REPO = 'apache/pulsar-site'
+API_ROOT = 'https://api.github.com'
+PUBLISH_REF_FILE = '.publish-ref'
+RELEASE_PULSAR_PATH = 'data/release-pulsar.js'
+VTAG_VERSION_RE = re.compile(r'^\d+\.\d+\.x$')
+
+# GitHub's compare API caps responses at 300 files / 250 commits. Beyond those
+# thresholds the result is silently truncated, so we fall back to a full 
rebuild.
+COMPARE_FILE_CAP = 300
+COMPARE_COMMIT_CAP = 250
+
+BUILD_ALL_RE = re.compile(r'BUILD_ALL_VERSION=([01])')
+BUILD_VERSIONS_RE = re.compile(r'BUILD_VERSIONS=([0-9.x,]+)')
+
+
+@dataclass
+class ChangeSet:
+    files: List[str] = field(default_factory=list)
+    build_all: bool = False
+    force_versions: Set[str] = field(default_factory=set)
+
+
+def _auth_headers(token: str) -> dict:
+    return {
+        'Authorization': f'Bearer {token}',
+        'Accept': 'application/vnd.github+json',
+        'X-GitHub-Api-Version': '2022-11-28',
+    }
+
+
+def _read_publish_ref(asf_site: Path) -> Optional[str]:
+    ref_file = asf_site / PUBLISH_REF_FILE
+    if not ref_file.is_file():
+        return None
+    sha = ref_file.read_text().strip()
+    return sha or None
+
+
+def _read_known_versions() -> Set[str]:
+    """Return the set of versioned-docs branches from versions.json (the 
canonical source)."""
+    return set(json.loads((site_path() / 'versions.json').read_text()))
+
+
+def _fetch_release_pulsar_entries(sha: str, token: str) -> 
Optional[List[dict]]:
+    """Fetch and parse data/release-pulsar.js at the given SHA into a list of 
entry dicts."""
+    url = f'{API_ROOT}/repos/{REPO}/contents/{RELEASE_PULSAR_PATH}'
+    try:
+        resp = requests.get(url, headers=_auth_headers(token), params={'ref': 
sha}, timeout=30)
+        resp.raise_for_status()
+        payload = resp.json()
+    except (requests.RequestException, ValueError) as e:
+        print(f'failed to fetch {RELEASE_PULSAR_PATH}@{sha}: {e}')
+        return None
+
+    encoded = payload.get('content')
+    if not encoded:
+        print(f'no content field for {RELEASE_PULSAR_PATH}@{sha}')
+        return None
+    try:
+        body = base64.b64decode(encoded).decode('utf-8')
+    except (ValueError, UnicodeDecodeError) as e:
+        print(f'failed to decode {RELEASE_PULSAR_PATH}@{sha}: {e}')
+        return None
+
+    # The file is `module.exports = [...]` where the array is valid JSON.
+    start = body.find('[')
+    end = body.rfind(']')
+    if start < 0 or end < 0 or end <= start:
+        print(f'could not locate JSON array in {RELEASE_PULSAR_PATH}@{sha}')
+        return None
+    try:
+        entries = json.loads(body[start:end + 1])
+    except json.JSONDecodeError as e:
+        print(f'failed to parse {RELEASE_PULSAR_PATH}@{sha} as JSON: {e}')
+        return None
+    if not isinstance(entries, list):
+        return None
+    return entries
+
+
+def _group_by_vtag(entries: List[dict]) -> Dict[str, Set[Tuple]]:
+    """Group entries by vtag. Each entry becomes a frozen tuple of sorted 
items so we can
+    diff them as set members. Vtags that aren't of the form X.Y.x are skipped 
— they refer
+    to legacy per-patch versions for which there's no versioned_docs/version-* 
directory."""
+    grouped: Dict[str, Set[Tuple]] = defaultdict(set)
+    for entry in entries:
+        vtag = entry.get('vtag')
+        if not isinstance(vtag, str) or not VTAG_VERSION_RE.match(vtag):
+            continue
+        grouped[vtag].add(tuple(sorted(entry.items())))
+    return grouped
+
+
+def _release_pulsar_synthetic_paths(
+    base_sha: str, head_sha: str, token: str
+) -> Tuple[List[str], bool]:
+    """Return (synthetic_paths, ok). When ok is False, caller should escalate 
to build_all.
+
+    The returned paths are pseudo-arguments — never written to disk. They're 
appended to
+    the file list whose substring matcher fires the corresponding 
versioned-docs build."""
+    base_entries = _fetch_release_pulsar_entries(base_sha, token)
+    head_entries = _fetch_release_pulsar_entries(head_sha, token)
+    if base_entries is None or head_entries is None:
+        return [], False
+
+    base_grouped = _group_by_vtag(base_entries)
+    head_grouped = _group_by_vtag(head_entries)
+
+    affected = {
+        vtag for vtag in base_grouped.keys() | head_grouped.keys()
+        if base_grouped.get(vtag, set()) != head_grouped.get(vtag, set())
+    }
+    if not affected:
+        return [], True
+
+    known = _read_known_versions()
+    rebuild = sorted(affected & known)
+    skipped = sorted(affected - known)
+    if rebuild:
+        print(f'release-pulsar.js changes → rebuild versions: {rebuild}')
+    if skipped:
+        print(f'release-pulsar.js changes (unknown versions, skipped): 
{skipped}')
+    return [
+        f'versioned_docs/version-{vtag}/' for vtag in rebuild
+    ], True
+
+
+def _parse_commit_directives(messages: Iterable[str]) -> Tuple[bool, Set[str]]:
+    """Scan commit messages for BUILD_ALL_VERSION=1 / BUILD_VERSIONS=... 
directives.
+
+    Returns (build_all, force_versions). Mirrors the regex behavior of the 
previous
+    scripts/split-version-build.sh, but evaluates every commit in the range — 
not just HEAD."""
+    build_all = False
+    force_versions: Set[str] = set()
+    for msg in messages:
+        if not msg:
+            continue
+        if any(m.group(1) == '1' for m in BUILD_ALL_RE.finditer(msg)):
+            build_all = True
+        for match in BUILD_VERSIONS_RE.finditer(msg):
+            for v in match.group(1).split(','):
+                v = v.strip()
+                if v:
+                    force_versions.add(v)
+    return build_all, force_versions
+
+
+def full_build_paths() -> List[str]:
+    return [
+        f'versioned_docs/version-{v}/'
+        for v in sorted(_read_known_versions())
+    ]
+
+
+def compute_changed_files(
+    asf_site: Path,
+    head_sha: str,
+    token: Optional[str],
+) -> ChangeSet:
+    """Compute the change set since the last successful publish.
+
+    `build_all=True` signals the caller to rebuild every versioned-docs branch 
in
+    versions.json. `force_versions` carries any explicit BUILD_VERSIONS=... 
selectors
+    found in commit messages between .publish-ref and head_sha."""
+    base_sha = _read_publish_ref(asf_site)
+    if base_sha is None:
+        print(f'{PUBLISH_REF_FILE} missing → full rebuild')
+        return ChangeSet(build_all=True)
+
+    if not token:
+        if os.getenv('GITHUB_ACTIONS'):
+            print('GITHUB_TOKEN not set in CI → full rebuild')
+        else:
+            print('no GITHUB_TOKEN (local run) → full rebuild')
+        return ChangeSet(build_all=True)
+
+    if base_sha == head_sha:
+        print(f'{PUBLISH_REF_FILE} already at {head_sha} → no rebuild needed')
+        return ChangeSet()
+
+    url = f'{API_ROOT}/repos/{REPO}/compare/{base_sha}...{head_sha}'
+    try:
+        resp = requests.get(url, headers=_auth_headers(token), timeout=30)
+        if resp.status_code == 404:
+            print(f'compare {base_sha}...{head_sha} returned 404 → full 
rebuild')
+            return ChangeSet(build_all=True)
+        resp.raise_for_status()
+        payload = resp.json()
+    except (requests.RequestException, ValueError) as e:
+        print(f'compare API failed: {e} → full rebuild')
+        return ChangeSet(build_all=True)
+
+    files = payload.get('files') or []
+    total_commits = payload.get('total_commits', 0)
+    if len(files) >= COMPARE_FILE_CAP or total_commits >= COMPARE_COMMIT_CAP:
+        print(
+            f'compare result truncated (files={len(files)}, 
commits={total_commits}) → full rebuild'
+        )
+        return ChangeSet(build_all=True)
+
+    messages = [c.get('commit', {}).get('message', '') for c in 
payload.get('commits') or []]
+    build_all, force_versions = _parse_commit_directives(messages)
+    if build_all:
+        print('BUILD_ALL_VERSION=1 found in a commit message → full rebuild')
+    if force_versions:
+        print(f'BUILD_VERSIONS commit directives → also build: 
{sorted(force_versions)}')
+
+    changed = [f['filename'] for f in files if f.get('filename')]
+    print(f'compare {base_sha[:12]}...{head_sha[:12]} → {len(changed)} changed 
file(s)')
+
+    if RELEASE_PULSAR_PATH in changed:
+        synthetic, ok = _release_pulsar_synthetic_paths(base_sha, head_sha, 
token)
+        if not ok:
+            print(f'failed to resolve {RELEASE_PULSAR_PATH} vtag diff → full 
rebuild')
+            return ChangeSet(build_all=True)
+        changed.extend(synthetic)
+
+    return ChangeSet(files=changed, build_all=build_all, 
force_versions=force_versions)
diff --git a/tools/pytools/lib/execute/site_builder.py 
b/tools/pytools/lib/execute/site_builder.py
index 14fd2a19633..fa57aaa9746 100644
--- a/tools/pytools/lib/execute/site_builder.py
+++ b/tools/pytools/lib/execute/site_builder.py
@@ -16,43 +16,46 @@
 # under the License.
 
 import shutil
-import tempfile
 from pathlib import Path
+from typing import Optional
 
 from command import find_command, run
 from constant import site_path
+from execute import version_build
+from execute.changed_files import ChangeSet, compute_changed_files, 
full_build_paths
 
 
-def execute(asf_site: Path):
-    # 1. Get modified files
-    git = find_command('git', msg="git is required")
-    with tempfile.TemporaryFile('w+') as f:
-        run(git, 'diff', '--name-only', 'HEAD~1', 'HEAD', stdout=f, 
cwd=site_path())
-        f.seek(0)
-        modified_files = f.read().splitlines()
-    for file in modified_files:
-        print(f"{file} was modified")
+def execute(asf_site: Path, head_sha: str, token: Optional[str]):
+    # Compute the change set vs. the last successful publish (.publish-ref in 
asf-site-next).
+    # The CI workflow checks out main with fetch-depth=2, so a local `git 
diff` cannot span
+    # more than the most recent commit; the GitHub compare API is the source 
of truth.
+    change_set = compute_changed_files(asf_site, head_sha, token)
+    if change_set.build_all:
+        change_set = ChangeSet(
+            files=full_build_paths(),
+            build_all=True,
+            force_versions=change_set.force_versions,
+        )
+    for file in change_set.files:
+        print(f'{file} was modified')
+    if change_set.force_versions:
+        print(f'forced versions from commit messages: 
{sorted(change_set.force_versions)}')
 
-    # # 2. Install and build
-    yarn = find_command('yarn', msg="yarn is required")
-    bash = find_command('bash', msg="bash is required")
+    yarn = find_command('yarn', msg='yarn is required')
     run(yarn, 'install', cwd=site_path())
-    run(bash, 'scripts/split-version-build.sh', *modified_files, 
cwd=site_path())
+    version_build.execute(change_set)
     # Expand @pulsar:...@ tokens and rewrite `pathname:///` in the Docsify
     # reference site (build/reference/), which Docusaurus copies verbatim
     # from static/ and so isn't touched by the markdown preprocessor pipeline.
     run(yarn, 'process-reference-markdown', cwd=site_path())
     latest_content = site_path() / 'build'
 
-    # 3. Publish content to asf-site-next branch
     published_content = asf_site / 'content'
     if not published_content.exists():
         published_content.mkdir(parents=True, exist_ok=True)
 
-    is_build_all = (site_path() / 'scripts' / '.build').read_text().strip()
-    is_build_all = is_build_all == "1"
-    print(f'is_build_all: {is_build_all}')
-    if is_build_all:
+    print(f'is_build_all: {change_set.build_all}')
+    if change_set.build_all:
         whitelist = ['api', 'charts']
         old_files = [f for f in published_content.glob('*') if f.name not in 
whitelist]
         print(f'clean all the old content: {list(map(str, old_files))}')
diff --git a/tools/pytools/lib/execute/site_uploader.py 
b/tools/pytools/lib/execute/site_uploader.py
index 37331c47f70..692e44cd19e 100755
--- a/tools/pytools/lib/execute/site_uploader.py
+++ b/tools/pytools/lib/execute/site_uploader.py
@@ -44,9 +44,16 @@ def _should_push(mode: Mode) -> bool:
     return result
 
 
-def _do_push(msg: str, site: Path, branch: str):
+def _do_push(msg: str, site: Path, branch: str, head_sha: str):
     git = find_command('git', msg="git is required")
 
+    # Persist the source-repo SHA we just published so the next run can compute
+    # the changed-files set against this point. Written before `git add -A` so
+    # it lands in the same commit as the published content. If `git push` later
+    # fails, this local file is discarded along with the unpushed commit — the
+    # next CI run re-clones asf-site-next and reads the previous .publish-ref.
+    (site / '.publish-ref').write_text(head_sha + '\n')
+
     run(git, 'add', '-A', '.', cwd=site)
     changed = run(git, 'diff-index', '--quiet', 'HEAD', codes={0, 1}, 
cwd=site).returncode
     print(f'changed: {changed}')
@@ -68,9 +75,9 @@ def _do_push(msg: str, site: Path, branch: str):
         run(git, 'push', 'origin', branch, cwd=site)
 
 
-def execute(mode: Mode, msg: str, site: Path, branch: str):
+def execute(mode: Mode, msg: str, site: Path, branch: str, head_sha: str):
     if _should_push(mode):
-        _do_push(msg, site, branch)
+        _do_push(msg, site, branch, head_sha)
     else:  # show changes
         git = find_command('git', msg="git is required")
         with tempfile.TemporaryFile('w+') as f:
diff --git a/tools/pytools/lib/execute/version_build.py 
b/tools/pytools/lib/execute/version_build.py
new file mode 100644
index 00000000000..132a516f57f
--- /dev/null
+++ b/tools/pytools/lib/execute/version_build.py
@@ -0,0 +1,148 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Per-version Docusaurus build orchestrator.
+
+Replaces the legacy ``scripts/split-version-build.sh`` + 
``scripts/split-version.js``
+pair: reads versions.json directly, decides which versions to build based on 
the
+ChangeSet, runs ``yarn build`` once per included version, and assembles
+``build/docs/`` from the per-version outputs."""
+
+import json
+import shutil
+from pathlib import Path
+from typing import List
+
+from command import find_command, run
+from constant import site_path
+from execute.changed_files import ChangeSet
+
+
+def _should_build(bv: str, change_set: ChangeSet) -> bool:
+    if bv == 'next':
+        return True
+    if change_set.build_all:
+        return True
+    if bv in change_set.force_versions:
+        return True
+    needle = f'versioned_docs/version-{bv}/'
+    return any(needle in f for f in change_set.files)
+
+
+def _write_build_versions(site: Path, build_versions: List[str]) -> None:
+    (site / '.build-versions.json').write_text(json.dumps(build_versions) + 
'\n')
+
+
+def _move_tree(src: Path, dst: Path) -> None:
+    """Replace dst with src. dst is removed first if it exists."""
+    if dst.exists():
+        if dst.is_dir():
+            shutil.rmtree(dst)
+        else:
+            dst.unlink()
+    dst.parent.mkdir(parents=True, exist_ok=True)
+    shutil.move(str(src), str(dst))
+
+
+def _merge_tree(src: Path, dst: Path) -> None:
+    """Copy src/* into dst/, creating dst if needed. Files in dst are 
overwritten."""
+    dst.mkdir(parents=True, exist_ok=True)
+    for child in src.iterdir():
+        target = dst / child.name
+        if child.is_dir():
+            shutil.copytree(child, target, dirs_exist_ok=True)
+        else:
+            shutil.copy2(child, target)
+
+
+def execute(change_set: ChangeSet) -> None:
+    """Build every required version into a single ``build/`` tree.
+
+    The flow mirrors what split-version-build.sh did, just in Python:
+    1. Iterate ``versions[1:] + ["next"]`` (latest is built as part of "next").
+    2. For each included version, write ``.build-versions.json`` and run 
``yarn build``.
+       Each yarn build OVERWRITES ``build/`` — so we stash per-version output 
to
+       ``build-{v}/`` and accumulated assets to ``build-assets/`` between runs.
+    3. After the loop, fold every ``build-{v}/*`` back into ``build/docs/`` and
+       merge ``build-assets/*`` into ``build/assets/``. Copy 
``static/.htaccess``
+       into ``build/`` (Docusaurus copies the rest of static/ but skips 
dot-files).
+
+    The ``next`` iteration always runs (with ``buildVersions=["current", 
latest]``);
+    its output places the latest version's docs under ``build/docs/{latest}/`` 
and
+    the "current" docs under ``build/docs/next/`` (per 
``versionsMap.current.path``
+    in docusaurus.config.ts)."""
+    site = site_path()
+    yarn = find_command('yarn', msg='yarn is required')
+
+    versions: List[str] = json.loads((site / 'versions.json').read_text())
+    if not versions:
+        raise RuntimeError('versions.json is empty')
+    latest = versions[0]
+    iter_versions = versions[1:] + ['next']
+
+    build_dir = site / 'build'
+    assets_stash = site / 'build-assets'
+
+    for bv in iter_versions:
+        if not _should_build(bv, change_set):
+            print(f'... {bv} no change, skip')
+            continue
+
+        if bv == 'next':
+            print(f'... {bv} and {latest} begin build...')
+            _write_build_versions(site, ['current', latest])
+        else:
+            print(f'... {bv} begin build...')
+            _write_build_versions(site, [bv])
+
+        run(yarn, 'build', cwd=site)
+
+        # bv == latest never happens here (latest is excluded from 
iter_versions),
+        # so we always stash. Stashing the per-version subtree lets the next 
yarn
+        # build overwrite build/ without losing what we just produced.
+        built_subdir = build_dir / 'docs' / bv
+        if not built_subdir.is_dir():
+            raise RuntimeError(f'expected {built_subdir} after yarn build, not 
found')
+        stash_root = site / f'build-{bv}'
+        stash_subdir = stash_root / bv
+        if stash_root.exists():
+            shutil.rmtree(stash_root)
+        stash_subdir.parent.mkdir(parents=True, exist_ok=True)
+        shutil.move(str(built_subdir), str(stash_subdir))
+
+        assets_src = build_dir / 'assets'
+        if assets_src.is_dir():
+            _merge_tree(assets_src, assets_stash)
+
+        print(f'... {bv} build done...')
+
+    # Fold per-version stashes back under build/docs/.
+    for bv in iter_versions:
+        stash_root = site / f'build-{bv}'
+        if not stash_root.is_dir():
+            continue
+        for child in stash_root.iterdir():
+            _move_tree(child, build_dir / 'docs' / child.name)
+        shutil.rmtree(stash_root)
+
+    if assets_stash.is_dir():
+        _merge_tree(assets_stash, build_dir / 'assets')
+        shutil.rmtree(assets_stash)
+
+    htaccess_src = site / 'static' / '.htaccess'
+    if htaccess_src.is_file():
+        shutil.copy2(htaccess_src, build_dir / '.htaccess')

Reply via email to