This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 3575360736 GH-47081: [Release] Verify reproducible source build 
explicitly (#47082)
3575360736 is described below

commit 35753607366d60b2e8aaf2858d12a91940d65426
Author: Sutou Kouhei <[email protected]>
AuthorDate: Sun Jul 13 20:29:11 2025 +0900

    GH-47081: [Release] Verify reproducible source build explicitly (#47082)
    
    ### Rationale for this change
    
    There are 2 problems on verification of reproducible source archive:
    
    1. CI on macOS isn't prepared correctly
    2. Some verification environments may not have required tools
    
    FYI: We need the following to check reproducible build on macOS:
    
    * Ensure using apache/arrow for `GITHUB_REPOSITORY`
      * `GITHUB_REPOSITORY` is defined automatically on GitHub Actions. Our 
Crossbow based verification job has `GITHUB_REPOSITORY=ursacomputing/crossbow` 
by default.
    * GNU tar
    * GNU gzip
    
    ### What changes are included in this PR?
    
    For the problem1:
    * Set `GITHUB_REPOSITORY` explicitly
    * Install GNU gzip (GNU tar is already installed)
    
    For the problem2:
    * Add `TEST_SOURCE_REPRODUCIBLE` that is `0` by default
    * Set `TEST_SOURCE_REPRODUCIBLE=1` on CI
    * At least one PMC member must set `TEST_SOURCE_REPRODUCIBLE=1` on release 
verification
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    No.
    * GitHub Issue: #47081
    
    Authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 dev/release/verify-release-candidate.sh           | 30 +++++++++++++++++------
 dev/tasks/verify-rc/github.linux.amd64.docker.yml |  3 +++
 dev/tasks/verify-rc/github.macos.yml              | 17 ++++++++++++-
 3 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/dev/release/verify-release-candidate.sh 
b/dev/release/verify-release-candidate.sh
index 28e4ed247d..b38da36c38 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -789,14 +789,6 @@ ensure_source_directory() {
     if [ ! -d "${ARROW_SOURCE_DIR}" ]; then
       pushd $ARROW_TMPDIR
       fetch_archive ${dist_name}
-      git clone https://github.com/${GITHUB_REPOSITORY}.git arrow
-      pushd arrow
-      dev/release/utils-create-release-tarball.sh ${VERSION} ${RC_NUMBER}
-      if ! cmp ${dist_name}.tar.gz ../${dist_name}.tar.gz; then
-        echo "Source archive isn't reproducible"
-        return 1
-      fi
-      popd
       tar xf ${dist_name}.tar.gz
       popd
     fi
@@ -845,6 +837,27 @@ test_source_distribution() {
 
   pushd $ARROW_SOURCE_DIR
 
+  if [ "${SOURCE_KIND}" = "tarball" ] && [ "${TEST_SOURCE_REPRODUCIBLE}" -gt 0 
]; then
+    pushd ..
+    git clone "https://github.com/${GITHUB_REPOSITORY}.git"; arrow
+    pushd arrow
+    dev/release/utils-create-release-tarball.sh "${VERSION}" "${RC_NUMBER}"
+    tarball="apache-arrow-${VERSION}.tar.gz"
+    if ! cmp "${tarball}" "../${tarball}"; then
+      echo "Source archive isn't reproducible"
+      if ! tar --version | grep --quiet --fixed GNU && \
+          ! gtar --version | grep --quiet --fixed GNU; then
+        echo "We need GNU tar to verify reproducible build"
+      fi
+      if ! gzip --version | grep --quiet --fixed GNU; then
+        echo "We need GNU gzip to verify reproducible build"
+      fi
+      return 1
+    fi
+    popd
+    popd
+  fi
+
   if [ ${TEST_CSHARP} -gt 0 ]; then
     test_csharp
   fi
@@ -1033,6 +1046,7 @@ test_wheels() {
 : ${TEST_YUM:=${TEST_BINARIES}}
 
 # Source verification tasks
+: ${TEST_SOURCE_REPRODUCIBLE:=0}
 : ${TEST_CPP:=${TEST_SOURCE}}
 : ${TEST_CSHARP:=${TEST_SOURCE}}
 : ${TEST_GLIB:=${TEST_SOURCE}}
diff --git a/dev/tasks/verify-rc/github.linux.amd64.docker.yml 
b/dev/tasks/verify-rc/github.linux.amd64.docker.yml
index 97eecd2d28..dd5fd9f891 100644
--- a/dev/tasks/verify-rc/github.linux.amd64.docker.yml
+++ b/dev/tasks/verify-rc/github.linux.amd64.docker.yml
@@ -52,6 +52,9 @@ jobs:
             {% endif %}
             -e VERIFY_RC="{{ rc|default("") }}" \
             -e TEST_DEFAULT=0 \
+            {% if target == "cpp" %}
+            -e TEST_SOURCE_REPRODUCIBLE=1 \
+            {% endif %}
             -e TEST_{{ target|upper }}=1 \
             {{ distro }}-verify-rc
 
diff --git a/dev/tasks/verify-rc/github.macos.yml 
b/dev/tasks/verify-rc/github.macos.yml
index 93486f4604..315083543c 100644
--- a/dev/tasks/verify-rc/github.macos.yml
+++ b/dev/tasks/verify-rc/github.macos.yml
@@ -59,6 +59,9 @@ jobs:
           if [ -x "${pkgconf}" ]; then
             echo "PKG_CONFIG=${pkgconf}" >> $GITHUB_ENV
           fi
+
+          # For reproducible source archive verification
+          brew install gzip
       {% endif %}
 
       - uses: actions/setup-java@v2
@@ -91,6 +94,18 @@ jobs:
           USE_CONDA: 1
         {% else %}
           GTest_SOURCE: SYSTEM
+          {% if target == "cpp" %}
+          TEST_SOURCE_REPRODUCIBLE: 1
+          {% endif %}
         {% endif %}
         run: |
-          arrow/dev/release/verify-release-candidate.sh {{ release|default("") 
}} {{ rc|default("") }}
+          version={{ release|default("") }}
+          rc={{ rc|default("") }}
+          if [ -n "${version}" ] && [ -n "${rc}" ]; then
+            args=("${version}" "${rc}")
+            GITHUB_REPOSITORY=apache/arrow
+          else
+            args=()
+            GITHUB_REPOSITORY={{ arrow.github_repo }}
+          fi
+          arrow/dev/release/verify-release-candidate.sh "${args[@]}"

Reply via email to