bobbai00 commented on code in PR #4387:
URL: https://github.com/apache/texera/pull/4387#discussion_r3143199294


##########
.github/workflows/check-binary-licenses.yml:
##########
@@ -0,0 +1,165 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Check binary dependency licenses
+
+on:
+  pull_request:
+    paths:
+      - '**/build.sbt'
+      - 'project/plugins.sbt'
+      - 'project/AddMetaInfLicenseFiles.scala'
+      - 'frontend/package.json'
+      - 'frontend/yarn.lock'
+      - 'frontend/custom-webpack.config.js'
+      - 'amber/requirements.txt'
+      - 'amber/operator-requirements.txt'
+      - 'LICENSE-binary'
+      - 'NOTICE-binary'
+      - 'bin/licensing/**'
+      - '.github/workflows/check-binary-licenses.yml'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+jobs:
+  check-jvm-deps:
+    name: Check JVM dependencies
+    runs-on: ubuntu-22.04
+    env:
+      JAVA_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M 
-Dfile.encoding=UTF-8
+      JVM_OPTS:  -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M 
-Dfile.encoding=UTF-8
+    services:
+      postgres:
+        image: postgres
+        env:
+          POSTGRES_PASSWORD: postgres
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd="pg_isready -U postgres"
+          --health-interval=10s
+          --health-timeout=5s
+          --health-retries=5
+    steps:
+      - uses: actions/checkout@v5
+      - uses: actions/setup-java@v5
+        with:
+          distribution: temurin
+          java-version: 11
+      - uses: actions/setup-python@v6
+        with:
+          python-version: '3.12'
+      - uses: sbt/setup-sbt@508b753e53cb6095967669e0911487d2b9bc9f41 # v1.1.22
+      - uses: coursier/cache-action@4e2615869d13561d626ed48655e1a39e5b192b3c # 
v6.4.9
+        with:
+          extraSbtFiles: '["*.sbt", "project/**.{scala,sbt}", 
"project/build.properties" ]'
+      - name: Create databases (required by sbt dist wiring)
+        run: |
+          psql -h localhost -U postgres -f sql/texera_ddl.sql
+          psql -h localhost -U postgres -f sql/iceberg_postgres_catalog.sql
+          psql -h localhost -U postgres -f sql/texera_lakefs.sql
+        env:
+          PGPASSWORD: postgres
+      - name: Build distributable bundles
+        # Build every dist-producing module so the union of bundled jars can
+        # be diffed against LICENSE-binary.
+        run: sbt 'clean; ConfigService/dist; AccessControlService/dist; 
FileService/dist; ComputingUnitManagingService/dist; 
WorkflowCompilingService/dist; WorkflowExecutionService/dist'
+      - name: Unzip each dist
+        run: |
+          mkdir -p /tmp/dists
+          for zip in \
+            config-service/target/universal/config-service-*.zip \
+            
access-control-service/target/universal/access-control-service-*.zip \
+            file-service/target/universal/file-service-*.zip \
+            
computing-unit-managing-service/target/universal/computing-unit-managing-service-*.zip
 \
+            
workflow-compiling-service/target/universal/workflow-compiling-service-*.zip \
+            amber/target/universal/amber-*.zip; do
+              unzip -q "$zip" -d /tmp/dists/
+          done
+      - name: Check bundled jars against LICENSE-binary
+        run: |
+          ./bin/licensing/check_binary_deps.py jar \
+            /tmp/dists/config-service-*/lib \
+            /tmp/dists/access-control-service-*/lib \
+            /tmp/dists/file-service-*/lib \
+            /tmp/dists/computing-unit-managing-service-*/lib \
+            /tmp/dists/workflow-compiling-service-*/lib \
+            /tmp/dists/amber-*/lib
+
+  check-npm-deps:
+    name: Check npm dependencies
+    # Mirrors the frontend job in .github/workflows/github-action-build.yml.
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+      - uses: actions/setup-node@v5
+        with:
+          node-version: 18
+          architecture: x64
+      - uses: actions/cache@v4
+        with:
+          path: |
+            frontend/.yarn/cache
+            frontend/.yarn/unplugged
+            frontend/.yarn/install-state.gz
+          key: ${{ runner.os }}-x64-18-yarn-cache-v2-${{ 
hashFiles('**/yarn.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-x64-18-yarn-cache-v2-
+      - name: Prepare Yarn 4.14.1
+        run: corepack enable && corepack prepare [email protected] --activate
+      - uses: actions/setup-python@v6
+        with:
+          python-version: '3.12'
+      - name: Install frontend dependencies
+        timeout-minutes: 20
+        run: yarn --cwd frontend install --immutable --inline-builds 
--network-timeout=100000
+      - name: Production build (emits 3rdpartylicenses.json)
+        # build:ci is the same `ng build --configuration=production` pipeline

Review Comment:
   These comments are also redundant



##########
frontend/custom-webpack.config.js:
##########
@@ -37,4 +39,31 @@ module.exports = {
       },
     },
   },
+  plugins: [
+    // Emit a machine-readable manifest of bundled npm packages with versions
+    // and SPDX license ids. Consumed by bin/licensing/check_binary_deps.py to

Review Comment:
   no need to mention about how this result is being consumed. Remove those 
redundant comments



##########
bin/licensing/check_binary_deps.py:
##########
@@ -0,0 +1,256 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Verify the prose in LICENSE-binary matches actually-bundled third-party

Review Comment:
   Simplify these comments. It should just include funcationaltiy description 
and that's it. No need ot explain details like ground truth and stuff



##########
.github/workflows/check-binary-licenses.yml:
##########
@@ -0,0 +1,165 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Check binary dependency licenses
+
+on:
+  pull_request:
+    paths:
+      - '**/build.sbt'
+      - 'project/plugins.sbt'
+      - 'project/AddMetaInfLicenseFiles.scala'
+      - 'frontend/package.json'
+      - 'frontend/yarn.lock'
+      - 'frontend/custom-webpack.config.js'
+      - 'amber/requirements.txt'
+      - 'amber/operator-requirements.txt'
+      - 'LICENSE-binary'
+      - 'NOTICE-binary'
+      - 'bin/licensing/**'
+      - '.github/workflows/check-binary-licenses.yml'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+jobs:
+  check-jvm-deps:
+    name: Check JVM dependencies
+    runs-on: ubuntu-22.04
+    env:
+      JAVA_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M 
-Dfile.encoding=UTF-8
+      JVM_OPTS:  -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M 
-Dfile.encoding=UTF-8
+    services:
+      postgres:
+        image: postgres
+        env:
+          POSTGRES_PASSWORD: postgres
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd="pg_isready -U postgres"
+          --health-interval=10s
+          --health-timeout=5s
+          --health-retries=5
+    steps:
+      - uses: actions/checkout@v5
+      - uses: actions/setup-java@v5
+        with:
+          distribution: temurin
+          java-version: 11
+      - uses: actions/setup-python@v6
+        with:
+          python-version: '3.12'
+      - uses: sbt/setup-sbt@508b753e53cb6095967669e0911487d2b9bc9f41 # v1.1.22
+      - uses: coursier/cache-action@4e2615869d13561d626ed48655e1a39e5b192b3c # 
v6.4.9
+        with:
+          extraSbtFiles: '["*.sbt", "project/**.{scala,sbt}", 
"project/build.properties" ]'
+      - name: Create databases (required by sbt dist wiring)
+        run: |
+          psql -h localhost -U postgres -f sql/texera_ddl.sql
+          psql -h localhost -U postgres -f sql/iceberg_postgres_catalog.sql
+          psql -h localhost -U postgres -f sql/texera_lakefs.sql
+        env:
+          PGPASSWORD: postgres
+      - name: Build distributable bundles
+        # Build every dist-producing module so the union of bundled jars can
+        # be diffed against LICENSE-binary.
+        run: sbt 'clean; ConfigService/dist; AccessControlService/dist; 
FileService/dist; ComputingUnitManagingService/dist; 
WorkflowCompilingService/dist; WorkflowExecutionService/dist'
+      - name: Unzip each dist
+        run: |
+          mkdir -p /tmp/dists
+          for zip in \
+            config-service/target/universal/config-service-*.zip \
+            
access-control-service/target/universal/access-control-service-*.zip \
+            file-service/target/universal/file-service-*.zip \
+            
computing-unit-managing-service/target/universal/computing-unit-managing-service-*.zip
 \
+            
workflow-compiling-service/target/universal/workflow-compiling-service-*.zip \
+            amber/target/universal/amber-*.zip; do
+              unzip -q "$zip" -d /tmp/dists/
+          done
+      - name: Check bundled jars against LICENSE-binary
+        run: |
+          ./bin/licensing/check_binary_deps.py jar \
+            /tmp/dists/config-service-*/lib \
+            /tmp/dists/access-control-service-*/lib \
+            /tmp/dists/file-service-*/lib \
+            /tmp/dists/computing-unit-managing-service-*/lib \
+            /tmp/dists/workflow-compiling-service-*/lib \
+            /tmp/dists/amber-*/lib
+
+  check-npm-deps:
+    name: Check npm dependencies
+    # Mirrors the frontend job in .github/workflows/github-action-build.yml.

Review Comment:
   the comments here are redundant



##########
.github/workflows/check-binary-licenses.yml:
##########
@@ -0,0 +1,165 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Check binary dependency licenses
+
+on:
+  pull_request:
+    paths:

Review Comment:
   we should always run this CI for all changes. Can we not scoped to these 
paths?



##########
bin/licensing/collect_binary_licenses.sh:
##########
@@ -0,0 +1,150 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Extract META-INF/LICENSE* and META-INF/NOTICE* from every .jar in <jar-dir>
+# and emit LICENSE-skeleton / NOTICE-skeleton for curating LICENSE-binary /
+# NOTICE-binary by hand. Skeletons are raw concatenations, not shippable.
+#
+# JVM jars only. Frontend (npm) is covered by the Angular CLI's
+# 3rdpartylicenses.txt; Python is covered by pip-licenses.
+#
+# Usage:
+#   ./bin/licensing/collect_binary_licenses.sh <jar-dir> [<out-dir>]
+#
+# Example:
+#   sbt 'project WorkflowExecutionService' dist
+#   unzip amber/target/universal/amber-*.zip -d /tmp/dist
+#   ./bin/licensing/collect_binary_licenses.sh /tmp/dist/amber-*/lib 
/tmp/license-staging
+

Review Comment:
   Seems this file is no longer needed



##########
.github/workflows/check-binary-licenses.yml:
##########
@@ -0,0 +1,165 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Check binary dependency licenses
+
+on:
+  pull_request:
+    paths:
+      - '**/build.sbt'
+      - 'project/plugins.sbt'
+      - 'project/AddMetaInfLicenseFiles.scala'
+      - 'frontend/package.json'
+      - 'frontend/yarn.lock'
+      - 'frontend/custom-webpack.config.js'
+      - 'amber/requirements.txt'
+      - 'amber/operator-requirements.txt'
+      - 'LICENSE-binary'
+      - 'NOTICE-binary'
+      - 'bin/licensing/**'
+      - '.github/workflows/check-binary-licenses.yml'
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+jobs:
+  check-jvm-deps:
+    name: Check JVM dependencies
+    runs-on: ubuntu-22.04
+    env:
+      JAVA_OPTS: -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M 
-Dfile.encoding=UTF-8
+      JVM_OPTS:  -Xms2048M -Xmx2048M -Xss6M -XX:ReservedCodeCacheSize=256M 
-Dfile.encoding=UTF-8
+    services:
+      postgres:
+        image: postgres
+        env:
+          POSTGRES_PASSWORD: postgres
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd="pg_isready -U postgres"
+          --health-interval=10s
+          --health-timeout=5s
+          --health-retries=5
+    steps:
+      - uses: actions/checkout@v5
+      - uses: actions/setup-java@v5
+        with:
+          distribution: temurin
+          java-version: 11
+      - uses: actions/setup-python@v6
+        with:
+          python-version: '3.12'
+      - uses: sbt/setup-sbt@508b753e53cb6095967669e0911487d2b9bc9f41 # v1.1.22
+      - uses: coursier/cache-action@4e2615869d13561d626ed48655e1a39e5b192b3c # 
v6.4.9
+        with:
+          extraSbtFiles: '["*.sbt", "project/**.{scala,sbt}", 
"project/build.properties" ]'
+      - name: Create databases (required by sbt dist wiring)
+        run: |
+          psql -h localhost -U postgres -f sql/texera_ddl.sql
+          psql -h localhost -U postgres -f sql/iceberg_postgres_catalog.sql
+          psql -h localhost -U postgres -f sql/texera_lakefs.sql
+        env:
+          PGPASSWORD: postgres
+      - name: Build distributable bundles
+        # Build every dist-producing module so the union of bundled jars can
+        # be diffed against LICENSE-binary.
+        run: sbt 'clean; ConfigService/dist; AccessControlService/dist; 
FileService/dist; ComputingUnitManagingService/dist; 
WorkflowCompilingService/dist; WorkflowExecutionService/dist'
+      - name: Unzip each dist
+        run: |
+          mkdir -p /tmp/dists
+          for zip in \
+            config-service/target/universal/config-service-*.zip \
+            
access-control-service/target/universal/access-control-service-*.zip \
+            file-service/target/universal/file-service-*.zip \
+            
computing-unit-managing-service/target/universal/computing-unit-managing-service-*.zip
 \
+            
workflow-compiling-service/target/universal/workflow-compiling-service-*.zip \
+            amber/target/universal/amber-*.zip; do
+              unzip -q "$zip" -d /tmp/dists/
+          done
+      - name: Check bundled jars against LICENSE-binary
+        run: |
+          ./bin/licensing/check_binary_deps.py jar \
+            /tmp/dists/config-service-*/lib \
+            /tmp/dists/access-control-service-*/lib \
+            /tmp/dists/file-service-*/lib \
+            /tmp/dists/computing-unit-managing-service-*/lib \
+            /tmp/dists/workflow-compiling-service-*/lib \
+            /tmp/dists/amber-*/lib
+
+  check-npm-deps:
+    name: Check npm dependencies
+    # Mirrors the frontend job in .github/workflows/github-action-build.yml.
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+      - uses: actions/setup-node@v5
+        with:
+          node-version: 18
+          architecture: x64
+      - uses: actions/cache@v4
+        with:
+          path: |
+            frontend/.yarn/cache
+            frontend/.yarn/unplugged
+            frontend/.yarn/install-state.gz
+          key: ${{ runner.os }}-x64-18-yarn-cache-v2-${{ 
hashFiles('**/yarn.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-x64-18-yarn-cache-v2-
+      - name: Prepare Yarn 4.14.1
+        run: corepack enable && corepack prepare [email protected] --activate
+      - uses: actions/setup-python@v6
+        with:
+          python-version: '3.12'
+      - name: Install frontend dependencies
+        timeout-minutes: 20
+        run: yarn --cwd frontend install --immutable --inline-builds 
--network-timeout=100000
+      - name: Production build (emits 3rdpartylicenses.json)
+        # build:ci is the same `ng build --configuration=production` pipeline
+        # as build, just routed through nx with a larger heap for CI. The
+        # license-webpack-plugin instance in custom-webpack.config.js writes
+        # frontend/dist/3rdpartylicenses.json with {name, version, license}
+        # per bundled package — bundle-scoped, not node_modules-scoped.
+        run: yarn --cwd frontend run build:ci
+      - name: Check bundled npm packages against LICENSE-binary
+        run: ./bin/licensing/check_binary_deps.py npm 
frontend/dist/3rdpartylicenses.json
+
+  check-python-deps:
+    name: Check Python dependencies
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v5
+      - uses: actions/setup-python@v6
+        with:
+          python-version: '3.12'
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r amber/requirements.txt
+          pip install -r amber/operator-requirements.txt
+          pip install pip-licenses
+      - name: Generate pip-licenses manifest
+        # pip-licenses, prettytable, and wcwidth are CI-only helpers (the 
latter

Review Comment:
   these comments are redundant



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to