Copilot commented on code in PR #63676:
URL: https://github.com/apache/doris/pull/63676#discussion_r3302649679


##########
docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh:
##########
@@ -18,6 +18,9 @@
 
 set -e -x
 
+. /mnt/scripts/bootstrap/bootstrap-groups.sh
+BOOTSTRAP_GROUPS="$(bootstrap_normalize_groups "${HIVE_BOOTSTRAP_GROUPS:-}")"

Review Comment:
   Same failure mode as in `prepare-hive-data.sh`: if 
`bootstrap_normalize_groups` errors, the script may continue despite `set -e` 
because the failure occurs inside command substitution. Add an explicit `|| 
exit 1` (or equivalent) to guarantee invalid bootstrap group configurations 
halt the container init.
   



##########
docker/thirdparties/docker-compose/hive/scripts/prepare-hive-data.sh:
##########
@@ -18,110 +18,73 @@ set -eo pipefail
 # under the License.
 
 CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
-# Extract all tar.gz files under the repo
-find ${CUR_DIR}/data -type f -name "*.tar.gz" -print0 | \
-xargs -0 -n1 -P"${LOAD_PARALLEL}" bash -c '
-  f="$0"
-  echo "Extracting hive data $f"
-  dir=$(dirname "$f")
-  tar -xzf "$f" -C "$dir"
-'
+. "${CUR_DIR}/bootstrap/bootstrap-groups.sh"
 
-# download tpch1_data
-if [[ ! -d "${CUR_DIR}/tpch1.db" ]]; then
-    echo "${CUR_DIR}/tpch1.db does not exist"
-    cd ${CUR_DIR}/
-    curl -O 
https://${s3BucketName}.${s3Endpoint}/regression/datalake/pipeline_data/tpch1.db.tar.gz
-    tar -zxf tpch1.db.tar.gz
-    rm -rf tpch1.db.tar.gz
-    cd -
-else
-    echo "${CUR_DIR}/tpch1.db exist, continue !"
+BOOTSTRAP_GROUPS="$(bootstrap_normalize_groups "${HIVE_BOOTSTRAP_GROUPS:-}")"

Review Comment:
   `bootstrap_normalize_groups` can return a non-zero status for unknown 
groups, but assigning via command substitution won’t reliably stop the script 
under `set -e` (bash’s `errexit` is commonly suppressed in command 
substitutions). This can lead to silently continuing with an empty/invalid 
`BOOTSTRAP_GROUPS`. Capture/validate the exit code explicitly (e.g., `... || 
exit 1`) so invalid group inputs fail fast.
   



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcDriverLoader.java:
##########
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.catalog;
+
+import org.apache.doris.common.DdlException;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.DriverPropertyInfo;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JdbcDriverLoader {
+    private static final Logger LOG = 
LogManager.getLogger(JdbcDriverLoader.class);
+    private static final Map<URL, ClassLoader> DRIVER_CLASS_LOADER_CACHE = new 
ConcurrentHashMap<>();
+    private static final Set<String> REGISTERED_DRIVER_KEYS = 
ConcurrentHashMap.newKeySet();
+
+    private JdbcDriverLoader() {
+    }
+
+    public static String validateDriverChecksum(String driverUrl, String 
expectedChecksum) throws DdlException {
+        String actualChecksum = JdbcResource.computeObjectChecksum(driverUrl);
+        if (StringUtils.isNotBlank(expectedChecksum) && 
!expectedChecksum.equals(actualChecksum)) {
+            throw new DdlException("The provided checksum (" + expectedChecksum
+                    + ") does not match the computed checksum (" + 
actualChecksum
+                    + ") for the driver_url.");
+        }
+        return actualChecksum;
+    }
+
+    public static String registerDriver(String driverUrl, String 
driverClassName, String expectedChecksum,
+            ClassLoader parentClassLoader) {
+        if (StringUtils.isBlank(driverClassName)) {
+            throw new IllegalArgumentException("JDBC driver class is required 
when driver url is specified.");
+        }
+
+        try {
+            validateDriverChecksum(driverUrl, expectedChecksum);
+        } catch (DdlException e) {
+            throw new IllegalArgumentException(e.getMessage(), e);
+        }
+
+        try {
+            String fullDriverUrl = JdbcResource.getFullDriverUrl(driverUrl);

Review Comment:
   Checksum validation is performed on `driverUrl`, but the actual driver is 
loaded from `fullDriverUrl` after normalization via 
`JdbcResource.getFullDriverUrl`. If normalization changes the resolved resource 
(e.g., alternative schemes/canonicalization), you could validate one URL but 
load another. Validate the checksum against the same resolved/normalized URL 
(`fullDriverUrl`) that will be used for the `URLClassLoader`.
   



##########
fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jdbc/JdbcDriverUtils.java:
##########
@@ -0,0 +1,169 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.jdbc;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.log4j.Logger;

Review Comment:
   This introduces a dependency on Log4j 1.x (`org.apache.log4j.Logger`), which 
is EOL and has known security/maintenance issues. If the module supports it, 
switch to the project’s standard logging facade (often SLF4J + Log4j2) to avoid 
carrying Log4j 1.x in new code.



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcDriverLoader.java:
##########
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.catalog;
+
+import org.apache.doris.common.DdlException;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.DriverPropertyInfo;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JdbcDriverLoader {
+    private static final Logger LOG = 
LogManager.getLogger(JdbcDriverLoader.class);
+    private static final Map<URL, ClassLoader> DRIVER_CLASS_LOADER_CACHE = new 
ConcurrentHashMap<>();
+    private static final Set<String> REGISTERED_DRIVER_KEYS = 
ConcurrentHashMap.newKeySet();
+
+    private JdbcDriverLoader() {
+    }
+
+    public static String validateDriverChecksum(String driverUrl, String 
expectedChecksum) throws DdlException {
+        String actualChecksum = JdbcResource.computeObjectChecksum(driverUrl);
+        if (StringUtils.isNotBlank(expectedChecksum) && 
!expectedChecksum.equals(actualChecksum)) {
+            throw new DdlException("The provided checksum (" + expectedChecksum
+                    + ") does not match the computed checksum (" + 
actualChecksum
+                    + ") for the driver_url.");
+        }
+        return actualChecksum;
+    }
+
+    public static String registerDriver(String driverUrl, String 
driverClassName, String expectedChecksum,
+            ClassLoader parentClassLoader) {
+        if (StringUtils.isBlank(driverClassName)) {
+            throw new IllegalArgumentException("JDBC driver class is required 
when driver url is specified.");
+        }
+
+        try {
+            validateDriverChecksum(driverUrl, expectedChecksum);
+        } catch (DdlException e) {
+            throw new IllegalArgumentException(e.getMessage(), e);
+        }
+
+        try {
+            String fullDriverUrl = JdbcResource.getFullDriverUrl(driverUrl);

Review Comment:
   Checksum validation is performed on `driverUrl`, but the actual driver is 
loaded from `fullDriverUrl` after normalization via 
`JdbcResource.getFullDriverUrl`. If normalization changes the resolved resource 
(e.g., alternative schemes/canonicalization), you could validate one URL but 
load another. Validate the checksum against the same resolved/normalized URL 
(`fullDriverUrl`) that will be used for the `URLClassLoader`.
   



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcDriverLoader.java:
##########
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.catalog;
+
+import org.apache.doris.common.DdlException;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.DriverPropertyInfo;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JdbcDriverLoader {
+    private static final Logger LOG = 
LogManager.getLogger(JdbcDriverLoader.class);
+    private static final Map<URL, ClassLoader> DRIVER_CLASS_LOADER_CACHE = new 
ConcurrentHashMap<>();
+    private static final Set<String> REGISTERED_DRIVER_KEYS = 
ConcurrentHashMap.newKeySet();
+
+    private JdbcDriverLoader() {
+    }
+
+    public static String validateDriverChecksum(String driverUrl, String 
expectedChecksum) throws DdlException {
+        String actualChecksum = JdbcResource.computeObjectChecksum(driverUrl);
+        if (StringUtils.isNotBlank(expectedChecksum) && 
!expectedChecksum.equals(actualChecksum)) {
+            throw new DdlException("The provided checksum (" + expectedChecksum
+                    + ") does not match the computed checksum (" + 
actualChecksum
+                    + ") for the driver_url.");
+        }
+        return actualChecksum;
+    }
+
+    public static String registerDriver(String driverUrl, String 
driverClassName, String expectedChecksum,
+            ClassLoader parentClassLoader) {
+        if (StringUtils.isBlank(driverClassName)) {
+            throw new IllegalArgumentException("JDBC driver class is required 
when driver url is specified.");
+        }
+
+        try {
+            validateDriverChecksum(driverUrl, expectedChecksum);
+        } catch (DdlException e) {
+            throw new IllegalArgumentException(e.getMessage(), e);
+        }
+
+        try {
+            String fullDriverUrl = JdbcResource.getFullDriverUrl(driverUrl);
+            URL url = new URL(fullDriverUrl);
+            String driverKey = fullDriverUrl + "#" + driverClassName;
+            if (!REGISTERED_DRIVER_KEYS.add(driverKey)) {
+                LOG.info("JDBC driver already registered: {} from {}", 
driverClassName, fullDriverUrl);
+                return fullDriverUrl;
+            }
+            try {
+                ClassLoader classLoader = 
DRIVER_CLASS_LOADER_CACHE.computeIfAbsent(url, u ->
+                        URLClassLoader.newInstance(new URL[] {u}, 
parentClassLoader));

Review Comment:
   Caching `URLClassLoader` instances in a static map without any 
eviction/close path can leak resources (open JAR handles on some platforms, 
class metadata over time, and permanent retention of downloaded drivers). 
Consider adding an explicit unregister/close mechanism (and removing from the 
cache), or use a bounded/weak cache so long-running processes don’t accumulate 
classloaders indefinitely.



##########
fe/fe-core/src/test/java/org/apache/doris/catalog/JdbcDriverLoaderTest.java:
##########
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.catalog;
+
+import org.apache.doris.common.DdlException;
+import org.apache.doris.common.FeConstants;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+public class JdbcDriverLoaderTest {
+    private static final String DRIVER_BYTES = "doris-jdbc-driver";
+    private static final String DRIVER_CHECKSUM = 
"ef2b3218a863c98bc78fdf447331b206";
+
+    @Test
+    public void testValidateDriverChecksumRejectsMismatch() throws Exception {
+        boolean oldRunningUnitTest = FeConstants.runningUnitTest;
+        FeConstants.runningUnitTest = false;
+        try {
+            String driverUrl = createDriverUrl();
+
+            DdlException exception = Assert.assertThrows(DdlException.class,
+                    () -> JdbcDriverLoader.validateDriverChecksum(driverUrl, 
"bad-checksum"));
+            Assert.assertTrue(exception.getMessage().contains("does not match 
the computed checksum"));
+        } finally {
+            FeConstants.runningUnitTest = oldRunningUnitTest;
+        }
+    }
+
+    @Test
+    public void testRegisterDriverValidatesChecksumBeforeLoadingClass() throws 
Exception {
+        boolean oldRunningUnitTest = FeConstants.runningUnitTest;
+        FeConstants.runningUnitTest = false;
+        try {
+            String driverUrl = createDriverUrl();
+
+            IllegalArgumentException exception = 
Assert.assertThrows(IllegalArgumentException.class,
+                    () -> JdbcDriverLoader.registerDriver(driverUrl,
+                            "org.apache.doris.catalog.NotExistingDriver", 
"bad-checksum",
+                            getClass().getClassLoader()));
+            Assert.assertTrue(exception.getMessage().contains("does not match 
the computed checksum"));
+        } finally {
+            FeConstants.runningUnitTest = oldRunningUnitTest;
+        }
+    }
+
+    @Test
+    public void testValidateDriverChecksumReturnsComputedChecksum() throws 
Exception {
+        boolean oldRunningUnitTest = FeConstants.runningUnitTest;
+        FeConstants.runningUnitTest = false;
+        try {
+            String driverUrl = createDriverUrl();
+
+            Assert.assertEquals(DRIVER_CHECKSUM, 
JdbcDriverLoader.validateDriverChecksum(driverUrl, DRIVER_CHECKSUM));
+            Assert.assertEquals(DRIVER_CHECKSUM, 
JdbcDriverLoader.validateDriverChecksum(driverUrl, ""));
+        } finally {
+            FeConstants.runningUnitTest = oldRunningUnitTest;

Review Comment:
   These tests mutate a global static flag (`FeConstants.runningUnitTest`). If 
the test runner executes tests in parallel, this can cause cross-test 
interference/flakiness. To make this robust, guard the mutation with a shared 
lock (e.g., synchronize on `FeConstants.class` around the whole test body), or 
refactor the production code to avoid relying on mutable global state for 
checksum behavior in tests.
   



##########
fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jdbc/JdbcDriverUtils.java:
##########
@@ -0,0 +1,169 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.jdbc;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.net.URLConnection;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.DriverPropertyInfo;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JdbcDriverUtils {
+    private static final Logger LOG = Logger.getLogger(JdbcDriverUtils.class);
+    private static final int HTTP_TIMEOUT_MS = 10000;
+    private static final Map<URL, ClassLoader> DRIVER_CLASS_LOADER_CACHE = new 
ConcurrentHashMap<>();
+    private static final Set<String> REGISTERED_DRIVER_KEYS = 
ConcurrentHashMap.newKeySet();
+
+    private JdbcDriverUtils() {
+    }
+
+    public static String registerDriver(String driverUrl, String 
driverClassName, ClassLoader parentClassLoader) {
+        return registerDriver(driverUrl, driverClassName, "", 
parentClassLoader);
+    }
+
+    public static String registerDriver(String driverUrl, String 
driverClassName, String expectedChecksum,
+            ClassLoader parentClassLoader) {
+        if (StringUtils.isBlank(driverClassName)) {
+            throw new IllegalArgumentException("JDBC driver class is required 
when driver url is specified.");
+        }
+        validateDriverChecksum(driverUrl, expectedChecksum);
+
+        try {
+            URL url = new URL(driverUrl);
+            String driverKey = driverUrl + "#" + driverClassName;
+            if (!REGISTERED_DRIVER_KEYS.add(driverKey)) {
+                LOG.info("JDBC driver already registered: " + driverClassName 
+ " from " + driverUrl);
+                return driverUrl;
+            }
+            try {
+                ClassLoader classLoader = 
DRIVER_CLASS_LOADER_CACHE.computeIfAbsent(url, u ->
+                        URLClassLoader.newInstance(new URL[] {u}, 
parentClassLoader));

Review Comment:
   Same resource-retention issue as the FE loader: caching `URLClassLoader` 
without a removal/close lifecycle can leak JAR file handles and memory. Provide 
a way to close/remove cached loaders when drivers are no longer needed (or use 
a weak/bounded cache) to prevent unbounded growth.



##########
docker/thirdparties/docker-compose/hive/scripts/hive-metastore.sh:
##########
@@ -73,68 +76,99 @@ fi
 hadoop fs -mkdir -p /user/doris/suites/
 
 DATA_DIR="/mnt/scripts/data/"
-find "${DATA_DIR}" -type f -name "run.sh" -print0 | xargs -0 -n 1 -P 
"${LOAD_PARALLEL}" -I {} bash -ec '
-    START_TIME=$(date +%s)
-    bash -e "{}" || (echo "Failed to executing script: {}" && exit 1)
-    END_TIME=$(date +%s)
-    EXECUTION_TIME=$((END_TIME - START_TIME))
-    echo "Script: {} executed in $EXECUTION_TIME seconds"
-'
+run_scripts=()
+while IFS= read -r -d '' run_script; do
+    relative_run_script="${run_script#/mnt/scripts/}"
+    if bootstrap_item_selected "${BOOTSTRAP_GROUPS}" "run_sh" 
"${relative_run_script}"; then
+        run_scripts+=("${run_script}")
+    fi
+done < <(find "${DATA_DIR}" -type f -name "run.sh" -print0)
+
+if (( ${#run_scripts[@]} > 0 )); then
+    printf '%s\0' "${run_scripts[@]}" | xargs -0 -P "${LOAD_PARALLEL}" -I {} 
bash -ec '
+        START_TIME=$(date +%s)
+        bash -e "{}" || (echo "Failed to executing script: {}" && exit 1)
+        END_TIME=$(date +%s)
+        EXECUTION_TIME=$((END_TIME - START_TIME))
+        echo "Script: {} executed in $EXECUTION_TIME seconds"
+    '
+fi
 
 # put data file
 hadoop_put_pids=()
+hadoop_put_paths=()
 hadoop fs -mkdir -p /user/doris/
 
+copy_to_hdfs_if_selected() {
+    local relative_path="$1"
+    local local_path="/mnt/scripts/${relative_path}"
+
+    if ! bootstrap_item_selected "${BOOTSTRAP_GROUPS}" "hdfs_dir" 
"${relative_path}"; then
+        return
+    fi
+
+    if [[ ! -e "${local_path}" ]]; then
+        echo "${local_path} does not exist"
+        exit 1
+    fi
+
+    if [[ -d "${local_path}" && -z "$(ls "${local_path}")" ]]; then
+        echo "${local_path} does not exist"

Review Comment:
   When the directory exists but is empty, the message `does not exist` is 
incorrect and makes troubleshooting harder. Update the message to reflect the 
real condition (e.g., 'is empty' or 'contains no files') so failures are 
actionable.
   



##########
fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jdbc/JdbcDriverUtils.java:
##########
@@ -0,0 +1,169 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.jdbc;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.net.URLConnection;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.DriverPropertyInfo;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JdbcDriverUtils {
+    private static final Logger LOG = Logger.getLogger(JdbcDriverUtils.class);
+    private static final int HTTP_TIMEOUT_MS = 10000;
+    private static final Map<URL, ClassLoader> DRIVER_CLASS_LOADER_CACHE = new 
ConcurrentHashMap<>();

Review Comment:
   Same resource-retention issue as the FE loader: caching `URLClassLoader` 
without a removal/close lifecycle can leak JAR file handles and memory. Provide 
a way to close/remove cached loaders when drivers are no longer needed (or use 
a weak/bounded cache) to prevent unbounded growth.



##########
fe/be-java-extensions/java-common/src/main/java/org/apache/doris/common/jdbc/JdbcDriverUtils.java:
##########
@@ -0,0 +1,169 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.jdbc;
+
+import org.apache.commons.codec.binary.Hex;
+import org.apache.commons.lang3.StringUtils;
+import org.apache.log4j.Logger;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.net.URLConnection;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.DriverPropertyInfo;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JdbcDriverUtils {
+    private static final Logger LOG = Logger.getLogger(JdbcDriverUtils.class);
+    private static final int HTTP_TIMEOUT_MS = 10000;
+    private static final Map<URL, ClassLoader> DRIVER_CLASS_LOADER_CACHE = new 
ConcurrentHashMap<>();
+    private static final Set<String> REGISTERED_DRIVER_KEYS = 
ConcurrentHashMap.newKeySet();
+
+    private JdbcDriverUtils() {
+    }
+
+    public static String registerDriver(String driverUrl, String 
driverClassName, ClassLoader parentClassLoader) {
+        return registerDriver(driverUrl, driverClassName, "", 
parentClassLoader);
+    }
+
+    public static String registerDriver(String driverUrl, String 
driverClassName, String expectedChecksum,
+            ClassLoader parentClassLoader) {
+        if (StringUtils.isBlank(driverClassName)) {
+            throw new IllegalArgumentException("JDBC driver class is required 
when driver url is specified.");
+        }
+        validateDriverChecksum(driverUrl, expectedChecksum);
+
+        try {
+            URL url = new URL(driverUrl);
+            String driverKey = driverUrl + "#" + driverClassName;
+            if (!REGISTERED_DRIVER_KEYS.add(driverKey)) {
+                LOG.info("JDBC driver already registered: " + driverClassName 
+ " from " + driverUrl);
+                return driverUrl;
+            }
+            try {
+                ClassLoader classLoader = 
DRIVER_CLASS_LOADER_CACHE.computeIfAbsent(url, u ->
+                        URLClassLoader.newInstance(new URL[] {u}, 
parentClassLoader));
+                Class<?> loadedDriverClass = Class.forName(driverClassName, 
true, classLoader);
+                Driver driver = (Driver) 
loadedDriverClass.getDeclaredConstructor().newInstance();
+                DriverManager.registerDriver(new DriverShim(driver));
+                LOG.info("Successfully registered JDBC driver: " + 
driverClassName + " from " + driverUrl);
+                return driverUrl;
+            } catch (ClassNotFoundException e) {
+                REGISTERED_DRIVER_KEYS.remove(driverKey);
+                throw new IllegalArgumentException("Failed to load JDBC driver 
class: " + driverClassName, e);
+            } catch (Exception e) {
+                REGISTERED_DRIVER_KEYS.remove(driverKey);
+                throw new RuntimeException("Failed to register JDBC driver: " 
+ driverClassName, e);
+            }
+        } catch (MalformedURLException e) {
+            throw new IllegalArgumentException("Invalid driver URL: " + 
driverUrl, e);
+        }
+    }
+
+    public static String validateDriverChecksum(String driverUrl, String 
expectedChecksum) {
+        String actualChecksum = computeObjectChecksum(driverUrl);
+        if (StringUtils.isNotBlank(expectedChecksum) && 
!expectedChecksum.equals(actualChecksum)) {
+            throw new IllegalArgumentException("Checksum mismatch for JDBC 
driver. expected: "
+                    + expectedChecksum + ", actual: " + actualChecksum);
+        }
+        return actualChecksum;
+    }
+
+    public static String computeObjectChecksum(String urlStr) {
+        try (InputStream inputStream = getInputStreamFromUrl(urlStr, 
HTTP_TIMEOUT_MS, HTTP_TIMEOUT_MS)) {
+            MessageDigest digest = MessageDigest.getInstance("MD5");

Review Comment:
   Using MD5 for integrity validation is collision-prone and not appropriate if 
this checksum is intended to provide tamper resistance for driver artifacts. 
Prefer SHA-256 (or stronger) and update the expected checksum format 
accordingly; if MD5 is intentionally used only for corruption detection, 
document that this is not a security guarantee.



##########
fe/fe-core/src/main/java/org/apache/doris/catalog/JdbcDriverLoader.java:
##########
@@ -0,0 +1,140 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.catalog;
+
+import org.apache.doris.common.DdlException;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.sql.Connection;
+import java.sql.Driver;
+import java.sql.DriverManager;
+import java.sql.DriverPropertyInfo;
+import java.sql.SQLException;
+import java.sql.SQLFeatureNotSupportedException;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+
+public class JdbcDriverLoader {
+    private static final Logger LOG = 
LogManager.getLogger(JdbcDriverLoader.class);
+    private static final Map<URL, ClassLoader> DRIVER_CLASS_LOADER_CACHE = new 
ConcurrentHashMap<>();

Review Comment:
   Caching `URLClassLoader` instances in a static map without any 
eviction/close path can leak resources (open JAR handles on some platforms, 
class metadata over time, and permanent retention of downloaded drivers). 
Consider adding an explicit unregister/close mechanism (and removing from the 
cache), or use a bounded/weak cache so long-running processes don’t accumulate 
classloaders indefinitely.



##########
regression-test/pipeline/common/get-hive-bootstrap-groups.sh:
##########
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+set -eo pipefail
+
+usage() {
+    echo "Usage: $0 <hive2|hive3|both|all>" >&2
+    exit 1
+}
+
+case "${1:-}" in
+hive2)
+    echo "common,hive2_only"
+    ;;
+hive3)
+    echo "common,hive3_only"
+    ;;
+both)
+    echo "common,hive2_only,hive3_only"
+    ;;
+all)
+    echo "all"
+    ;;

Review Comment:
   This script hardcodes the same group semantics that are also implemented in 
`docker/.../bootstrap/bootstrap-groups.sh`. Keeping two sources of truth 
increases the risk of divergence (e.g., adding a new group later). Consider 
reusing the shared normalization/merge logic (or a shared list/config) so group 
definitions live in one place.



##########
fe/fe-core/src/test/java/org/apache/doris/catalog/JdbcDriverLoaderTest.java:
##########
@@ -0,0 +1,85 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.catalog;
+
+import org.apache.doris.common.DdlException;
+import org.apache.doris.common.FeConstants;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+public class JdbcDriverLoaderTest {
+    private static final String DRIVER_BYTES = "doris-jdbc-driver";
+    private static final String DRIVER_CHECKSUM = 
"ef2b3218a863c98bc78fdf447331b206";
+
+    @Test
+    public void testValidateDriverChecksumRejectsMismatch() throws Exception {
+        boolean oldRunningUnitTest = FeConstants.runningUnitTest;
+        FeConstants.runningUnitTest = false;
+        try {
+            String driverUrl = createDriverUrl();
+
+            DdlException exception = Assert.assertThrows(DdlException.class,
+                    () -> JdbcDriverLoader.validateDriverChecksum(driverUrl, 
"bad-checksum"));
+            Assert.assertTrue(exception.getMessage().contains("does not match 
the computed checksum"));
+        } finally {
+            FeConstants.runningUnitTest = oldRunningUnitTest;
+        }
+    }
+
+    @Test
+    public void testRegisterDriverValidatesChecksumBeforeLoadingClass() throws 
Exception {
+        boolean oldRunningUnitTest = FeConstants.runningUnitTest;
+        FeConstants.runningUnitTest = false;
+        try {
+            String driverUrl = createDriverUrl();
+
+            IllegalArgumentException exception = 
Assert.assertThrows(IllegalArgumentException.class,
+                    () -> JdbcDriverLoader.registerDriver(driverUrl,
+                            "org.apache.doris.catalog.NotExistingDriver", 
"bad-checksum",
+                            getClass().getClassLoader()));
+            Assert.assertTrue(exception.getMessage().contains("does not match 
the computed checksum"));
+        } finally {
+            FeConstants.runningUnitTest = oldRunningUnitTest;
+        }
+    }
+
+    @Test
+    public void testValidateDriverChecksumReturnsComputedChecksum() throws 
Exception {
+        boolean oldRunningUnitTest = FeConstants.runningUnitTest;
+        FeConstants.runningUnitTest = false;
+        try {
+            String driverUrl = createDriverUrl();
+
+            Assert.assertEquals(DRIVER_CHECKSUM, 
JdbcDriverLoader.validateDriverChecksum(driverUrl, DRIVER_CHECKSUM));
+            Assert.assertEquals(DRIVER_CHECKSUM, 
JdbcDriverLoader.validateDriverChecksum(driverUrl, ""));
+        } finally {
+            FeConstants.runningUnitTest = oldRunningUnitTest;
+        }
+    }
+
+    private String createDriverUrl() throws Exception {
+        Path driverPath = Files.createTempFile("doris-jdbc-driver", ".jar");
+        Files.write(driverPath, DRIVER_BYTES.getBytes(StandardCharsets.UTF_8));
+        return "file://" + driverPath.toAbsolutePath();

Review Comment:
   The temp file is never deleted, and the file URL is built via string 
concatenation (which can produce invalid URLs for paths needing escaping). 
Prefer using `driverPath.toUri().toString()` (or `toUri().toURL().toString()`) 
and ensure the temp file is cleaned up (e.g., `deleteOnExit()` or explicit 
deletion in a `finally`) to avoid polluting the test environment.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to