This is an automated email from the ASF dual-hosted git repository.
Gabriel39 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 44de6fd7a93 [fix](regression) Use in-network MinIO endpoint for paimon
JDBC seed (#64113)
44de6fd7a93 is described below
commit 44de6fd7a930ff162227bd6777ac4ddb8f12b779
Author: Chenjunwei <[email protected]>
AuthorDate: Thu Jul 2 10:35:13 2026 +0800
[fix](regression) Use in-network MinIO endpoint for paimon JDBC seed
(#64113)
### What problem does this PR solve?
Issue Number: None
Related PR: #61932
Problem Summary:
The master external regression suite can hang in
`test_paimon_jdbc_catalog` while running `docker exec ... spark-sql`.
Root cause:
- the Spark seed path used `http://${externalEnvIp}:${minioPort}` as the
MinIO endpoint
- that host-mapped endpoint may be reachable from the host but not from
inside the `spark-iceberg` container
- Spark can then block in repeated S3A metadata retries
- the local `ProcessBuilder` helper waited without consuming
stdout/stderr or enforcing a timeout, so the suite could stay stuck
until the CI job timeout
This change makes only the Spark seed command use the paired MinIO
container endpoint when it exists in the Docker network. The Doris
catalog configuration still uses the configured external endpoint. The
command helper now consumes stdout/stderr and applies bounded timeouts
so future command failures surface instead of silently hanging.
### Release note
None
### Check List (For Author)
- Test: Manual test
- Source guard for the Paimon JDBC seed command failed before the change
and passed after the change
- `git diff HEAD~1..HEAD --check --
regression-test/suites/external_table_p0/paimon/test_paimon_jdbc_catalog.groovy`
- Attempted `timeout 180 ./run-regression-test.sh --run -d
external_table_p0/paimon -s test_paimon_jdbc_catalog`; it timed out
during regression framework shade/package before running the suite
- Behavior changed: Yes (the regression Spark seed step uses the
Docker-network MinIO endpoint when the paired MinIO container exists;
Doris catalog access still uses the configured external endpoint)
- Does this need documentation: No
---
.../paimon/test_paimon_jdbc_catalog.groovy | 50 +++++++++++++++-------
1 file changed, 35 insertions(+), 15 deletions(-)
diff --git
a/regression-test/suites/external_table_p0/paimon/test_paimon_jdbc_catalog.groovy
b/regression-test/suites/external_table_p0/paimon/test_paimon_jdbc_catalog.groovy
index 82d8d5b0dfa..3974051f9f6 100644
---
a/regression-test/suites/external_table_p0/paimon/test_paimon_jdbc_catalog.groovy
+++
b/regression-test/suites/external_table_p0/paimon/test_paimon_jdbc_catalog.groovy
@@ -66,38 +66,58 @@ suite("test_paimon_jdbc_catalog", "p0,external") {
assertTrue(jdbcDriversDir != null && !jdbcDriversDir.isEmpty(),
"jdbc_drivers_dir must be configured")
- def executeCommand = { String cmd, Boolean mustSuc ->
+ def executeCommand = { String cmd, Boolean mustSuc, int timeoutSeconds =
300 ->
+ StringBuilder stdout = new StringBuilder()
+ StringBuilder stderr = new StringBuilder()
try {
logger.info("execute ${cmd}")
- def proc = new ProcessBuilder("/bin/bash", "-c",
cmd).redirectErrorStream(true).start()
- int exitcode = proc.waitFor()
- String output = proc.text
+ def proc = new ProcessBuilder("/bin/bash", "-c", cmd).start()
+ proc.consumeProcessOutput(stdout, stderr)
+ proc.waitForOrKill(timeoutSeconds * 1000)
+ int exitcode = proc.exitValue()
+ String output = stdout.toString()
+ String error = stderr.toString()
if (exitcode != 0) {
- logger.info("exit code: ${exitcode}, output\n: ${output}")
+ logger.info("exit code: ${exitcode}, stdout\n:
${output}\nstderr\n: ${error}")
if (mustSuc) {
- assertTrue(false, "Execute failed: ${cmd}")
+ assertTrue(false, "Execute failed:
${cmd}\nstdout:\n${output}\nstderr:\n${error}")
}
}
return output
} catch (IOException e) {
- assertTrue(false, "Execute timeout: ${cmd}")
+ assertTrue(false, "Execute failed: ${cmd}, err: ${e.message}")
}
}
- executeCommand("mkdir -p ${localDriverDir}", false)
- executeCommand("mkdir -p ${jdbcDriversDir}", true)
+ executeCommand("mkdir -p ${localDriverDir}", false, 60)
+ executeCommand("mkdir -p ${jdbcDriversDir}", true, 60)
if (!new File(localDriverPath).exists()) {
- executeCommand("/usr/bin/curl --max-time 600 ${driverDownloadUrl}
--output ${localDriverPath}", true)
+ executeCommand("/usr/bin/curl --max-time 600 ${driverDownloadUrl}
--output ${localDriverPath}", true, 660)
}
- executeCommand("cp -f ${localDriverPath} ${jdbcDriversDir}/${driverName}",
true)
+ executeCommand("cp -f ${localDriverPath} ${jdbcDriversDir}/${driverName}",
true, 60)
- String sparkContainerName = executeCommand("docker ps --filter
name=spark-iceberg --format {{.Names}}", false)
+ String sparkContainerName = executeCommand("docker ps --filter
name=spark-iceberg --format {{.Names}}", false, 30)
?.trim()
if (sparkContainerName == null || sparkContainerName.isEmpty()) {
logger.info("spark-iceberg container not found, skip this test")
return
}
- executeCommand("docker cp ${localDriverPath}
${sparkContainerName}:${sparkDriverPath}", true)
+ executeCommand("docker cp ${localDriverPath}
${sparkContainerName}:${sparkDriverPath}", true, 60)
+
+ String sparkMinioEndpoint = "http://${externalEnvIp}:${minioPort}"
+ if (sparkContainerName.contains("spark-iceberg")) {
+ String sparkMinioContainerName =
sparkContainerName.replaceFirst("spark-iceberg", "minio")
+ String resolvedSparkMinioContainer = executeCommand(
+ "docker ps --filter name=${sparkMinioContainerName} --format
{{.Names}}",
+ false,
+ 30
+ )?.trim()
+ if (resolvedSparkMinioContainer == sparkMinioContainerName) {
+ // Spark runs inside the docker network and may not be able to
reach the host-mapped MinIO port.
+ sparkMinioEndpoint = "http://${resolvedSparkMinioContainer}:9000"
+ }
+ }
+ logger.info("spark seed minio endpoint: ${sparkMinioEndpoint}")
def sparkPaimonJdbc = { String sqlText ->
String escapedSql = sqlText.replaceAll('"', '\\\\"')
@@ -115,13 +135,13 @@ suite("test_paimon_jdbc_catalog", "p0,external") {
--conf spark.sql.catalog.${sparkSeedCatalogName}.jdbc.user=postgres \
--conf spark.sql.catalog.${sparkSeedCatalogName}.jdbc.password=123456 \
--conf spark.sql.catalog.${sparkSeedCatalogName}.lock.enabled=false \
---conf
spark.sql.catalog.${sparkSeedCatalogName}.s3.endpoint=http://${externalEnvIp}:${minioPort}
\
+--conf
spark.sql.catalog.${sparkSeedCatalogName}.s3.endpoint=${sparkMinioEndpoint} \
--conf spark.sql.catalog.${sparkSeedCatalogName}.s3.access-key=${minioAk} \
--conf spark.sql.catalog.${sparkSeedCatalogName}.s3.secret-key=${minioSk} \
--conf spark.sql.catalog.${sparkSeedCatalogName}.s3.region=us-east-1 \
--conf spark.sql.catalog.${sparkSeedCatalogName}.s3.path.style.access=true \
-e "${escapedSql}" """
- executeCommand(command, true)
+ executeCommand(command, true, 300)
}
def assertSystemTableReadable = { String tableExpr, List<String>
expectedColumns = [], Integer minCount = null ->
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]