This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new c53d9cc3998 [fix](regression) stabilize parquet_meta tvf assertions
across s3 buckets (#60938)
c53d9cc3998 is described below
commit c53d9cc39989a6f4ddf625e883e630158806c862
Author: Chenjunwei <[email protected]>
AuthorDate: Mon Mar 2 18:15:15 2026 +0800
[fix](regression) stabilize parquet_meta tvf assertions across s3 buckets
(#60938)
## Summary
- adjust `test_parquet_meta_tvf` S3-mode checks to compare only stable
columns
- avoid asserting `file_name` / full S3 URI fields that vary by pipeline
bucket
- update the corresponding `.out` baseline for the changed query
projections
## Why
Different CI pipelines may use different bucket names, which causes
false failures when full URI/file name columns are compared.
## Test
- attempted: `./run-regression-test.sh --run -f
external_table_p0/tvf/test_parquet_meta_tvf -forceGenOut`
- in this environment it failed with S3 `FORBIDDEN` while reading
regression parquet files
---
.../tvf/test_parquet_meta_tvf.out | 44 ++++++++++-----------
.../tvf/test_parquet_meta_tvf.groovy | 45 +++++++++++++++-------
2 files changed, 52 insertions(+), 37 deletions(-)
diff --git
a/regression-test/data/external_table_p0/tvf/test_parquet_meta_tvf.out
b/regression-test/data/external_table_p0/tvf/test_parquet_meta_tvf.out
index 37a5dce0bd4..521286473d4 100644
--- a/regression-test/data/external_table_p0/tvf/test_parquet_meta_tvf.out
+++ b/regression-test/data/external_table_p0/tvf/test_parquet_meta_tvf.out
@@ -1,44 +1,42 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !parquet_metadata_s3 --
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
0 5 4 236 0 4 5 normal_int INT32
1 5 1 \N 1 5 SNAPPY
PLAIN,RLE,RLE_DICTIONARY \N 4 36 61 57
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A414141414151414141414141414141437741414147526C59326C74595778665932397341
[...]
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
0 5 4 236 1 65 5 string_col
BYTE_ARRAY \N \N 1 \N a e SNAPPY
PLAIN,RLE,RLE_DICTIONARY \N 65 101 65 61
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A414141414151414141414141414141437741414147526C59326C7459577866
[...]
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
0 5 4 236 2 130 5 date_col INT32
2023-01-01 2023-01-05 1 \N 2023-01-01 2023-01-05
SNAPPY PLAIN,RLE,RLE_DICTIONARY \N 130 162 61 57
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A414141414151414141414141414141437
[...]
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
0 5 4 236 3 191 5 decimal_col
FIXED_LEN_BYTE_ARRAY 10.00 50.00 1 \N 10.00 50.00 SNAPPY
PLAIN,RLE,RLE_DICTIONARY \N 191 227 65 61
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A41414141415141414141414141414143774
[...]
+0 5 4 236 0 4 5 normal_int INT32
1 5 1 \N 1
+0 5 4 236 1 65 5 string_col
BYTE_ARRAY \N \N 1 \N a
+0 5 4 236 2 130 5 date_col INT32
2023-01-01 2023-01-05 1 \N 2023-01-01
+0 5 4 236 3 191 5 decimal_col
FIXED_LEN_BYTE_ARRAY 10.00 50.00 1 \N 10.00
-- !parquet_metadata_default_mode --
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
0 5 4 236 0 4 5 normal_int INT32
1 5 1 \N 1 5 SNAPPY
PLAIN,RLE,RLE_DICTIONARY \N 4 36 61 57
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A414141414151414141414141414141437741414147526C59326C74595778665932397341
[...]
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
0 5 4 236 1 65 5 string_col
BYTE_ARRAY \N \N 1 \N a e SNAPPY
PLAIN,RLE,RLE_DICTIONARY \N 65 101 65 61
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A414141414151414141414141414141437741414147526C59326C7459577866
[...]
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
0 5 4 236 2 130 5 date_col INT32
2023-01-01 2023-01-05 1 \N 2023-01-01 2023-01-05
SNAPPY PLAIN,RLE,RLE_DICTIONARY \N 130 162 61 57
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A414141414151414141414141414141437
[...]
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
0 5 4 236 3 191 5 decimal_col
FIXED_LEN_BYTE_ARRAY 10.00 50.00 1 \N 10.00 50.00 SNAPPY
PLAIN,RLE,RLE_DICTIONARY \N 191 227 65 61
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A41414141415141414141414141414143774
[...]
+0 5 4 236 0 4 5 normal_int INT32
1 5 1 \N 1
+0 5 4 236 1 65 5 string_col
BYTE_ARRAY \N \N 1 \N a
+0 5 4 236 2 130 5 date_col INT32
2023-01-01 2023-01-05 1 \N 2023-01-01
+0 5 4 236 3 191 5 decimal_col
FIXED_LEN_BYTE_ARRAY 10.00 50.00 1 \N 10.00
-- !parquet_schema --
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
date_col INT32 \N OPTIONAL 0 DATE \N \N
\N DATE
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
decimal_col FIXED_LEN_BYTE_ARRAY 5 OPTIONAL 0 DECIMAL
2 10 \N DECIMAL
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
normal_int INT32 \N OPTIONAL 0 \N \N \N
\N \N
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
string_col BYTE_ARRAY \N OPTIONAL 0 UTF8 \N
\N \N STRING
+date_col INT32 \N OPTIONAL 0 DATE \N \N
\N DATE
+decimal_col FIXED_LEN_BYTE_ARRAY 5 OPTIONAL 0 DECIMAL
2 10 \N DECIMAL
+normal_int INT32 \N OPTIONAL 0 \N \N \N
\N \N
+string_col BYTE_ARRAY \N OPTIONAL 0 UTF8 \N
\N \N STRING
-- !parquet_metadata_empty --
-- !parquet_kv_metadata --
-s3://doris-regression-hk/regression/datalake/pipeline_data/kvmeta.parquet
app kvmeta_test
-s3://doris-regression-hk/regression/datalake/pipeline_data/kvmeta.parquet
note it's ok
-s3://doris-regression-hk/regression/datalake/pipeline_data/kvmeta.parquet
ver v1
+app kvmeta_test
+note it's ok
+ver v1
-- !parquet_file_metadata --
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
parquet-cpp-arrow version 21.0.0 5 1 2 \N \N
+parquet-cpp-arrow version 21.0.0 5 1 2
-- !parquet_file_metadata_s3_glob --
-s3://doris-regression-hk/regression/datalake/pipeline_data/bloommeta.parquet
-s3://doris-regression-hk/regression/datalake/pipeline_data/kvmeta.parquet
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
+3
-- !parquet_bloom_probe --
-s3://doris-regression-hk/regression/datalake/pipeline_data/bloommeta.parquet
0 0
-s3://doris-regression-hk/regression/datalake/pipeline_data/bloommeta.parquet
1 0
-s3://doris-regression-hk/regression/datalake/pipeline_data/bloommeta.parquet
2 1
+0 0
+1 0
+2 1
-- !parquet_bloom_probe_no_bf --
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
0 -1
+0 -1
-- !parquet_mapping --
0 5 4 236 0 4 5 normal_int INT32
1 5 1 \N 1
diff --git
a/regression-test/suites/external_table_p0/tvf/test_parquet_meta_tvf.groovy
b/regression-test/suites/external_table_p0/tvf/test_parquet_meta_tvf.groovy
index 5796fc210fa..1ac07b6463b 100644
--- a/regression-test/suites/external_table_p0/tvf/test_parquet_meta_tvf.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_parquet_meta_tvf.groovy
@@ -33,37 +33,51 @@ suite("test_parquet_meta_tvf",
"p0,external,external_docker,tvf") {
// parquet_metadata (S3)
// Note: Prefer asserting on stable metadata columns; avoid relying on
host-specific/local-only paths.
order_qt_parquet_metadata_s3 """
- select * from parquet_meta(
+ select
+ row_group_id, row_group_num_rows, row_group_num_columns,
row_group_bytes, column_id,
+ file_offset, num_values, path_in_schema, type, stats_min,
stats_max, stats_null_count,
+ stats_distinct_count, stats_min_value
+ from parquet_meta(
"uri" = "${basePath}/meta.parquet",
"s3.access_key" = "${ak}",
"s3.secret_key" = "${sk}",
"endpoint" = "${endpoint}",
"region" = "${region}",
"mode" = "parquet_metadata"
- );
+ )
+ order by row_group_id, column_id;
"""
// default mode: parquet_metadata
order_qt_parquet_metadata_default_mode """
- select * from parquet_meta(
+ select
+ row_group_id, row_group_num_rows, row_group_num_columns,
row_group_bytes, column_id,
+ file_offset, num_values, path_in_schema, type, stats_min,
stats_max, stats_null_count,
+ stats_distinct_count, stats_min_value
+ from parquet_meta(
"uri" = "${basePath}/meta.parquet",
"s3.access_key" = "${ak}",
"s3.secret_key" = "${sk}",
"endpoint" = "${endpoint}",
"region" = "${region}"
- );
+ )
+ order by row_group_id, column_id;
"""
// parquet_schema
order_qt_parquet_schema """
- select * from parquet_meta(
+ select
+ name, type, type_length, repetition_type, num_children,
converted_type,
+ scale, precision, field_id, logical_type
+ from parquet_meta(
"uri" = "${basePath}/meta.parquet",
"s3.access_key" = "${ak}",
"s3.secret_key" = "${sk}",
"endpoint" = "${endpoint}",
"region" = "${region}",
"mode" = "parquet_schema"
- );
+ )
+ order by name;
"""
// empty parquet
@@ -80,19 +94,20 @@ suite("test_parquet_meta_tvf",
"p0,external,external_docker,tvf") {
// kv metadata
order_qt_parquet_kv_metadata """
- select * from parquet_meta(
+ select `key`, `value` from parquet_meta(
"uri" = "${basePath}/kvmeta.parquet",
"s3.access_key" = "${ak}",
"s3.secret_key" = "${sk}",
"endpoint" = "${endpoint}",
"region" = "${region}",
"mode" = "parquet_kv_metadata"
- );
+ )
+ order by `key`;
"""
// file metadata
order_qt_parquet_file_metadata """
- select * from parquet_meta(
+ select created_by, num_rows, num_row_groups, format_version from
parquet_meta(
"uri" = "${basePath}/meta.parquet",
"s3.access_key" = "${ak}",
"s3.secret_key" = "${sk}",
@@ -104,7 +119,7 @@ suite("test_parquet_meta_tvf",
"p0,external,external_docker,tvf") {
// file metadata (S3 glob)
order_qt_parquet_file_metadata_s3_glob """
- select file_name from parquet_meta(
+ select count(*) from parquet_meta(
"uri" = "${basePath}/*meta.parquet",
"s3.access_key" = "${ak}",
"s3.secret_key" = "${sk}",
@@ -116,7 +131,7 @@ suite("test_parquet_meta_tvf",
"p0,external,external_docker,tvf") {
// bloom probe
order_qt_parquet_bloom_probe """
- select * from parquet_meta(
+ select row_group_id, bloom_filter_excludes from parquet_meta(
"uri" = "${basePath}/bloommeta.parquet",
"s3.access_key" = "${ak}",
"s3.secret_key" = "${sk}",
@@ -125,12 +140,13 @@ suite("test_parquet_meta_tvf",
"p0,external,external_docker,tvf") {
"mode" = "parquet_bloom_probe",
"column" = "col",
"value" = 500
- );
+ )
+ order by row_group_id;
"""
// bloom probe: column without bloom filter
order_qt_parquet_bloom_probe_no_bf """
- select * from parquet_meta(
+ select row_group_id, bloom_filter_excludes from parquet_meta(
"uri" = "${basePath}/meta.parquet",
"s3.access_key" = "${ak}",
"s3.secret_key" = "${sk}",
@@ -139,7 +155,8 @@ suite("test_parquet_meta_tvf",
"p0,external,external_docker,tvf") {
"mode" = "parquet_bloom_probe",
"column" = "normal_int",
"value" = 500
- );
+ )
+ order by row_group_id;
"""
// mapping select
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]