This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new c53d9cc3998 [fix](regression) stabilize parquet_meta tvf assertions 
across s3 buckets (#60938)
c53d9cc3998 is described below

commit c53d9cc39989a6f4ddf625e883e630158806c862
Author: Chenjunwei <[email protected]>
AuthorDate: Mon Mar 2 18:15:15 2026 +0800

    [fix](regression) stabilize parquet_meta tvf assertions across s3 buckets 
(#60938)
    
    ## Summary
    - adjust `test_parquet_meta_tvf` S3-mode checks to compare only stable
    columns
    - avoid asserting `file_name` / full S3 URI fields that vary by pipeline
    bucket
    - update the corresponding `.out` baseline for the changed query
    projections
    
    ## Why
    Different CI pipelines may use different bucket names, which causes
    false failures when full URI/file name columns are compared.
    
    ## Test
    - attempted: `./run-regression-test.sh --run -f
    external_table_p0/tvf/test_parquet_meta_tvf -forceGenOut`
    - in this environment it failed with S3 `FORBIDDEN` while reading
    regression parquet files
---
 .../tvf/test_parquet_meta_tvf.out                  | 44 ++++++++++-----------
 .../tvf/test_parquet_meta_tvf.groovy               | 45 +++++++++++++++-------
 2 files changed, 52 insertions(+), 37 deletions(-)

diff --git 
a/regression-test/data/external_table_p0/tvf/test_parquet_meta_tvf.out 
b/regression-test/data/external_table_p0/tvf/test_parquet_meta_tvf.out
index 37a5dce0bd4..521286473d4 100644
--- a/regression-test/data/external_table_p0/tvf/test_parquet_meta_tvf.out
+++ b/regression-test/data/external_table_p0/tvf/test_parquet_meta_tvf.out
@@ -1,44 +1,42 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !parquet_metadata_s3 --
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet        
0       5       4       236     0       4       5       normal_int      INT32   
1       5       1       \N      1       5       SNAPPY  
PLAIN,RLE,RLE_DICTIONARY        \N      4       36      61      57      
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A414141414151414141414141414141437741414147526C59326C74595778665932397341
 [...]
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet        
0       5       4       236     1       65      5       string_col      
BYTE_ARRAY      \N      \N      1       \N      a       e       SNAPPY  
PLAIN,RLE,RLE_DICTIONARY        \N      65      101     65      61      
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A414141414151414141414141414141437741414147526C59326C7459577866
 [...]
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet        
0       5       4       236     2       130     5       date_col        INT32   
2023-01-01      2023-01-05      1       \N      2023-01-01      2023-01-05      
SNAPPY  PLAIN,RLE,RLE_DICTIONARY        \N      130     162     61      57      
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A414141414151414141414141414141437
 [...]
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet        
0       5       4       236     3       191     5       decimal_col     
FIXED_LEN_BYTE_ARRAY    10.00   50.00   1       \N      10.00   50.00   SNAPPY  
PLAIN,RLE,RLE_DICTIONARY        \N      191     227     65      61      
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A41414141415141414141414141414143774
 [...]
+0      5       4       236     0       4       5       normal_int      INT32   
1       5       1       \N      1
+0      5       4       236     1       65      5       string_col      
BYTE_ARRAY      \N      \N      1       \N      a
+0      5       4       236     2       130     5       date_col        INT32   
2023-01-01      2023-01-05      1       \N      2023-01-01
+0      5       4       236     3       191     5       decimal_col     
FIXED_LEN_BYTE_ARRAY    10.00   50.00   1       \N      10.00
 
 -- !parquet_metadata_default_mode --
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet        
0       5       4       236     0       4       5       normal_int      INT32   
1       5       1       \N      1       5       SNAPPY  
PLAIN,RLE,RLE_DICTIONARY        \N      4       36      61      57      
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A414141414151414141414141414141437741414147526C59326C74595778665932397341
 [...]
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet        
0       5       4       236     1       65      5       string_col      
BYTE_ARRAY      \N      \N      1       \N      a       e       SNAPPY  
PLAIN,RLE,RLE_DICTIONARY        \N      65      101     65      61      
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A414141414151414141414141414141437741414147526C59326C7459577866
 [...]
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet        
0       5       4       236     2       130     5       date_col        INT32   
2023-01-01      2023-01-05      1       \N      2023-01-01      2023-01-05      
SNAPPY  PLAIN,RLE,RLE_DICTIONARY        \N      130     162     61      57      
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A414141414151414141414141414141437
 [...]
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet        
0       5       4       236     3       191     5       decimal_col     
FIXED_LEN_BYTE_ARRAY    10.00   50.00   1       \N      10.00   50.00   SNAPPY  
PLAIN,RLE,RLE_DICTIONARY        \N      191     227     65      61      
{0x4152524F573A736368656D61:0x2F2F2F2F2F7A414241414151414141414141414B41417741426741464141674143674141414141424241414D4141414143414149414141414241414941414141424141414141514141414441414141416641414141455141414141454141414159502F2F2F77414141516351414141414A41414141415141414141414141414143774
 [...]
+0      5       4       236     0       4       5       normal_int      INT32   
1       5       1       \N      1
+0      5       4       236     1       65      5       string_col      
BYTE_ARRAY      \N      \N      1       \N      a
+0      5       4       236     2       130     5       date_col        INT32   
2023-01-01      2023-01-05      1       \N      2023-01-01
+0      5       4       236     3       191     5       decimal_col     
FIXED_LEN_BYTE_ARRAY    10.00   50.00   1       \N      10.00
 
 -- !parquet_schema --
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet        
date_col        INT32   \N      OPTIONAL        0       DATE    \N      \N      
\N      DATE
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet        
decimal_col     FIXED_LEN_BYTE_ARRAY    5       OPTIONAL        0       DECIMAL 
2       10      \N      DECIMAL
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet        
normal_int      INT32   \N      OPTIONAL        0       \N      \N      \N      
\N      \N
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet        
string_col      BYTE_ARRAY      \N      OPTIONAL        0       UTF8    \N      
\N      \N      STRING
+date_col       INT32   \N      OPTIONAL        0       DATE    \N      \N      
\N      DATE
+decimal_col    FIXED_LEN_BYTE_ARRAY    5       OPTIONAL        0       DECIMAL 
2       10      \N      DECIMAL
+normal_int     INT32   \N      OPTIONAL        0       \N      \N      \N      
\N      \N
+string_col     BYTE_ARRAY      \N      OPTIONAL        0       UTF8    \N      
\N      \N      STRING
 
 -- !parquet_metadata_empty --
 
 -- !parquet_kv_metadata --
-s3://doris-regression-hk/regression/datalake/pipeline_data/kvmeta.parquet      
app     kvmeta_test
-s3://doris-regression-hk/regression/datalake/pipeline_data/kvmeta.parquet      
note    it's ok
-s3://doris-regression-hk/regression/datalake/pipeline_data/kvmeta.parquet      
ver     v1
+app    kvmeta_test
+note   it's ok
+ver    v1
 
 -- !parquet_file_metadata --
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet        
parquet-cpp-arrow version 21.0.0        5       1       2       \N      \N
+parquet-cpp-arrow version 21.0.0       5       1       2
 
 -- !parquet_file_metadata_s3_glob --
-s3://doris-regression-hk/regression/datalake/pipeline_data/bloommeta.parquet
-s3://doris-regression-hk/regression/datalake/pipeline_data/kvmeta.parquet
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet
+3
 
 -- !parquet_bloom_probe --
-s3://doris-regression-hk/regression/datalake/pipeline_data/bloommeta.parquet   
0       0
-s3://doris-regression-hk/regression/datalake/pipeline_data/bloommeta.parquet   
1       0
-s3://doris-regression-hk/regression/datalake/pipeline_data/bloommeta.parquet   
2       1
+0      0
+1      0
+2      1
 
 -- !parquet_bloom_probe_no_bf --
-s3://doris-regression-hk/regression/datalake/pipeline_data/meta.parquet        
0       -1
+0      -1
 
 -- !parquet_mapping --
 0      5       4       236     0       4       5       normal_int      INT32   
1       5       1       \N      1
diff --git 
a/regression-test/suites/external_table_p0/tvf/test_parquet_meta_tvf.groovy 
b/regression-test/suites/external_table_p0/tvf/test_parquet_meta_tvf.groovy
index 5796fc210fa..1ac07b6463b 100644
--- a/regression-test/suites/external_table_p0/tvf/test_parquet_meta_tvf.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_parquet_meta_tvf.groovy
@@ -33,37 +33,51 @@ suite("test_parquet_meta_tvf", 
"p0,external,external_docker,tvf") {
     // parquet_metadata (S3)
     // Note: Prefer asserting on stable metadata columns; avoid relying on 
host-specific/local-only paths.
     order_qt_parquet_metadata_s3 """
-        select * from parquet_meta(
+        select
+            row_group_id, row_group_num_rows, row_group_num_columns, 
row_group_bytes, column_id,
+            file_offset, num_values, path_in_schema, type, stats_min, 
stats_max, stats_null_count,
+            stats_distinct_count, stats_min_value
+        from parquet_meta(
             "uri" = "${basePath}/meta.parquet",
             "s3.access_key" = "${ak}",
             "s3.secret_key" = "${sk}",
             "endpoint" = "${endpoint}",
             "region" = "${region}",
             "mode" = "parquet_metadata"
-        );
+        )
+        order by row_group_id, column_id;
     """
 
     // default mode: parquet_metadata
     order_qt_parquet_metadata_default_mode """
-        select * from parquet_meta(
+        select
+            row_group_id, row_group_num_rows, row_group_num_columns, 
row_group_bytes, column_id,
+            file_offset, num_values, path_in_schema, type, stats_min, 
stats_max, stats_null_count,
+            stats_distinct_count, stats_min_value
+        from parquet_meta(
             "uri" = "${basePath}/meta.parquet",
             "s3.access_key" = "${ak}",
             "s3.secret_key" = "${sk}",
             "endpoint" = "${endpoint}",
             "region" = "${region}"
-        );
+        )
+        order by row_group_id, column_id;
     """
 
     // parquet_schema
     order_qt_parquet_schema """
-        select * from parquet_meta(
+        select
+            name, type, type_length, repetition_type, num_children, 
converted_type,
+            scale, precision, field_id, logical_type
+        from parquet_meta(
             "uri" = "${basePath}/meta.parquet",
             "s3.access_key" = "${ak}",
             "s3.secret_key" = "${sk}",
             "endpoint" = "${endpoint}",
             "region" = "${region}",
             "mode" = "parquet_schema"
-        );
+        )
+        order by name;
     """
 
     // empty parquet
@@ -80,19 +94,20 @@ suite("test_parquet_meta_tvf", 
"p0,external,external_docker,tvf") {
 
     // kv metadata
     order_qt_parquet_kv_metadata """
-        select * from parquet_meta(
+        select `key`, `value` from parquet_meta(
             "uri" = "${basePath}/kvmeta.parquet",
             "s3.access_key" = "${ak}",
             "s3.secret_key" = "${sk}",
             "endpoint" = "${endpoint}",
             "region" = "${region}",
             "mode" = "parquet_kv_metadata"
-        );
+        )
+        order by `key`;
     """
 
     // file metadata
     order_qt_parquet_file_metadata """
-        select * from parquet_meta(
+        select created_by, num_rows, num_row_groups, format_version from 
parquet_meta(
             "uri" = "${basePath}/meta.parquet",
             "s3.access_key" = "${ak}",
             "s3.secret_key" = "${sk}",
@@ -104,7 +119,7 @@ suite("test_parquet_meta_tvf", 
"p0,external,external_docker,tvf") {
 
     // file metadata (S3 glob)
     order_qt_parquet_file_metadata_s3_glob """
-        select file_name from parquet_meta(
+        select count(*) from parquet_meta(
             "uri" = "${basePath}/*meta.parquet",
             "s3.access_key" = "${ak}",
             "s3.secret_key" = "${sk}",
@@ -116,7 +131,7 @@ suite("test_parquet_meta_tvf", 
"p0,external,external_docker,tvf") {
 
     // bloom probe
     order_qt_parquet_bloom_probe """
-        select * from parquet_meta(
+        select row_group_id, bloom_filter_excludes from parquet_meta(
             "uri" = "${basePath}/bloommeta.parquet",
             "s3.access_key" = "${ak}",
             "s3.secret_key" = "${sk}",
@@ -125,12 +140,13 @@ suite("test_parquet_meta_tvf", 
"p0,external,external_docker,tvf") {
             "mode" = "parquet_bloom_probe",
             "column" = "col",
             "value" = 500
-        );
+        )
+        order by row_group_id;
     """
 
     // bloom probe: column without bloom filter
     order_qt_parquet_bloom_probe_no_bf """
-        select * from parquet_meta(
+        select row_group_id, bloom_filter_excludes from parquet_meta(
             "uri" = "${basePath}/meta.parquet",
             "s3.access_key" = "${ak}",
             "s3.secret_key" = "${sk}",
@@ -139,7 +155,8 @@ suite("test_parquet_meta_tvf", 
"p0,external,external_docker,tvf") {
             "mode" = "parquet_bloom_probe",
             "column" = "normal_int",
             "value" = 500
-        );
+        )
+        order by row_group_id;
     """
 
     // mapping select


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to