This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 68cfef6d67c [fix](parquet)fix when hive_parquet_use_column_names=false 
&& read partition tb cause be core. (#49966)
68cfef6d67c is described below

commit 68cfef6d67ce588c2b079c53a2204a471c9acf93
Author: daidai <changyu...@selectdb.com>
AuthorDate: Mon Apr 14 09:22:04 2025 +0800

    [fix](parquet)fix when hive_parquet_use_column_names=false && read 
partition tb cause be core. (#49966)
    
    ### What problem does this PR solve?
    related pr : #38432
    
    Problem Summary:
    when you query hive parquet format partition table, and `set
    hive_parquet_use_column_names = false`, maybe you will get :
    ```
    *** SIGABRT unknown detail explain (@0x2f59de) received by PID 3103198 (TID 
3110278 OR 0x7f51c8e63640) from PID 3103198; stack trace: ***
     0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*, void*) at 
/home/zcp/repo_center/doris_master/doris/be/src/common/signal_handler.h:421
     1# 0x00007F55DFB45520 in /lib/x86_64-linux-gnu/libc.so.6
     2# pthread_kill at ./nptl/pthread_kill.c:89
     3# raise at ../sysdeps/posix/raise.c:27
     4# abort at ./stdlib/abort.c:81
     5# __gnu_cxx::__verbose_terminate_handler() [clone .cold] at 
../../../../libstdc++-v3/libsupc++/vterminate.cc:75
     6# __cxxabiv1::__terminate(void (*)()) at 
../../../../libstdc++-v3/libsupc++/eh_terminate.cc:48
     7# 0x000055C8BD4E2041 in 
/mnt/disk1/doris-clusters/doris-master/output/be/lib/doris_be
     8# 0x000055C8BD4E2194 in 
/mnt/disk1/doris-clusters/doris-master/output/be/lib/doris_be
     9# 0x000055C8BD4E2586 in 
/mnt/disk1/doris-clusters/doris-master/output/be/lib/doris_be
    10# std::__cxx11::basic_string<char, std::char_traits<char>, 
std::allocator<char> >::_M_assign(std::__cxx11::basic_string<char, 
std::char_traits<char>, std::allocator<char> > const&) at 
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.tcc:265
    11# 
doris::vectorized::ParquetReader::get_next_block(doris::vectorized::Block*, 
unsigned long*, bool*) at 
/home/zcp/repo_center/doris_master/doris/be/src/vec/exec/format/parquet/vparquet_reader.cpp:586
    ````
    The reason is that when `get_next_block` replaces the column name, data
    out of bounds occurs.
---
 be/src/vec/exec/format/parquet/vparquet_reader.cpp    |   3 ++-
 .../hive/test_external_catalog_hive_partition.out     | Bin 2711 -> 4455 bytes
 .../hive/test_external_catalog_hive_partition.groovy  |   9 +++++++++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
index b91a7c21cbd..3083fd61ab0 100644
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@@ -561,6 +561,7 @@ Status ParquetReader::get_next_block(Block* block, size_t* 
read_rows, bool* eof)
         return Status::OK();
     }
 
+    std::vector<std::string> original_block_column_name = block->get_names();
     if (!_hive_use_column_names) {
         for (auto i = 0; i < block->get_names().size(); i++) {
             auto& col = block->get_by_position(i);
@@ -584,7 +585,7 @@ Status ParquetReader::get_next_block(Block* block, size_t* 
read_rows, bool* eof)
 
     if (!_hive_use_column_names) {
         for (auto i = 0; i < block->columns(); i++) {
-            block->get_by_position(i).name = (*_column_names)[i];
+            block->get_by_position(i).name = original_block_column_name[i];
         }
         block->initialize_index_by_name();
     }
diff --git 
a/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out
 
b/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out
index aa1e48a439d..0402feef40e 100644
Binary files 
a/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out
 and 
b/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out
 differ
diff --git 
a/regression-test/suites/external_table_p0/hive/test_external_catalog_hive_partition.groovy
 
b/regression-test/suites/external_table_p0/hive/test_external_catalog_hive_partition.groovy
index 32b80f5650d..d34467c4c56 100644
--- 
a/regression-test/suites/external_table_p0/hive/test_external_catalog_hive_partition.groovy
+++ 
b/regression-test/suites/external_table_p0/hive/test_external_catalog_hive_partition.groovy
@@ -65,9 +65,18 @@ suite("test_external_catalog_hive_partition", 
"p0,external,hive,external_docker,
             qt_q06 """ select * from multi_catalog.text_partitioned_columns 
where t_int is not null order by t_float """
         }
         sql """ use `multi_catalog`; """
+        sql """ set hive_parquet_use_column_names = true; """ 
+        sql """ set hive_orc_use_column_names = true"""
+        
         q01_parquet()
         q01_orc()
         q01_text()
+
+        sql """ set hive_parquet_use_column_names = false; """ 
+        sql """ set hive_orc_use_column_names = false"""
+        q01_parquet()
+        q01_orc()
+
     }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to