This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 68cfef6d67c [fix](parquet)fix when hive_parquet_use_column_names=false && read partition tb cause be core. (#49966) 68cfef6d67c is described below commit 68cfef6d67ce588c2b079c53a2204a471c9acf93 Author: daidai <changyu...@selectdb.com> AuthorDate: Mon Apr 14 09:22:04 2025 +0800 [fix](parquet)fix when hive_parquet_use_column_names=false && read partition tb cause be core. (#49966) ### What problem does this PR solve? related pr : #38432 Problem Summary: when you query hive parquet format partition table, and `set hive_parquet_use_column_names = false`, maybe you will get : ``` *** SIGABRT unknown detail explain (@0x2f59de) received by PID 3103198 (TID 3110278 OR 0x7f51c8e63640) from PID 3103198; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /home/zcp/repo_center/doris_master/doris/be/src/common/signal_handler.h:421 1# 0x00007F55DFB45520 in /lib/x86_64-linux-gnu/libc.so.6 2# pthread_kill at ./nptl/pthread_kill.c:89 3# raise at ../sysdeps/posix/raise.c:27 4# abort at ./stdlib/abort.c:81 5# __gnu_cxx::__verbose_terminate_handler() [clone .cold] at ../../../../libstdc++-v3/libsupc++/vterminate.cc:75 6# __cxxabiv1::__terminate(void (*)()) at ../../../../libstdc++-v3/libsupc++/eh_terminate.cc:48 7# 0x000055C8BD4E2041 in /mnt/disk1/doris-clusters/doris-master/output/be/lib/doris_be 8# 0x000055C8BD4E2194 in /mnt/disk1/doris-clusters/doris-master/output/be/lib/doris_be 9# 0x000055C8BD4E2586 in /mnt/disk1/doris-clusters/doris-master/output/be/lib/doris_be 10# std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_M_assign(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&) at /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/basic_string.tcc:265 11# doris::vectorized::ParquetReader::get_next_block(doris::vectorized::Block*, unsigned long*, bool*) at /home/zcp/repo_center/doris_master/doris/be/src/vec/exec/format/parquet/vparquet_reader.cpp:586 ```` The reason is that when `get_next_block` replaces the column name, data out of bounds occurs. --- be/src/vec/exec/format/parquet/vparquet_reader.cpp | 3 ++- .../hive/test_external_catalog_hive_partition.out | Bin 2711 -> 4455 bytes .../hive/test_external_catalog_hive_partition.groovy | 9 +++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp index b91a7c21cbd..3083fd61ab0 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp @@ -561,6 +561,7 @@ Status ParquetReader::get_next_block(Block* block, size_t* read_rows, bool* eof) return Status::OK(); } + std::vector<std::string> original_block_column_name = block->get_names(); if (!_hive_use_column_names) { for (auto i = 0; i < block->get_names().size(); i++) { auto& col = block->get_by_position(i); @@ -584,7 +585,7 @@ Status ParquetReader::get_next_block(Block* block, size_t* read_rows, bool* eof) if (!_hive_use_column_names) { for (auto i = 0; i < block->columns(); i++) { - block->get_by_position(i).name = (*_column_names)[i]; + block->get_by_position(i).name = original_block_column_name[i]; } block->initialize_index_by_name(); } diff --git a/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out b/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out index aa1e48a439d..0402feef40e 100644 Binary files a/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out and b/regression-test/data/external_table_p0/hive/test_external_catalog_hive_partition.out differ diff --git a/regression-test/suites/external_table_p0/hive/test_external_catalog_hive_partition.groovy b/regression-test/suites/external_table_p0/hive/test_external_catalog_hive_partition.groovy index 32b80f5650d..d34467c4c56 100644 --- a/regression-test/suites/external_table_p0/hive/test_external_catalog_hive_partition.groovy +++ b/regression-test/suites/external_table_p0/hive/test_external_catalog_hive_partition.groovy @@ -65,9 +65,18 @@ suite("test_external_catalog_hive_partition", "p0,external,hive,external_docker, qt_q06 """ select * from multi_catalog.text_partitioned_columns where t_int is not null order by t_float """ } sql """ use `multi_catalog`; """ + sql """ set hive_parquet_use_column_names = true; """ + sql """ set hive_orc_use_column_names = true""" + q01_parquet() q01_orc() q01_text() + + sql """ set hive_parquet_use_column_names = false; """ + sql """ set hive_orc_use_column_names = false""" + q01_parquet() + q01_orc() + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org