This is an automated email from the ASF dual-hosted git repository.

zouxinyi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1dcd96da1d5 [fix](arrow-flight-sql) Fix Doris NULL column conversion 
to arrow batch (#43929)
1dcd96da1d5 is described below

commit 1dcd96da1d5e21fb11901b3c49dd41937f4e5b0c
Author: Xinyi Zou <zouxi...@selectdb.com>
AuthorDate: Tue Nov 19 10:54:42 2024 +0800

    [fix](arrow-flight-sql) Fix Doris NULL column conversion to arrow batch 
(#43929)
    
    ### What problem does this PR solve?
    
    Problem Summary:
    
    The representation of NULL columns in Doris is special, which is
    `DataTypeNull<DataTypeNumber::Uint8>`. `Uint8` uses
    `arrow::BooleanBuilder` when serializing into arrow batch, which does
    not match the expected `arrow::NullBuilder`.
    
    Fix:
    
    ```
    *** Query id: fd32741526804c1e-bc016473fd8f3aa3 ***
    *** is nereids: 1 ***
    *** tablet id: 0 ***
    *** Aborted at 1731327262 (unix time) try "date -d @1731327262" if you are 
using GNU date ***
    *** Current BE git commitID: 653e315ba5 ***
    *** SIGSEGV address not mapped to object (@0x100000024) received by PID 
1442863 (TID 1443456 OR 0x7f8b8cdea700) from PID 36; stack trace: ***
     0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*, void*) at 
/mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/common/signal_handler.h:421
     1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in 
/mnt/disk2/liyifan/doris/jdk-17.0.2/lib/server/libjvm.so
     2# JVM_handle_linux_signal in 
/mnt/disk2/liyifan/doris/jdk-17.0.2/lib/server/libjvm.so
     3# 0x00007F8CA1F38B50 in /lib64/libc.so.6
     4# 0x000055FC45E5B2D3 in 
/mnt/disk2/liyifan/doris/doris_2.1/doris/output_run/be/lib/doris_be
     5# arrow::BooleanBuilder::AppendValues(unsigned char const*, long, 
unsigned char const*) in 
/mnt/disk2/liyifan/doris/doris_2.1/doris/output_run/be/lib/doris_be
     6# doris::vectorized::DataTypeNumberSerDe<unsigned 
char>::write_column_to_arrow(doris::vectorized::IColumn const&, 
doris::vectorized::PODArray<unsigned char, 4096ul, Allocator<false, false, 
false, DefaultMemoryAllocator>, 15ul, 16ul> const*, arrow::ArrayBuilder*, int, 
int, cctz::time_zone const&) const at 
/mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/vec/data_types/serde/data_type_number_serde.cpp:86
     7# 
doris::FromBlockConverter::convert(std::shared_ptr<arrow::RecordBatch>*) at 
/mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/util/arrow/block_convertor.cpp:390
     8# doris::convert_to_arrow_batch(doris::vectorized::Block const&, 
std::shared_ptr<arrow::Schema> const&, arrow::MemoryPool*, 
std::shared_ptr<arrow::RecordBatch>*, cctz::time_zone const&) in 
/mnt/disk2/liyifan/doris/doris_2.1/doris/output_run/be/lib/doris_be
     9# 
doris::vectorized::VArrowFlightResultWriter::write(doris::vectorized::Block&) 
at 
/mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/vec/sink/varrow_flight_result_writer.cpp:76
    10# doris::vectorized::VResultSink::send(doris::RuntimeState*, 
doris::vectorized::Block*, bool) at 
/mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/vec/sink/vresult_sink.cpp:149
    11# doris::PlanFragmentExecutor::open_vectorized_internal() at 
/mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/runtime/plan_fragment_executor.cpp:341
    12# doris::PlanFragmentExecutor::open() at 
/mnt/disk2/liyifan/doris/doris_2.1/doris/be/src/runtime/plan_fragment_executor.cpp:273
    ```
---
 .../vec/data_types/serde/data_type_number_serde.cpp | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp 
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index efa41e346bf..f4fb6bbbb1f 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -78,12 +78,21 @@ void DataTypeNumberSerDe<T>::write_column_to_arrow(const 
IColumn& column, const
     auto arrow_null_map = revert_null_map(null_map, start, end);
     auto arrow_null_map_data = arrow_null_map.empty() ? nullptr : 
arrow_null_map.data();
     if constexpr (std::is_same_v<T, UInt8>) {
-        ARROW_BUILDER_TYPE& builder = 
assert_cast<ARROW_BUILDER_TYPE&>(*array_builder);
-        checkArrowStatus(
-                builder.AppendValues(reinterpret_cast<const 
uint8_t*>(col_data.data() + start),
-                                     end - start,
-                                     reinterpret_cast<const 
uint8_t*>(arrow_null_map_data)),
-                column.get_name(), array_builder->type()->name());
+        auto* null_builder = dynamic_cast<arrow::NullBuilder*>(array_builder);
+        if (null_builder) {
+            for (size_t i = start; i < end; ++i) {
+                checkArrowStatus(null_builder->AppendNull(), column.get_name(),
+                                 null_builder->type()->name());
+            }
+        } else {
+            ARROW_BUILDER_TYPE& builder = 
assert_cast<ARROW_BUILDER_TYPE&>(*array_builder);
+            checkArrowStatus(
+                    builder.AppendValues(reinterpret_cast<const 
uint8_t*>(col_data.data() + start),
+                                         end - start,
+                                         reinterpret_cast<const 
uint8_t*>(arrow_null_map_data)),
+                    column.get_name(), array_builder->type()->name());
+        }
+
     } else if constexpr (std::is_same_v<T, Int128>) {
         auto& string_builder = 
assert_cast<arrow::StringBuilder&>(*array_builder);
         for (size_t i = start; i < end; ++i) {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to