uchenily opened a new issue, #43991: URL: https://github.com/apache/arrow/issues/43991
### Describe the bug, including details regarding any error messages, version, and platform. Hello, I get segmentation fault when call `arrow::acero::DeclarationToStatus()`, the gdb stack logs are as follows: ```c++ Thread 4 "bow-exe" received signal SIGSEGV, Segmentation fault. 0x00007ffff55a5fb8 in arrow::compute::FunctionOptions::operator= (this=0x7fffe4007920) at /root/work/build-arrow/cpp/src/arrow/compute/function_options.h:52 52 class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> { (gdb) bt #0 0x00007ffff55a5fb8 in arrow::compute::FunctionOptions::operator= (this=0x7fffe4007920) at /root/work/build-arrow/cpp/src/arrow/compute/function_options.h:52 #1 0x00007ffff59b7fef in arrow::compute::ScalarAggregateOptions::operator= (this=0x7fffe4007920) at /root/work/build-arrow/cpp/src/arrow/compute/api_aggregate.h:48 #2 0x00007ffff586b350 in arrow::compute::internal::(anonymous namespace)::GroupedMinMaxImpl<arrow::Int64Type, void>::Init (this=0x7fffe4007800, ctx=0x7fffe411c4e8, args=...) at /root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:1390 #3 0x00007ffff58582c5 in arrow::compute::internal::(anonymous namespace)::HashAggregateInit<arrow::compute::internal::(anonymous namespace)::GroupedMinMaxImpl<arrow::Int64Type, void> > (ctx=0x7fffebdfd370, args=...) at /root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:85 #4 0x00007ffff584536b in arrow::compute::internal::(anonymous namespace)::MinMaxInit<arrow::Int64Type> (ctx=0x7fffebdfd370, args=...) at /root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:1690 #5 0x00007ffff539bc02 in std::__invoke_impl<arrow::Result<std::unique_ptr<arrow::compute::KernelState, std::default_delete<arrow::compute::KernelState> > >, arrow::Result<std::unique_ptr<arrow::compute::KernelState, std::default_delete<arrow::compute::KernelState> > > (*&)(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&), arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&> ( __f=@0x7fffe4111b10: 0x7ffff5845330 <arrow::compute::internal::(anonymous namespace)::MinMaxInit<arrow::Int64Type>(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&)>) at /usr/include/c++/14.2.1/bits/invoke.h:61 #6 0x00007ffff539ab4d in std::__invoke_r<arrow::Result<std::unique_ptr<arrow::compute::KernelState, std::default_delete<arrow::compute::KernelState> > >, arrow::Result<std::unique_ptr<arrow::compute::KernelState, std::default_delete<arrow::compute::KernelState> > > (*&)(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&), arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&> ( __fn=@0x7fffe4111b10: 0x7ffff5845330 <arrow::compute::internal::(anonymous namespace)::MinMaxInit<arrow::Int64Type>(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&)>) at /usr/include/c++/14.2.1/bits/invoke.h:116 #7 0x00007ffff53992e9 in std::_Function_handler<arrow::Result<std::unique_ptr<arrow::compute::KernelState, std::default_delete<arrow::compute::KernelState> > >(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&), arrow::Result<std::unique_ptr<arrow::compute::KernelState, std::default_delete<arrow::compute::KernelState> > > (*)(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&)>::_M_invoke (__functor=..., __args#0=@0x7fffebdfd0d8: 0x7fffebdfd370, __args#1=...) at /usr/include/c++/14.2.1/bits/std_function.h:291 #8 0x00007ffff53ae465 in std::function<arrow::Result<std::unique_ptr<arrow::compute::KernelState, std::default_delete<arrow::compute::KernelState> > >(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&)>::operator() (this=0x7fffe4111b10, __args#0=0x7fffebdfd370, __args#1=...) at /usr/include/c++/14.2.1/bits/std_function.h:591 #9 0x00007ffff5832b13 in operator() (__closure=0x7fffe41132a0, ctx=0x7fffebdfd370, args=...) at /root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:1704 #10 0x00007ffff5899c45 in std::__invoke_impl<arrow::Result<std::unique_ptr<arrow::compute::KernelState> >, arrow::compute::internal::(anonymous namespace)::MakeMinOrMaxKernel<(arrow::compute::internal::MinOrMax)0>(arrow::compute::HashAggregateFunction*)::<lambda(arrow::compute::KernelContext*, const arrow::compute::KernelInitArgs&)>&, arrow::compute::KernelContext*, const arrow::compute::KernelInitArgs&>(std::__invoke_other, struct {...} &) (__f=...) at /usr/include/c++/14.2.1/bits/invoke.h:61 #11 0x00007ffff588969f in std::__invoke_r<arrow::Result<std::unique_ptr<arrow::compute::KernelState> >, arrow::compute::internal::(anonymous namespace)::MakeMinOrMaxKernel<(arrow::compute::internal::MinOrMax)0>(arrow::compute::HashAggregateFunction*)::<lambda(arrow::compute::KernelContext*, const arrow::compute::KernelInitArgs&)>&, arrow::compute::KernelContext*, const arrow::compute::KernelInitArgs&>(struct {...} &) (__fn=...) at /usr/include/c++/14.2.1/bits/invoke.h:116 #12 0x00007ffff58671db in std::_Function_handler<arrow::Result<std::unique_ptr<arrow::compute::KernelState, std::default_delete<arrow::compute::KernelState> > >(arrow::compute::KernelContext*, const arrow::compute::KernelInitArgs&), arrow::compute::internal::(anonymous namespace)::MakeMinOrMaxKernel<(arrow::compute::internal::MinOrMax)0>(arrow::compute::HashAggregateFunction*)::<lambda(arrow::compute::KernelContext*, const arrow::compute::KernelInitArgs&)> >::_M_invoke(const std::_Any_data &, arrow::compute::KernelContext *&&, const arrow::compute::KernelInitArgs &) ( __functor=..., __args#0=@0x7fffebdfd2c8: 0x7fffebdfd370, __args#1=...) at /usr/include/c++/14.2.1/bits/std_function.h:291 #13 0x00007ffff53ae465 in std::function<arrow::Result<std::unique_ptr<arrow::compute::KernelState, std::default_delete<arrow::compute::KernelState> > >(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&)>::operator() (this=0x7fffe41132a0, __args#0=0x7fffebdfd370, __args#1=...) at /usr/include/c++/14.2.1/bits/std_function.h:591 #14 0x00007ffff3392aec in arrow::acero::aggregate::InitKernel (kernel=0x7fffe4113290, ctx=0x7fffe411c4e8, aggregate=..., in_types=std::vector of length 1, capacity 1 = {...}) at /root/work/build-arrow/cpp/src/arrow/acero/aggregate_internal.cc:102 #15 0x00007ffff339305f in arrow::acero::aggregate::InitKernels (kernels=std::vector of length 2, capacity 2 = {...}, ctx=0x7fffe411c4e8, aggregates=std::vector of length 2, capacity 2 = {...}, in_types=std::vector of length 2, capacity 2 = {...}) at /root/work/build-arrow/cpp/src/arrow/acero/aggregate_internal.cc:129 #16 0x00007ffff33818ee in arrow::acero::aggregate::GroupByNode::MakeAggregateNodeArgs (input_schema=std::shared_ptr<arrow::Schema> (use count 2, weak count 0) = {...}, keys=std::vector of length 1, capacity 1 = {...}, segment_keys=std::vector of length 0, capacity 0, aggs=std::vector of length 2, capacity 2 = {...}, ctx=0x7fffe411c4e8, is_cpu_parallel=true) at /root/work/build-arrow/cpp/src/arrow/acero/groupby_aggregate_node.cc:142 #17 0x00007ffff3382663 in arrow::acero::aggregate::GroupByNode::Make (plan=0x7fffe411c400, inputs=std::vector of length 1, capacity 1 = {...}, options=...) at /root/work/build-arrow/cpp/src/arrow/acero/groupby_aggregate_node.cc:193 #18 0x00007ffff33949e9 in operator() (__closure=0x7fffebdfde40, plan=0x7fffe411c400, inputs=std::vector of length 0, capacity 0, options=...) at /root/work/build-arrow/cpp/src/arrow/acero/aggregate_internal.cc:253 #19 0x00007ffff33951d9 in std::__invoke_impl<arrow::Result<arrow::acero::ExecNode*>, arrow::acero::internal::RegisterAggregateNode(arrow::acero::ExecFactoryRegistry*)::<lambda(arrow::acero::ExecPlan*, std::vector<arrow::acero::ExecNode*>, const arrow::acero::ExecNodeOptions&)>&, arrow::acero::ExecPlan*, std::vector<arrow::acero::ExecNode*, std::allocator<arrow::acero::ExecNode*> >, const arrow::acero::ExecNodeOptions&>(std::__invoke_other, struct {...} &) (__f=...) at /usr/include/c++/14.2.1/bits/invoke.h:61 #20 0x00007ffff3395031 in std::__invoke_r<arrow::Result<arrow::acero::ExecNode*>, arrow::acero::internal::RegisterAggregateNode(arrow::acero::ExecFactoryRegistry*)::<lambda(arrow::acero::ExecPlan*, std::vector<arrow::acero::ExecNode*>, const arrow::acero::ExecNodeOptions&)>&, arrow::acero::ExecPlan*, std::vector<arrow::acero::ExecNode*, std::allocator<arrow::acero::ExecNode*> >, const arrow::acero::ExecNodeOptions&>(struct {...} &) (__fn=...) at /usr/include/c++/14.2.1/bits/invoke.h:116 #21 0x00007ffff3394eb0 in std::_Function_handler<arrow::Result<arrow::acero::ExecNode*>(arrow::acero::ExecPlan*, std::vector<arrow::acero::ExecNode*, std::allocator<arrow::acero::ExecNode*> >, const arrow::acero::ExecNodeOptions&), arrow::acero::internal::RegisterAggregateNode(arrow::acero::ExecFactoryRegistry*)::<lambda(arrow::acero::ExecPlan*, std::vector<arrow::acero::ExecNode*, std::allocator<arrow::acero::ExecNode*> >, const arrow::acero::ExecNodeOptions&)> >::_M_invoke(const std::_Any_data &, arrow::acero::ExecPlan *&&, std::vector<arrow::acero::ExecNode*, std::allocator<arrow::acero::ExecNode*> > &&, const arrow::acero::ExecNodeOptions &) (__functor=..., __args#0=@0x7fffebdfdd88: 0x7fffe411c400, __args#1=..., __args#2=...) at /usr/include/c++/14.2.1/bits/std_function.h:291 #22 0x00007ffff340a3b1 in std::function<arrow::Result<arrow::acero::ExecNode*>(arrow::acero::ExecPlan*, std::vector<arrow::acero::ExecNode*, std::allocator<arrow::acero::ExecNode*> >, arrow::acero::ExecNodeOptions const&)>::operator() (this=0x7fffebdfde40, __args#0=0x7fffe411c400, __args#1=std::vector of length 0, capacity 0, __args#2=...) at /usr/include/c++/14.2.1/bits/std_function.h:591 #23 0x00007ffff3407715 in arrow::acero::MakeExecNode (factory_name="aggregate", plan=0x7fffe411c400, inputs=std::vector of length 0, capacity 0, options=..., registry=0x7ffff36a75a0 <arrow::acero::default_exec_factory_registry()::instance>) at /root/work/build-arrow/cpp/src/arrow/acero/exec_plan.h:381 #24 0x00007ffff33f2b40 in arrow::acero::Declaration::AddToPlan (this=0x7fffe413dfc0, plan=0x7fffe411c400, registry=0x7ffff36a75a0 <arrow::acero::default_exec_factory_registry()::instance>) at /root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:585 #25 0x00007ffff33f29bd in arrow::acero::Declaration::AddToPlan (this=0x7fffebdfe460, plan=0x7fffe411c400, registry=0x7ffff36a75a0 <arrow::acero::default_exec_factory_registry()::instance>) at /root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:581 #26 0x00007ffff33f5918 in arrow::acero::(anonymous namespace)::DeclarationToStatusImpl (declaration=..., options=..., cpu_executor=0x7fffe40052c0) at /root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:714 #27 0x00007ffff33f9164 in operator() (__closure=0x7fffe4005628, executor=0x7fffe40052c0) at /root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:929 #28 0x00007ffff3403aab in arrow::internal::FnOnce<arrow::Future<arrow::internal::Empty>(arrow::internal::Executor*)>::FnImpl<arrow::acero::DeclarationToStatus(Declaration, bool, arrow::MemoryPool*, arrow::compute::FunctionRegistry*)::<lambda(arrow::internal::Executor*)> >::invoke(arrow::internal::Executor *&&) (this=0x7fffe4005620, a#0=@0x7fffebdfe548: 0x7fffe40052c0) at /root/work/build-arrow/cpp/src/arrow/util/functional.h:152 #29 0x00007ffff3417fb3 in arrow::internal::FnOnce<arrow::Future<arrow::internal::Empty> (arrow::internal::Executor*)>::operator()(arrow::internal::Executor*) && (this=0x7fffebdfe628, a#0=0x7fffe40052c0) at /root/work/build-arrow/cpp/src/arrow/util/functional.h:140 #30 0x00007ffff340fae9 in arrow::internal::RunSynchronously<arrow::Future<arrow::internal::Empty>, arrow::internal::Empty> (get_future=..., use_threads=true) at /root/work/build-arrow/cpp/src/arrow/util/thread_pool.h:587 #31 0x00007ffff33f92bd in arrow::acero::DeclarationToStatus (declaration=..., use_threads=true, memory_pool=0x7ffff7bff7c0 <arrow::global_state+320>, function_registry=0x0) at /root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:931 ... ``` By debugging, I found that the problem may be in this place: ```diff --- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc +++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc @@ -1385,7 +1385,9 @@ struct GroupedMinMaxImpl final : public GroupedAggregator { typename std::conditional<is_boolean_type<Type>::value, uint8_t, CType>::type; Status Init(ExecContext* ctx, const KernelInitArgs& args) override { - options_ = *checked_cast<const ScalarAggregateOptions*>(args.options); + // options_ = *checked_cast<const ScalarAggregateOptions*>(args.options); // segmentation fault <-- the original code + // options_ = *dynamic_cast<const ScalarAggregateOptions*>(args.options); // segmentation fault + options_ = *static_cast<const ScalarAggregateOptions*>(args.options); // ok // type_ initialized by MinMaxInit mins_ = TypedBufferBuilder<CType>(ctx->memory_pool()); maxes_ = TypedBufferBuilder<CType>(ctx->memory_pool()); ``` It seems that if I replace this line with `static_cast<const ScalarAggregateOptions*>`, there will be no errors, but I want to figure out why this is happening and how to fix it properly. ```c++ Thread 4 "bow-exe" hit Breakpoint 2.10, arrow::compute::internal::(anonymous namespace)::GroupedMinMaxImpl<arrow::Int64Type, void>::Init (this=0x7fffe4007800, ctx=0x7fffe411c498, args=...) at /root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:1387 1387 Status Init(ExecContext* ctx, const KernelInitArgs& args) override { (gdb) n 1388 options_ = *checked_cast<const ScalarAggregateOptions*>(args.options); (gdb) p args.options $1 = (const arrow::compute::FunctionOptions *) 0x7fffe4005920 (gdb) p *(const ScalarAggregateOptions*)(args.options) $3 = {<arrow::compute::FunctionOptions> = {<arrow::util::EqualityComparable<arrow::compute::FunctionOptions>> = {<No data fields>}, _vptr.FunctionOptions = 0x7ffff7b3d198 <vtable for arrow::compute::CumulativeOptions+16>, options_type_ = 0x7ffff7c02d40 <arrow::compute::internal::GetFunctionOptionsType<arrow::compute::CumulativeOptions, arrow::internal::DataMemberProperty<arrow::compute::CumulativeOptions, std::optional<std::shared_ptr<arrow::Scalar> > >, arrow::internal::DataMemberProperty<arrow::compute::CumulativeOptions, bool> >(arrow::internal::DataMemberProperty<arrow::compute::CumulativeOptions, std::optional<std::shared_ptr<arrow::Scalar> > > const&, arrow::internal::DataMemberProperty<arrow::compute::CumulativeOptions, bool> const&)::instance>}, static kTypeName = "ScalarAggregateOptions", skip_nulls = 117, min_count = 1819231092} (gdb) n Thread 4 "bow-exe" received signal SIGSEGV, Segmentation fault. 0x00007ffff55a5fb8 in arrow::compute::FunctionOptions::operator= (this=0x7fffe4007920) at /root/work/build-arrow/cpp/src/arrow/compute/function_options.h:52 52 class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> { ``` I'm using tag `apache-arrow-17.0.0-rc2`, and build arrow with those options: ARROW_BUILD_INTEGRATION="OFF" ARROW_BUILD_TESTS="OFF" ARROW_COMPUTE="ON" ARROW_CSV="ON" ARROW_DATASET="ON" ARROW_DEPENDENCY_SOURCE="VCPKG" ARROW_DEPENDENCY_USE_SHARED="OFF" ARROW_EXTRA_ERROR_CONTEXT="ON" ARROW_FILESYSTEM="ON" ARROW_HDFS="ON" ARROW_JSON="ON" ARROW_PARQUET="ON" ARROW_WITH_ZSTD="ON" CMAKE_BUILD_TYPE="Debug" // this error is mainly influenced by this option IMO ```cpp template <typename OutputType, typename InputType> inline OutputType checked_cast(InputType&& value) { static_assert(std::is_class<typename std::remove_pointer< typename std::remove_reference<InputType>::type>::type>::value, "checked_cast input type must be a class"); static_assert(std::is_class<typename std::remove_pointer< typename std::remove_reference<OutputType>::type>::type>::value, "checked_cast output type must be a class"); #ifdef NDEBUG return static_cast<OutputType>(value); #else return dynamic_cast<OutputType>(value); #endif } ``` ### Component(s) C++ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@arrow.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org