uchenily opened a new issue, #43991:
URL: https://github.com/apache/arrow/issues/43991

   ### Describe the bug, including details regarding any error messages, 
version, and platform.
   
   Hello, 
     I get segmentation fault when call `arrow::acero::DeclarationToStatus()`, 
the gdb stack logs are as follows:
   
   ```c++
   Thread 4 "bow-exe" received signal SIGSEGV, Segmentation fault.
   0x00007ffff55a5fb8 in arrow::compute::FunctionOptions::operator= 
(this=0x7fffe4007920) at 
/root/work/build-arrow/cpp/src/arrow/compute/function_options.h:52
   52      class ARROW_EXPORT FunctionOptions : public 
util::EqualityComparable<FunctionOptions> {
   (gdb) bt
   #0  0x00007ffff55a5fb8 in arrow::compute::FunctionOptions::operator= 
(this=0x7fffe4007920) at 
/root/work/build-arrow/cpp/src/arrow/compute/function_options.h:52
   #1  0x00007ffff59b7fef in arrow::compute::ScalarAggregateOptions::operator= 
(this=0x7fffe4007920) at 
/root/work/build-arrow/cpp/src/arrow/compute/api_aggregate.h:48
   #2  0x00007ffff586b350 in arrow::compute::internal::(anonymous 
namespace)::GroupedMinMaxImpl<arrow::Int64Type, void>::Init 
(this=0x7fffe4007800, ctx=0x7fffe411c4e8, args=...) at 
/root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:1390
   #3  0x00007ffff58582c5 in arrow::compute::internal::(anonymous 
namespace)::HashAggregateInit<arrow::compute::internal::(anonymous 
namespace)::GroupedMinMaxImpl<arrow::Int64Type, void> > (ctx=0x7fffebdfd370, 
args=...)
       at 
/root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:85
   #4  0x00007ffff584536b in arrow::compute::internal::(anonymous 
namespace)::MinMaxInit<arrow::Int64Type> (ctx=0x7fffebdfd370, args=...) at 
/root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:1690
   #5  0x00007ffff539bc02 in 
std::__invoke_impl<arrow::Result<std::unique_ptr<arrow::compute::KernelState, 
std::default_delete<arrow::compute::KernelState> > >, 
arrow::Result<std::unique_ptr<arrow::compute::KernelState, 
std::default_delete<arrow::compute::KernelState> > > 
(*&)(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&), 
arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&> (
       __f=@0x7fffe4111b10: 0x7ffff5845330 
<arrow::compute::internal::(anonymous 
namespace)::MinMaxInit<arrow::Int64Type>(arrow::compute::KernelContext*, 
arrow::compute::KernelInitArgs const&)>) at 
/usr/include/c++/14.2.1/bits/invoke.h:61
   #6  0x00007ffff539ab4d in 
std::__invoke_r<arrow::Result<std::unique_ptr<arrow::compute::KernelState, 
std::default_delete<arrow::compute::KernelState> > >, 
arrow::Result<std::unique_ptr<arrow::compute::KernelState, 
std::default_delete<arrow::compute::KernelState> > > 
(*&)(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&), 
arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&> (
       __fn=@0x7fffe4111b10: 0x7ffff5845330 
<arrow::compute::internal::(anonymous 
namespace)::MinMaxInit<arrow::Int64Type>(arrow::compute::KernelContext*, 
arrow::compute::KernelInitArgs const&)>) at 
/usr/include/c++/14.2.1/bits/invoke.h:116
   #7  0x00007ffff53992e9 in 
std::_Function_handler<arrow::Result<std::unique_ptr<arrow::compute::KernelState,
 std::default_delete<arrow::compute::KernelState> > 
>(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&), 
arrow::Result<std::unique_ptr<arrow::compute::KernelState, 
std::default_delete<arrow::compute::KernelState> > > 
(*)(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs 
const&)>::_M_invoke (__functor=..., __args#0=@0x7fffebdfd0d8: 0x7fffebdfd370, 
__args#1=...) at /usr/include/c++/14.2.1/bits/std_function.h:291
   #8  0x00007ffff53ae465 in 
std::function<arrow::Result<std::unique_ptr<arrow::compute::KernelState, 
std::default_delete<arrow::compute::KernelState> > 
>(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs 
const&)>::operator() (this=0x7fffe4111b10, __args#0=0x7fffebdfd370,
       __args#1=...) at /usr/include/c++/14.2.1/bits/std_function.h:591
   #9  0x00007ffff5832b13 in operator() (__closure=0x7fffe41132a0, 
ctx=0x7fffebdfd370, args=...) at 
/root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:1704
   #10 0x00007ffff5899c45 in 
std::__invoke_impl<arrow::Result<std::unique_ptr<arrow::compute::KernelState> 
>, arrow::compute::internal::(anonymous 
namespace)::MakeMinOrMaxKernel<(arrow::compute::internal::MinOrMax)0>(arrow::compute::HashAggregateFunction*)::<lambda(arrow::compute::KernelContext*,
 const arrow::compute::KernelInitArgs&)>&, arrow::compute::KernelContext*, 
const arrow::compute::KernelInitArgs&>(std::__invoke_other, struct {...} &) 
(__f=...) at /usr/include/c++/14.2.1/bits/invoke.h:61
   #11 0x00007ffff588969f in 
std::__invoke_r<arrow::Result<std::unique_ptr<arrow::compute::KernelState> >, 
arrow::compute::internal::(anonymous 
namespace)::MakeMinOrMaxKernel<(arrow::compute::internal::MinOrMax)0>(arrow::compute::HashAggregateFunction*)::<lambda(arrow::compute::KernelContext*,
 const arrow::compute::KernelInitArgs&)>&, arrow::compute::KernelContext*, 
const arrow::compute::KernelInitArgs&>(struct {...} &) (__fn=...) at 
/usr/include/c++/14.2.1/bits/invoke.h:116
   #12 0x00007ffff58671db in 
std::_Function_handler<arrow::Result<std::unique_ptr<arrow::compute::KernelState,
 std::default_delete<arrow::compute::KernelState> > 
>(arrow::compute::KernelContext*, const arrow::compute::KernelInitArgs&), 
arrow::compute::internal::(anonymous 
namespace)::MakeMinOrMaxKernel<(arrow::compute::internal::MinOrMax)0>(arrow::compute::HashAggregateFunction*)::<lambda(arrow::compute::KernelContext*,
 const arrow::compute::KernelInitArgs&)> >::_M_invoke(const std::_Any_data &, 
arrow::compute::KernelContext *&&, const arrow::compute::KernelInitArgs &) (
       __functor=..., __args#0=@0x7fffebdfd2c8: 0x7fffebdfd370, __args#1=...) 
at /usr/include/c++/14.2.1/bits/std_function.h:291
   #13 0x00007ffff53ae465 in 
std::function<arrow::Result<std::unique_ptr<arrow::compute::KernelState, 
std::default_delete<arrow::compute::KernelState> > 
>(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs 
const&)>::operator() (this=0x7fffe41132a0, __args#0=0x7fffebdfd370,
       __args#1=...) at /usr/include/c++/14.2.1/bits/std_function.h:591
   #14 0x00007ffff3392aec in arrow::acero::aggregate::InitKernel 
(kernel=0x7fffe4113290, ctx=0x7fffe411c4e8, aggregate=..., in_types=std::vector 
of length 1, capacity 1 = {...}) at 
/root/work/build-arrow/cpp/src/arrow/acero/aggregate_internal.cc:102
   #15 0x00007ffff339305f in arrow::acero::aggregate::InitKernels 
(kernels=std::vector of length 2, capacity 2 = {...}, ctx=0x7fffe411c4e8, 
aggregates=std::vector of length 2, capacity 2 = {...}, in_types=std::vector of 
length 2, capacity 2 = {...})
       at /root/work/build-arrow/cpp/src/arrow/acero/aggregate_internal.cc:129
   #16 0x00007ffff33818ee in 
arrow::acero::aggregate::GroupByNode::MakeAggregateNodeArgs 
(input_schema=std::shared_ptr<arrow::Schema> (use count 2, weak count 0) = 
{...}, keys=std::vector of length 1, capacity 1 = {...}, 
segment_keys=std::vector of length 0, capacity 0,
       aggs=std::vector of length 2, capacity 2 = {...}, ctx=0x7fffe411c4e8, 
is_cpu_parallel=true) at 
/root/work/build-arrow/cpp/src/arrow/acero/groupby_aggregate_node.cc:142
   #17 0x00007ffff3382663 in arrow::acero::aggregate::GroupByNode::Make 
(plan=0x7fffe411c400, inputs=std::vector of length 1, capacity 1 = {...}, 
options=...) at 
/root/work/build-arrow/cpp/src/arrow/acero/groupby_aggregate_node.cc:193
   #18 0x00007ffff33949e9 in operator() (__closure=0x7fffebdfde40, 
plan=0x7fffe411c400, inputs=std::vector of length 0, capacity 0, options=...) 
at /root/work/build-arrow/cpp/src/arrow/acero/aggregate_internal.cc:253
   #19 0x00007ffff33951d9 in 
std::__invoke_impl<arrow::Result<arrow::acero::ExecNode*>, 
arrow::acero::internal::RegisterAggregateNode(arrow::acero::ExecFactoryRegistry*)::<lambda(arrow::acero::ExecPlan*,
 std::vector<arrow::acero::ExecNode*>, const arrow::acero::ExecNodeOptions&)>&, 
arrow::acero::ExecPlan*, std::vector<arrow::acero::ExecNode*, 
std::allocator<arrow::acero::ExecNode*> >, const 
arrow::acero::ExecNodeOptions&>(std::__invoke_other, struct {...} &) (__f=...) 
at /usr/include/c++/14.2.1/bits/invoke.h:61
   #20 0x00007ffff3395031 in 
std::__invoke_r<arrow::Result<arrow::acero::ExecNode*>, 
arrow::acero::internal::RegisterAggregateNode(arrow::acero::ExecFactoryRegistry*)::<lambda(arrow::acero::ExecPlan*,
 std::vector<arrow::acero::ExecNode*>, const arrow::acero::ExecNodeOptions&)>&, 
arrow::acero::ExecPlan*, std::vector<arrow::acero::ExecNode*, 
std::allocator<arrow::acero::ExecNode*> >, const 
arrow::acero::ExecNodeOptions&>(struct {...} &) (__fn=...) at 
/usr/include/c++/14.2.1/bits/invoke.h:116
   #21 0x00007ffff3394eb0 in 
std::_Function_handler<arrow::Result<arrow::acero::ExecNode*>(arrow::acero::ExecPlan*,
 std::vector<arrow::acero::ExecNode*, std::allocator<arrow::acero::ExecNode*> 
>, const arrow::acero::ExecNodeOptions&), 
arrow::acero::internal::RegisterAggregateNode(arrow::acero::ExecFactoryRegistry*)::<lambda(arrow::acero::ExecPlan*,
 std::vector<arrow::acero::ExecNode*, std::allocator<arrow::acero::ExecNode*> 
>, const arrow::acero::ExecNodeOptions&)> >::_M_invoke(const std::_Any_data &, 
arrow::acero::ExecPlan *&&, std::vector<arrow::acero::ExecNode*, 
std::allocator<arrow::acero::ExecNode*> > &&, const 
arrow::acero::ExecNodeOptions &) (__functor=..., __args#0=@0x7fffebdfdd88: 
0x7fffe411c400, __args#1=..., __args#2=...) at 
/usr/include/c++/14.2.1/bits/std_function.h:291
   #22 0x00007ffff340a3b1 in 
std::function<arrow::Result<arrow::acero::ExecNode*>(arrow::acero::ExecPlan*, 
std::vector<arrow::acero::ExecNode*, std::allocator<arrow::acero::ExecNode*> >, 
arrow::acero::ExecNodeOptions const&)>::operator() (this=0x7fffebdfde40, 
__args#0=0x7fffe411c400,
       __args#1=std::vector of length 0, capacity 0, __args#2=...) at 
/usr/include/c++/14.2.1/bits/std_function.h:591
   #23 0x00007ffff3407715 in arrow::acero::MakeExecNode 
(factory_name="aggregate", plan=0x7fffe411c400, inputs=std::vector of length 0, 
capacity 0, options=..., registry=0x7ffff36a75a0 
<arrow::acero::default_exec_factory_registry()::instance>)
       at /root/work/build-arrow/cpp/src/arrow/acero/exec_plan.h:381
   #24 0x00007ffff33f2b40 in arrow::acero::Declaration::AddToPlan 
(this=0x7fffe413dfc0, plan=0x7fffe411c400, registry=0x7ffff36a75a0 
<arrow::acero::default_exec_factory_registry()::instance>) at 
/root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:585
   #25 0x00007ffff33f29bd in arrow::acero::Declaration::AddToPlan 
(this=0x7fffebdfe460, plan=0x7fffe411c400, registry=0x7ffff36a75a0 
<arrow::acero::default_exec_factory_registry()::instance>) at 
/root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:581
   #26 0x00007ffff33f5918 in arrow::acero::(anonymous 
namespace)::DeclarationToStatusImpl (declaration=..., options=..., 
cpu_executor=0x7fffe40052c0) at 
/root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:714
   #27 0x00007ffff33f9164 in operator() (__closure=0x7fffe4005628, 
executor=0x7fffe40052c0) at 
/root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:929
   #28 0x00007ffff3403aab in 
arrow::internal::FnOnce<arrow::Future<arrow::internal::Empty>(arrow::internal::Executor*)>::FnImpl<arrow::acero::DeclarationToStatus(Declaration,
 bool, arrow::MemoryPool*, 
arrow::compute::FunctionRegistry*)::<lambda(arrow::internal::Executor*)> 
>::invoke(arrow::internal::Executor *&&) (this=0x7fffe4005620, 
a#0=@0x7fffebdfe548: 0x7fffe40052c0) at 
/root/work/build-arrow/cpp/src/arrow/util/functional.h:152
   #29 0x00007ffff3417fb3 in 
arrow::internal::FnOnce<arrow::Future<arrow::internal::Empty> 
(arrow::internal::Executor*)>::operator()(arrow::internal::Executor*) && 
(this=0x7fffebdfe628, a#0=0x7fffe40052c0) at 
/root/work/build-arrow/cpp/src/arrow/util/functional.h:140
   #30 0x00007ffff340fae9 in 
arrow::internal::RunSynchronously<arrow::Future<arrow::internal::Empty>, 
arrow::internal::Empty> (get_future=..., use_threads=true) at 
/root/work/build-arrow/cpp/src/arrow/util/thread_pool.h:587
   #31 0x00007ffff33f92bd in arrow::acero::DeclarationToStatus 
(declaration=..., use_threads=true, memory_pool=0x7ffff7bff7c0 
<arrow::global_state+320>, function_registry=0x0) at 
/root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:931
   ... 
   ```
   
   By debugging, I found that the problem may be in this place:
   
   ```diff
   --- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
   +++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
   @@ -1385,7 +1385,9 @@ struct GroupedMinMaxImpl final : public 
GroupedAggregator {
          typename std::conditional<is_boolean_type<Type>::value, uint8_t, 
CType>::type;
   
      Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
   -    options_ = *checked_cast<const ScalarAggregateOptions*>(args.options);
   +    // options_ = *checked_cast<const 
ScalarAggregateOptions*>(args.options); // segmentation fault <-- the original 
code
   +    // options_ = *dynamic_cast<const 
ScalarAggregateOptions*>(args.options); // segmentation fault
   +    options_ = *static_cast<const ScalarAggregateOptions*>(args.options); 
// ok
        // type_ initialized by MinMaxInit
        mins_ = TypedBufferBuilder<CType>(ctx->memory_pool());
        maxes_ = TypedBufferBuilder<CType>(ctx->memory_pool());
   ```
   
   It seems that if I replace this line with `static_cast<const 
ScalarAggregateOptions*>`, there will be no errors, but I want to figure out 
why this is happening and how to fix it properly.
   
   
   
   ```c++
   Thread 4 "bow-exe" hit Breakpoint 2.10, arrow::compute::internal::(anonymous 
namespace)::GroupedMinMaxImpl<arrow::Int64Type, void>::Init 
(this=0x7fffe4007800, ctx=0x7fffe411c498, args=...) at 
/root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:1387
   1387      Status Init(ExecContext* ctx, const KernelInitArgs& args) override 
{
   (gdb) n
   1388        options_ = *checked_cast<const 
ScalarAggregateOptions*>(args.options);
   (gdb) p args.options
   $1 = (const arrow::compute::FunctionOptions *) 0x7fffe4005920
   (gdb) p *(const ScalarAggregateOptions*)(args.options)
   $3 = {<arrow::compute::FunctionOptions> = 
{<arrow::util::EqualityComparable<arrow::compute::FunctionOptions>> = {<No data 
fields>}, _vptr.FunctionOptions = 0x7ffff7b3d198 <vtable for 
arrow::compute::CumulativeOptions+16>,
       options_type_ = 0x7ffff7c02d40 
<arrow::compute::internal::GetFunctionOptionsType<arrow::compute::CumulativeOptions,
 arrow::internal::DataMemberProperty<arrow::compute::CumulativeOptions, 
std::optional<std::shared_ptr<arrow::Scalar> > >, 
arrow::internal::DataMemberProperty<arrow::compute::CumulativeOptions, bool> 
>(arrow::internal::DataMemberProperty<arrow::compute::CumulativeOptions, 
std::optional<std::shared_ptr<arrow::Scalar> > > const&, 
arrow::internal::DataMemberProperty<arrow::compute::CumulativeOptions, bool> 
const&)::instance>},
     static kTypeName = "ScalarAggregateOptions", skip_nulls = 117, min_count = 
1819231092}
   (gdb) n
   
   Thread 4 "bow-exe" received signal SIGSEGV, Segmentation fault.
   0x00007ffff55a5fb8 in arrow::compute::FunctionOptions::operator= 
(this=0x7fffe4007920) at 
/root/work/build-arrow/cpp/src/arrow/compute/function_options.h:52
   52      class ARROW_EXPORT FunctionOptions : public 
util::EqualityComparable<FunctionOptions> {
   ```
   
   
   I'm using tag `apache-arrow-17.0.0-rc2`, and build arrow with those options:
   
   ARROW_BUILD_INTEGRATION="OFF"
   ARROW_BUILD_TESTS="OFF"
   ARROW_COMPUTE="ON"
   ARROW_CSV="ON"
   ARROW_DATASET="ON"
   ARROW_DEPENDENCY_SOURCE="VCPKG"
   ARROW_DEPENDENCY_USE_SHARED="OFF"
   ARROW_EXTRA_ERROR_CONTEXT="ON"
   ARROW_FILESYSTEM="ON"
   ARROW_HDFS="ON"
   ARROW_JSON="ON"
   ARROW_PARQUET="ON"
   ARROW_WITH_ZSTD="ON"
   CMAKE_BUILD_TYPE="Debug"  // this error is mainly influenced by this option 
IMO
   
   ```cpp
   template <typename OutputType, typename InputType>
   inline OutputType checked_cast(InputType&& value) {
     static_assert(std::is_class<typename std::remove_pointer<
                       typename 
std::remove_reference<InputType>::type>::type>::value,
                   "checked_cast input type must be a class");
     static_assert(std::is_class<typename std::remove_pointer<
                       typename 
std::remove_reference<OutputType>::type>::type>::value,
                   "checked_cast output type must be a class");
   #ifdef NDEBUG
     return static_cast<OutputType>(value);
   #else
     return dynamic_cast<OutputType>(value);
   #endif
   }
   ```
   
   
   ### Component(s)
   
   C++


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@arrow.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to