uchenily opened a new issue, #43991:
URL: https://github.com/apache/arrow/issues/43991
### Describe the bug, including details regarding any error messages,
version, and platform.
Hello,
I get segmentation fault when call `arrow::acero::DeclarationToStatus()`,
the gdb stack logs are as follows:
```c++
Thread 4 "bow-exe" received signal SIGSEGV, Segmentation fault.
0x00007ffff55a5fb8 in arrow::compute::FunctionOptions::operator=
(this=0x7fffe4007920) at
/root/work/build-arrow/cpp/src/arrow/compute/function_options.h:52
52 class ARROW_EXPORT FunctionOptions : public
util::EqualityComparable<FunctionOptions> {
(gdb) bt
#0 0x00007ffff55a5fb8 in arrow::compute::FunctionOptions::operator=
(this=0x7fffe4007920) at
/root/work/build-arrow/cpp/src/arrow/compute/function_options.h:52
#1 0x00007ffff59b7fef in arrow::compute::ScalarAggregateOptions::operator=
(this=0x7fffe4007920) at
/root/work/build-arrow/cpp/src/arrow/compute/api_aggregate.h:48
#2 0x00007ffff586b350 in arrow::compute::internal::(anonymous
namespace)::GroupedMinMaxImpl<arrow::Int64Type, void>::Init
(this=0x7fffe4007800, ctx=0x7fffe411c4e8, args=...) at
/root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:1390
#3 0x00007ffff58582c5 in arrow::compute::internal::(anonymous
namespace)::HashAggregateInit<arrow::compute::internal::(anonymous
namespace)::GroupedMinMaxImpl<arrow::Int64Type, void> > (ctx=0x7fffebdfd370,
args=...)
at
/root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:85
#4 0x00007ffff584536b in arrow::compute::internal::(anonymous
namespace)::MinMaxInit<arrow::Int64Type> (ctx=0x7fffebdfd370, args=...) at
/root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:1690
#5 0x00007ffff539bc02 in
std::__invoke_impl<arrow::Result<std::unique_ptr<arrow::compute::KernelState,
std::default_delete<arrow::compute::KernelState> > >,
arrow::Result<std::unique_ptr<arrow::compute::KernelState,
std::default_delete<arrow::compute::KernelState> > >
(*&)(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&),
arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&> (
__f=@0x7fffe4111b10: 0x7ffff5845330
<arrow::compute::internal::(anonymous
namespace)::MinMaxInit<arrow::Int64Type>(arrow::compute::KernelContext*,
arrow::compute::KernelInitArgs const&)>) at
/usr/include/c++/14.2.1/bits/invoke.h:61
#6 0x00007ffff539ab4d in
std::__invoke_r<arrow::Result<std::unique_ptr<arrow::compute::KernelState,
std::default_delete<arrow::compute::KernelState> > >,
arrow::Result<std::unique_ptr<arrow::compute::KernelState,
std::default_delete<arrow::compute::KernelState> > >
(*&)(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&),
arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&> (
__fn=@0x7fffe4111b10: 0x7ffff5845330
<arrow::compute::internal::(anonymous
namespace)::MinMaxInit<arrow::Int64Type>(arrow::compute::KernelContext*,
arrow::compute::KernelInitArgs const&)>) at
/usr/include/c++/14.2.1/bits/invoke.h:116
#7 0x00007ffff53992e9 in
std::_Function_handler<arrow::Result<std::unique_ptr<arrow::compute::KernelState,
std::default_delete<arrow::compute::KernelState> >
>(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs const&),
arrow::Result<std::unique_ptr<arrow::compute::KernelState,
std::default_delete<arrow::compute::KernelState> > >
(*)(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs
const&)>::_M_invoke (__functor=..., __args#0=@0x7fffebdfd0d8: 0x7fffebdfd370,
__args#1=...) at /usr/include/c++/14.2.1/bits/std_function.h:291
#8 0x00007ffff53ae465 in
std::function<arrow::Result<std::unique_ptr<arrow::compute::KernelState,
std::default_delete<arrow::compute::KernelState> >
>(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs
const&)>::operator() (this=0x7fffe4111b10, __args#0=0x7fffebdfd370,
__args#1=...) at /usr/include/c++/14.2.1/bits/std_function.h:591
#9 0x00007ffff5832b13 in operator() (__closure=0x7fffe41132a0,
ctx=0x7fffebdfd370, args=...) at
/root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:1704
#10 0x00007ffff5899c45 in
std::__invoke_impl<arrow::Result<std::unique_ptr<arrow::compute::KernelState>
>, arrow::compute::internal::(anonymous
namespace)::MakeMinOrMaxKernel<(arrow::compute::internal::MinOrMax)0>(arrow::compute::HashAggregateFunction*)::<lambda(arrow::compute::KernelContext*,
const arrow::compute::KernelInitArgs&)>&, arrow::compute::KernelContext*,
const arrow::compute::KernelInitArgs&>(std::__invoke_other, struct {...} &)
(__f=...) at /usr/include/c++/14.2.1/bits/invoke.h:61
#11 0x00007ffff588969f in
std::__invoke_r<arrow::Result<std::unique_ptr<arrow::compute::KernelState> >,
arrow::compute::internal::(anonymous
namespace)::MakeMinOrMaxKernel<(arrow::compute::internal::MinOrMax)0>(arrow::compute::HashAggregateFunction*)::<lambda(arrow::compute::KernelContext*,
const arrow::compute::KernelInitArgs&)>&, arrow::compute::KernelContext*,
const arrow::compute::KernelInitArgs&>(struct {...} &) (__fn=...) at
/usr/include/c++/14.2.1/bits/invoke.h:116
#12 0x00007ffff58671db in
std::_Function_handler<arrow::Result<std::unique_ptr<arrow::compute::KernelState,
std::default_delete<arrow::compute::KernelState> >
>(arrow::compute::KernelContext*, const arrow::compute::KernelInitArgs&),
arrow::compute::internal::(anonymous
namespace)::MakeMinOrMaxKernel<(arrow::compute::internal::MinOrMax)0>(arrow::compute::HashAggregateFunction*)::<lambda(arrow::compute::KernelContext*,
const arrow::compute::KernelInitArgs&)> >::_M_invoke(const std::_Any_data &,
arrow::compute::KernelContext *&&, const arrow::compute::KernelInitArgs &) (
__functor=..., __args#0=@0x7fffebdfd2c8: 0x7fffebdfd370, __args#1=...)
at /usr/include/c++/14.2.1/bits/std_function.h:291
#13 0x00007ffff53ae465 in
std::function<arrow::Result<std::unique_ptr<arrow::compute::KernelState,
std::default_delete<arrow::compute::KernelState> >
>(arrow::compute::KernelContext*, arrow::compute::KernelInitArgs
const&)>::operator() (this=0x7fffe41132a0, __args#0=0x7fffebdfd370,
__args#1=...) at /usr/include/c++/14.2.1/bits/std_function.h:591
#14 0x00007ffff3392aec in arrow::acero::aggregate::InitKernel
(kernel=0x7fffe4113290, ctx=0x7fffe411c4e8, aggregate=..., in_types=std::vector
of length 1, capacity 1 = {...}) at
/root/work/build-arrow/cpp/src/arrow/acero/aggregate_internal.cc:102
#15 0x00007ffff339305f in arrow::acero::aggregate::InitKernels
(kernels=std::vector of length 2, capacity 2 = {...}, ctx=0x7fffe411c4e8,
aggregates=std::vector of length 2, capacity 2 = {...}, in_types=std::vector of
length 2, capacity 2 = {...})
at /root/work/build-arrow/cpp/src/arrow/acero/aggregate_internal.cc:129
#16 0x00007ffff33818ee in
arrow::acero::aggregate::GroupByNode::MakeAggregateNodeArgs
(input_schema=std::shared_ptr<arrow::Schema> (use count 2, weak count 0) =
{...}, keys=std::vector of length 1, capacity 1 = {...},
segment_keys=std::vector of length 0, capacity 0,
aggs=std::vector of length 2, capacity 2 = {...}, ctx=0x7fffe411c4e8,
is_cpu_parallel=true) at
/root/work/build-arrow/cpp/src/arrow/acero/groupby_aggregate_node.cc:142
#17 0x00007ffff3382663 in arrow::acero::aggregate::GroupByNode::Make
(plan=0x7fffe411c400, inputs=std::vector of length 1, capacity 1 = {...},
options=...) at
/root/work/build-arrow/cpp/src/arrow/acero/groupby_aggregate_node.cc:193
#18 0x00007ffff33949e9 in operator() (__closure=0x7fffebdfde40,
plan=0x7fffe411c400, inputs=std::vector of length 0, capacity 0, options=...)
at /root/work/build-arrow/cpp/src/arrow/acero/aggregate_internal.cc:253
#19 0x00007ffff33951d9 in
std::__invoke_impl<arrow::Result<arrow::acero::ExecNode*>,
arrow::acero::internal::RegisterAggregateNode(arrow::acero::ExecFactoryRegistry*)::<lambda(arrow::acero::ExecPlan*,
std::vector<arrow::acero::ExecNode*>, const arrow::acero::ExecNodeOptions&)>&,
arrow::acero::ExecPlan*, std::vector<arrow::acero::ExecNode*,
std::allocator<arrow::acero::ExecNode*> >, const
arrow::acero::ExecNodeOptions&>(std::__invoke_other, struct {...} &) (__f=...)
at /usr/include/c++/14.2.1/bits/invoke.h:61
#20 0x00007ffff3395031 in
std::__invoke_r<arrow::Result<arrow::acero::ExecNode*>,
arrow::acero::internal::RegisterAggregateNode(arrow::acero::ExecFactoryRegistry*)::<lambda(arrow::acero::ExecPlan*,
std::vector<arrow::acero::ExecNode*>, const arrow::acero::ExecNodeOptions&)>&,
arrow::acero::ExecPlan*, std::vector<arrow::acero::ExecNode*,
std::allocator<arrow::acero::ExecNode*> >, const
arrow::acero::ExecNodeOptions&>(struct {...} &) (__fn=...) at
/usr/include/c++/14.2.1/bits/invoke.h:116
#21 0x00007ffff3394eb0 in
std::_Function_handler<arrow::Result<arrow::acero::ExecNode*>(arrow::acero::ExecPlan*,
std::vector<arrow::acero::ExecNode*, std::allocator<arrow::acero::ExecNode*>
>, const arrow::acero::ExecNodeOptions&),
arrow::acero::internal::RegisterAggregateNode(arrow::acero::ExecFactoryRegistry*)::<lambda(arrow::acero::ExecPlan*,
std::vector<arrow::acero::ExecNode*, std::allocator<arrow::acero::ExecNode*>
>, const arrow::acero::ExecNodeOptions&)> >::_M_invoke(const std::_Any_data &,
arrow::acero::ExecPlan *&&, std::vector<arrow::acero::ExecNode*,
std::allocator<arrow::acero::ExecNode*> > &&, const
arrow::acero::ExecNodeOptions &) (__functor=..., __args#0=@0x7fffebdfdd88:
0x7fffe411c400, __args#1=..., __args#2=...) at
/usr/include/c++/14.2.1/bits/std_function.h:291
#22 0x00007ffff340a3b1 in
std::function<arrow::Result<arrow::acero::ExecNode*>(arrow::acero::ExecPlan*,
std::vector<arrow::acero::ExecNode*, std::allocator<arrow::acero::ExecNode*> >,
arrow::acero::ExecNodeOptions const&)>::operator() (this=0x7fffebdfde40,
__args#0=0x7fffe411c400,
__args#1=std::vector of length 0, capacity 0, __args#2=...) at
/usr/include/c++/14.2.1/bits/std_function.h:591
#23 0x00007ffff3407715 in arrow::acero::MakeExecNode
(factory_name="aggregate", plan=0x7fffe411c400, inputs=std::vector of length 0,
capacity 0, options=..., registry=0x7ffff36a75a0
<arrow::acero::default_exec_factory_registry()::instance>)
at /root/work/build-arrow/cpp/src/arrow/acero/exec_plan.h:381
#24 0x00007ffff33f2b40 in arrow::acero::Declaration::AddToPlan
(this=0x7fffe413dfc0, plan=0x7fffe411c400, registry=0x7ffff36a75a0
<arrow::acero::default_exec_factory_registry()::instance>) at
/root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:585
#25 0x00007ffff33f29bd in arrow::acero::Declaration::AddToPlan
(this=0x7fffebdfe460, plan=0x7fffe411c400, registry=0x7ffff36a75a0
<arrow::acero::default_exec_factory_registry()::instance>) at
/root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:581
#26 0x00007ffff33f5918 in arrow::acero::(anonymous
namespace)::DeclarationToStatusImpl (declaration=..., options=...,
cpu_executor=0x7fffe40052c0) at
/root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:714
#27 0x00007ffff33f9164 in operator() (__closure=0x7fffe4005628,
executor=0x7fffe40052c0) at
/root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:929
#28 0x00007ffff3403aab in
arrow::internal::FnOnce<arrow::Future<arrow::internal::Empty>(arrow::internal::Executor*)>::FnImpl<arrow::acero::DeclarationToStatus(Declaration,
bool, arrow::MemoryPool*,
arrow::compute::FunctionRegistry*)::<lambda(arrow::internal::Executor*)>
>::invoke(arrow::internal::Executor *&&) (this=0x7fffe4005620,
a#0=@0x7fffebdfe548: 0x7fffe40052c0) at
/root/work/build-arrow/cpp/src/arrow/util/functional.h:152
#29 0x00007ffff3417fb3 in
arrow::internal::FnOnce<arrow::Future<arrow::internal::Empty>
(arrow::internal::Executor*)>::operator()(arrow::internal::Executor*) &&
(this=0x7fffebdfe628, a#0=0x7fffe40052c0) at
/root/work/build-arrow/cpp/src/arrow/util/functional.h:140
#30 0x00007ffff340fae9 in
arrow::internal::RunSynchronously<arrow::Future<arrow::internal::Empty>,
arrow::internal::Empty> (get_future=..., use_threads=true) at
/root/work/build-arrow/cpp/src/arrow/util/thread_pool.h:587
#31 0x00007ffff33f92bd in arrow::acero::DeclarationToStatus
(declaration=..., use_threads=true, memory_pool=0x7ffff7bff7c0
<arrow::global_state+320>, function_registry=0x0) at
/root/work/build-arrow/cpp/src/arrow/acero/exec_plan.cc:931
...
```
By debugging, I found that the problem may be in this place:
```diff
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -1385,7 +1385,9 @@ struct GroupedMinMaxImpl final : public
GroupedAggregator {
typename std::conditional<is_boolean_type<Type>::value, uint8_t,
CType>::type;
Status Init(ExecContext* ctx, const KernelInitArgs& args) override {
- options_ = *checked_cast<const ScalarAggregateOptions*>(args.options);
+ // options_ = *checked_cast<const
ScalarAggregateOptions*>(args.options); // segmentation fault <-- the original
code
+ // options_ = *dynamic_cast<const
ScalarAggregateOptions*>(args.options); // segmentation fault
+ options_ = *static_cast<const ScalarAggregateOptions*>(args.options);
// ok
// type_ initialized by MinMaxInit
mins_ = TypedBufferBuilder<CType>(ctx->memory_pool());
maxes_ = TypedBufferBuilder<CType>(ctx->memory_pool());
```
It seems that if I replace this line with `static_cast<const
ScalarAggregateOptions*>`, there will be no errors, but I want to figure out
why this is happening and how to fix it properly.
```c++
Thread 4 "bow-exe" hit Breakpoint 2.10, arrow::compute::internal::(anonymous
namespace)::GroupedMinMaxImpl<arrow::Int64Type, void>::Init
(this=0x7fffe4007800, ctx=0x7fffe411c498, args=...) at
/root/work/build-arrow/cpp/src/arrow/compute/kernels/hash_aggregate.cc:1387
1387 Status Init(ExecContext* ctx, const KernelInitArgs& args) override
{
(gdb) n
1388 options_ = *checked_cast<const
ScalarAggregateOptions*>(args.options);
(gdb) p args.options
$1 = (const arrow::compute::FunctionOptions *) 0x7fffe4005920
(gdb) p *(const ScalarAggregateOptions*)(args.options)
$3 = {<arrow::compute::FunctionOptions> =
{<arrow::util::EqualityComparable<arrow::compute::FunctionOptions>> = {<No data
fields>}, _vptr.FunctionOptions = 0x7ffff7b3d198 <vtable for
arrow::compute::CumulativeOptions+16>,
options_type_ = 0x7ffff7c02d40
<arrow::compute::internal::GetFunctionOptionsType<arrow::compute::CumulativeOptions,
arrow::internal::DataMemberProperty<arrow::compute::CumulativeOptions,
std::optional<std::shared_ptr<arrow::Scalar> > >,
arrow::internal::DataMemberProperty<arrow::compute::CumulativeOptions, bool>
>(arrow::internal::DataMemberProperty<arrow::compute::CumulativeOptions,
std::optional<std::shared_ptr<arrow::Scalar> > > const&,
arrow::internal::DataMemberProperty<arrow::compute::CumulativeOptions, bool>
const&)::instance>},
static kTypeName = "ScalarAggregateOptions", skip_nulls = 117, min_count =
1819231092}
(gdb) n
Thread 4 "bow-exe" received signal SIGSEGV, Segmentation fault.
0x00007ffff55a5fb8 in arrow::compute::FunctionOptions::operator=
(this=0x7fffe4007920) at
/root/work/build-arrow/cpp/src/arrow/compute/function_options.h:52
52 class ARROW_EXPORT FunctionOptions : public
util::EqualityComparable<FunctionOptions> {
```
I'm using tag `apache-arrow-17.0.0-rc2`, and build arrow with those options:
ARROW_BUILD_INTEGRATION="OFF"
ARROW_BUILD_TESTS="OFF"
ARROW_COMPUTE="ON"
ARROW_CSV="ON"
ARROW_DATASET="ON"
ARROW_DEPENDENCY_SOURCE="VCPKG"
ARROW_DEPENDENCY_USE_SHARED="OFF"
ARROW_EXTRA_ERROR_CONTEXT="ON"
ARROW_FILESYSTEM="ON"
ARROW_HDFS="ON"
ARROW_JSON="ON"
ARROW_PARQUET="ON"
ARROW_WITH_ZSTD="ON"
CMAKE_BUILD_TYPE="Debug" // this error is mainly influenced by this option
IMO
```cpp
template <typename OutputType, typename InputType>
inline OutputType checked_cast(InputType&& value) {
static_assert(std::is_class<typename std::remove_pointer<
typename
std::remove_reference<InputType>::type>::type>::value,
"checked_cast input type must be a class");
static_assert(std::is_class<typename std::remove_pointer<
typename
std::remove_reference<OutputType>::type>::type>::value,
"checked_cast output type must be a class");
#ifdef NDEBUG
return static_cast<OutputType>(value);
#else
return dynamic_cast<OutputType>(value);
#endif
}
```
### Component(s)
C++
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]