This is an automated email from the ASF dual-hosted git repository.
raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 56c0e2f508 GH-46915: [C++][Compute] Initialize Compute kernels on
benchmarks that require extra kernels (#46922)
56c0e2f508 is described below
commit 56c0e2f508fdc5137d6734b406634386f9284a52
Author: Raúl Cumplido <[email protected]>
AuthorDate: Sun Jul 6 12:03:26 2025 +0200
GH-46915: [C++][Compute] Initialize Compute kernels on benchmarks that
require extra kernels (#46922)
### Rationale for this change
Benchmarks that require non core compute kernels fail because the functions
are not registered. We are lacking initialization.
### What changes are included in this PR?
- Create a custom main function that can be compiled along the compute
benchmarks in order to initialize the compute kernels.
- Create new CMake `add_arrow_compute_benchmark` function that will
automatically add the new the `main_compute_benchmark.cc` file to the sources
for the benchmark, prefix the test with `arrow-compute` and link
`arrow_compute_{shared/static}`.
- Update tests that require the compute kernels to use
`add_arrow_compute_benchmark` instead of `add_arrow_benchmark`
- Adapt `add_benchmark` function to be able to receive `EXTRA_SOURCES` and
"return" the `OUTPUT_BENCHMARK_NAME`
### Are these changes tested?
This has been tested locally.
### Are there any user-facing changes?
No
* GitHub Issue: #46915
Lead-authored-by: Raúl Cumplido <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Raúl Cumplido <[email protected]>
---
cpp/cmake_modules/BuildUtils.cmake | 11 ++-
cpp/src/arrow/acero/CMakeLists.txt | 114 ++++++++++++---------------
cpp/src/arrow/compute/CMakeLists.txt | 45 ++++++++++-
cpp/src/arrow/compute/benchmark_main.cc | 33 ++++++++
cpp/src/arrow/compute/kernels/CMakeLists.txt | 32 ++++----
cpp/src/arrow/dataset/CMakeLists.txt | 42 +++++++---
6 files changed, 187 insertions(+), 90 deletions(-)
diff --git a/cpp/cmake_modules/BuildUtils.cmake
b/cpp/cmake_modules/BuildUtils.cmake
index 134f47b12e..d92d3af2e4 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -522,6 +522,7 @@ function(ADD_BENCHMARK REL_BENCHMARK_NAME)
STATIC_LINK_LIBS
DEPENDENCIES
SOURCES
+ EXTRA_SOURCES
LABELS)
cmake_parse_arguments(ARG
"${options}"
@@ -541,10 +542,16 @@ function(ADD_BENCHMARK REL_BENCHMARK_NAME)
set(BENCHMARK_NAME "${ARG_PREFIX}-${BENCHMARK_NAME}")
endif()
+ set(SOURCES "")
+
+ if(ARG_EXTRA_SOURCES)
+ list(APPEND SOURCES ${ARG_EXTRA_SOURCES})
+ endif()
+
if(ARG_SOURCES)
- set(SOURCES ${ARG_SOURCES})
+ list(APPEND SOURCES ${ARG_SOURCES})
else()
- set(SOURCES "${REL_BENCHMARK_NAME}.cc")
+ list(APPEND SOURCES "${REL_BENCHMARK_NAME}.cc")
endif()
# Make sure the executable name contains only hyphens, not underscores
diff --git a/cpp/src/arrow/acero/CMakeLists.txt
b/cpp/src/arrow/acero/CMakeLists.txt
index 37e00fd256..dc18afa979 100644
--- a/cpp/src/arrow/acero/CMakeLists.txt
+++ b/cpp/src/arrow/acero/CMakeLists.txt
@@ -129,6 +129,10 @@ if(ARROW_TESTING)
if(ARROW_WITH_OPENTELEMETRY)
target_link_libraries(arrow_acero_testing PRIVATE
${ARROW_OPENTELEMETRY_LIBS})
endif()
+ # arrow_compute_testing will register the kernels for Gtest. In order to
register the kernels
+ # for Google benchmark we use a custom main function used on
add_arrow_compute_benchmark.
+ set(ARROW_ACERO_BENCHMARKS_TEST_LINK_LIBS
+ ${ARROW_ACERO_TEST_LINK_LIBS} arrow_acero_testing
arrow_compute_core_testing)
list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_testing
arrow_compute_testing)
endif()
# Only for hash_aggregate_test.cc.
@@ -188,68 +192,52 @@ add_arrow_acero_test(hash_aggregate_test SOURCES
hash_aggregate_test.cc)
add_arrow_acero_test(test_util_internal_test SOURCES
test_util_internal_test.cc)
-if(ARROW_BUILD_BENCHMARKS)
- function(add_arrow_acero_benchmark REL_BENCHMARK_NAME)
- set(options)
- set(one_value_args PREFIX)
- set(multi_value_args LABELS)
- cmake_parse_arguments(ARG
- "${options}"
- "${one_value_args}"
- "${multi_value_args}"
- ${ARGN})
-
- if(ARG_PREFIX)
- set(PREFIX ${ARG_PREFIX})
- else()
- set(PREFIX "arrow-acero")
- endif()
-
- if(ARG_LABELS)
- set(LABELS ${ARG_LABELS})
- else()
- set(LABELS "arrow_acero")
- endif()
-
- add_arrow_benchmark(${REL_BENCHMARK_NAME}
- EXTRA_LINK_LIBS
- ${ARROW_ACERO_TEST_LINK_LIBS}
- PREFIX
- ${PREFIX}
- LABELS
- ${LABELS}
- ${ARG_UNPARSED_ARGUMENTS})
- endfunction()
-
- add_arrow_acero_benchmark(expression_benchmark SOURCES
expression_benchmark.cc)
-
- add_arrow_acero_benchmark(filter_benchmark SOURCES benchmark_util.cc
- filter_benchmark.cc)
-
- add_arrow_acero_benchmark(project_benchmark SOURCES benchmark_util.cc
- project_benchmark.cc)
-
- add_arrow_acero_benchmark(asof_join_benchmark SOURCES asof_join_benchmark.cc)
-
- add_arrow_acero_benchmark(tpch_benchmark SOURCES tpch_benchmark.cc)
-
- add_arrow_acero_benchmark(aggregate_benchmark SOURCES aggregate_benchmark.cc)
-
- add_arrow_acero_benchmark(hash_join_benchmark SOURCES hash_join_benchmark.cc)
-
- if(ARROW_BUILD_STATIC)
- target_link_libraries(arrow-acero-expression-benchmark PUBLIC
arrow_acero_static)
- target_link_libraries(arrow-acero-filter-benchmark PUBLIC
arrow_acero_static)
- target_link_libraries(arrow-acero-project-benchmark PUBLIC
arrow_acero_static)
- target_link_libraries(arrow-acero-asof-join-benchmark PUBLIC
arrow_acero_static)
- target_link_libraries(arrow-acero-tpch-benchmark PUBLIC arrow_acero_static)
- target_link_libraries(arrow-acero-hash-join-benchmark PUBLIC
arrow_acero_static)
+function(add_arrow_acero_benchmark REL_BENCHMARK_NAME)
+ set(options)
+ set(one_value_args PREFIX)
+ set(multi_value_args LABELS EXTRA_LINK_LIBS)
+ cmake_parse_arguments(ARG
+ "${options}"
+ "${one_value_args}"
+ "${multi_value_args}"
+ ${ARGN})
+
+ if(ARG_PREFIX)
+ set(PREFIX ${ARG_PREFIX})
else()
- target_link_libraries(arrow-acero-expression-benchmark PUBLIC
arrow_acero_shared)
- target_link_libraries(arrow-acero-filter-benchmark PUBLIC
arrow_acero_shared)
- target_link_libraries(arrow-acero-project-benchmark PUBLIC
arrow_acero_shared)
- target_link_libraries(arrow-acero-asof-join-benchmark PUBLIC
arrow_acero_shared)
- target_link_libraries(arrow-acero-tpch-benchmark PUBLIC arrow_acero_shared)
- target_link_libraries(arrow-acero-hash-join-benchmark PUBLIC
arrow_acero_shared)
+ set(PREFIX "arrow-acero")
endif()
-endif()
+
+ if(ARG_LABELS)
+ set(LABELS ${ARG_LABELS})
+ else()
+ set(LABELS "arrow_acero")
+ endif()
+
+ if(ARROW_TEST_LINKAGE STREQUAL "static")
+ set(EXTRA_LINK_LIBS arrow_acero_static
${ARROW_ACERO_BENCHMARKS_TEST_LINK_LIBS})
+ else()
+ set(EXTRA_LINK_LIBS arrow_acero_shared
${ARROW_ACERO_BENCHMARKS_TEST_LINK_LIBS})
+ endif()
+ if(ARG_EXTRA_LINK_LIBS)
+ list(APPEND EXTRA_LINK_LIBS ${ARG_EXTRA_LINK_LIBS})
+ endif()
+
+ add_arrow_compute_benchmark(${REL_BENCHMARK_NAME}
+ EXTRA_LINK_LIBS
+ ${EXTRA_LINK_LIBS}
+ PREFIX
+ ${PREFIX}
+ LABELS
+ ${LABELS}
+ ${ARG_UNPARSED_ARGUMENTS})
+endfunction()
+
+add_arrow_acero_benchmark(aggregate_benchmark)
+add_arrow_acero_benchmark(asof_join_benchmark)
+add_arrow_acero_benchmark(expression_benchmark)
+add_arrow_acero_benchmark(filter_benchmark SOURCES benchmark_util.cc
filter_benchmark.cc)
+add_arrow_acero_benchmark(hash_join_benchmark)
+add_arrow_acero_benchmark(project_benchmark SOURCES benchmark_util.cc
+ project_benchmark.cc)
+add_arrow_acero_benchmark(tpch_benchmark)
diff --git a/cpp/src/arrow/compute/CMakeLists.txt
b/cpp/src/arrow/compute/CMakeLists.txt
index 6498fd1c17..6d11aa26c0 100644
--- a/cpp/src/arrow/compute/CMakeLists.txt
+++ b/cpp/src/arrow/compute/CMakeLists.txt
@@ -114,6 +114,49 @@ function(ADD_ARROW_COMPUTE_TEST REL_TEST_NAME)
${ARG_UNPARSED_ARGUMENTS})
endfunction()
+# This function is used to add a custom main to the benchmarks in order
+# to initialize the compute kernels registry before running them.
+# This is necessary for benchmarks that use compute kernels that are not
+# part of libarrow.
+# It will also link the compute libraries to the benchmark target.
+function(add_arrow_compute_benchmark REL_TEST_NAME)
+ set(options)
+ set(one_value_args PREFIX)
+ set(multi_value_args EXTRA_SOURCES EXTRA_LINK_LIBS)
+ cmake_parse_arguments(ARG
+ "${options}"
+ "${one_value_args}"
+ "${multi_value_args}"
+ ${ARGN})
+ if(ARG_PREFIX)
+ set(PREFIX ${ARG_PREFIX})
+ else()
+ set(PREFIX "arrow-compute")
+ endif()
+ set(EXTRA_SOURCES "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/benchmark_main.cc")
+ if(ARG_EXTRA_SOURCES)
+ list(APPEND EXTRA_SOURCES ${ARG_EXTRA_SOURCES})
+ endif()
+ if(ARROW_TEST_LINKAGE STREQUAL "static")
+ set(EXTRA_LINK_LIBS arrow_compute_static)
+ else()
+ set(EXTRA_LINK_LIBS arrow_compute_shared)
+ endif()
+ if(ARG_EXTRA_LINK_LIBS)
+ list(APPEND EXTRA_LINK_LIBS ${ARG_EXTRA_LINK_LIBS})
+ endif()
+ add_benchmark(${REL_TEST_NAME}
+ PREFIX
+ ${PREFIX}
+ LABELS
+ "arrow-benchmarks"
+ EXTRA_SOURCES
+ ${EXTRA_SOURCES}
+ EXTRA_LINK_LIBS
+ ${EXTRA_LINK_LIBS}
+ ${ARG_UNPARSED_ARGUMENTS})
+endfunction()
+
add_arrow_test(internals_test
${ARROW_COMPUTE_TEST_ARGS}
SOURCES
@@ -142,7 +185,7 @@ add_arrow_compute_test(row_test
EXTRA_LINK_LIBS
arrow_compute_testing)
-add_arrow_benchmark(function_benchmark PREFIX "arrow-compute")
+add_arrow_compute_benchmark(function_benchmark)
add_subdirectory(kernels)
diff --git a/cpp/src/arrow/compute/benchmark_main.cc
b/cpp/src/arrow/compute/benchmark_main.cc
new file mode 100644
index 0000000000..2c54d69782
--- /dev/null
+++ b/cpp/src/arrow/compute/benchmark_main.cc
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/compute/initialize.h"
+#include "arrow/testing/gtest_util.h"
+
+int main(int argc, char** argv) {
+ // Initialize compute functions before any benchmarks run
+ ABORT_NOT_OK(arrow::compute::Initialize());
+
+ // Initialize and run benchmarks
+ ::benchmark::Initialize(&argc, argv);
+ if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1;
+ ::benchmark::RunSpecifiedBenchmarks();
+ ::benchmark::Shutdown();
+ return 0;
+}
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt
b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 929cca8f5a..15955b5ef8 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -84,17 +84,18 @@ add_arrow_compute_test(scalar_utility_test
arrow_compute_kernels_testing
arrow_compute_testing)
-add_arrow_benchmark(scalar_arithmetic_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_boolean_benchmark PREFIX "arrow-compute")
add_arrow_benchmark(scalar_cast_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_compare_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_if_else_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_list_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_random_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_round_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_set_lookup_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_string_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_temporal_benchmark PREFIX "arrow-compute")
+# The following benchmarks require compute kernels initialization.
+add_arrow_compute_benchmark(scalar_arithmetic_benchmark)
+add_arrow_compute_benchmark(scalar_boolean_benchmark)
+add_arrow_compute_benchmark(scalar_compare_benchmark)
+add_arrow_compute_benchmark(scalar_if_else_benchmark)
+add_arrow_compute_benchmark(scalar_list_benchmark)
+add_arrow_compute_benchmark(scalar_random_benchmark)
+add_arrow_compute_benchmark(scalar_round_benchmark)
+add_arrow_compute_benchmark(scalar_set_lookup_benchmark)
+add_arrow_compute_benchmark(scalar_string_benchmark)
+add_arrow_compute_benchmark(scalar_temporal_benchmark)
# ----------------------------------------------------------------------
# Vector kernels
@@ -135,11 +136,12 @@ add_arrow_compute_test(vector_swizzle_test
arrow_compute_testing)
add_arrow_benchmark(vector_hash_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(vector_sort_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(vector_partition_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(vector_topk_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(vector_replace_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(vector_selection_benchmark PREFIX "arrow-compute")
+# The following benchmarks require compute kernels initialization.
+add_arrow_compute_benchmark(vector_sort_benchmark)
+add_arrow_compute_benchmark(vector_partition_benchmark)
+add_arrow_compute_benchmark(vector_topk_benchmark)
+add_arrow_compute_benchmark(vector_replace_benchmark)
+add_arrow_compute_benchmark(vector_selection_benchmark)
# ----------------------------------------------------------------------
# Aggregate kernels
diff --git a/cpp/src/arrow/dataset/CMakeLists.txt
b/cpp/src/arrow/dataset/CMakeLists.txt
index 809bdfaae6..d87afdf5bd 100644
--- a/cpp/src/arrow/dataset/CMakeLists.txt
+++ b/cpp/src/arrow/dataset/CMakeLists.txt
@@ -206,15 +206,39 @@ if(ARROW_PARQUET)
endif()
endif()
-if(ARROW_BUILD_BENCHMARKS)
- add_arrow_benchmark(file_benchmark PREFIX "arrow-dataset")
- add_arrow_benchmark(scanner_benchmark PREFIX "arrow-dataset")
+function(add_arrow_dataset_benchmark REL_BENCHMARK_NAME)
+ set(options)
+ set(one_value_args PREFIX)
+ set(multi_value_args EXTRA_LINK_LIBS)
+ cmake_parse_arguments(ARG
+ "${options}"
+ "${one_value_args}"
+ "${multi_value_args}"
+ ${ARGN})
- if(ARROW_BUILD_STATIC)
- target_link_libraries(arrow-dataset-file-benchmark PUBLIC
arrow_dataset_static)
- target_link_libraries(arrow-dataset-scanner-benchmark PUBLIC
arrow_dataset_static)
+ if(ARG_PREFIX)
+ set(PREFIX ${ARG_PREFIX})
else()
- target_link_libraries(arrow-dataset-file-benchmark PUBLIC
arrow_dataset_shared)
- target_link_libraries(arrow-dataset-scanner-benchmark PUBLIC
arrow_dataset_shared)
+ set(PREFIX "arrow-dataset")
endif()
-endif()
+
+ if(ARROW_TEST_LINKAGE STREQUAL "static")
+ set(EXTRA_LINK_LIBS arrow_dataset_static)
+ else()
+ set(EXTRA_LINK_LIBS arrow_dataset_shared)
+ endif()
+ if(ARG_EXTRA_LINK_LIBS)
+ list(APPEND EXTRA_LINK_LIBS ${ARG_EXTRA_LINK_LIBS})
+ endif()
+
+ # Dataset benchmarks require compute kernels initialization.
+ add_arrow_compute_benchmark(${REL_BENCHMARK_NAME}
+ PREFIX
+ ${PREFIX}
+ EXTRA_LINK_LIBS
+ ${EXTRA_LINK_LIBS}
+ ${ARG_UNPARSED_ARGUMENTS})
+endfunction()
+
+add_arrow_dataset_benchmark(file_benchmark)
+add_arrow_dataset_benchmark(scanner_benchmark)