This is an automated email from the ASF dual-hosted git repository.

raulcd pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 56c0e2f508 GH-46915: [C++][Compute] Initialize Compute kernels on 
benchmarks that require extra kernels (#46922)
56c0e2f508 is described below

commit 56c0e2f508fdc5137d6734b406634386f9284a52
Author: Raúl Cumplido <[email protected]>
AuthorDate: Sun Jul 6 12:03:26 2025 +0200

    GH-46915: [C++][Compute] Initialize Compute kernels on benchmarks that 
require extra kernels (#46922)
    
    ### Rationale for this change
    
    Benchmarks that require non core compute kernels fail because the functions 
are not registered. We are lacking initialization.
    
    ### What changes are included in this PR?
    
    - Create a custom main function that can be compiled along the compute 
benchmarks in order to initialize the compute kernels.
    - Create new CMake `add_arrow_compute_benchmark` function that will 
automatically add the new  the `main_compute_benchmark.cc` file to the sources 
for the benchmark, prefix the test with `arrow-compute` and link 
`arrow_compute_{shared/static}`.
    - Update tests that require the compute kernels to use 
`add_arrow_compute_benchmark` instead of `add_arrow_benchmark`
    - Adapt `add_benchmark` function to be able to receive `EXTRA_SOURCES` and 
"return" the `OUTPUT_BENCHMARK_NAME`
    
    ### Are these changes tested?
    
    This has been tested locally.
    
    ### Are there any user-facing changes?
    
    No
    
    * GitHub Issue: #46915
    
    Lead-authored-by: Raúl Cumplido <[email protected]>
    Co-authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Raúl Cumplido <[email protected]>
---
 cpp/cmake_modules/BuildUtils.cmake           |  11 ++-
 cpp/src/arrow/acero/CMakeLists.txt           | 114 ++++++++++++---------------
 cpp/src/arrow/compute/CMakeLists.txt         |  45 ++++++++++-
 cpp/src/arrow/compute/benchmark_main.cc      |  33 ++++++++
 cpp/src/arrow/compute/kernels/CMakeLists.txt |  32 ++++----
 cpp/src/arrow/dataset/CMakeLists.txt         |  42 +++++++---
 6 files changed, 187 insertions(+), 90 deletions(-)

diff --git a/cpp/cmake_modules/BuildUtils.cmake 
b/cpp/cmake_modules/BuildUtils.cmake
index 134f47b12e..d92d3af2e4 100644
--- a/cpp/cmake_modules/BuildUtils.cmake
+++ b/cpp/cmake_modules/BuildUtils.cmake
@@ -522,6 +522,7 @@ function(ADD_BENCHMARK REL_BENCHMARK_NAME)
       STATIC_LINK_LIBS
       DEPENDENCIES
       SOURCES
+      EXTRA_SOURCES
       LABELS)
   cmake_parse_arguments(ARG
                         "${options}"
@@ -541,10 +542,16 @@ function(ADD_BENCHMARK REL_BENCHMARK_NAME)
     set(BENCHMARK_NAME "${ARG_PREFIX}-${BENCHMARK_NAME}")
   endif()
 
+  set(SOURCES "")
+
+  if(ARG_EXTRA_SOURCES)
+    list(APPEND SOURCES ${ARG_EXTRA_SOURCES})
+  endif()
+
   if(ARG_SOURCES)
-    set(SOURCES ${ARG_SOURCES})
+    list(APPEND SOURCES ${ARG_SOURCES})
   else()
-    set(SOURCES "${REL_BENCHMARK_NAME}.cc")
+    list(APPEND SOURCES "${REL_BENCHMARK_NAME}.cc")
   endif()
 
   # Make sure the executable name contains only hyphens, not underscores
diff --git a/cpp/src/arrow/acero/CMakeLists.txt 
b/cpp/src/arrow/acero/CMakeLists.txt
index 37e00fd256..dc18afa979 100644
--- a/cpp/src/arrow/acero/CMakeLists.txt
+++ b/cpp/src/arrow/acero/CMakeLists.txt
@@ -129,6 +129,10 @@ if(ARROW_TESTING)
   if(ARROW_WITH_OPENTELEMETRY)
     target_link_libraries(arrow_acero_testing PRIVATE 
${ARROW_OPENTELEMETRY_LIBS})
   endif()
+  # arrow_compute_testing will register the kernels for Gtest. In order to 
register the kernels
+  # for Google benchmark we use a custom main function used on 
add_arrow_compute_benchmark.
+  set(ARROW_ACERO_BENCHMARKS_TEST_LINK_LIBS
+      ${ARROW_ACERO_TEST_LINK_LIBS} arrow_acero_testing 
arrow_compute_core_testing)
   list(APPEND ARROW_ACERO_TEST_LINK_LIBS arrow_acero_testing 
arrow_compute_testing)
 endif()
 # Only for hash_aggregate_test.cc.
@@ -188,68 +192,52 @@ add_arrow_acero_test(hash_aggregate_test SOURCES 
hash_aggregate_test.cc)
 
 add_arrow_acero_test(test_util_internal_test SOURCES 
test_util_internal_test.cc)
 
-if(ARROW_BUILD_BENCHMARKS)
-  function(add_arrow_acero_benchmark REL_BENCHMARK_NAME)
-    set(options)
-    set(one_value_args PREFIX)
-    set(multi_value_args LABELS)
-    cmake_parse_arguments(ARG
-                          "${options}"
-                          "${one_value_args}"
-                          "${multi_value_args}"
-                          ${ARGN})
-
-    if(ARG_PREFIX)
-      set(PREFIX ${ARG_PREFIX})
-    else()
-      set(PREFIX "arrow-acero")
-    endif()
-
-    if(ARG_LABELS)
-      set(LABELS ${ARG_LABELS})
-    else()
-      set(LABELS "arrow_acero")
-    endif()
-
-    add_arrow_benchmark(${REL_BENCHMARK_NAME}
-                        EXTRA_LINK_LIBS
-                        ${ARROW_ACERO_TEST_LINK_LIBS}
-                        PREFIX
-                        ${PREFIX}
-                        LABELS
-                        ${LABELS}
-                        ${ARG_UNPARSED_ARGUMENTS})
-  endfunction()
-
-  add_arrow_acero_benchmark(expression_benchmark SOURCES 
expression_benchmark.cc)
-
-  add_arrow_acero_benchmark(filter_benchmark SOURCES benchmark_util.cc
-                            filter_benchmark.cc)
-
-  add_arrow_acero_benchmark(project_benchmark SOURCES benchmark_util.cc
-                            project_benchmark.cc)
-
-  add_arrow_acero_benchmark(asof_join_benchmark SOURCES asof_join_benchmark.cc)
-
-  add_arrow_acero_benchmark(tpch_benchmark SOURCES tpch_benchmark.cc)
-
-  add_arrow_acero_benchmark(aggregate_benchmark SOURCES aggregate_benchmark.cc)
-
-  add_arrow_acero_benchmark(hash_join_benchmark SOURCES hash_join_benchmark.cc)
-
-  if(ARROW_BUILD_STATIC)
-    target_link_libraries(arrow-acero-expression-benchmark PUBLIC 
arrow_acero_static)
-    target_link_libraries(arrow-acero-filter-benchmark PUBLIC 
arrow_acero_static)
-    target_link_libraries(arrow-acero-project-benchmark PUBLIC 
arrow_acero_static)
-    target_link_libraries(arrow-acero-asof-join-benchmark PUBLIC 
arrow_acero_static)
-    target_link_libraries(arrow-acero-tpch-benchmark PUBLIC arrow_acero_static)
-    target_link_libraries(arrow-acero-hash-join-benchmark PUBLIC 
arrow_acero_static)
+function(add_arrow_acero_benchmark REL_BENCHMARK_NAME)
+  set(options)
+  set(one_value_args PREFIX)
+  set(multi_value_args LABELS EXTRA_LINK_LIBS)
+  cmake_parse_arguments(ARG
+                        "${options}"
+                        "${one_value_args}"
+                        "${multi_value_args}"
+                        ${ARGN})
+
+  if(ARG_PREFIX)
+    set(PREFIX ${ARG_PREFIX})
   else()
-    target_link_libraries(arrow-acero-expression-benchmark PUBLIC 
arrow_acero_shared)
-    target_link_libraries(arrow-acero-filter-benchmark PUBLIC 
arrow_acero_shared)
-    target_link_libraries(arrow-acero-project-benchmark PUBLIC 
arrow_acero_shared)
-    target_link_libraries(arrow-acero-asof-join-benchmark PUBLIC 
arrow_acero_shared)
-    target_link_libraries(arrow-acero-tpch-benchmark PUBLIC arrow_acero_shared)
-    target_link_libraries(arrow-acero-hash-join-benchmark PUBLIC 
arrow_acero_shared)
+    set(PREFIX "arrow-acero")
   endif()
-endif()
+
+  if(ARG_LABELS)
+    set(LABELS ${ARG_LABELS})
+  else()
+    set(LABELS "arrow_acero")
+  endif()
+
+  if(ARROW_TEST_LINKAGE STREQUAL "static")
+    set(EXTRA_LINK_LIBS arrow_acero_static 
${ARROW_ACERO_BENCHMARKS_TEST_LINK_LIBS})
+  else()
+    set(EXTRA_LINK_LIBS arrow_acero_shared 
${ARROW_ACERO_BENCHMARKS_TEST_LINK_LIBS})
+  endif()
+  if(ARG_EXTRA_LINK_LIBS)
+    list(APPEND EXTRA_LINK_LIBS ${ARG_EXTRA_LINK_LIBS})
+  endif()
+
+  add_arrow_compute_benchmark(${REL_BENCHMARK_NAME}
+                              EXTRA_LINK_LIBS
+                              ${EXTRA_LINK_LIBS}
+                              PREFIX
+                              ${PREFIX}
+                              LABELS
+                              ${LABELS}
+                              ${ARG_UNPARSED_ARGUMENTS})
+endfunction()
+
+add_arrow_acero_benchmark(aggregate_benchmark)
+add_arrow_acero_benchmark(asof_join_benchmark)
+add_arrow_acero_benchmark(expression_benchmark)
+add_arrow_acero_benchmark(filter_benchmark SOURCES benchmark_util.cc 
filter_benchmark.cc)
+add_arrow_acero_benchmark(hash_join_benchmark)
+add_arrow_acero_benchmark(project_benchmark SOURCES benchmark_util.cc
+                          project_benchmark.cc)
+add_arrow_acero_benchmark(tpch_benchmark)
diff --git a/cpp/src/arrow/compute/CMakeLists.txt 
b/cpp/src/arrow/compute/CMakeLists.txt
index 6498fd1c17..6d11aa26c0 100644
--- a/cpp/src/arrow/compute/CMakeLists.txt
+++ b/cpp/src/arrow/compute/CMakeLists.txt
@@ -114,6 +114,49 @@ function(ADD_ARROW_COMPUTE_TEST REL_TEST_NAME)
                  ${ARG_UNPARSED_ARGUMENTS})
 endfunction()
 
+# This function is used to add a custom main to the benchmarks in order
+# to initialize the compute kernels registry before running them.
+# This is necessary for benchmarks that use compute kernels that are not
+# part of libarrow.
+# It will also link the compute libraries to the benchmark target.
+function(add_arrow_compute_benchmark REL_TEST_NAME)
+  set(options)
+  set(one_value_args PREFIX)
+  set(multi_value_args EXTRA_SOURCES EXTRA_LINK_LIBS)
+  cmake_parse_arguments(ARG
+                        "${options}"
+                        "${one_value_args}"
+                        "${multi_value_args}"
+                        ${ARGN})
+  if(ARG_PREFIX)
+    set(PREFIX ${ARG_PREFIX})
+  else()
+    set(PREFIX "arrow-compute")
+  endif()
+  set(EXTRA_SOURCES "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/benchmark_main.cc")
+  if(ARG_EXTRA_SOURCES)
+    list(APPEND EXTRA_SOURCES ${ARG_EXTRA_SOURCES})
+  endif()
+  if(ARROW_TEST_LINKAGE STREQUAL "static")
+    set(EXTRA_LINK_LIBS arrow_compute_static)
+  else()
+    set(EXTRA_LINK_LIBS arrow_compute_shared)
+  endif()
+  if(ARG_EXTRA_LINK_LIBS)
+    list(APPEND EXTRA_LINK_LIBS ${ARG_EXTRA_LINK_LIBS})
+  endif()
+  add_benchmark(${REL_TEST_NAME}
+                PREFIX
+                ${PREFIX}
+                LABELS
+                "arrow-benchmarks"
+                EXTRA_SOURCES
+                ${EXTRA_SOURCES}
+                EXTRA_LINK_LIBS
+                ${EXTRA_LINK_LIBS}
+                ${ARG_UNPARSED_ARGUMENTS})
+endfunction()
+
 add_arrow_test(internals_test
                ${ARROW_COMPUTE_TEST_ARGS}
                SOURCES
@@ -142,7 +185,7 @@ add_arrow_compute_test(row_test
                        EXTRA_LINK_LIBS
                        arrow_compute_testing)
 
-add_arrow_benchmark(function_benchmark PREFIX "arrow-compute")
+add_arrow_compute_benchmark(function_benchmark)
 
 add_subdirectory(kernels)
 
diff --git a/cpp/src/arrow/compute/benchmark_main.cc 
b/cpp/src/arrow/compute/benchmark_main.cc
new file mode 100644
index 0000000000..2c54d69782
--- /dev/null
+++ b/cpp/src/arrow/compute/benchmark_main.cc
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "benchmark/benchmark.h"
+
+#include "arrow/compute/initialize.h"
+#include "arrow/testing/gtest_util.h"
+
+int main(int argc, char** argv) {
+  // Initialize compute functions before any benchmarks run
+  ABORT_NOT_OK(arrow::compute::Initialize());
+
+  // Initialize and run benchmarks
+  ::benchmark::Initialize(&argc, argv);
+  if (::benchmark::ReportUnrecognizedArguments(argc, argv)) return 1;
+  ::benchmark::RunSpecifiedBenchmarks();
+  ::benchmark::Shutdown();
+  return 0;
+}
diff --git a/cpp/src/arrow/compute/kernels/CMakeLists.txt 
b/cpp/src/arrow/compute/kernels/CMakeLists.txt
index 929cca8f5a..15955b5ef8 100644
--- a/cpp/src/arrow/compute/kernels/CMakeLists.txt
+++ b/cpp/src/arrow/compute/kernels/CMakeLists.txt
@@ -84,17 +84,18 @@ add_arrow_compute_test(scalar_utility_test
                        arrow_compute_kernels_testing
                        arrow_compute_testing)
 
-add_arrow_benchmark(scalar_arithmetic_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_boolean_benchmark PREFIX "arrow-compute")
 add_arrow_benchmark(scalar_cast_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_compare_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_if_else_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_list_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_random_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_round_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_set_lookup_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_string_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(scalar_temporal_benchmark PREFIX "arrow-compute")
+# The following benchmarks require compute kernels initialization.
+add_arrow_compute_benchmark(scalar_arithmetic_benchmark)
+add_arrow_compute_benchmark(scalar_boolean_benchmark)
+add_arrow_compute_benchmark(scalar_compare_benchmark)
+add_arrow_compute_benchmark(scalar_if_else_benchmark)
+add_arrow_compute_benchmark(scalar_list_benchmark)
+add_arrow_compute_benchmark(scalar_random_benchmark)
+add_arrow_compute_benchmark(scalar_round_benchmark)
+add_arrow_compute_benchmark(scalar_set_lookup_benchmark)
+add_arrow_compute_benchmark(scalar_string_benchmark)
+add_arrow_compute_benchmark(scalar_temporal_benchmark)
 
 # ----------------------------------------------------------------------
 # Vector kernels
@@ -135,11 +136,12 @@ add_arrow_compute_test(vector_swizzle_test
                        arrow_compute_testing)
 
 add_arrow_benchmark(vector_hash_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(vector_sort_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(vector_partition_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(vector_topk_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(vector_replace_benchmark PREFIX "arrow-compute")
-add_arrow_benchmark(vector_selection_benchmark PREFIX "arrow-compute")
+# The following benchmarks require compute kernels initialization.
+add_arrow_compute_benchmark(vector_sort_benchmark)
+add_arrow_compute_benchmark(vector_partition_benchmark)
+add_arrow_compute_benchmark(vector_topk_benchmark)
+add_arrow_compute_benchmark(vector_replace_benchmark)
+add_arrow_compute_benchmark(vector_selection_benchmark)
 
 # ----------------------------------------------------------------------
 # Aggregate kernels
diff --git a/cpp/src/arrow/dataset/CMakeLists.txt 
b/cpp/src/arrow/dataset/CMakeLists.txt
index 809bdfaae6..d87afdf5bd 100644
--- a/cpp/src/arrow/dataset/CMakeLists.txt
+++ b/cpp/src/arrow/dataset/CMakeLists.txt
@@ -206,15 +206,39 @@ if(ARROW_PARQUET)
   endif()
 endif()
 
-if(ARROW_BUILD_BENCHMARKS)
-  add_arrow_benchmark(file_benchmark PREFIX "arrow-dataset")
-  add_arrow_benchmark(scanner_benchmark PREFIX "arrow-dataset")
+function(add_arrow_dataset_benchmark REL_BENCHMARK_NAME)
+  set(options)
+  set(one_value_args PREFIX)
+  set(multi_value_args EXTRA_LINK_LIBS)
+  cmake_parse_arguments(ARG
+                        "${options}"
+                        "${one_value_args}"
+                        "${multi_value_args}"
+                        ${ARGN})
 
-  if(ARROW_BUILD_STATIC)
-    target_link_libraries(arrow-dataset-file-benchmark PUBLIC 
arrow_dataset_static)
-    target_link_libraries(arrow-dataset-scanner-benchmark PUBLIC 
arrow_dataset_static)
+  if(ARG_PREFIX)
+    set(PREFIX ${ARG_PREFIX})
   else()
-    target_link_libraries(arrow-dataset-file-benchmark PUBLIC 
arrow_dataset_shared)
-    target_link_libraries(arrow-dataset-scanner-benchmark PUBLIC 
arrow_dataset_shared)
+    set(PREFIX "arrow-dataset")
   endif()
-endif()
+
+  if(ARROW_TEST_LINKAGE STREQUAL "static")
+    set(EXTRA_LINK_LIBS arrow_dataset_static)
+  else()
+    set(EXTRA_LINK_LIBS arrow_dataset_shared)
+  endif()
+  if(ARG_EXTRA_LINK_LIBS)
+    list(APPEND EXTRA_LINK_LIBS ${ARG_EXTRA_LINK_LIBS})
+  endif()
+
+  # Dataset benchmarks require compute kernels initialization.
+  add_arrow_compute_benchmark(${REL_BENCHMARK_NAME}
+                              PREFIX
+                              ${PREFIX}
+                              EXTRA_LINK_LIBS
+                              ${EXTRA_LINK_LIBS}
+                              ${ARG_UNPARSED_ARGUMENTS})
+endfunction()
+
+add_arrow_dataset_benchmark(file_benchmark)
+add_arrow_dataset_benchmark(scanner_benchmark)

Reply via email to