(arrow) branch main updated: GH-47167: [C++][Dev] Update clang-format dependency (#47168)

zanmato Wed, 14 Jan 2026 06:53:51 -0800

This is an automated email from the ASF dual-hosted git repository.

zanmato pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new a1ec5a9a1d GH-47167: [C++][Dev] Update clang-format dependency (#47168)
a1ec5a9a1d is described below

commit a1ec5a9a1decb163eef26deecf60a6903f9aa125
Author: Antoine Prouvost <[email protected]>
AuthorDate: Wed Jan 14 11:04:54 2026 +0100

    GH-47167: [C++][Dev] Update clang-format dependency (#47168)
    
    ### Rationale for this change
    Update clang-format to better match modern IDEs.
    
    ### What changes are included in this PR?
    - Update clang-format
    - Reformat code base with newer version
    
    ### Are these changes tested?
    Yes
    
    ### Are there any user-facing changes?
    No
    
    * GitHub Issue: #47167
    
    Lead-authored-by: AntoinePrv <[email protected]>
    Co-authored-by: Rossi Sun <[email protected]>
    Signed-off-by: Rossi Sun <[email protected]>
---
 .github/workflows/dev.yml                          |  4 +-
 .pre-commit-config.yaml                            | 10 ++---
 cpp/src/arrow/acero/exec_plan.cc                   |  2 +-
 cpp/src/arrow/compute/kernels/aggregate_test.cc    |  3 +-
 cpp/src/arrow/compute/kernels/hash_aggregate.cc    |  2 +-
 cpp/src/arrow/compute/kernels/temporal_internal.h  |  8 ++--
 .../kernels/vector_selection_filter_internal.cc    |  2 +-
 cpp/src/arrow/compute/kernels/vector_sort_test.cc  |  4 +-
 cpp/src/arrow/dataset/dataset_writer_test.cc       |  8 ++--
 cpp/src/arrow/dataset/file_test.cc                 | 11 ++---
 cpp/src/arrow/device.cc                            |  3 +-
 cpp/src/arrow/engine/substrait/serde_test.cc       | 12 +++---
 cpp/src/arrow/flight/serialization_internal.cc     |  2 +-
 cpp/src/arrow/io/memory_test.cc                    |  6 +--
 cpp/src/arrow/json/test_common.h                   |  2 +-
 cpp/src/arrow/scalar.cc                            |  2 +-
 cpp/src/arrow/util/async_generator.h               |  5 +--
 cpp/src/arrow/util/bpacking.cc                     | 12 +++---
 cpp/src/arrow/util/byte_stream_split_internal.cc   | 28 ++++++-------
 cpp/src/arrow/util/byte_stream_split_internal.h    |  8 ++--
 cpp/src/arrow/util/compare.h                       |  4 +-
 cpp/src/arrow/util/cpu_info.cc                     | 48 ++++++++++------------
 cpp/src/arrow/util/hash_util.h                     |  2 +-
 cpp/src/arrow/util/hashing.h                       |  9 ++--
 cpp/src/arrow/util/rle_encoding_internal.h         | 16 ++++----
 cpp/src/arrow/util/span.h                          |  8 ++--
 cpp/src/arrow/util/tracing.cc                      |  3 +-
 cpp/src/arrow/util/value_parsing_test.cc           | 12 +++---
 cpp/src/gandiva/encrypt_utils_test.cc              | 10 +++--
 cpp/src/gandiva/gdv_hash_function_stubs.cc         |  2 +-
 cpp/src/gandiva/precompiled/string_ops_test.cc     |  6 +--
 cpp/src/parquet/arrow/arrow_reader_writer_test.cc  |  6 +--
 32 files changed, 122 insertions(+), 138 deletions(-)

diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index b763cfbbbc..59171ddcaa 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -41,8 +41,8 @@ jobs:
 
   lint:
     name: Lint C++, Python, R, Docker, RAT
-    # Use Ubuntu 22.04 to ensure working pre-commit on Ubuntu 22.04.
-    runs-on: ubuntu-22.04
+    # Use Ubuntu 24.04 to ensure working pre-commit on Ubuntu 24.04.
+    runs-on: ubuntu-24.04
     if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
     timeout-minutes: 15
     steps:
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index da84abed0d..c4c4f04188 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -50,7 +50,7 @@ repos:
           )
         types: []
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v14.0.6
+    rev: v18.1.8
     hooks:
       - id: clang-format
         name: C++ Format
@@ -93,7 +93,7 @@ repos:
           ?^cpp/thirdparty/|
           )
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v14.0.6
+    rev: v18.1.8
     hooks:
       - id: clang-format
         alias: c-glib
@@ -101,7 +101,7 @@ repos:
         files: >-
           ^c_glib/
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v14.0.6
+    rev: v18.1.8
     hooks:
       - id: clang-format
         name: MATLAB (C++) Format
@@ -156,7 +156,7 @@ repos:
         files: >-
           ^python/
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v14.0.6
+    rev: v18.1.8
     hooks:
       - id: clang-format
         alias: python
@@ -213,7 +213,7 @@ repos:
         files: >-
             ^r/.*\.R$
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v14.0.6
+    rev: v18.1.8
     hooks:
       - id: clang-format
         alias: r
diff --git a/cpp/src/arrow/acero/exec_plan.cc b/cpp/src/arrow/acero/exec_plan.cc
index ff5e5d8bdd..2aa5532a0c 100644
--- a/cpp/src/arrow/acero/exec_plan.cc
+++ b/cpp/src/arrow/acero/exec_plan.cc
@@ -137,7 +137,7 @@ struct ExecPlanImpl : public ExecPlan {
             opentelemetry::nostd::shared_ptr<opentelemetry::trace::Span> span =
                 ::arrow::internal::tracing::UnwrapSpan(span_.details.get());
             std::for_each(std::begin(pairs), std::end(pairs),
-                          [span](std::pair<std::string, std::string> const& 
pair) {
+                          [span](const std::pair<std::string, std::string>& 
pair) {
                             span->SetAttribute(pair.first, pair.second);
                           });
           }
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc 
b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index cdc62f946a..c5ba012d76 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -3889,8 +3889,7 @@ class TestPrimitiveQuantileKernel : public 
::testing::Test {
 #define INTYPE(x) Datum(static_cast<typename TypeParam::c_type>(x))
 #define DOUBLE(x) Datum(static_cast<double>(x))
 // output type per interpolation: linear, lower, higher, nearest, midpoint
-#define O(a, b, c, d, e) \
-  { DOUBLE(a), INTYPE(b), INTYPE(c), INTYPE(d), DOUBLE(e) }
+#define O(a, b, c, d, e) {DOUBLE(a), INTYPE(b), INTYPE(c), INTYPE(d), 
DOUBLE(e)}
 
 template <typename ArrowType>
 class TestIntegerQuantileKernel : public 
TestPrimitiveQuantileKernel<ArrowType> {};
diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc 
b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
index 2ab5e574e2..ed50025ef5 100644
--- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc
+++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc
@@ -282,7 +282,7 @@ concept CFloatingPointConcept = std::floating_point<T> || 
std::same_as<T, util::
 
 template <typename T>
 concept CDecimalConcept = std::same_as<T, Decimal32> || std::same_as<T, 
Decimal64> ||
-    std::same_as<T, Decimal128> || std::same_as<T, Decimal256>;
+                          std::same_as<T, Decimal128> || std::same_as<T, 
Decimal256>;
 
 template <typename CType>
 struct AntiExtrema {
diff --git a/cpp/src/arrow/compute/kernels/temporal_internal.h 
b/cpp/src/arrow/compute/kernels/temporal_internal.h
index 3674c233dc..139cc134bd 100644
--- a/cpp/src/arrow/compute/kernels/temporal_internal.h
+++ b/cpp/src/arrow/compute/kernels/temporal_internal.h
@@ -45,8 +45,8 @@ using std::chrono::duration_cast;
 using ArrowTimeZone = std::variant<const time_zone*, OffsetZone>;
 
 template <class Duration, class Func>
-auto ApplyTimeZone(const ArrowTimeZone& tz, sys_time<Duration> st, Func&& func)
-    -> decltype(func(zoned_time<Duration>{})) {
+auto ApplyTimeZone(const ArrowTimeZone& tz, sys_time<Duration> st,
+                   Func&& func) -> decltype(func(zoned_time<Duration>{})) {
   return std::visit(
       [&](auto&& zone) {
         if constexpr (std::is_pointer_v<std::decay_t<decltype(zone)> >) {
@@ -60,8 +60,8 @@ auto ApplyTimeZone(const ArrowTimeZone& tz, 
sys_time<Duration> st, Func&& func)
 
 template <class Duration, class Func>
 auto ApplyTimeZone(const ArrowTimeZone& tz, local_time<Duration> lt,
-                   std::optional<choose> c, Func&& func)
-    -> decltype(func(zoned_time<Duration>{})) {
+                   std::optional<choose> c,
+                   Func&& func) -> decltype(func(zoned_time<Duration>{})) {
   return std::visit(
       [&](auto&& zone) {
         if constexpr (std::is_pointer_v<std::decay_t<decltype(zone)> >) {
diff --git a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc 
b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc
index 1c2eacb9a7..cca8c7ae74 100644
--- a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc
+++ b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc
@@ -368,7 +368,7 @@ class PrimitiveFilterImpl {
           }
         }
       }  // !filter_block.AllSet()
-    }    // while(in_position < values_length_)
+    }  // while(in_position < values_length_)
   }
 
   // Write the next out_position given the selected in_position for the input
diff --git a/cpp/src/arrow/compute/kernels/vector_sort_test.cc 
b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
index 0569f1f2ab..90f8eb7a56 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort_test.cc
@@ -76,8 +76,8 @@ std::ostream& operator<<(std::ostream& os, NullPlacement 
null_placement) {
 // Tests for NthToIndices
 
 template <typename ArrayType>
-auto GetLogicalValue(const ArrayType& array, uint64_t index)
-    -> decltype(array.GetView(index)) {
+auto GetLogicalValue(const ArrayType& array,
+                     uint64_t index) -> decltype(array.GetView(index)) {
   return array.GetView(index);
 }
 
diff --git a/cpp/src/arrow/dataset/dataset_writer_test.cc 
b/cpp/src/arrow/dataset/dataset_writer_test.cc
index 2f34c21aec..45a36decce 100644
--- a/cpp/src/arrow/dataset/dataset_writer_test.cc
+++ b/cpp/src/arrow/dataset/dataset_writer_test.cc
@@ -613,8 +613,8 @@ TEST_F(DatasetWriterTestFixture, ErrOnExistingData) {
                      fs::File("testdir/chunk-5.arrow"), 
fs::File("testdir/blah.txt")}));
   filesystem_ = std::dynamic_pointer_cast<MockFileSystem>(fs);
   write_options_.filesystem = filesystem_;
-  ASSERT_RAISES(Invalid, DatasetWriter::Make(
-                             write_options_, scheduler_, [] {}, [] {}, [] {}));
+  ASSERT_RAISES(Invalid,
+                DatasetWriter::Make(write_options_, scheduler_, [] {}, [] {}, 
[] {}));
   AssertEmptyFiles(
       {"testdir/chunk-0.arrow", "testdir/chunk-5.arrow", "testdir/blah.txt"});
 
@@ -627,8 +627,8 @@ TEST_F(DatasetWriterTestFixture, ErrOnExistingData) {
   filesystem_ = std::dynamic_pointer_cast<MockFileSystem>(fs2);
   write_options_.filesystem = filesystem_;
   write_options_.base_dir = "testdir";
-  ASSERT_RAISES(Invalid, DatasetWriter::Make(
-                             write_options_, scheduler_, [] {}, [] {}, [] {}));
+  ASSERT_RAISES(Invalid,
+                DatasetWriter::Make(write_options_, scheduler_, [] {}, [] {}, 
[] {}));
   AssertEmptyFiles({"testdir/part-0.arrow"});
 }
 
diff --git a/cpp/src/arrow/dataset/file_test.cc 
b/cpp/src/arrow/dataset/file_test.cc
index 8904531200..2e2561203b 100644
--- a/cpp/src/arrow/dataset/file_test.cc
+++ b/cpp/src/arrow/dataset/file_test.cc
@@ -594,11 +594,12 @@ class FileSystemWriteTest : public 
testing::TestWithParam<std::tuple<bool, bool>
 };
 
 TEST_P(FileSystemWriteTest, Write) {
-  auto plan_factory = [](const FileSystemDatasetWriteOptions& write_options,
-                         std::function<Future<std::optional<cp::ExecBatch>>()>*
-                             sink_gen) {
-    return std::vector<acero::Declaration>{{"write", 
WriteNodeOptions{write_options}}};
-  };
+  auto plan_factory =
+      [](const FileSystemDatasetWriteOptions& write_options,
+         std::function<Future<std::optional<cp::ExecBatch>>()>* sink_gen) {
+        return std::vector<acero::Declaration>{
+            {"write", WriteNodeOptions{write_options}}};
+      };
   TestDatasetWriteRoundTrip(plan_factory, /*has_output=*/false);
 }
 
diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc
index 05bd679040..fd4503c5f2 100644
--- a/cpp/src/arrow/device.cc
+++ b/cpp/src/arrow/device.cc
@@ -42,8 +42,7 @@ Result<std::shared_ptr<Device::SyncEvent>> 
MemoryManager::WrapDeviceSyncEvent(
   return nullptr;
 }
 
-Device::~Device() {}
-
+Device::~Device(){}
 #define COPY_BUFFER_SUCCESS(maybe_buffer) \
   ((maybe_buffer).ok() && *(maybe_buffer) != nullptr)
 
diff --git a/cpp/src/arrow/engine/substrait/serde_test.cc 
b/cpp/src/arrow/engine/substrait/serde_test.cc
index f92aee8eaa..138d03b247 100644
--- a/cpp/src/arrow/engine/substrait/serde_test.cc
+++ b/cpp/src/arrow/engine/substrait/serde_test.cc
@@ -1229,9 +1229,9 @@ TEST(Substrait, ExtensionSetFromPlan) {
        {std::shared_ptr<ExtensionIdRegistry>(), MakeExtensionIdRegistry()}) {
     ExtensionIdRegistry* ext_id_reg = sp_ext_id_reg.get();
     ExtensionSet ext_set(ext_id_reg);
-    ASSERT_OK_AND_ASSIGN(auto sink_decls,
-                         DeserializePlans(
-                             *buf, [] { return kNullConsumer; }, ext_id_reg, 
&ext_set));
+    ASSERT_OK_AND_ASSIGN(
+        auto sink_decls,
+        DeserializePlans(*buf, [] { return kNullConsumer; }, ext_id_reg, 
&ext_set));
 
     EXPECT_OK_AND_ASSIGN(auto decoded_null_type, ext_set.DecodeType(42));
     EXPECT_EQ(decoded_null_type.id.uri, kArrowExtTypesUri);
@@ -1665,9 +1665,9 @@ TEST(Substrait, JoinPlanBasic) {
        {std::shared_ptr<ExtensionIdRegistry>(), MakeExtensionIdRegistry()}) {
     ExtensionIdRegistry* ext_id_reg = sp_ext_id_reg.get();
     ExtensionSet ext_set(ext_id_reg);
-    ASSERT_OK_AND_ASSIGN(auto sink_decls,
-                         DeserializePlans(
-                             *buf, [] { return kNullConsumer; }, ext_id_reg, 
&ext_set));
+    ASSERT_OK_AND_ASSIGN(
+        auto sink_decls,
+        DeserializePlans(*buf, [] { return kNullConsumer; }, ext_id_reg, 
&ext_set));
 
     auto join_decl = sink_decls[0].inputs[0];
 
diff --git a/cpp/src/arrow/flight/serialization_internal.cc 
b/cpp/src/arrow/flight/serialization_internal.cc
index 34fcef1f83..604375311d 100644
--- a/cpp/src/arrow/flight/serialization_internal.cc
+++ b/cpp/src/arrow/flight/serialization_internal.cc
@@ -36,7 +36,7 @@ struct overloaded : Ts... {
   using Ts::operator()...;
 };
 template <class... Ts>  // CTAD will not be needed for >=C++20
-overloaded(Ts...)->overloaded<Ts...>;
+overloaded(Ts...) -> overloaded<Ts...>;
 
 namespace arrow {
 namespace flight {
diff --git a/cpp/src/arrow/io/memory_test.cc b/cpp/src/arrow/io/memory_test.cc
index eabee87146..1b2c7bdbf3 100644
--- a/cpp/src/arrow/io/memory_test.cc
+++ b/cpp/src/arrow/io/memory_test.cc
@@ -580,8 +580,7 @@ class TestTransformInputStream : public ::testing::Test {
     auto stream = std::make_shared<TransformInputStream>(
         std::make_shared<BufferReader>(src), this->transform());
     std::shared_ptr<Buffer> actual;
-    AccumulateReads(
-        stream, [&]() -> int64_t { return chunk_sizes(gen); }, &actual);
+    AccumulateReads(stream, [&]() -> int64_t { return chunk_sizes(gen); }, 
&actual);
     AssertBufferEqual(*actual, *expected);
   }
 
@@ -613,8 +612,7 @@ class TestTransformInputStream : public ::testing::Test {
 
   void AccumulateReads(const std::shared_ptr<InputStream>& stream, int64_t 
chunk_size,
                        std::shared_ptr<Buffer>* out) {
-    return AccumulateReads(
-        stream, [=]() { return chunk_size; }, out);
+    return AccumulateReads(stream, [=]() { return chunk_size; }, out);
   }
 
  protected:
diff --git a/cpp/src/arrow/json/test_common.h b/cpp/src/arrow/json/test_common.h
index 2f819779bd..423a0123c0 100644
--- a/cpp/src/arrow/json/test_common.h
+++ b/cpp/src/arrow/json/test_common.h
@@ -89,7 +89,7 @@ struct GenerateImpl {
   Status Visit(const NullType&) { return OK(writer.Null()); }
 
   Status Visit(const BooleanType&) {
-    return OK(writer.Bool(std::uniform_int_distribution<uint16_t>{}(e)&1));
+    return OK(writer.Bool(std::uniform_int_distribution<uint16_t>{}(e) & 1));
   }
 
   template <typename T>
diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc
index ad2c66a928..71ac25e1c2 100644
--- a/cpp/src/arrow/scalar.cc
+++ b/cpp/src/arrow/scalar.cc
@@ -565,7 +565,7 @@ struct ScalarValidateImpl {
 };
 
 template <typename T, size_t N>
-void FillScalarScratchSpace(void* scratch_space, T const (&arr)[N]) {
+void FillScalarScratchSpace(void* scratch_space, const T (&arr)[N]) {
   static_assert(sizeof(arr) <= internal::kScalarScratchSpaceSize);
   std::memcpy(scratch_space, arr, sizeof(arr));
 }
diff --git a/cpp/src/arrow/util/async_generator.h 
b/cpp/src/arrow/util/async_generator.h
index 57c6d9b5dd..056b842bb7 100644
--- a/cpp/src/arrow/util/async_generator.h
+++ b/cpp/src/arrow/util/async_generator.h
@@ -1439,9 +1439,8 @@ class MergedGenerator {
             immediate_inner(next_item.result());
             if (immediate_inner.was_empty) {
               Future<AsyncGenerator<T>> next_source = state->PullSource();
-              if (next_source.TryAddCallback([this] {
-                    return OuterCallback{state, index};
-                  })) {
+              if (next_source.TryAddCallback(
+                      [this] { return OuterCallback{state, index}; })) {
                 // We hit an unfinished future so we can stop looping
                 return;
               }
diff --git a/cpp/src/arrow/util/bpacking.cc b/cpp/src/arrow/util/bpacking.cc
index fdb1c5a52a..277fe7b040 100644
--- a/cpp/src/arrow/util/bpacking.cc
+++ b/cpp/src/arrow/util/bpacking.cc
@@ -33,15 +33,15 @@ struct UnpackDynamicFunction {
   using Implementation = std::pair<DispatchLevel, FunctionType>;
 
   static constexpr auto implementations() {
-    return std::array {
-      // Current SIMD unpack algorithm works terribly on SSE4.2 due to lack of 
variable
-      // rhsift and poor xsimd fallback.
-      Implementation{DispatchLevel::NONE, &unpack_scalar<Uint>},
+    return std::array{
+        // Current SIMD unpack algorithm works terribly on SSE4.2 due to lack 
of variable
+        // rhsift and poor xsimd fallback.
+        Implementation{DispatchLevel::NONE, &unpack_scalar<Uint>},
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
-          Implementation{DispatchLevel::AVX2, &unpack_avx2<Uint>},
+        Implementation{DispatchLevel::AVX2, &unpack_avx2<Uint>},
 #endif
 #if defined(ARROW_HAVE_RUNTIME_AVX512)
-          Implementation{DispatchLevel::AVX512, &unpack_avx512<Uint>},
+        Implementation{DispatchLevel::AVX512, &unpack_avx512<Uint>},
 #endif
     };
   }
diff --git a/cpp/src/arrow/util/byte_stream_split_internal.cc 
b/cpp/src/arrow/util/byte_stream_split_internal.cc
index 7e9e339cff..f9f64747e1 100644
--- a/cpp/src/arrow/util/byte_stream_split_internal.cc
+++ b/cpp/src/arrow/util/byte_stream_split_internal.cc
@@ -35,9 +35,9 @@ struct ByteStreamSplitDecodeDynamic {
   using Implementation = std::pair<DispatchLevel, FunctionType>;
 
   constexpr static auto implementations() {
-    return std::array {
-      Implementation {
-        DispatchLevel::NONE,
+    return std::array{
+        Implementation{
+            DispatchLevel::NONE,
 #if defined(ARROW_HAVE_NEON)
             // We always expect Neon to be available on Arm64
             &ByteStreamSplitDecodeSimd<xsimd::neon64, kNumStreams>,
@@ -47,13 +47,12 @@ struct ByteStreamSplitDecodeDynamic {
 #else
             &ByteStreamSplitDecodeScalar<kNumStreams>,
 #endif
-      }
-      ,
+        },
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
-          Implementation{
-              DispatchLevel::AVX2,
-              &ByteStreamSplitDecodeSimd<xsimd::avx2, kNumStreams>,
-          },
+        Implementation{
+            DispatchLevel::AVX2,
+            &ByteStreamSplitDecodeSimd<xsimd::avx2, kNumStreams>,
+        },
 #endif
     };
   }
@@ -83,9 +82,9 @@ struct ByteStreamSplitEncodeDynamic {
   using Implementation = std::pair<DispatchLevel, FunctionType>;
 
   constexpr static auto implementations() {
-    return std::array {
-      Implementation {
-        DispatchLevel::NONE,
+    return std::array{
+        Implementation{
+            DispatchLevel::NONE,
 #if defined(ARROW_HAVE_NEON)
             // We always expect Neon to be available on Arm64
             &ByteStreamSplitEncodeSimd<xsimd::neon64, kNumStreams>,
@@ -95,10 +94,9 @@ struct ByteStreamSplitEncodeDynamic {
 #else
             &ByteStreamSplitEncodeScalar<kNumStreams>,
 #endif
-      }
-      ,
+        },
 #if defined(ARROW_HAVE_RUNTIME_AVX2)
-          Implementation{DispatchLevel::AVX2, 
&ByteStreamSplitEncodeAvx2<kNumStreams>},
+        Implementation{DispatchLevel::AVX2, 
&ByteStreamSplitEncodeAvx2<kNumStreams>},
 #endif
     };
   }
diff --git a/cpp/src/arrow/util/byte_stream_split_internal.h 
b/cpp/src/arrow/util/byte_stream_split_internal.h
index 33e9f4b28e..e237beb791 100644
--- a/cpp/src/arrow/util/byte_stream_split_internal.h
+++ b/cpp/src/arrow/util/byte_stream_split_internal.h
@@ -122,8 +122,8 @@ void ByteStreamSplitDecodeSimd(const uint8_t* data, int 
width, int64_t num_value
 
 // Like xsimd::zip_lo, but zip groups of kNumBytes at once.
 template <typename Arch, int kNumBytes>
-auto zip_lo_n(const xsimd::batch<int8_t, Arch>& a, const xsimd::batch<int8_t, 
Arch>& b)
-    -> xsimd::batch<int8_t, Arch> {
+auto zip_lo_n(const xsimd::batch<int8_t, Arch>& a,
+              const xsimd::batch<int8_t, Arch>& b) -> xsimd::batch<int8_t, 
Arch> {
   using arrow::internal::SizedInt;
   using simd_batch = xsimd::batch<int8_t, Arch>;
   // For signed arithmetic
@@ -146,8 +146,8 @@ auto zip_lo_n(const xsimd::batch<int8_t, Arch>& a, const 
xsimd::batch<int8_t, Ar
 
 // Like xsimd::zip_hi, but zip groups of kNumBytes at once.
 template <typename Arch, int kNumBytes>
-auto zip_hi_n(const xsimd::batch<int8_t, Arch>& a, const xsimd::batch<int8_t, 
Arch>& b)
-    -> xsimd::batch<int8_t, Arch> {
+auto zip_hi_n(const xsimd::batch<int8_t, Arch>& a,
+              const xsimd::batch<int8_t, Arch>& b) -> xsimd::batch<int8_t, 
Arch> {
   using simd_batch = xsimd::batch<int8_t, Arch>;
   using arrow::internal::SizedInt;
   // For signed arithmetic
diff --git a/cpp/src/arrow/util/compare.h b/cpp/src/arrow/util/compare.h
index 0594b6002f..e6deb4f6ac 100644
--- a/cpp/src/arrow/util/compare.h
+++ b/cpp/src/arrow/util/compare.h
@@ -51,8 +51,8 @@ class EqualityComparable {
     }
   };
 
-  friend bool operator==(T const& a, T const& b) { return a.Equals(b); }
-  friend bool operator!=(T const& a, T const& b) { return !(a == b); }
+  friend bool operator==(const T& a, const T& b) { return a.Equals(b); }
+  friend bool operator!=(const T& a, const T& b) { return !(a == b); }
 
  private:
   const T& cast() const { return static_cast<const T&>(*this); }
diff --git a/cpp/src/arrow/util/cpu_info.cc b/cpp/src/arrow/util/cpu_info.cc
index 8bcc814d48..fdd0728c8e 100644
--- a/cpp/src/arrow/util/cpu_info.cc
+++ b/cpp/src/arrow/util/cpu_info.cc
@@ -266,20 +266,20 @@ void OsRetrieveCpuInfo(int64_t* hardware_flags, 
CpuInfo::Vendor* vendor,
   };
   std::vector<SysCtlCpuFeature> features = {
 #  if defined(CPUINFO_ARCH_X86)
-    {"hw.optional.sse4_2",
-     CpuInfo::SSSE3 | CpuInfo::SSE4_1 | CpuInfo::SSE4_2 | CpuInfo::POPCNT},
-    {"hw.optional.avx1_0", CpuInfo::AVX},
-    {"hw.optional.avx2_0", CpuInfo::AVX2},
-    {"hw.optional.bmi1", CpuInfo::BMI1},
-    {"hw.optional.bmi2", CpuInfo::BMI2},
-    {"hw.optional.avx512f", CpuInfo::AVX512F},
-    {"hw.optional.avx512cd", CpuInfo::AVX512CD},
-    {"hw.optional.avx512dq", CpuInfo::AVX512DQ},
-    {"hw.optional.avx512bw", CpuInfo::AVX512BW},
-    {"hw.optional.avx512vl", CpuInfo::AVX512VL},
+      {"hw.optional.sse4_2",
+       CpuInfo::SSSE3 | CpuInfo::SSE4_1 | CpuInfo::SSE4_2 | CpuInfo::POPCNT},
+      {"hw.optional.avx1_0", CpuInfo::AVX},
+      {"hw.optional.avx2_0", CpuInfo::AVX2},
+      {"hw.optional.bmi1", CpuInfo::BMI1},
+      {"hw.optional.bmi2", CpuInfo::BMI2},
+      {"hw.optional.avx512f", CpuInfo::AVX512F},
+      {"hw.optional.avx512cd", CpuInfo::AVX512CD},
+      {"hw.optional.avx512dq", CpuInfo::AVX512DQ},
+      {"hw.optional.avx512bw", CpuInfo::AVX512BW},
+      {"hw.optional.avx512vl", CpuInfo::AVX512VL},
 #  elif defined(CPUINFO_ARCH_ARM)
-    // ARM64 (note that this is exposed under Rosetta as well)
-    {"hw.optional.neon", CpuInfo::ASIMD},
+      // ARM64 (note that this is exposed under Rosetta as well)
+      {"hw.optional.neon", CpuInfo::ASIMD},
 #  endif
   };
   for (const auto& feature : features) {
@@ -351,21 +351,15 @@ int64_t LinuxParseCpuFlags(const std::string& values) {
     int64_t flag;
   } flag_mappings[] = {
 #    if defined(CPUINFO_ARCH_X86)
-    {"ssse3", CpuInfo::SSSE3},
-    {"sse4_1", CpuInfo::SSE4_1},
-    {"sse4_2", CpuInfo::SSE4_2},
-    {"popcnt", CpuInfo::POPCNT},
-    {"avx", CpuInfo::AVX},
-    {"avx2", CpuInfo::AVX2},
-    {"avx512f", CpuInfo::AVX512F},
-    {"avx512cd", CpuInfo::AVX512CD},
-    {"avx512vl", CpuInfo::AVX512VL},
-    {"avx512dq", CpuInfo::AVX512DQ},
-    {"avx512bw", CpuInfo::AVX512BW},
-    {"bmi1", CpuInfo::BMI1},
-    {"bmi2", CpuInfo::BMI2},
+      {"ssse3", CpuInfo::SSSE3},       {"sse4_1", CpuInfo::SSE4_1},
+      {"sse4_2", CpuInfo::SSE4_2},     {"popcnt", CpuInfo::POPCNT},
+      {"avx", CpuInfo::AVX},           {"avx2", CpuInfo::AVX2},
+      {"avx512f", CpuInfo::AVX512F},   {"avx512cd", CpuInfo::AVX512CD},
+      {"avx512vl", CpuInfo::AVX512VL}, {"avx512dq", CpuInfo::AVX512DQ},
+      {"avx512bw", CpuInfo::AVX512BW}, {"bmi1", CpuInfo::BMI1},
+      {"bmi2", CpuInfo::BMI2},
 #    elif defined(CPUINFO_ARCH_ARM)
-    {"asimd", CpuInfo::ASIMD},
+      {"asimd", CpuInfo::ASIMD},
 #    endif
   };
   const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
diff --git a/cpp/src/arrow/util/hash_util.h b/cpp/src/arrow/util/hash_util.h
index 7b3de22089..3052ddf5a7 100644
--- a/cpp/src/arrow/util/hash_util.h
+++ b/cpp/src/arrow/util/hash_util.h
@@ -54,7 +54,7 @@ inline void hash_combine_impl(uint32_t& h1, uint32_t k1) {
 }  // namespace detail
 
 template <class T>
-inline void hash_combine(std::size_t& seed, T const& v) {
+inline void hash_combine(std::size_t& seed, const T& v) {
   std::hash<T> hasher;
   return ::arrow::internal::detail::hash_combine_impl(seed, hasher(v));
 }
diff --git a/cpp/src/arrow/util/hashing.h b/cpp/src/arrow/util/hashing.h
index ac3beea266..53f92c8f23 100644
--- a/cpp/src/arrow/util/hashing.h
+++ b/cpp/src/arrow/util/hashing.h
@@ -461,8 +461,7 @@ class ScalarMemoTable : public MemoTable {
 
   template <typename Value>
   Status GetOrInsert(Value&& value, int32_t* out_memo_index) {
-    return GetOrInsert(
-        value, [](int32_t i) {}, [](int32_t i) {}, out_memo_index);
+    return GetOrInsert(value, [](int32_t i) {}, [](int32_t i) {}, 
out_memo_index);
   }
 
   int32_t GetNull() const { return null_index_; }
@@ -599,8 +598,7 @@ class SmallScalarMemoTable : public MemoTable {
   }
 
   Status GetOrInsert(const Scalar value, int32_t* out_memo_index) {
-    return GetOrInsert(
-        value, [](int32_t i) {}, [](int32_t i) {}, out_memo_index);
+    return GetOrInsert(value, [](int32_t i) {}, [](int32_t i) {}, 
out_memo_index);
   }
 
   int32_t GetNull() const { return value_to_index_[cardinality]; }
@@ -722,8 +720,7 @@ class BinaryMemoTable : public MemoTable {
 
   Status GetOrInsert(const void* data, builder_offset_type length,
                      int32_t* out_memo_index) {
-    return GetOrInsert(
-        data, length, [](int32_t i) {}, [](int32_t i) {}, out_memo_index);
+    return GetOrInsert(data, length, [](int32_t i) {}, [](int32_t i) {}, 
out_memo_index);
   }
 
   Status GetOrInsert(std::string_view value, int32_t* out_memo_index) {
diff --git a/cpp/src/arrow/util/rle_encoding_internal.h 
b/cpp/src/arrow/util/rle_encoding_internal.h
index 2c084e0b4c..984cd35b12 100644
--- a/cpp/src/arrow/util/rle_encoding_internal.h
+++ b/cpp/src/arrow/util/rle_encoding_internal.h
@@ -753,8 +753,8 @@ bool RleBitPackedDecoder<T>::Get(value_type* val) {
 }
 
 template <typename T>
-auto RleBitPackedDecoder<T>::GetBatch(value_type* out, rle_size_t batch_size)
-    -> rle_size_t {
+auto RleBitPackedDecoder<T>::GetBatch(value_type* out,
+                                      rle_size_t batch_size) -> rle_size_t {
   using ControlFlow = RleBitPackedParser::ControlFlow;
 
   rle_size_t values_read = 0;
@@ -867,8 +867,8 @@ template <typename Converter, typename BitRunReader, 
typename BitRun, typename v
 auto RunGetSpaced(Converter* converter, typename Converter::out_type* out,
                   rle_size_t batch_size, rle_size_t null_count,
                   rle_size_t value_bit_width, BitRunReader* validity_reader,
-                  BitRun* validity_run, RleRunDecoder<value_type>* decoder)
-    -> GetSpacedResult<rle_size_t> {
+                  BitRun* validity_run,
+                  RleRunDecoder<value_type>* decoder) -> 
GetSpacedResult<rle_size_t> {
   ARROW_DCHECK_GT(batch_size, 0);
   // The equality case is handled in the main loop in GetSpaced
   ARROW_DCHECK_LT(null_count, batch_size);
@@ -1147,8 +1147,8 @@ struct NoOpConverter {
 template <typename T>
 auto RleBitPackedDecoder<T>::GetBatchSpaced(rle_size_t batch_size, rle_size_t 
null_count,
                                             const uint8_t* valid_bits,
-                                            int64_t valid_bits_offset, 
value_type* out)
-    -> rle_size_t {
+                                            int64_t valid_bits_offset,
+                                            value_type* out) -> rle_size_t {
   if (null_count == 0) {
     return GetBatch(out, batch_size);
   }
@@ -1296,8 +1296,8 @@ template <typename T>
 template <typename V>
 auto RleBitPackedDecoder<T>::GetBatchWithDictSpaced(
     const V* dictionary, int32_t dictionary_length, V* out, rle_size_t 
batch_size,
-    rle_size_t null_count, const uint8_t* valid_bits, int64_t 
valid_bits_offset)
-    -> rle_size_t {
+    rle_size_t null_count, const uint8_t* valid_bits,
+    int64_t valid_bits_offset) -> rle_size_t {
   if (null_count == 0) {
     return GetBatchWithDict<V>(dictionary, dictionary_length, out, batch_size);
   }
diff --git a/cpp/src/arrow/util/span.h b/cpp/src/arrow/util/span.h
index 1e57ee8c8d..abe8e61beb 100644
--- a/cpp/src/arrow/util/span.h
+++ b/cpp/src/arrow/util/span.h
@@ -44,13 +44,13 @@ writing code which would break when it is replaced by 
std::span.)");
   using element_type = T;
   using value_type = std::remove_cv_t<T>;
   using iterator = T*;
-  using const_iterator = T const*;
+  using const_iterator = const T*;
 
   span() = default;
   span(const span&) = default;
   span& operator=(const span&) = default;
 
-  template <typename M, typename = std::enable_if_t<std::is_same_v<T, M 
const>>>
+  template <typename M, typename = std::enable_if_t<std::is_same_v<T, const 
M>>>
   // NOLINTNEXTLINE runtime/explicit
   constexpr span(span<M> mut) : span{mut.data(), mut.size()} {}
 
@@ -100,7 +100,7 @@ writing code which would break when it is replaced by 
std::span.)");
       return std::memcmp(data_, other.data_, size_bytes()) == 0;
     } else {
       T* ptr = data_;
-      for (T const& e : other) {
+      for (const T& e : other) {
         if (*ptr++ != e) return false;
       }
       return true;
@@ -120,7 +120,7 @@ template <typename T>
 span(T*, size_t) -> span<T>;
 
 template <typename T>
-constexpr span<std::byte const> as_bytes(span<T> s) {
+constexpr span<const std::byte> as_bytes(span<T> s) {
   return {reinterpret_cast<const std::byte*>(s.data()), s.size_bytes()};
 }
 
diff --git a/cpp/src/arrow/util/tracing.cc b/cpp/src/arrow/util/tracing.cc
index 18257eced7..d775d3712c 100644
--- a/cpp/src/arrow/util/tracing.cc
+++ b/cpp/src/arrow/util/tracing.cc
@@ -43,8 +43,7 @@ void Span::reset() {
 
 #else
 
-Span::Span() noexcept { /* details is left a nullptr */
-}
+Span::Span() noexcept { /* details is left a nullptr */ }
 
 bool Span::valid() const { return false; }
 void Span::reset() {}
diff --git a/cpp/src/arrow/util/value_parsing_test.cc 
b/cpp/src/arrow/util/value_parsing_test.cc
index a67f1d97f1..b9e3b18444 100644
--- a/cpp/src/arrow/util/value_parsing_test.cc
+++ b/cpp/src/arrow/util/value_parsing_test.cc
@@ -876,20 +876,18 @@ TEST(TimestampParser, StrptimeZoneOffset) {
   std::string format = "%Y-%d-%m %H:%M:%S%z";
   auto parser = TimestampParser::MakeStrptime(format);
 
-  std::vector<std::string> values = {
-    "2018-01-01 00:00:00+0000",
-    "2018-01-01 00:00:00+0100",
+  std::vector<std::string> values = {"2018-01-01 00:00:00+0000",
+                                     "2018-01-01 00:00:00+0100",
 #if defined(__GLIBC__) && defined(__GLIBC_MINOR__)
 // glibc < 2.28 doesn't support "-0117" timezone offset.
 // See also: https://github.com/apache/arrow/issues/43808
 #  if ((__GLIBC__ == 2) && (__GLIBC_MINOR__ >= 28)) || (__GLIBC__ >= 3)
-    "2018-01-01 00:00:00-0117",
+                                     "2018-01-01 00:00:00-0117",
 #  endif
 #else
-    "2018-01-01 00:00:00-0117",
+                                     "2018-01-01 00:00:00-0117",
 #endif
-    "2018-01-01 00:00:00+0130"
-  };
+                                     "2018-01-01 00:00:00+0130"};
 
   // N.B. GNU %z supports ISO8601 format while BSD %z supports only
   // +HHMM or -HHMM and POSIX doesn't appear to define %z at all
diff --git a/cpp/src/gandiva/encrypt_utils_test.cc 
b/cpp/src/gandiva/encrypt_utils_test.cc
index 5bc4c3957f..c5dd76be93 100644
--- a/cpp/src/gandiva/encrypt_utils_test.cc
+++ b/cpp/src/gandiva/encrypt_utils_test.cc
@@ -94,8 +94,9 @@ TEST(TestShaEncryptUtils, TestAesEncryptDecrypt) {
       { gandiva::aes_encrypt(to_encrypt, to_encrypt_len, key, key_len, 
cipher_4); },
       std::runtime_error);
 
-  ASSERT_THROW({ gandiva::aes_decrypt(cipher, cipher_len, key, key_len, 
plain_text); },
-               std::runtime_error);
+  ASSERT_THROW(
+      { gandiva::aes_decrypt(cipher, cipher_len, key, key_len, plain_text); },
+      std::runtime_error);
 
   key = "12345678";
   to_encrypt = "New\ntest\nstring";
@@ -107,6 +108,7 @@ TEST(TestShaEncryptUtils, TestAesEncryptDecrypt) {
   ASSERT_THROW(
       { gandiva::aes_encrypt(to_encrypt, to_encrypt_len, key, key_len, 
cipher_5); },
       std::runtime_error);
-  ASSERT_THROW({ gandiva::aes_decrypt(cipher, cipher_len, key, key_len, 
plain_text); },
-               std::runtime_error);
+  ASSERT_THROW(
+      { gandiva::aes_decrypt(cipher, cipher_len, key, key_len, plain_text); },
+      std::runtime_error);
 }
diff --git a/cpp/src/gandiva/gdv_hash_function_stubs.cc 
b/cpp/src/gandiva/gdv_hash_function_stubs.cc
index 41eef32445..0663601aca 100644
--- a/cpp/src/gandiva/gdv_hash_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_hash_function_stubs.cc
@@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//#pragma once
+// #pragma once
 
 #include "gandiva/engine.h"
 #include "gandiva/exported_funcs.h"
diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc 
b/cpp/src/gandiva/precompiled/string_ops_test.cc
index aaa25db0a9..9d0a4d71af 100644
--- a/cpp/src/gandiva/precompiled/string_ops_test.cc
+++ b/cpp/src/gandiva/precompiled/string_ops_test.cc
@@ -103,15 +103,15 @@ TEST(TestStringOps, TestChrBigInt) {
   out = chr_int64(ctx_ptr, -66, &out_len);
   EXPECT_EQ(std::string(out, out_len), "\xBE");
 
-  //€
+  // €
   out = chr_int32(ctx_ptr, 128, &out_len);
   EXPECT_EQ(std::string(out, out_len), "\x80");
 
-  //œ
+  // œ
   out = chr_int64(ctx_ptr, 156, &out_len);
   EXPECT_EQ(std::string(out, out_len), "\x9C");
 
-  //ÿ
+  // ÿ
   out = chr_int32(ctx_ptr, 255, &out_len);
   EXPECT_EQ(std::string(out, out_len), "\xFF");
 
diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc 
b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
index 0831fb6267..6f310f815c 100644
--- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
+++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc
@@ -378,19 +378,19 @@ const double test_traits<::arrow::DoubleType>::value(4.2);
 template <>
 struct test_traits<::arrow::StringType> {
   static constexpr ParquetType::type parquet_enum = ParquetType::BYTE_ARRAY;
-  static std::string const value;
+  static const std::string value;
 };
 
 template <>
 struct test_traits<::arrow::BinaryType> {
   static constexpr ParquetType::type parquet_enum = ParquetType::BYTE_ARRAY;
-  static std::string const value;
+  static const std::string value;
 };
 
 template <>
 struct test_traits<::arrow::FixedSizeBinaryType> {
   static constexpr ParquetType::type parquet_enum = 
ParquetType::FIXED_LEN_BYTE_ARRAY;
-  static std::string const value;
+  static const std::string value;
 };
 
 const std::string test_traits<::arrow::StringType>::value("Test");            
// NOLINT

(arrow) branch main updated: GH-47167: [C++][Dev] Update clang-format dependency (#47168)

Reply via email to