This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new e4cdd003d7 GH-46938: [C++] Enhance arrow::ChunkedArray::Equals to 
support floating-point comparison when values share the same memory (#47044)
e4cdd003d7 is described below

commit e4cdd003d789afbaa33c006ea665dbf79ff708bd
Author: Arash Andishgar <[email protected]>
AuthorDate: Thu Jul 10 03:54:37 2025 +0330

    GH-46938: [C++] Enhance arrow::ChunkedArray::Equals to support 
floating-point comparison when values share the same memory (#47044)
    
    ### Rationale for this change
    
    As discussed 
[here](https://github.com/apache/arrow/issues/46938#issue-3187249840), this is 
a minor enhancement to `arrow::ChunkedArray::Equals`.
    
    ### What changes are included in this PR?
    
    A minor improvement to `arrow::ChunkedArray::Equals` to handle the case 
where chunked arrays share the same underlying memory.
    
    ### Are these changes tested?
    
    Yes, I ran the relevant unit tests.
    
    ### Are there any user-facing changes?
    
    No.
    
    * GitHub Issue: #46938
    
    Authored-by: Arash Andishgar <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 cpp/src/arrow/chunked_array.cc      | 19 ++++++++----
 cpp/src/arrow/chunked_array_test.cc | 62 ++++++++++++++++++++++---------------
 2 files changed, 50 insertions(+), 31 deletions(-)

diff --git a/cpp/src/arrow/chunked_array.cc b/cpp/src/arrow/chunked_array.cc
index 32578ffd93..0fa174c175 100644
--- a/cpp/src/arrow/chunked_array.cc
+++ b/cpp/src/arrow/chunked_array.cc
@@ -100,24 +100,31 @@ DeviceAllocationTypeSet ChunkedArray::device_types() 
const {
 }
 namespace {
 
-bool mayHaveNaN(const arrow::DataType& type) {
-  if (type.num_fields() == 0) {
-    return is_floating(type.id());
+// Check whether the type or any of its children is a float type.
+bool ContainsFloatType(const DataType& type) {
+  if (is_floating(type.id())) {
+    return true;
   } else {
+    // Check if any nested field contains a float type.
     for (const auto& field : type.fields()) {
-      if (mayHaveNaN(*field->type())) {
+      if (ContainsFloatType(*field->type())) {
         return true;
       }
     }
   }
+  // No float types are observed
   return false;
 }
 
 }  //  namespace
 
 bool ChunkedArray::Equals(const ChunkedArray& other, const EqualOptions& opts) 
const {
-  if (this == &other && !mayHaveNaN(*type_)) {
-    return true;
+  if (this == &other) {
+    if (opts.nans_equal()) {
+      return true;
+    } else if (!ContainsFloatType(*type_)) {
+      return true;
+    }
   }
   if (length_ != other.length()) {
     return false;
diff --git a/cpp/src/arrow/chunked_array_test.cc 
b/cpp/src/arrow/chunked_array_test.cc
index 689ef57c59..326eb24d08 100644
--- a/cpp/src/arrow/chunked_array_test.cc
+++ b/cpp/src/arrow/chunked_array_test.cc
@@ -153,33 +153,45 @@ TEST_F(TestChunkedArray, EqualsDifferingMetadata) {
   ASSERT_TRUE(left.Equals(right));
 }
 
-TEST_F(TestChunkedArray, EqualsSameAddressWithNaNs) {
-  auto chunk_with_nan1 = ArrayFromJSON(float64(), "[0, 1, 2, NaN]");
-  auto chunk_without_nan1 = ArrayFromJSON(float64(), "[3, 4, 5]");
-  ArrayVector chunks1 = {chunk_with_nan1, chunk_without_nan1};
-  ASSERT_OK_AND_ASSIGN(auto chunked_array_with_nan1, 
ChunkedArray::Make(chunks1));
-  ASSERT_FALSE(chunked_array_with_nan1->Equals(chunked_array_with_nan1));
-
-  auto chunk_without_nan2 = ArrayFromJSON(float64(), "[6, 7, 8, 9]");
-  ArrayVector chunks2 = {chunk_without_nan1, chunk_without_nan2};
-  ASSERT_OK_AND_ASSIGN(auto chunked_array_without_nan1, 
ChunkedArray::Make(chunks2));
-  ASSERT_TRUE(chunked_array_without_nan1->Equals(chunked_array_without_nan1));
+class TestChunkedArrayEqualsSameAddress : public TestChunkedArray {};
 
+TEST_F(TestChunkedArrayEqualsSameAddress, NonFloatType) {
   auto int32_array = ArrayFromJSON(int32(), "[0, 1, 2]");
-  auto float64_array_with_nan = ArrayFromJSON(float64(), "[0, 1, NaN]");
-  ArrayVector arrays1 = {int32_array, float64_array_with_nan};
-  std::vector<std::string> fieldnames = {"Int32Type", "Float64Type"};
-  ASSERT_OK_AND_ASSIGN(auto struct_with_nan, StructArray::Make(arrays1, 
fieldnames));
-  ArrayVector chunks3 = {struct_with_nan};
-  ASSERT_OK_AND_ASSIGN(auto chunked_array_with_nan2, 
ChunkedArray::Make(chunks3));
-  ASSERT_FALSE(chunked_array_with_nan2->Equals(chunked_array_with_nan2));
-
-  auto float64_array_without_nan = ArrayFromJSON(float64(), "[0, 1, 2]");
-  ArrayVector arrays2 = {int32_array, float64_array_without_nan};
-  ASSERT_OK_AND_ASSIGN(auto struct_without_nan, StructArray::Make(arrays2, 
fieldnames));
-  ArrayVector chunks4 = {struct_without_nan};
-  ASSERT_OK_AND_ASSIGN(auto chunked_array_without_nan2, 
ChunkedArray::Make(chunks4));
-  ASSERT_TRUE(chunked_array_without_nan2->Equals(chunked_array_without_nan2));
+  ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({int32_array}));
+  ASSERT_TRUE(chunked_array->Equals(chunked_array));
+}
+
+TEST_F(TestChunkedArrayEqualsSameAddress, NestedTypeWithoutFloat) {
+  auto int32_array = ArrayFromJSON(int32(), "[0, 1]");
+  ASSERT_OK_AND_ASSIGN(auto struct_array,
+                       StructArray::Make({int32_array}, {"Int32Type"}));
+  ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({struct_array}));
+
+  ASSERT_TRUE(chunked_array->Equals(chunked_array));
+}
+
+TEST_F(TestChunkedArrayEqualsSameAddress, FloatType) {
+  auto float64_array = ArrayFromJSON(float64(), "[0.0, 1.0, 2.0, NaN]");
+  ASSERT_OK_AND_ASSIGN(auto chunked_array, 
ChunkedArray::Make({float64_array}));
+
+  ASSERT_FALSE(chunked_array->Equals(chunked_array));
+
+  // Assert when EqualOptions::nans_equal_ is set
+  ASSERT_TRUE(
+      chunked_array->Equals(chunked_array, 
EqualOptions::Defaults().nans_equal(true)));
+}
+
+TEST_F(TestChunkedArrayEqualsSameAddress, NestedTypeWithFloat) {
+  auto float64_array = ArrayFromJSON(float64(), "[0.0, 1.0, NaN]");
+  ASSERT_OK_AND_ASSIGN(auto struct_array,
+                       StructArray::Make({float64_array}, {"Float64Type"}));
+  ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({struct_array}));
+
+  ASSERT_FALSE(chunked_array->Equals(chunked_array));
+
+  // Assert when EqualOptions::nans_equal_ is set
+  ASSERT_TRUE(
+      chunked_array->Equals(chunked_array, 
EqualOptions::Defaults().nans_equal(true)));
 }
 
 TEST_F(TestChunkedArray, ApproxEquals) {

Reply via email to