This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 610c201ff0 GH-46659: [C++] Fix export of extension arrays with binary 
view/string view storage (#46660)
610c201ff0 is described below

commit 610c201ff015acf6df201f3c48af67070b23713b
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Jun 4 17:15:08 2025 -0500

    GH-46659: [C++] Fix export of extension arrays with binary view/string view 
storage (#46660)
    
    ### Rationale for this change
    
    Extension arrays exported with binary view/string view storage did not 
export the variadic sizes buffer which resulted in crashes when reimporting.
    
    ### What changes are included in this PR?
    
    The expression that controlled whether the variadic sizes buffer was 
written was updated.
    
    ### Are these changes tested?
    
    Yes, a test was added
    
    ### Are there any user-facing changes?
    
    No
    * GitHub Issue: #46659
    
    Lead-authored-by: Dewey Dunnington <[email protected]>
    Co-authored-by: Dewey Dunnington <[email protected]>
    Co-authored-by: Benjamin Kietzman <[email protected]>
    Signed-off-by: Dewey Dunnington <[email protected]>
---
 cpp/src/arrow/c/bridge.cc              |  4 ++--
 cpp/src/arrow/c/bridge_test.cc         | 13 +++++++++++--
 cpp/src/arrow/testing/extension_type.h | 20 ++++++++++++++++++++
 cpp/src/arrow/testing/gtest_util.cc    | 28 ++++++++++++++++++++++++++++
 4 files changed, 61 insertions(+), 4 deletions(-)

diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc
index 0c158322d0..9c01300df4 100644
--- a/cpp/src/arrow/c/bridge.cc
+++ b/cpp/src/arrow/c/bridge.cc
@@ -586,8 +586,8 @@ struct ArrayExporter {
       ++buffers_begin;
     }
 
-    bool need_variadic_buffer_sizes =
-        data->type->id() == Type::BINARY_VIEW || data->type->id() == 
Type::STRING_VIEW;
+    bool need_variadic_buffer_sizes = data->type->storage_id() == 
Type::BINARY_VIEW ||
+                                      data->type->storage_id() == 
Type::STRING_VIEW;
     if (need_variadic_buffer_sizes) {
       ++n_buffers;
     }
diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc
index 75d5d1f428..4bcb821650 100644
--- a/cpp/src/arrow/c/bridge_test.cc
+++ b/cpp/src/arrow/c/bridge_test.cc
@@ -580,8 +580,10 @@ struct ArrayExportChecker {
       --expected_n_buffers;
       ++expected_buffers;
     }
-    bool has_variadic_buffer_sizes = expected_data.type->id() == 
Type::STRING_VIEW ||
-                                     expected_data.type->id() == 
Type::BINARY_VIEW;
+
+    bool has_variadic_buffer_sizes =
+        expected_data.type->storage_id() == Type::BINARY_VIEW ||
+        expected_data.type->storage_id() == Type::STRING_VIEW;
     ASSERT_EQ(c_export->n_buffers, expected_n_buffers + 
has_variadic_buffer_sizes);
     ASSERT_NE(c_export->buffers, nullptr);
 
@@ -960,6 +962,13 @@ TEST_F(TestArrayExport, BinaryViewMultipleBuffers) {
   });
 }
 
+TEST_F(TestArrayExport, BinaryViewExtensionWithMultipleBuffers) {
+  TestPrimitive([&] {
+    auto storage = MakeBinaryViewArrayWithMultipleDataBuffers();
+    return BinaryViewExtensionType::WrapArray(binary_view_extension_type(), 
storage);
+  });
+}
+
 TEST_F(TestArrayExport, Null) {
   TestPrimitive(null(), "[null, null, null]");
   TestPrimitive(null(), "[]");
diff --git a/cpp/src/arrow/testing/extension_type.h 
b/cpp/src/arrow/testing/extension_type.h
index a4526e31c2..9b4492a543 100644
--- a/cpp/src/arrow/testing/extension_type.h
+++ b/cpp/src/arrow/testing/extension_type.h
@@ -132,6 +132,23 @@ class ARROW_TESTING_EXPORT DictExtensionType : public 
ExtensionType {
   std::string Serialize() const override { return "dict-extension-serialized"; 
}
 };
 
+class ARROW_TESTING_EXPORT BinaryViewExtensionType : public ExtensionType {
+ public:
+  BinaryViewExtensionType() : ExtensionType(binary_view()) {}
+
+  std::string extension_name() const override { return "binary_view"; }
+
+  bool ExtensionEquals(const ExtensionType& other) const override;
+
+  std::shared_ptr<Array> MakeArray(std::shared_ptr<ArrayData> data) const 
override;
+
+  Result<std::shared_ptr<DataType>> Deserialize(
+      std::shared_ptr<DataType> storage_type,
+      const std::string& serialized) const override;
+
+  std::string Serialize() const override { return "binary_view_serialized"; }
+};
+
 // A minimal extension type that does not error when passed blank extension 
information
 class ARROW_TESTING_EXPORT MetadataOptionalExtensionType : public 
ExtensionType {
  public:
@@ -190,6 +207,9 @@ std::shared_ptr<DataType> list_extension_type();
 ARROW_TESTING_EXPORT
 std::shared_ptr<DataType> dict_extension_type();
 
+ARROW_TESTING_EXPORT
+std::shared_ptr<DataType> binary_view_extension_type();
+
 ARROW_TESTING_EXPORT
 std::shared_ptr<DataType> complex128();
 
diff --git a/cpp/src/arrow/testing/gtest_util.cc 
b/cpp/src/arrow/testing/gtest_util.cc
index 99d6cabde9..c49106757e 100644
--- a/cpp/src/arrow/testing/gtest_util.cc
+++ b/cpp/src/arrow/testing/gtest_util.cc
@@ -944,6 +944,30 @@ Result<std::shared_ptr<DataType>> 
DictExtensionType::Deserialize(
   return std::make_shared<DictExtensionType>();
 }
 
+bool BinaryViewExtensionType::ExtensionEquals(const ExtensionType& other) 
const {
+  return (other.extension_name() == this->extension_name());
+}
+
+std::shared_ptr<Array> BinaryViewExtensionType::MakeArray(
+    std::shared_ptr<ArrayData> data) const {
+  DCHECK_EQ(data->type->id(), Type::EXTENSION);
+  DCHECK_EQ("binary_view",
+            static_cast<const ExtensionType&>(*data->type).extension_name());
+  return std::make_shared<TinyintArray>(data);
+}
+
+Result<std::shared_ptr<DataType>> BinaryViewExtensionType::Deserialize(
+    std::shared_ptr<DataType> storage_type, const std::string& serialized) 
const {
+  if (serialized != "binary_view_serialized") {
+    return Status::Invalid("Type identifier did not match: '", serialized, 
"'");
+  }
+  if (!storage_type->Equals(*int16())) {
+    return Status::Invalid("Invalid storage type for BinaryViewExtensionType: 
",
+                           storage_type->ToString());
+  }
+  return std::make_shared<BinaryViewExtensionType>();
+}
+
 bool Complex128Type::ExtensionEquals(const ExtensionType& other) const {
   return (other.extension_name() == this->extension_name());
 }
@@ -976,6 +1000,10 @@ std::shared_ptr<DataType> list_extension_type() {
   return std::make_shared<ListExtensionType>();
 }
 
+std::shared_ptr<DataType> binary_view_extension_type() {
+  return std::make_shared<BinaryViewExtensionType>();
+}
+
 std::shared_ptr<DataType> dict_extension_type() {
   return std::make_shared<DictExtensionType>();
 }

Reply via email to