(arrow) branch main updated: GH-49059: [C++] Fix issues found by OSS-Fuzz in IPC reader (#49060)

apitrou Thu, 29 Jan 2026 09:32:07 -0800

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 3e6182aed3 GH-49059: [C++] Fix issues found by OSS-Fuzz in IPC reader 
(#49060)
3e6182aed3 is described below

commit 3e6182aed37fc5f4b4cbd1df46d29249019c6067
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Jan 29 18:31:08 2026 +0100

    GH-49059: [C++] Fix issues found by OSS-Fuzz in IPC reader (#49060)
    
    ### Rationale for this change
    
    Fix two issues found by OSS-Fuzz in the IPC reader:
    
    * a controlled abort on invalid IPC metadata: 
https://oss-fuzz.com/testcase-detail/5301064831401984
    * a nullptr dereference on invalid IPC metadata: 
https://oss-fuzz.com/testcase-detail/5091511766417408
    
    None of these two issues is a security issue.
    
    ### Are these changes tested?
    
    Yes, by new unit tests and new fuzz regression files.
    
    ### Are there any user-facing changes?
    
    No.
    
    **This PR contains a "Critical Fix".** (If the changes fix either (a) a 
security vulnerability, (b) a bug that caused incorrect or invalid data to be 
produced, or (c) a bug that causes a crash (even when the API contract is 
upheld), please provide explanation. If not, you can remove this.)
    
    * GitHub Issue: #49059
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/ipc/reader.cc        |  7 ++++---
 cpp/src/arrow/record_batch.cc      | 16 ++++++++++------
 cpp/src/arrow/record_batch_test.cc | 16 +++++++++++++++-
 testing                            |  2 +-
 4 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index f1571f76c2..046eacb6ce 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -245,7 +245,7 @@ class ArrayLoader {
   }
 
   Status GetBuffer(int buffer_index, std::shared_ptr<Buffer>* out) {
-    auto buffers = metadata_->buffers();
+    auto* buffers = metadata_->buffers();
     CHECK_FLATBUFFERS_NOT_NULL(buffers, "RecordBatch.buffers");
     if (buffer_index >= static_cast<int>(buffers->size())) {
       return Status::IOError("buffer_index out of range.");
@@ -262,7 +262,9 @@ class ArrayLoader {
 
   Result<int64_t> GetVariadicCount(int i) {
     auto* variadic_counts = metadata_->variadicBufferCounts();
+    auto* buffers = metadata_->buffers();
     CHECK_FLATBUFFERS_NOT_NULL(variadic_counts, 
"RecordBatch.variadicBufferCounts");
+    CHECK_FLATBUFFERS_NOT_NULL(buffers, "RecordBatch.buffers");
     if (i >= static_cast<int>(variadic_counts->size())) {
       return Status::IOError("variadic_count_index out of range.");
     }
@@ -272,8 +274,7 @@ class ArrayLoader {
     }
     // Detect an excessive variadic buffer count to avoid potential memory 
blowup
     // (GH-48900).
-    const auto max_buffer_count =
-        static_cast<int64_t>(metadata_->buffers()->size()) - buffer_index_;
+    const auto max_buffer_count = static_cast<int64_t>(buffers->size()) - 
buffer_index_;
     if (count > max_buffer_count) {
       return Status::IOError("variadic buffer count exceeds available number 
of buffers");
     }
diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index 1162b4c3bb..12e0f553b7 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -266,10 +266,13 @@ Result<std::shared_ptr<RecordBatch>> 
RecordBatch::FromStructArray(
 namespace {
 
 Status ValidateColumnLength(const RecordBatch& batch, int i) {
-  const auto& array = *batch.column(i);
-  if (ARROW_PREDICT_FALSE(array.length() != batch.num_rows())) {
+  // This function is part of the validation code path and should
+  // be robust against invalid data, but `column()` would call MakeArray()
+  // that can abort on invalid data.
+  const auto& array = *batch.column_data(i);
+  if (ARROW_PREDICT_FALSE(array.length != batch.num_rows())) {
     return Status::Invalid("Number of rows in column ", i,
-                           " did not match batch: ", array.length(), " vs ",
+                           " did not match batch: ", array.length, " vs ",
                            batch.num_rows());
   }
   return Status::OK();
@@ -455,11 +458,12 @@ namespace {
 Status ValidateBatch(const RecordBatch& batch, bool full_validation) {
   for (int i = 0; i < batch.num_columns(); ++i) {
     RETURN_NOT_OK(ValidateColumnLength(batch, i));
-    const auto& array = *batch.column(i);
+    // See ValidateColumnLength about avoiding a ArrayData -> Array conversion
+    const auto& array = *batch.column_data(i);
     const auto& schema_type = batch.schema()->field(i)->type();
-    if (!array.type()->Equals(schema_type)) {
+    if (!array.type->Equals(schema_type)) {
       return Status::Invalid("Column ", i,
-                             " type not match schema: ", 
array.type()->ToString(), " vs ",
+                             " type not match schema: ", 
array.type->ToString(), " vs ",
                              schema_type->ToString());
     }
     const auto st = full_validation ? internal::ValidateArrayFull(array)
diff --git a/cpp/src/arrow/record_batch_test.cc 
b/cpp/src/arrow/record_batch_test.cc
index 4516b808a8..a037d7261e 100644
--- a/cpp/src/arrow/record_batch_test.cc
+++ b/cpp/src/arrow/record_batch_test.cc
@@ -318,7 +318,6 @@ TEST_F(TestRecordBatch, Validate) {
   auto a3 = gen.ArrayOf(int16(), 5);
 
   auto b1 = RecordBatch::Make(schema, length, {a0, a1, a2});
-
   ASSERT_OK(b1->ValidateFull());
 
   // Length mismatch
@@ -328,6 +327,21 @@ TEST_F(TestRecordBatch, Validate) {
   // Type mismatch
   auto b3 = RecordBatch::Make(schema, length, {a0, a1, a0});
   ASSERT_RAISES(Invalid, b3->ValidateFull());
+
+  // Invalid column data (nulls in map key array) that would abort on MakeArray
+  auto map_field = field("f", map(utf8(), int32()));
+  schema = ::arrow::schema({map_field});
+  auto map_key_data = ArrayFromJSON(utf8(), "[null]")->data();
+  auto map_item_data = ArrayFromJSON(int32(), "[null]")->data();
+  auto map_data = ArrayData::Make(map_field->type(), /*length=*/1, 
/*buffers=*/{nullptr},
+                                  /*child_data=*/{map_key_data, 
map_item_data});
+
+  auto b4 = RecordBatch::Make(schema, /*num_rows=*/map_data->length, 
{map_data});
+  ASSERT_RAISES(Invalid, b4->ValidateFull());
+
+  // Length mismatch with a column data that would also fail on MakeArray
+  auto b5 = RecordBatch::Make(schema, /*num_rows=*/1 + map_data->length, 
{map_data});
+  ASSERT_RAISES(Invalid, b5->Validate());
 }
 
 TEST_F(TestRecordBatch, Slice) {
diff --git a/testing b/testing
index 7b641152dc..df428ddaa2 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit 7b641152dcb0f9e197ebe24a1986151849250959
+Subproject commit df428ddaa22d94dfb525af4c0951f3dafb463795

(arrow) branch main updated: GH-49059: [C++] Fix issues found by OSS-Fuzz in IPC reader (#49060)

Reply via email to