This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 3e6182aed3 GH-49059: [C++] Fix issues found by OSS-Fuzz in IPC reader
(#49060)
3e6182aed3 is described below
commit 3e6182aed37fc5f4b4cbd1df46d29249019c6067
Author: Antoine Pitrou <[email protected]>
AuthorDate: Thu Jan 29 18:31:08 2026 +0100
GH-49059: [C++] Fix issues found by OSS-Fuzz in IPC reader (#49060)
### Rationale for this change
Fix two issues found by OSS-Fuzz in the IPC reader:
* a controlled abort on invalid IPC metadata:
https://oss-fuzz.com/testcase-detail/5301064831401984
* a nullptr dereference on invalid IPC metadata:
https://oss-fuzz.com/testcase-detail/5091511766417408
None of these two issues is a security issue.
### Are these changes tested?
Yes, by new unit tests and new fuzz regression files.
### Are there any user-facing changes?
No.
**This PR contains a "Critical Fix".** (If the changes fix either (a) a
security vulnerability, (b) a bug that caused incorrect or invalid data to be
produced, or (c) a bug that causes a crash (even when the API contract is
upheld), please provide explanation. If not, you can remove this.)
* GitHub Issue: #49059
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/ipc/reader.cc | 7 ++++---
cpp/src/arrow/record_batch.cc | 16 ++++++++++------
cpp/src/arrow/record_batch_test.cc | 16 +++++++++++++++-
testing | 2 +-
4 files changed, 30 insertions(+), 11 deletions(-)
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index f1571f76c2..046eacb6ce 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -245,7 +245,7 @@ class ArrayLoader {
}
Status GetBuffer(int buffer_index, std::shared_ptr<Buffer>* out) {
- auto buffers = metadata_->buffers();
+ auto* buffers = metadata_->buffers();
CHECK_FLATBUFFERS_NOT_NULL(buffers, "RecordBatch.buffers");
if (buffer_index >= static_cast<int>(buffers->size())) {
return Status::IOError("buffer_index out of range.");
@@ -262,7 +262,9 @@ class ArrayLoader {
Result<int64_t> GetVariadicCount(int i) {
auto* variadic_counts = metadata_->variadicBufferCounts();
+ auto* buffers = metadata_->buffers();
CHECK_FLATBUFFERS_NOT_NULL(variadic_counts,
"RecordBatch.variadicBufferCounts");
+ CHECK_FLATBUFFERS_NOT_NULL(buffers, "RecordBatch.buffers");
if (i >= static_cast<int>(variadic_counts->size())) {
return Status::IOError("variadic_count_index out of range.");
}
@@ -272,8 +274,7 @@ class ArrayLoader {
}
// Detect an excessive variadic buffer count to avoid potential memory
blowup
// (GH-48900).
- const auto max_buffer_count =
- static_cast<int64_t>(metadata_->buffers()->size()) - buffer_index_;
+ const auto max_buffer_count = static_cast<int64_t>(buffers->size()) -
buffer_index_;
if (count > max_buffer_count) {
return Status::IOError("variadic buffer count exceeds available number
of buffers");
}
diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc
index 1162b4c3bb..12e0f553b7 100644
--- a/cpp/src/arrow/record_batch.cc
+++ b/cpp/src/arrow/record_batch.cc
@@ -266,10 +266,13 @@ Result<std::shared_ptr<RecordBatch>>
RecordBatch::FromStructArray(
namespace {
Status ValidateColumnLength(const RecordBatch& batch, int i) {
- const auto& array = *batch.column(i);
- if (ARROW_PREDICT_FALSE(array.length() != batch.num_rows())) {
+ // This function is part of the validation code path and should
+ // be robust against invalid data, but `column()` would call MakeArray()
+ // that can abort on invalid data.
+ const auto& array = *batch.column_data(i);
+ if (ARROW_PREDICT_FALSE(array.length != batch.num_rows())) {
return Status::Invalid("Number of rows in column ", i,
- " did not match batch: ", array.length(), " vs ",
+ " did not match batch: ", array.length, " vs ",
batch.num_rows());
}
return Status::OK();
@@ -455,11 +458,12 @@ namespace {
Status ValidateBatch(const RecordBatch& batch, bool full_validation) {
for (int i = 0; i < batch.num_columns(); ++i) {
RETURN_NOT_OK(ValidateColumnLength(batch, i));
- const auto& array = *batch.column(i);
+ // See ValidateColumnLength about avoiding a ArrayData -> Array conversion
+ const auto& array = *batch.column_data(i);
const auto& schema_type = batch.schema()->field(i)->type();
- if (!array.type()->Equals(schema_type)) {
+ if (!array.type->Equals(schema_type)) {
return Status::Invalid("Column ", i,
- " type not match schema: ",
array.type()->ToString(), " vs ",
+ " type not match schema: ",
array.type->ToString(), " vs ",
schema_type->ToString());
}
const auto st = full_validation ? internal::ValidateArrayFull(array)
diff --git a/cpp/src/arrow/record_batch_test.cc
b/cpp/src/arrow/record_batch_test.cc
index 4516b808a8..a037d7261e 100644
--- a/cpp/src/arrow/record_batch_test.cc
+++ b/cpp/src/arrow/record_batch_test.cc
@@ -318,7 +318,6 @@ TEST_F(TestRecordBatch, Validate) {
auto a3 = gen.ArrayOf(int16(), 5);
auto b1 = RecordBatch::Make(schema, length, {a0, a1, a2});
-
ASSERT_OK(b1->ValidateFull());
// Length mismatch
@@ -328,6 +327,21 @@ TEST_F(TestRecordBatch, Validate) {
// Type mismatch
auto b3 = RecordBatch::Make(schema, length, {a0, a1, a0});
ASSERT_RAISES(Invalid, b3->ValidateFull());
+
+ // Invalid column data (nulls in map key array) that would abort on MakeArray
+ auto map_field = field("f", map(utf8(), int32()));
+ schema = ::arrow::schema({map_field});
+ auto map_key_data = ArrayFromJSON(utf8(), "[null]")->data();
+ auto map_item_data = ArrayFromJSON(int32(), "[null]")->data();
+ auto map_data = ArrayData::Make(map_field->type(), /*length=*/1,
/*buffers=*/{nullptr},
+ /*child_data=*/{map_key_data,
map_item_data});
+
+ auto b4 = RecordBatch::Make(schema, /*num_rows=*/map_data->length,
{map_data});
+ ASSERT_RAISES(Invalid, b4->ValidateFull());
+
+ // Length mismatch with a column data that would also fail on MakeArray
+ auto b5 = RecordBatch::Make(schema, /*num_rows=*/1 + map_data->length,
{map_data});
+ ASSERT_RAISES(Invalid, b5->Validate());
}
TEST_F(TestRecordBatch, Slice) {
diff --git a/testing b/testing
index 7b641152dc..df428ddaa2 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit 7b641152dcb0f9e197ebe24a1986151849250959
+Subproject commit df428ddaa22d94dfb525af4c0951f3dafb463795