This is an automated email from the ASF dual-hosted git repository.

maplefu pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new aa45c12402 GH-46988: [C++][Parquet] Fix FLBA DecodeArrow multiply 
overflow (#46991)
aa45c12402 is described below

commit aa45c12402dfd638f2e0ff7c2957c0512a3595a9
Author: mwish <[email protected]>
AuthorDate: Fri Jul 4 22:31:31 2025 +0800

    GH-46988: [C++][Parquet] Fix FLBA DecodeArrow multiply overflow (#46991)
    
    ### Rationale for this change
    
    See: https://github.com/apache/arrow/issues/46988
    
    Note that it's an old problem, not a new problem
    
    ### What changes are included in this PR?
    
    Add `checkPageOverflow`
    
    ### Are these changes tested?
    
    By arrow-testing file
    
    ### Are there any user-facing changes?
    
    No, just a minor bugfix
    * GitHub Issue: #46988
    
    Authored-by: mwish <[email protected]>
    Signed-off-by: mwish <[email protected]>
---
 cpp/src/parquet/decoder.cc | 23 +++++++++++------------
 testing                    |  2 +-
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/cpp/src/parquet/decoder.cc b/cpp/src/parquet/decoder.cc
index 3bcdc55d8e..57a2316bf4 100644
--- a/cpp/src/parquet/decoder.cc
+++ b/cpp/src/parquet/decoder.cc
@@ -260,6 +260,13 @@ auto DispatchArrowBinaryHelper(typename 
EncodingTraits<DType>::Accumulator* acc,
   }
 }
 
+void CheckPageLargeEnough(int64_t remaining_bytes, int32_t value_width,
+                          int64_t num_values) {
+  if (remaining_bytes < value_width * num_values) {
+    ParquetException::EofException();
+  }
+}
+
 // Internal decoder class hierarchy
 
 class DecoderImpl : virtual public Decoder {
@@ -385,9 +392,7 @@ int PlainDecoder<DType>::DecodeArrow(
 
   constexpr int value_size = static_cast<int>(sizeof(value_type));
   int values_decoded = num_values - null_count;
-  if (ARROW_PREDICT_FALSE(this->len_ < value_size * values_decoded)) {
-    ParquetException::EofException();
-  }
+  CheckPageLargeEnough(this->len_, value_size, values_decoded);
 
   const uint8_t* data = this->data_;
 
@@ -419,9 +424,7 @@ int PlainDecoder<DType>::DecodeArrow(
 
   constexpr int value_size = static_cast<int>(sizeof(value_type));
   int values_decoded = num_values - null_count;
-  if (ARROW_PREDICT_FALSE(this->len_ < value_size * values_decoded)) {
-    ParquetException::EofException();
-  }
+  CheckPageLargeEnough(this->len_, value_size, values_decoded);
 
   const uint8_t* data = this->data_;
 
@@ -659,9 +662,7 @@ inline int PlainDecoder<FLBAType>::DecodeArrow(
     typename EncodingTraits<FLBAType>::Accumulator* builder) {
   const int byte_width = this->type_length_;
   const int values_decoded = num_values - null_count;
-  if (ARROW_PREDICT_FALSE(len_ < byte_width * values_decoded)) {
-    ParquetException::EofException();
-  }
+  CheckPageLargeEnough(len_, byte_width, values_decoded);
 
   PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
 
@@ -691,9 +692,7 @@ inline int PlainDecoder<FLBAType>::DecodeArrow(
     typename EncodingTraits<FLBAType>::DictAccumulator* builder) {
   const int byte_width = this->type_length_;
   const int values_decoded = num_values - null_count;
-  if (ARROW_PREDICT_FALSE(len_ < byte_width * values_decoded)) {
-    ParquetException::EofException();
-  }
+  CheckPageLargeEnough(len_, byte_width, values_decoded);
 
   PARQUET_THROW_NOT_OK(builder->Reserve(num_values));
   PARQUET_THROW_NOT_OK(
diff --git a/testing b/testing
index 7b94287bd0..fbf6b703dc 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit 7b94287bd0ed7436aaf6a5fda60eda002f6d9d1c
+Subproject commit fbf6b703dc93d17d75fa3664c5aa2c7873ebaf06

Reply via email to