This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new d2a171805c GH-47012: [C++][Parquet] Reserve values correctly when 
reading BYTE_ARRAY and FLBA (#47013)
d2a171805c is described below

commit d2a171805c63caa27f05232695b753e07c32cb1d
Author: Antoine Pitrou <[email protected]>
AuthorDate: Wed Jul 9 09:35:19 2025 +0200

    GH-47012: [C++][Parquet] Reserve values correctly when reading BYTE_ARRAY 
and FLBA (#47013)
    
    ### Rationale for this change
    
    When reading a Parquet leaf column as Arrow, we [presize the Arrow 
builder](https://github.com/apache/arrow/blob/a0cc2d8ed35dce7ee6c3e7cbcc4867216a9ef16f/cpp/src/parquet/arrow/reader.cc#L487-L488)
 so as to avoid spurious reallocations during incremental Parquet decoding 
calls.
    
    However, the Reserve method on RecordReader will [only properly reserve 
values](https://github.com/apache/arrow/blob/a0cc2d8ed35dce7ee6c3e7cbcc4867216a9ef16f/cpp/src/parquet/column_reader.cc#L1693-L1696)
 for non-FLBA non-BYTE_ARRAY physical types.
    
    The result is that, on some of our micro-benchmarks, we spend a significant 
amount of time reallocating data on the ArrayBuilder.
    
    ### What changes are included in this PR?
    
    Properly reserve space on Array builders when reading Parquet data as 
Arrow. Note that, when reading into Binary or LargeBinary, this doesn't avoid 
reallocations for the actual data. However, for FixedSizeBinary and BinaryView, 
this is sufficient to avoid any reallocations.
    
    Benchmark numbers on my local machine (Ubuntu 24.04):
    ```
    
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
    Non-regressions: (250)
    
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
                                                                                
     benchmark         baseline        contender  change %                      
                                                                                
                                                                                
                                               counters
                              
BM_ReadColumnPlain<false,Float16LogicalType>/null_probability:-1    3.295 
GiB/sec    7.834 GiB/sec   137.771                               
{'family_index': 10, 'per_family_instance_index': 0, 'run_name': 
'BM_ReadColumnPlain<false,Float16LogicalType>/null_probability:-1', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 118}
                    
BM_ReadColumnByteStreamSplit<false,Float16LogicalType>/null_probability:-1    
3.453 GiB/sec    8.148 GiB/sec   135.957                     {'family_index': 
12, 'per_family_instance_index': 0, 'run_name': 
'BM_ReadColumnByteStreamSplit<false,Float16LogicalType>/null_probability:-1', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 119}
                    
BM_ReadColumnByteStreamSplit<true,Float16LogicalType>/null_probability:100    
1.360 GiB/sec    1.780 GiB/sec    30.870                      {'family_index': 
13, 'per_family_instance_index': 4, 'run_name': 
'BM_ReadColumnByteStreamSplit<true,Float16LogicalType>/null_probability:100', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 49}
                              
BM_ReadColumnPlain<true,Float16LogicalType>/null_probability:100    1.360 
GiB/sec    1.780 GiB/sec    30.861                                
{'family_index': 11, 'per_family_instance_index': 4, 'run_name': 
'BM_ReadColumnPlain<true,Float16LogicalType>/null_probability:100', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 49}
                      
BM_ReadColumnByteStreamSplit<true,Float16LogicalType>/null_probability:0    
1.292 GiB/sec    1.662 GiB/sec    28.666                        
{'family_index': 13, 'per_family_instance_index': 0, 'run_name': 
'BM_ReadColumnByteStreamSplit<true,Float16LogicalType>/null_probability:0', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 47}
                                
BM_ReadColumnPlain<true,Float16LogicalType>/null_probability:0    1.304 GiB/sec 
   1.665 GiB/sec    27.691                                  {'family_index': 
11, 'per_family_instance_index': 0, 'run_name': 
'BM_ReadColumnPlain<true,Float16LogicalType>/null_probability:0', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 46}
                                  
BM_ReadBinaryViewColumn/null_probability:99/unique_values:32  959.085 MiB/sec   
 1.185 GiB/sec    26.568                                     {'family_index': 
15, 'per_family_instance_index': 4, 'run_name': 
'BM_ReadBinaryViewColumn/null_probability:99/unique_values:32', 'repetitions': 
1, 'repetition_index': 0, 'threads': 1, 'iterations': 9}
                     
BM_ReadColumnByteStreamSplit<true,Float16LogicalType>/null_probability:99    
1.012 GiB/sec    1.210 GiB/sec    19.557                       {'family_index': 
13, 'per_family_instance_index': 3, 'run_name': 
'BM_ReadColumnByteStreamSplit<true,Float16LogicalType>/null_probability:99', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 36}
                    
BM_ReadBinaryViewColumnDeltaByteArray/null_probability:99/unique_values:-1    
1.011 GiB/sec    1.187 GiB/sec    17.407                       {'family_index': 
17, 'per_family_instance_index': 3, 'run_name': 
'BM_ReadBinaryViewColumnDeltaByteArray/null_probability:99/unique_values:-1', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 9}
                               
BM_ReadColumnPlain<true,Float16LogicalType>/null_probability:99    1.024 
GiB/sec    1.201 GiB/sec    17.206                                 
{'family_index': 11, 'per_family_instance_index': 3, 'run_name': 
'BM_ReadColumnPlain<true,Float16LogicalType>/null_probability:99', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 36}
                                  
BM_ReadBinaryViewColumn/null_probability:99/unique_values:-1    1.023 GiB/sec   
 1.197 GiB/sec    17.016                                     {'family_index': 
15, 'per_family_instance_index': 7, 'run_name': 
'BM_ReadBinaryViewColumn/null_probability:99/unique_values:-1', 'repetitions': 
1, 'repetition_index': 0, 'threads': 1, 'iterations': 9}
                                      
BM_ReadBinaryColumn/null_probability:99/unique_values:32  541.347 MiB/sec  
632.640 MiB/sec    16.864                                         
{'family_index': 14, 'per_family_instance_index': 4, 'run_name': 
'BM_ReadBinaryColumn/null_probability:99/unique_values:32', 'repetitions': 1, 
'repetition_index': 0, 'threads': 1, 'iterations': 9}
                                
BM_ReadColumnPlain<true,Float16LogicalType>/null_probability:1  954.762 MiB/sec 
   1.084 GiB/sec    16.272                                  {'family_index': 
11, 'per_family_instance_index': 1, 'run_name': 
'BM_ReadColumnPlain<true,Float16LogicalType>/null_probability:1', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 33}
                      
BM_ReadColumnByteStreamSplit<true,Float16LogicalType>/null_probability:1  
970.997 MiB/sec    1.100 GiB/sec    15.969                        
{'family_index': 13, 'per_family_instance_index': 1, 'run_name': 
'BM_ReadColumnByteStreamSplit<true,Float16LogicalType>/null_probability:1', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 34}
                                      
BM_ReadBinaryColumn/null_probability:99/unique_values:-1  592.605 MiB/sec  
666.605 MiB/sec    12.487                                        
{'family_index': 14, 'per_family_instance_index': 7, 'run_name': 
'BM_ReadBinaryColumn/null_probability:99/unique_values:-1', 'repetitions': 1, 
'repetition_index': 0, 'threads': 1, 'iterations': 10}
                        
BM_ReadBinaryColumnDeltaByteArray/null_probability:99/unique_values:-1  587.604 
MiB/sec  659.154 MiB/sec    12.177                          {'family_index': 
16, 'per_family_instance_index': 3, 'run_name': 
'BM_ReadBinaryColumnDeltaByteArray/null_probability:99/unique_values:-1', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 10}
                                  
BM_ReadBinaryViewColumn/null_probability:50/unique_values:-1  867.001 MiB/sec  
962.427 MiB/sec    11.006                                     {'family_index': 
15, 'per_family_instance_index': 6, 'run_name': 
'BM_ReadBinaryViewColumn/null_probability:50/unique_values:-1', 'repetitions': 
1, 'repetition_index': 0, 'threads': 1, 'iterations': 4}
                               
BM_ReadColumnPlain<true,Float16LogicalType>/null_probability:50  473.040 
MiB/sec  522.948 MiB/sec    10.551                                 
{'family_index': 11, 'per_family_instance_index': 2, 'run_name': 
'BM_ReadColumnPlain<true,Float16LogicalType>/null_probability:50', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 17}
                                   
BM_ReadBinaryViewColumn/null_probability:0/unique_values:-1    1.633 GiB/sec    
1.800 GiB/sec    10.197                                      {'family_index': 
15, 'per_family_instance_index': 1, 'run_name': 
'BM_ReadBinaryViewColumn/null_probability:0/unique_values:-1', 'repetitions': 
1, 'repetition_index': 0, 'threads': 1, 'iterations': 5}
                                                                  
BM_ReadStructOfListColumn/50  466.944 MiB/sec  513.407 MiB/sec     9.951        
                                                            {'family_index': 
20, 'per_family_instance_index': 2, 'run_name': 'BM_ReadStructOfListColumn/50', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 27}
                    
BM_ReadBinaryViewColumnDeltaByteArray/null_probability:50/unique_values:-1  
894.649 MiB/sec  976.595 MiB/sec     9.160                       
{'family_index': 17, 'per_family_instance_index': 2, 'run_name': 
'BM_ReadBinaryViewColumnDeltaByteArray/null_probability:50/unique_values:-1', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4}
                     
BM_ReadColumnByteStreamSplit<true,Float16LogicalType>/null_probability:50  
479.717 MiB/sec  523.293 MiB/sec     9.084                       
{'family_index': 13, 'per_family_instance_index': 2, 'run_name': 
'BM_ReadColumnByteStreamSplit<true,Float16LogicalType>/null_probability:50', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 17}
                                      
BM_ReadBinaryColumn/null_probability:50/unique_values:-1  613.860 MiB/sec  
667.963 MiB/sec     8.814                                         
{'family_index': 14, 'per_family_instance_index': 6, 'run_name': 
'BM_ReadBinaryColumn/null_probability:50/unique_values:-1', 'repetitions': 1, 
'repetition_index': 0, 'threads': 1, 'iterations': 3}
                     
BM_ReadBinaryViewColumnDeltaByteArray/null_probability:1/unique_values:-1    
1.479 GiB/sec    1.608 GiB/sec     8.761                        
{'family_index': 17, 'per_family_instance_index': 1, 'run_name': 
'BM_ReadBinaryViewColumnDeltaByteArray/null_probability:1/unique_values:-1', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4}
                     
BM_ReadBinaryViewColumnDeltaByteArray/null_probability:0/unique_values:-1    
1.628 GiB/sec    1.762 GiB/sec     8.235                        
{'family_index': 17, 'per_family_instance_index': 0, 'run_name': 
'BM_ReadBinaryViewColumnDeltaByteArray/null_probability:0/unique_values:-1', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 5}
                                                                   
BM_ReadStructOfListColumn/0  760.221 MiB/sec  822.339 MiB/sec     8.171         
                                                            {'family_index': 
20, 'per_family_instance_index': 0, 'run_name': 'BM_ReadStructOfListColumn/0', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 47}
                                   
BM_ReadBinaryViewColumn/null_probability:1/unique_values:32  843.826 MiB/sec  
912.397 MiB/sec     8.126                                      {'family_index': 
15, 'per_family_instance_index': 2, 'run_name': 
'BM_ReadBinaryViewColumn/null_probability:1/unique_values:32', 'repetitions': 
1, 'repetition_index': 0, 'threads': 1, 'iterations': 3}
                                  
BM_ReadBinaryViewColumn/null_probability:50/unique_values:32  699.538 MiB/sec  
755.468 MiB/sec     7.995                                     {'family_index': 
15, 'per_family_instance_index': 3, 'run_name': 
'BM_ReadBinaryViewColumn/null_probability:50/unique_values:32', 'repetitions': 
1, 'repetition_index': 0, 'threads': 1, 'iterations': 3}
                                                
BM_ByteStreamSplitDecode_FLBA_Generic<16>/1024    3.724 GiB/sec    4.007 
GiB/sec     7.597                                               
{'family_index': 4, 'per_family_instance_index': 0, 'run_name': 
'BM_ByteStreamSplitDecode_FLBA_Generic<16>/1024', 'repetitions': 1, 
'repetition_index': 0, 'threads': 1, 'iterations': 176027}
                                   
BM_ReadBinaryViewColumn/null_probability:1/unique_values:-1    1.474 GiB/sec    
1.586 GiB/sec     7.591                                      {'family_index': 
15, 'per_family_instance_index': 5, 'run_name': 
'BM_ReadBinaryViewColumn/null_probability:1/unique_values:-1', 'repetitions': 
1, 'repetition_index': 0, 'threads': 1, 'iterations': 4}
                                       
BM_ReadBinaryColumn/null_probability:0/unique_values:-1    1.114 GiB/sec    
1.192 GiB/sec     7.005                                          
{'family_index': 14, 'per_family_instance_index': 1, 'run_name': 
'BM_ReadBinaryColumn/null_probability:0/unique_values:-1', 'repetitions': 1, 
'repetition_index': 0, 'threads': 1, 'iterations': 3}
                                       
BM_ReadBinaryColumn/null_probability:1/unique_values:-1    1.022 GiB/sec    
1.091 GiB/sec     6.715                                          
{'family_index': 14, 'per_family_instance_index': 5, 'run_name': 
'BM_ReadBinaryColumn/null_probability:1/unique_values:-1', 'repetitions': 1, 
'repetition_index': 0, 'threads': 1, 'iterations': 4}
                         
BM_ReadBinaryColumnDeltaByteArray/null_probability:0/unique_values:-1    1.101 
GiB/sec    1.174 GiB/sec     6.557                            {'family_index': 
16, 'per_family_instance_index': 0, 'run_name': 
'BM_ReadBinaryColumnDeltaByteArray/null_probability:0/unique_values:-1', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 4}
     
BM_DecodeArrowBooleanPlain/DecodeArrowWithNull/num_values:16384/null_in_ten_thousand:5000
   18.019 MiB/sec   19.100 MiB/sec     5.997    {'family_index': 33, 
'per_family_instance_index': 14, 'run_name': 
'BM_DecodeArrowBooleanPlain/DecodeArrowWithNull/num_values:16384/null_in_ten_thousand:5000',
 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 6295}
                                   
BM_ReadBinaryViewColumn/null_probability:0/unique_values:32  893.151 MiB/sec  
945.900 MiB/sec     5.906                                      {'family_index': 
15, 'per_family_instance_index': 0, 'run_name': 
'BM_ReadBinaryViewColumn/null_probability:0/unique_values:32', 'repetitions': 
1, 'repetition_index': 0, 'threads': 1, 'iterations': 3}
     
BM_DecodeArrowBooleanPlain/DecodeArrowWithNull/num_values:16384/null_in_ten_thousand:1000
   20.243 MiB/sec   21.404 MiB/sec     5.733    {'family_index': 33, 
'per_family_instance_index': 10, 'run_name': 
'BM_DecodeArrowBooleanPlain/DecodeArrowWithNull/num_values:16384/null_in_ten_thousand:1000',
 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7257}
                        
BM_ReadBinaryColumnDeltaByteArray/null_probability:50/unique_values:-1  620.583 
MiB/sec  655.859 MiB/sec     5.684                           {'family_index': 
16, 'per_family_instance_index': 2, 'run_name': 
'BM_ReadBinaryColumnDeltaByteArray/null_probability:50/unique_values:-1', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 3}
                                       
BM_ReadBinaryColumn/null_probability:0/unique_values:32  751.375 MiB/sec  
793.728 MiB/sec     5.637                                          
{'family_index': 14, 'per_family_instance_index': 0, 'run_name': 
'BM_ReadBinaryColumn/null_probability:0/unique_values:32', 'repetitions': 1, 
'repetition_index': 0, 'threads': 1, 'iterations': 3}
                                      
BM_ReadBinaryColumn/null_probability:50/unique_values:32  537.693 MiB/sec  
567.159 MiB/sec     5.480                                         
{'family_index': 14, 'per_family_instance_index': 3, 'run_name': 
'BM_ReadBinaryColumn/null_probability:50/unique_values:32', 'repetitions': 1, 
'repetition_index': 0, 'threads': 1, 'iterations': 3}
      
BM_DecodeArrowBooleanPlain/DecodeArrowWithNull/num_values:16384/null_in_ten_thousand:100
   44.112 MiB/sec   46.474 MiB/sec     5.355     {'family_index': 33, 
'per_family_instance_index': 6, 'run_name': 
'BM_DecodeArrowBooleanPlain/DecodeArrowWithNull/num_values:16384/null_in_ten_thousand:100',
 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 15273}
       
BM_DecodeArrowBooleanRle/DecodeArrowWithNull/num_values:16384/null_in_ten_thousand:1000
   20.750 MiB/sec   21.843 MiB/sec     5.265      {'family_index': 30, 
'per_family_instance_index': 10, 'run_name': 
'BM_DecodeArrowBooleanRle/DecodeArrowWithNull/num_values:16384/null_in_ten_thousand:1000',
 'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 7387}
                                                          
BM_ReadColumn<false,Int32Type>/-1/10    7.621 GiB/sec    8.019 GiB/sec     
5.223                                                            
{'family_index': 0, 'per_family_instance_index': 1, 'run_name': 
'BM_ReadColumn<false,Int32Type>/-1/10', 'repetitions': 1, 'repetition_index': 
0, 'threads': 1, 'iterations': 137}
    
    [ ... snip non-significant changes ... ]
    
    
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
    Regressions: (4)
    
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
                                               benchmark        baseline       
contender  change %                                                             
                                                                                
                                                counters
                                    BM_ReadListColumn/99   1.452 GiB/sec   
1.379 GiB/sec    -5.006                                   {'family_index': 21, 
'per_family_instance_index': 3, 'run_name': 'BM_ReadListColumn/99', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 129}
    BM_ArrowBinaryViewDict/DecodeArrowNonNull_Dense/1024 270.542 MiB/sec 
256.345 MiB/sec    -5.248 {'family_index': 27, 'per_family_instance_index': 0, 
'run_name': 'BM_ArrowBinaryViewDict/DecodeArrowNonNull_Dense/1024', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 32060}
              BM_ArrowBinaryPlain/DecodeArrow_Dict/65536 172.371 MiB/sec 
162.455 MiB/sec    -5.753             {'family_index': 18, 
'per_family_instance_index': 3, 'run_name': 
'BM_ArrowBinaryPlain/DecodeArrow_Dict/65536', 'repetitions': 1, 
'repetition_index': 0, 'threads': 1, 'iterations': 319}
        BM_ArrowBinaryPlain/DecodeArrowNonNull_Dict/1024 189.008 MiB/sec 
176.900 MiB/sec    -6.406     {'family_index': 19, 'per_family_instance_index': 
0, 'run_name': 'BM_ArrowBinaryPlain/DecodeArrowNonNull_Dict/1024', 
'repetitions': 1, 'repetition_index': 0, 'threads': 1, 'iterations': 22292}
    ```
    
    ### Are these changes tested?
    
    By existing tests.
    
    ### Are there any user-facing changes?
    
    No.
    
    * GitHub Issue: #47012
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 ci/scripts/cpp_test.sh           |  3 +++
 cpp/build-support/run-test.sh    |  1 +
 cpp/src/parquet/column_reader.cc | 14 +++++++++++++-
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/ci/scripts/cpp_test.sh b/ci/scripts/cpp_test.sh
index 60d9dd0a3b..88c06849c8 100755
--- a/ci/scripts/cpp_test.sh
+++ b/ci/scripts/cpp_test.sh
@@ -126,6 +126,9 @@ fi
 
 if [ "${ARROW_FUZZING}" == "ON" ]; then
     # Fuzzing regression tests
+    # Some fuzz regression files may trigger huge memory allocations,
+    # let the allocator return null instead of aborting.
+    export ASAN_OPTIONS="$ASAN_OPTIONS allocator_may_return_null=1"
     "${binary_output_dir}/arrow-ipc-stream-fuzz" 
"${ARROW_TEST_DATA}"/arrow-ipc-stream/crash-*
     "${binary_output_dir}/arrow-ipc-stream-fuzz" 
"${ARROW_TEST_DATA}"/arrow-ipc-stream/*-testcase-*
     "${binary_output_dir}/arrow-ipc-file-fuzz" 
"${ARROW_TEST_DATA}"/arrow-ipc-file/*-testcase-*
diff --git a/cpp/build-support/run-test.sh b/cpp/build-support/run-test.sh
index f176586a0f..3e3034a3c8 100755
--- a/cpp/build-support/run-test.sh
+++ b/cpp/build-support/run-test.sh
@@ -77,6 +77,7 @@ function setup_sanitizers() {
 
   # Set up suppressions for AddressSanitizer
   ASAN_OPTIONS="$ASAN_OPTIONS 
suppressions=$ROOT/build-support/asan-suppressions.txt"
+  ASAN_OPTIONS="$ASAN_OPTIONS allocator_may_return_null=1"
   export ASAN_OPTIONS
 
   # Set up suppressions for LeakSanitizer
diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc
index 1e681d8e90..eb9df9f2f4 100644
--- a/cpp/src/parquet/column_reader.cc
+++ b/cpp/src/parquet/column_reader.cc
@@ -1684,7 +1684,7 @@ class TypedRecordReader : public 
TypedColumnReaderImpl<DType>,
     }
   }
 
-  void ReserveValues(int64_t extra_values) {
+  virtual void ReserveValues(int64_t extra_values) {
     const int64_t new_values_capacity =
         UpdateCapacity(values_capacity_, values_written_, extra_values);
     if (new_values_capacity > values_capacity_) {
@@ -1968,6 +1968,12 @@ class FLBARecordReader final : public 
TypedRecordReader<FLBAType>,
     return ::arrow::ArrayVector{std::move(chunk)};
   }
 
+  void ReserveValues(int64_t extra_values) override {
+    ARROW_DCHECK(!uses_values_);
+    TypedRecordReader::ReserveValues(extra_values);
+    PARQUET_THROW_NOT_OK(array_builder_.Reserve(extra_values));
+  }
+
   void ReadValuesDense(int64_t values_to_read) override {
     int64_t num_decoded = this->current_decoder_->DecodeArrowNonNull(
         static_cast<int>(values_to_read), &array_builder_);
@@ -2042,6 +2048,12 @@ class ByteArrayChunkedRecordReader final : public 
TypedRecordReader<ByteArrayTyp
     return result;
   }
 
+  void ReserveValues(int64_t extra_values) override {
+    ARROW_DCHECK(!uses_values_);
+    TypedRecordReader::ReserveValues(extra_values);
+    PARQUET_THROW_NOT_OK(accumulator_.builder->Reserve(extra_values));
+  }
+
   void ReadValuesDense(int64_t values_to_read) override {
     int64_t num_decoded = this->current_decoder_->DecodeArrowNonNull(
         static_cast<int>(values_to_read), &accumulator_);

Reply via email to