EnricoMi opened a new issue, #44988:
URL: https://github.com/apache/arrow/issues/44988

   ### Describe the bug, including details regarding any error messages, 
version, and platform.
   
   A `FileDecryptionProperties` is been used after it has been freed in two 
places in `ParquetFileFormat`.
   
   1. A `FileDecryptionProperties` is been deconstructed when leaving method 
`ParquetFileFormat::GetReaderAsync` (thread `T440`), which is later (thread 
`T74`) used inside the async lambda (`properties`):
   
https://github.com/apache/arrow/blob/b907c5dadb516b525c8fafbf34b0116d44044733/cpp/src/arrow/dataset/file_parquet.cc#L525-L532
   
   ```
   ==203464==ERROR: AddressSanitizer: heap-use-after-free on address 
0x5030007d5548 at pc 0x58c146411ce2 bp 0x7de5705fc610 sp 0x7de5705fc600
   WRITE of size 4 at 0x5030007d5548 thread T74
       #0 0x58c146411ce1 in __gnu_cxx::__atomic_add(int volatile*, int) 
/usr/include/c++/13/ext/atomicity.h:71
       #1 0x58c146411ce1 in __gnu_cxx::__atomic_add_dispatch(int*, int) 
/usr/include/c++/13/ext/atomicity.h:111
       #2 0x58c146411ce1 in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_add_ref_copy() 
/usr/include/c++/13/bits/shared_ptr_base.h:152
       #3 0x58c14640bce6 in 
std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count(std::__shared_count<(__gnu_cxx::_Lock_policy)2>
 const&) /usr/include/c++/13/bits/shared_ptr_base.h:1078
       #4 0x58c1464d3c64 in 
std::__shared_ptr<parquet::FileDecryptionProperties, 
(__gnu_cxx::_Lock_policy)2>::__shared_ptr(std::__shared_ptr<parquet::FileDecryptionProperties,
 (__gnu_cxx::_Lock_policy)2> const&) 
(/home/enrico/Work/git/arrow/cpp/cmake-build-debug/debug/arrow-dataset-file-parquet-encryption-test+0x1cec64)
 (BuildId: 57601f70a2e6f76ad5b425c6cf08eaa169b839ea)
       #5 0x58c1464d3c8e in 
std::shared_ptr<parquet::FileDecryptionProperties>::shared_ptr(std::shared_ptr<parquet::FileDecryptionProperties>
 const&) 
(/home/enrico/Work/git/arrow/cpp/cmake-build-debug/debug/arrow-dataset-file-parquet-encryption-test+0x1cec8e)
 (BuildId: 57601f70a2e6f76ad5b425c6cf08eaa169b839ea)
       #6 0x7de5e90aa676 in 
parquet::ReaderProperties::ReaderProperties(parquet::ReaderProperties const&) 
/home/enrico/Work/git/arrow/cpp/src/parquet/properties.h:59
       #7 0x7de5e80e3790 in 
std::__detail::_MakeUniq<parquet::SerializedRowGroup>::__single_object 
std::make_unique<parquet::SerializedRowGroup, 
std::shared_ptr<arrow::io::RandomAccessFile>&, 
std::shared_ptr<arrow::io::internal::ReadRangeCache>&, long&, 
parquet::FileMetaData*, int&, parquet::ReaderProperties&, 
std::shared_ptr<arrow::Buffer> >(std::shared_ptr<arrow::io::RandomAccessFile>&, 
std::shared_ptr<arrow::io::internal::ReadRangeCache>&, long&, 
parquet::FileMetaData*&&, int&, parquet::ReaderProperties&, 
std::shared_ptr<arrow::Buffer>&&) /usr/include/c++/13/bits/unique_ptr.h:1070
       #8 0x7de5e80dbf1d in parquet::SerializedFile::GetRowGroup(int) 
/home/enrico/Work/git/arrow/cpp/src/parquet/file_reader.cc:327
       #9 0x7de5e80d5bb3 in parquet::ParquetFileReader::RowGroup(int) 
/home/enrico/Work/git/arrow/cpp/src/parquet/file_reader.cc:889
       #10 0x7de5e7b805bf in parquet::arrow::FileColumnIterator::NextChunk() 
(/home/enrico/Work/git/arrow/cpp/cmake-build-debug/debug/libparquet.so.1900+0x3805bf)
 (BuildId: d5a62f9c6ed58a37b6da81c34f141d1f0a952dd0)
   
   
   0x5030007d5548 is located 8 bytes inside of 24-byte region 
[0x5030007d5540,0x5030007d5558)
   freed by thread T440 here:
       #0 0x7de5e94ff5e8 in operator delete(void*, unsigned long) 
../../../../src/libsanitizer/asan/asan_new_delete.cpp:164
       #1 0x7de5e80bc176 in 
std::_Sp_counted_ptr<parquet::FileDecryptionProperties*, 
(__gnu_cxx::_Lock_policy)2>::~_Sp_counted_ptr() 
/usr/include/c++/13/bits/shared_ptr_base.h:419
       #2 0x7de5e80bc7d2 in 
std::_Sp_counted_ptr<parquet::FileDecryptionProperties*, 
(__gnu_cxx::_Lock_policy)2>::_M_destroy() 
/usr/include/c++/13/bits/shared_ptr_base.h:432
       #3 0x58c1464063db in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() 
/usr/include/c++/13/bits/shared_ptr_base.h:347
       #4 0x58c14640bc63 in 
std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count() 
/usr/include/c++/13/bits/shared_ptr_base.h:1071
       #5 0x58c1464ca4ab in 
std::__shared_ptr<parquet::FileDecryptionProperties, 
(__gnu_cxx::_Lock_policy)2>::~__shared_ptr() 
/usr/include/c++/13/bits/shared_ptr_base.h:1524
       #6 0x58c1464ca4cb in 
std::shared_ptr<parquet::FileDecryptionProperties>::~shared_ptr() 
/usr/include/c++/13/bits/shared_ptr.h:175
       #7 0x58c1464ca591 in parquet::ReaderProperties::~ReaderProperties() 
/home/enrico/Work/git/arrow/cpp/src/parquet/properties.h:59
       #8 0x7de5e9087118 in 
arrow::dataset::ParquetFileFormat::GetReaderAsync(arrow::dataset::FileSource 
const&, std::shared_ptr<arrow::dataset::ScanOptions> const&, 
std::shared_ptr<parquet::FileMetaData> const&) const 
/home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:558
       #9 0x7de5e90893c0 in 
arrow::dataset::ParquetFileFormat::ScanBatchesAsync(std::shared_ptr<arrow::dataset::ScanOptions>
 const&, std::shared_ptr<arrow::dataset::FileFragment> const&) const 
/home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:652
       #10 0x7de5e8de0554 in 
arrow::dataset::FileFragment::ScanBatchesAsync(std::shared_ptr<arrow::dataset::ScanOptions>
 const&) /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_base.cc:188
       #11 0x7de5e8ee5049 in FragmentToBatches 
/home/enrico/Work/git/arrow/cpp/src/arrow/dataset/scanner.cc:307
   
   previously allocated by thread T440 here:
       #0 0x7de5e94fe548 in operator new(unsigned long) 
../../../../src/libsanitizer/asan/asan_new_delete.cpp:95
       #1 0x7de5e80b701a in 
std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<parquet::FileDecryptionProperties*>(parquet::FileDecryptionProperties*)
 /usr/include/c++/13/bits/shared_ptr_base.h:917
       #2 0x7de5e80b1d08 in 
std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<parquet::FileDecryptionProperties*>(parquet::FileDecryptionProperties*,
 std::integral_constant<bool, false>) 
/usr/include/c++/13/bits/shared_ptr_base.h:928
       #3 0x7de5e80ae0ec in 
std::__shared_ptr<parquet::FileDecryptionProperties, 
(__gnu_cxx::_Lock_policy)2>::__shared_ptr<parquet::FileDecryptionProperties, 
void>(parquet::FileDecryptionProperties*) 
(/home/enrico/Work/git/arrow/cpp/cmake-build-debug/debug/libparquet.so.1900+0x8ae0ec)
 (BuildId: d5a62f9c6ed58a37b6da81c34f141d1f0a952dd0)
       #4 0x7de5e80abbbc in 
std::shared_ptr<parquet::FileDecryptionProperties>::shared_ptr<parquet::FileDecryptionProperties,
 void>(parquet::FileDecryptionProperties*) 
/usr/include/c++/13/bits/shared_ptr.h:214
       #5 0x7de5e835bdc3 in parquet::FileDecryptionProperties::Builder::build() 
/home/enrico/Work/git/arrow/cpp/src/parquet/encryption/encryption.h:328
       #6 0x7de5e835b813 in 
parquet::encryption::CryptoFactory::GetFileDecryptionProperties(parquet::encryption::KmsConnectionConfig
 const&, parquet::encryption::DecryptionConfiguration const&, 
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > 
const&, std::shared_ptr<arrow::fs::FileSystem> const&) 
/home/enrico/Work/git/arrow/cpp/src/parquet/encryption/crypto_factory.cc:180
       #7 0x7de5e907c44f in MakeReaderProperties 
/home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:86
       #8 0x7de5e9086d93 in 
arrow::dataset::ParquetFileFormat::GetReaderAsync(arrow::dataset::FileSource 
const&, std::shared_ptr<arrow::dataset::ScanOptions> const&, 
std::shared_ptr<parquet::FileMetaData> const&) const 
/home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:526
       #9 0x7de5e90893c0 in 
arrow::dataset::ParquetFileFormat::ScanBatchesAsync(std::shared_ptr<arrow::dataset::ScanOptions>
 const&, std::shared_ptr<arrow::dataset::FileFragment> const&) const 
/home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:652
       #10 0x7de5e8de0554 in 
arrow::dataset::FileFragment::ScanBatchesAsync(std::shared_ptr<arrow::dataset::ScanOptions>
 const&) /home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_base.cc:188
       #11 0x7de5e8ee5049 in FragmentToBatches 
/home/enrico/Work/git/arrow/cpp/src/arrow/dataset/scanner.cc:307
   
   ```
   
   2. In `MakeReaderProperties`, the `parquet_scan_options` given to the method 
is being modified, which is suspicious given the modification relies on other 
arguments to the method (like `path` and `filesystem`). Concurrent threads 
overwrite each-others instances:
   
https://github.com/apache/arrow/blob/b907c5dadb516b525c8fafbf34b0116d44044733/cpp/src/arrow/dataset/file_parquet.cc#L66-L95
   
   By modifying 
`parquet_scan_options->reader_properties->file_decryption_properties`, the 
current value that has been assigned by a different thread (`T235`) is being 
deconstructed (**for some reason the `std::shared_ptr` does not know some 
thread still knows about it**). When the other thread access that instance, a 
segmentation fault occurs:
   
   ```
   ==414573==ERROR: AddressSanitizer: heap-use-after-free on address 
0x503000e06b08 at pc 0x767802313d7c bp 0x7676929f8ca0 sp 0x7676929f8c90
   WRITE of size 4 at 0x503000e06b08 thread T235
       #0 0x767802313d7b in __gnu_cxx::__atomic_add(int volatile*, int) 
/usr/include/c++/13/ext/atomicity.h:71
       #1 0x767802313d7b in __gnu_cxx::__atomic_add_dispatch(int*, int) 
/usr/include/c++/13/ext/atomicity.h:111
       #2 0x767802313d7b in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_add_ref_copy() 
/usr/include/c++/13/bits/shared_ptr_base.h:152
       #3 0x767802310a78 in 
std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count(std::__shared_count<(__gnu_cxx::_Lock_policy)2>
 const&) /usr/include/c++/13/bits/shared_ptr_base.h:1078
       #4 0x7678025f902e in 
std::__shared_ptr<parquet::FileDecryptionProperties, 
(__gnu_cxx::_Lock_policy)2>::__shared_ptr(std::__shared_ptr<parquet::FileDecryptionProperties,
 (__gnu_cxx::_Lock_policy)2> const&) 
/usr/include/c++/13/bits/shared_ptr_base.h:1522
       #5 0x7678025f9058 in 
std::shared_ptr<parquet::FileDecryptionProperties>::shared_ptr(std::shared_ptr<parquet::FileDecryptionProperties>
 const&) /usr/include/c++/13/bits/shared_ptr.h:204
       #6 0x7678025f93a4 in 
parquet::ReaderProperties::ReaderProperties(parquet::ReaderProperties const&) 
/home/enrico/Work/git/arrow/cpp/src/parquet/properties.h:59
       #7 0x7678028db77d in 
parquet::SerializedFile::SerializedFile(std::shared_ptr<arrow::io::RandomAccessFile>,
 parquet::ReaderProperties const&) 
/home/enrico/Work/git/arrow/cpp/src/parquet/file_reader.cc:303
       #8 0x7678028d3416 in 
parquet::ParquetFileReader::Contents::OpenAsync(std::shared_ptr<arrow::io::RandomAccessFile>,
 parquet::ReaderProperties const&, std::shared_ptr<parquet::FileMetaData>) 
/home/enrico/Work/git/arrow/cpp/src/parquet/file_reader.cc:793
       #9 0x7678028d4edc in 
parquet::ParquetFileReader::OpenAsync(std::shared_ptr<arrow::io::RandomAccessFile>,
 parquet::ReaderProperties const&, std::shared_ptr<parquet::FileMetaData>) 
/home/enrico/Work/git/arrow/cpp/src/parquet/file_reader.cc:842
       #10 0x767803886510 in operator() 
/home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:531
       #11 0x7678038a24d4 in 
operator()<arrow::dataset::ParquetFileFormat::GetReaderAsync(const 
arrow::dataset::FileSource&, const 
std::shared_ptr<arrow::dataset::ScanOptions>&, const 
std::shared_ptr<parquet::FileMetaData>&) const::<lambda(const 
std::shared_ptr<arrow::io::RandomAccessFile>&)>, const 
std::shared_ptr<arrow::io::RandomAccessFile>&> 
/home/enrico/Work/git/arrow/cpp/src/arrow/util/future.h:178
   
   previously allocated by thread T235 here:
       #0 0x767803cfe548 in operator new(unsigned long) 
../../../../src/libsanitizer/asan/asan_new_delete.cpp:95
       #1 0x7678028b701a in 
std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<parquet::FileDecryptionProperties*>(parquet::FileDecryptionProperties*)
 /usr/include/c++/13/bits/shared_ptr_base.h:917
       #2 0x7678028b1d08 in 
std::__shared_count<(__gnu_cxx::_Lock_policy)2>::__shared_count<parquet::FileDecryptionProperties*>(parquet::FileDecryptionProperties*,
 std::integral_constant<bool, false>) 
/usr/include/c++/13/bits/shared_ptr_base.h:928
       #3 0x7678028ae0ec in 
std::__shared_ptr<parquet::FileDecryptionProperties, 
(__gnu_cxx::_Lock_policy)2>::__shared_ptr<parquet::FileDecryptionProperties, 
void>(parquet::FileDecryptionProperties*) 
(/home/enrico/Work/git/arrow/cpp/cmake-build-debug/debug/libparquet.so.1900+0x8ae0ec)
 (BuildId: d5a62f9c6ed58a37b6da81c34f141d1f0a952dd0)
       #4 0x7678028abbbc in 
std::shared_ptr<parquet::FileDecryptionProperties>::shared_ptr<parquet::FileDecryptionProperties,
 void>(parquet::FileDecryptionProperties*) 
/usr/include/c++/13/bits/shared_ptr.h:214
       #5 0x767802b5bdc3 in parquet::FileDecryptionProperties::Builder::build() 
/home/enrico/Work/git/arrow/cpp/src/parquet/encryption/encryption.h:328
       #6 0x767802b5b813 in 
parquet::encryption::CryptoFactory::GetFileDecryptionProperties(parquet::encryption::KmsConnectionConfig
 const&, parquet::encryption::DecryptionConfiguration const&, 
std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > 
const&, std::shared_ptr<arrow::fs::FileSystem> const&) 
/home/enrico/Work/git/arrow/cpp/src/parquet/encryption/crypto_factory.cc:180
       #7 0x76780387c42f in MakeReaderProperties 
/home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:86
       #8 0x767803886d69 in 
arrow::dataset::ParquetFileFormat::GetReaderAsync(arrow::dataset::FileSource 
const&, std::shared_ptr<arrow::dataset::ScanOptions> const&, 
std::shared_ptr<parquet::FileMetaData> const&) const 
/home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:526
   
   0x503000e06b08 is located 8 bytes inside of 24-byte region 
[0x503000e06b00,0x503000e06b18)
   freed by thread T481 here:
       #0 0x767803cff5e8 in operator delete(void*, unsigned long) 
../../../../src/libsanitizer/asan/asan_new_delete.cpp:164
       #1 0x7678028bc176 in 
std::_Sp_counted_ptr<parquet::FileDecryptionProperties*, 
(__gnu_cxx::_Lock_policy)2>::~_Sp_counted_ptr() 
/usr/include/c++/13/bits/shared_ptr_base.h:419
       #2 0x7678028bc7d2 in 
std::_Sp_counted_ptr<parquet::FileDecryptionProperties*, 
(__gnu_cxx::_Lock_policy)2>::_M_destroy() 
/usr/include/c++/13/bits/shared_ptr_base.h:432
       #3 0x5b649c444bd0 in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release_last_use() 
(/home/enrico/Work/git/arrow/cpp/cmake-build-debug/debug/arrow-dataset-file-parquet-encryption-test+0x10cbd0)
 (BuildId: 57601f70a2e6f76ad5b425c6cf08eaa169b839ea)
       #4 0x5b649c43eb43 in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release_last_use_cold() 
/usr/include/c++/13/bits/shared_ptr_base.h:199
       #5 0x5b649c43950d in 
std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release() 
/usr/include/c++/13/bits/shared_ptr_base.h:353
       #6 0x5b649c43ec63 in 
std::__shared_count<(__gnu_cxx::_Lock_policy)2>::~__shared_count() 
/usr/include/c++/13/bits/shared_ptr_base.h:1071
       #7 0x5b649c4fd4ab in 
std::__shared_ptr<parquet::FileDecryptionProperties, 
(__gnu_cxx::_Lock_policy)2>::~__shared_ptr() 
/usr/include/c++/13/bits/shared_ptr_base.h:1524
       #8 0x5b649c510151 in 
std::__shared_ptr<parquet::FileDecryptionProperties, 
(__gnu_cxx::_Lock_policy)2>::operator=(std::__shared_ptr<parquet::FileDecryptionProperties,
 (__gnu_cxx::_Lock_policy)2>&&) /usr/include/c++/13/bits/shared_ptr_base.h:1620
       #9 0x5b649c509a77 in 
std::shared_ptr<parquet::FileDecryptionProperties>::operator=(std::shared_ptr<parquet::FileDecryptionProperties>&&)
 /usr/include/c++/13/bits/shared_ptr.h:440
       #10 0x5b649c4fd505 in 
parquet::ReaderProperties::file_decryption_properties(std::shared_ptr<parquet::FileDecryptionProperties>)
 /home/enrico/Work/git/arrow/cpp/src/parquet/properties.h:111
       #11 0x76780387c472 in MakeReaderProperties 
/home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:88
       #12 0x767803886d69 in 
arrow::dataset::ParquetFileFormat::GetReaderAsync(arrow::dataset::FileSource 
const&, std::shared_ptr<arrow::dataset::ScanOptions> const&, 
std::shared_ptr<parquet::FileMetaData> const&) const 
/home/enrico/Work/git/arrow/cpp/src/arrow/dataset/file_parquet.cc:526
   ```
   
   
   ### Component(s)
   
   C++


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@arrow.apache.org.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to