This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new a3c1657c4ba [cherry-pick](branch-2.1) check end of file when reading page (#42159) a3c1657c4ba is described below commit a3c1657c4ba3146e117e73e711e8a7cde6009244 Author: Socrates <suxiaogang...@icloud.com> AuthorDate: Mon Oct 21 17:01:04 2024 +0800 [cherry-pick](branch-2.1) check end of file when reading page (#42159) ## Proposed changes pick pr: https://github.com/apache/doris/pull/41816 --- be/src/io/fs/buffered_reader.cpp | 8 ++++++-- .../external_table_p0/tvf/test_hdfs_parquet_group0.groovy | 14 ++++++++------ .../external_table_p0/tvf/test_hdfs_parquet_group5.groovy | 8 -------- 3 files changed, 14 insertions(+), 16 deletions(-) diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp index 2d9b9e962ec..f897de67e0f 100644 --- a/be/src/io/fs/buffered_reader.cpp +++ b/be/src/io/fs/buffered_reader.cpp @@ -777,8 +777,12 @@ BufferedFileStreamReader::BufferedFileStreamReader(io::FileReaderSPtr file, uint Status BufferedFileStreamReader::read_bytes(const uint8_t** buf, uint64_t offset, const size_t bytes_to_read, const IOContext* io_ctx) { - if (offset < _file_start_offset || offset >= _file_end_offset) { - return Status::IOError("Out-of-bounds Access"); + if (offset < _file_start_offset || offset >= _file_end_offset || + offset + bytes_to_read > _file_end_offset) { + return Status::IOError( + "Out-of-bounds Access: offset={}, bytes_to_read={}, file_start={}, " + "file_end={}", + offset, bytes_to_read, _file_start_offset, _file_end_offset); } int64_t end_offset = offset + bytes_to_read; if (_buf_start_offset <= offset && _buf_end_offset >= end_offset) { diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy index 2bf95551a38..47fc8574a34 100644 --- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy +++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy @@ -166,12 +166,14 @@ suite("test_hdfs_parquet_group0","external,hive,tvf,external_docker") { "format" = "parquet") limit 10; """ - // uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group0/nation.dict-malformed.parquet" - // order_qt_test_20 """ select * from HDFS( - // "uri" = "${uri}", - // "hadoop.username" = "${hdfsUserName}", - // "format" = "parquet") limit 10; """ - // [E-3113]string column length is too large: total_length=3990808712454497748, element_number=25, you can set batch_size a number smaller than 25 to avoid this error + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group0/nation.dict-malformed.parquet" + test { + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "parquet") limit 10; """ + exception "[IO_ERROR]Out-of-bounds Access" + } uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group0/lz4_raw_compressed_larger.parquet" diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy index 324b6aaf209..a8723a433f0 100644 --- a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy +++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy @@ -183,14 +183,6 @@ suite("test_hdfs_parquet_group5","external,hive,tvf,external_docker") { "format" = "parquet") limit 10; """ - // uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group5/nation.dict-malformed.parquet" - // order_qt_test_22 """ select * from HDFS( - // "uri" = "${uri}", - // "hadoop.username" = "${hdfsUserName}", - // "format" = "parquet") limit 10; """ - // [E-3113]string column length is too large: total_length=7909446880690438330, element_number=25, you can set batch_size a number smaller than 25 to avoid this error - - uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_parquet/group5/v0.7.1.column-metadata-handling.parquet" order_qt_test_23 """ select * from HDFS( "uri" = "${uri}", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org