This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new a3c1657c4ba [cherry-pick](branch-2.1) check end of file when reading 
page (#42159)
a3c1657c4ba is described below

commit a3c1657c4ba3146e117e73e711e8a7cde6009244
Author: Socrates <suxiaogang...@icloud.com>
AuthorDate: Mon Oct 21 17:01:04 2024 +0800

    [cherry-pick](branch-2.1) check end of file when reading page (#42159)
    
    ## Proposed changes
    pick pr: https://github.com/apache/doris/pull/41816
---
 be/src/io/fs/buffered_reader.cpp                           |  8 ++++++--
 .../external_table_p0/tvf/test_hdfs_parquet_group0.groovy  | 14 ++++++++------
 .../external_table_p0/tvf/test_hdfs_parquet_group5.groovy  |  8 --------
 3 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp
index 2d9b9e962ec..f897de67e0f 100644
--- a/be/src/io/fs/buffered_reader.cpp
+++ b/be/src/io/fs/buffered_reader.cpp
@@ -777,8 +777,12 @@ 
BufferedFileStreamReader::BufferedFileStreamReader(io::FileReaderSPtr file, uint
 
 Status BufferedFileStreamReader::read_bytes(const uint8_t** buf, uint64_t 
offset,
                                             const size_t bytes_to_read, const 
IOContext* io_ctx) {
-    if (offset < _file_start_offset || offset >= _file_end_offset) {
-        return Status::IOError("Out-of-bounds Access");
+    if (offset < _file_start_offset || offset >= _file_end_offset ||
+        offset + bytes_to_read > _file_end_offset) {
+        return Status::IOError(
+                "Out-of-bounds Access: offset={}, bytes_to_read={}, 
file_start={}, "
+                "file_end={}",
+                offset, bytes_to_read, _file_start_offset, _file_end_offset);
     }
     int64_t end_offset = offset + bytes_to_read;
     if (_buf_start_offset <= offset && _buf_end_offset >= end_offset) {
diff --git 
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy 
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
index 2bf95551a38..47fc8574a34 100644
--- 
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
+++ 
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group0.groovy
@@ -166,12 +166,14 @@ 
suite("test_hdfs_parquet_group0","external,hive,tvf,external_docker") {
                         "format" = "parquet") limit 10; """
 
 
-            // uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_parquet/group0/nation.dict-malformed.parquet"
-            // order_qt_test_20 """ select * from HDFS(
-            //             "uri" = "${uri}",
-            //             "hadoop.username" = "${hdfsUserName}",
-            //             "format" = "parquet") limit 10; """
-            // [E-3113]string column length is too large: 
total_length=3990808712454497748, element_number=25, you can set batch_size a 
number smaller than 25 to avoid this error
+            uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_parquet/group0/nation.dict-malformed.parquet"
+            test {
+                sql """ select * from HDFS(
+                        "uri" = "${uri}",
+                        "hadoop.username" = "${hdfsUserName}",
+                        "format" = "parquet") limit 10; """
+                exception "[IO_ERROR]Out-of-bounds Access"
+            }
 
 
             uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_parquet/group0/lz4_raw_compressed_larger.parquet"
diff --git 
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy 
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
index 324b6aaf209..a8723a433f0 100644
--- 
a/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
+++ 
b/regression-test/suites/external_table_p0/tvf/test_hdfs_parquet_group5.groovy
@@ -183,14 +183,6 @@ 
suite("test_hdfs_parquet_group5","external,hive,tvf,external_docker") {
                         "format" = "parquet") limit 10; """
 
 
-            // uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_parquet/group5/nation.dict-malformed.parquet"
-            // order_qt_test_22 """ select * from HDFS(
-            //             "uri" = "${uri}",
-            //             "hadoop.username" = "${hdfsUserName}",
-            //             "format" = "parquet") limit 10; """
-            // [E-3113]string column length is too large: 
total_length=7909446880690438330, element_number=25, you can set batch_size a 
number smaller than 25 to avoid this error
-
-
             uri = "${defaultFS}" + 
"/user/doris/tvf_data/test_hdfs_parquet/group5/v0.7.1.column-metadata-handling.parquet"
             order_qt_test_23 """ select * from HDFS(
                         "uri" = "${uri}",


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to