This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 7a603b5618b2cc216b3e44ebe71c27a806e1f929
Author: daidai <2017501...@qq.com>
AuthorDate: Fri Aug 18 18:23:19 2023 +0800

    [fix](json)Fix the bug that does not stop when reading json files (#23062)
    
    * [fix](json)Fix the bug that does not stop when reading json files
---
 be/src/vec/exec/format/json/new_json_reader.cpp    |  1 +
 .../data/external_table_p0/tvf/test_hdfs_tvf.out   | 37 ++++++++++++++++++++
 .../external_table_p0/tvf/test_hdfs_tvf.groovy     | 40 ++++++++++++++++++++++
 3 files changed, 78 insertions(+)

diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp 
b/be/src/vec/exec/format/json/new_json_reader.cpp
index eb5c602238..b02c30807d 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -1054,6 +1054,7 @@ Status 
NewJsonReader::_read_one_message(std::unique_ptr<uint8_t[]>* file_buf, si
         file_buf->reset(new uint8_t[file_size]);
         Slice result(file_buf->get(), file_size);
         RETURN_IF_ERROR(_file_reader->read_at(_current_offset, result, 
read_size, _io_ctx));
+        _current_offset += *read_size;
         break;
     }
     case TFileType::FILE_STREAM: {
diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out 
b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out
index 9fb4acdf35..9ae369b977 100644
--- a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out
+++ b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out
@@ -257,6 +257,43 @@
 8      chengdu 2345678
 9      xian    2345679
 
+-- !json_limit1 --
+1      beijing 2345671
+10     hefei   23456710
+11     \N      23456711
+12     hefei   \N
+2      shanghai        2345672
+3      guangzhou       2345673
+4      shenzhen        2345674
+5      hangzhou        2345675
+6      nanjing 2345676
+7      wuhan   2345677
+8      chengdu 2345678
+9      xian    2345679
+
+-- !json_limit2 --
+1      beijing 1454547
+10     hefei   2345676
+2      shanghai        1244264
+3      guangzhou       528369
+4      shenzhen        594201
+5      hangzhou        594201
+6      nanjing 2345672
+7      wuhan   2345673
+8      chengdu 2345674
+9      xian    2345675
+
+-- !json_limit3 --
+1      {"id": 1, "city": "beijing", "code": 2345671}
+2      {"id": 2, "city": "shanghai", "code": 2345672}
+3      {"id": 3, "city": "hangzhou", "code": 2345673}
+4      {"id": 4, "city": "shenzhen", "code": 2345674}
+5      {"id": 5, "city": "guangzhou", "code": 2345675}
+
+-- !json_limit4 --
+1      {"id": 1, "city": "beijing", "code": 2345671}
+2      {"id": 2, "city": "shanghai", "code": 2345672}
+
 -- !json_root --
 1      beijing 2345671
 2      shanghai        2345672
diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy 
b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy
index 162af3c722..0535eb6505 100644
--- a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy
+++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy
@@ -130,6 +130,46 @@ suite("test_hdfs_tvf") {
                         "strip_outer_array" = "false",
                         "read_json_by_line" = "true") order by id; """
 
+
+           uri = "${defaultFS}" + 
"/user/doris/preinstalled_data/json_format_test/simple_object_json.json"
+            format = "json"
+            qt_json_limit1 """ select * from HDFS(
+                        "uri" = "${uri}",
+                        "fs.defaultFS"= "${defaultFS}",
+                        "hadoop.username" = "${hdfsUserName}",
+                        "format" = "${format}",
+                        "strip_outer_array" = "false",
+                        "read_json_by_line" = "true") order by id limit 100; 
"""
+
+           uri = "${defaultFS}" + 
"/user/doris/preinstalled_data/json_format_test/one_array_json.json"
+            format = "json"
+            qt_json_limit2 """ select * from HDFS(
+                        "uri" = "${uri}",
+                        "fs.defaultFS"= "${defaultFS}",
+                        "hadoop.username" = "${hdfsUserName}",
+                        "format" = "${format}",
+                        "strip_outer_array" = "true",
+                        "read_json_by_line" = "false") order by id limit 100; 
"""
+           uri = "${defaultFS}" + 
"/user/doris/preinstalled_data/json_format_test/nest_json.json"
+            format = "json"
+            qt_json_limit3 """ select * from HDFS(
+                        "uri" = "${uri}",
+                        "fs.defaultFS"= "${defaultFS}",
+                        "hadoop.username" = "${hdfsUserName}",
+                        "format" = "${format}",
+                        "strip_outer_array" = "false",
+                        "read_json_by_line" = "true") order by no  limit 100; 
"""
+           uri = "${defaultFS}" + 
"/user/doris/preinstalled_data/json_format_test/nest_json.json"
+            format = "json"
+            qt_json_limit4 """ select * from HDFS(
+                        "uri" = "${uri}",
+                        "fs.defaultFS"= "${defaultFS}",
+                        "hadoop.username" = "${hdfsUserName}",
+                        "format" = "${format}",
+                        "strip_outer_array" = "false",
+                        "read_json_by_line" = "true") order by no limit 2; """
+
+
             // test json root
             uri = "${defaultFS}" + 
"/user/doris/preinstalled_data/json_format_test/nest_json.json"
             format = "json"


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to