This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 7a603b5618b2cc216b3e44ebe71c27a806e1f929 Author: daidai <2017501...@qq.com> AuthorDate: Fri Aug 18 18:23:19 2023 +0800 [fix](json)Fix the bug that does not stop when reading json files (#23062) * [fix](json)Fix the bug that does not stop when reading json files --- be/src/vec/exec/format/json/new_json_reader.cpp | 1 + .../data/external_table_p0/tvf/test_hdfs_tvf.out | 37 ++++++++++++++++++++ .../external_table_p0/tvf/test_hdfs_tvf.groovy | 40 ++++++++++++++++++++++ 3 files changed, 78 insertions(+) diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp index eb5c602238..b02c30807d 100644 --- a/be/src/vec/exec/format/json/new_json_reader.cpp +++ b/be/src/vec/exec/format/json/new_json_reader.cpp @@ -1054,6 +1054,7 @@ Status NewJsonReader::_read_one_message(std::unique_ptr<uint8_t[]>* file_buf, si file_buf->reset(new uint8_t[file_size]); Slice result(file_buf->get(), file_size); RETURN_IF_ERROR(_file_reader->read_at(_current_offset, result, read_size, _io_ctx)); + _current_offset += *read_size; break; } case TFileType::FILE_STREAM: { diff --git a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out index 9fb4acdf35..9ae369b977 100644 --- a/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out +++ b/regression-test/data/external_table_p0/tvf/test_hdfs_tvf.out @@ -257,6 +257,43 @@ 8 chengdu 2345678 9 xian 2345679 +-- !json_limit1 -- +1 beijing 2345671 +10 hefei 23456710 +11 \N 23456711 +12 hefei \N +2 shanghai 2345672 +3 guangzhou 2345673 +4 shenzhen 2345674 +5 hangzhou 2345675 +6 nanjing 2345676 +7 wuhan 2345677 +8 chengdu 2345678 +9 xian 2345679 + +-- !json_limit2 -- +1 beijing 1454547 +10 hefei 2345676 +2 shanghai 1244264 +3 guangzhou 528369 +4 shenzhen 594201 +5 hangzhou 594201 +6 nanjing 2345672 +7 wuhan 2345673 +8 chengdu 2345674 +9 xian 2345675 + +-- !json_limit3 -- +1 {"id": 1, "city": "beijing", "code": 2345671} +2 {"id": 2, "city": "shanghai", "code": 2345672} +3 {"id": 3, "city": "hangzhou", "code": 2345673} +4 {"id": 4, "city": "shenzhen", "code": 2345674} +5 {"id": 5, "city": "guangzhou", "code": 2345675} + +-- !json_limit4 -- +1 {"id": 1, "city": "beijing", "code": 2345671} +2 {"id": 2, "city": "shanghai", "code": 2345672} + -- !json_root -- 1 beijing 2345671 2 shanghai 2345672 diff --git a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy index 162af3c722..0535eb6505 100644 --- a/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy +++ b/regression-test/suites/external_table_p0/tvf/test_hdfs_tvf.groovy @@ -130,6 +130,46 @@ suite("test_hdfs_tvf") { "strip_outer_array" = "false", "read_json_by_line" = "true") order by id; """ + + uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/simple_object_json.json" + format = "json" + qt_json_limit1 """ select * from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "strip_outer_array" = "false", + "read_json_by_line" = "true") order by id limit 100; """ + + uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/one_array_json.json" + format = "json" + qt_json_limit2 """ select * from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "strip_outer_array" = "true", + "read_json_by_line" = "false") order by id limit 100; """ + uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/nest_json.json" + format = "json" + qt_json_limit3 """ select * from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "strip_outer_array" = "false", + "read_json_by_line" = "true") order by no limit 100; """ + uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/nest_json.json" + format = "json" + qt_json_limit4 """ select * from HDFS( + "uri" = "${uri}", + "fs.defaultFS"= "${defaultFS}", + "hadoop.username" = "${hdfsUserName}", + "format" = "${format}", + "strip_outer_array" = "false", + "read_json_by_line" = "true") order by no limit 2; """ + + // test json root uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/nest_json.json" format = "json" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org