This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 1631076ae51 [Fix](JsonReader) fix json with duplicate key entry may result out of bound exception (#38188) 1631076ae51 is described below commit 1631076ae519bb767282adf5896c72ee757d3fc6 Author: lihangyu <15605149...@163.com> AuthorDate: Tue Jul 23 14:16:50 2024 +0800 [Fix](JsonReader) fix json with duplicate key entry may result out of bound exception (#38188) #38146 --- be/src/vec/exec/format/json/new_json_reader.cpp | 3 +++ .../stream_load/test_duplicate_json_keys.json | 1 + .../data/load_p0/stream_load/test_json_load.out | 2 ++ .../load_p0/stream_load/test_json_load.groovy | 25 ++++++++++++++++++++++ 4 files changed, 31 insertions(+) diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp b/be/src/vec/exec/format/json/new_json_reader.cpp index a41fa3881b1..0ef75a8ebfe 100644 --- a/be/src/vec/exec/format/json/new_json_reader.cpp +++ b/be/src/vec/exec/format/json/new_json_reader.cpp @@ -1415,6 +1415,9 @@ Status NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val // This key is not exist in slot desc, just ignore continue; } + if (_seen_columns[column_index]) { + continue; + } simdjson::ondemand::value val = field.value(); auto* column_ptr = block.get_by_position(column_index).column->assume_mutable().get(); RETURN_IF_ERROR( diff --git a/regression-test/data/load_p0/stream_load/test_duplicate_json_keys.json b/regression-test/data/load_p0/stream_load/test_duplicate_json_keys.json new file mode 100644 index 00000000000..4bf1079cf22 --- /dev/null +++ b/regression-test/data/load_p0/stream_load/test_duplicate_json_keys.json @@ -0,0 +1 @@ +{"k1" : 10, "k1" : 100} \ No newline at end of file diff --git a/regression-test/data/load_p0/stream_load/test_json_load.out b/regression-test/data/load_p0/stream_load/test_json_load.out index 7351891633d..861cc4a1664 100644 --- a/regression-test/data/load_p0/stream_load/test_json_load.out +++ b/regression-test/data/load_p0/stream_load/test_json_load.out @@ -245,3 +245,5 @@ John 30 New York {"email":"j...@example.com","phone":"+1-123-456-7890"} android \N \N \N \N \N android \N \N \N \N \N +-- !select29 -- +10 \N diff --git a/regression-test/suites/load_p0/stream_load/test_json_load.groovy b/regression-test/suites/load_p0/stream_load/test_json_load.groovy index 6d0c221ca03..4432ad6cc0c 100644 --- a/regression-test/suites/load_p0/stream_load/test_json_load.groovy +++ b/regression-test/suites/load_p0/stream_load/test_json_load.groovy @@ -734,4 +734,29 @@ suite("test_json_load", "p0") { } finally { try_sql("DROP TABLE IF EXISTS ${testTable}") } + + // add duplicate json entry case + try { + sql "DROP TABLE IF EXISTS ${testTable}" + sql """CREATE TABLE IF NOT EXISTS ${testTable} + ( + `k1` varchar(1024) NULL, + `k2` varchar(1024) NULL + ) + DUPLICATE KEY(`k1`) + COMMENT '' + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + );""" + + load_json_data.call("${testTable}", "${testTable}_case29", 'false', 'true', 'json', '', '', + '', '', '', 'test_duplicate_json_keys.json', false, 1) + + sql "sync" + qt_select29 "select * from ${testTable}" + + } finally { + try_sql("DROP TABLE IF EXISTS ${testTable}") + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org