This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 1631076ae51 [Fix](JsonReader) fix json with duplicate key entry may 
result out of bound exception  (#38188)
1631076ae51 is described below

commit 1631076ae519bb767282adf5896c72ee757d3fc6
Author: lihangyu <15605149...@163.com>
AuthorDate: Tue Jul 23 14:16:50 2024 +0800

    [Fix](JsonReader) fix json with duplicate key entry may result out of bound 
exception  (#38188)
    
    #38146
---
 be/src/vec/exec/format/json/new_json_reader.cpp    |  3 +++
 .../stream_load/test_duplicate_json_keys.json      |  1 +
 .../data/load_p0/stream_load/test_json_load.out    |  2 ++
 .../load_p0/stream_load/test_json_load.groovy      | 25 ++++++++++++++++++++++
 4 files changed, 31 insertions(+)

diff --git a/be/src/vec/exec/format/json/new_json_reader.cpp 
b/be/src/vec/exec/format/json/new_json_reader.cpp
index a41fa3881b1..0ef75a8ebfe 100644
--- a/be/src/vec/exec/format/json/new_json_reader.cpp
+++ b/be/src/vec/exec/format/json/new_json_reader.cpp
@@ -1415,6 +1415,9 @@ Status 
NewJsonReader::_simdjson_set_column_value(simdjson::ondemand::object* val
             // This key is not exist in slot desc, just ignore
             continue;
         }
+        if (_seen_columns[column_index]) {
+            continue;
+        }
         simdjson::ondemand::value val = field.value();
         auto* column_ptr = 
block.get_by_position(column_index).column->assume_mutable().get();
         RETURN_IF_ERROR(
diff --git 
a/regression-test/data/load_p0/stream_load/test_duplicate_json_keys.json 
b/regression-test/data/load_p0/stream_load/test_duplicate_json_keys.json
new file mode 100644
index 00000000000..4bf1079cf22
--- /dev/null
+++ b/regression-test/data/load_p0/stream_load/test_duplicate_json_keys.json
@@ -0,0 +1 @@
+{"k1" : 10, "k1" : 100}
\ No newline at end of file
diff --git a/regression-test/data/load_p0/stream_load/test_json_load.out 
b/regression-test/data/load_p0/stream_load/test_json_load.out
index 7351891633d..861cc4a1664 100644
--- a/regression-test/data/load_p0/stream_load/test_json_load.out
+++ b/regression-test/data/load_p0/stream_load/test_json_load.out
@@ -245,3 +245,5 @@ John        30      New York        
{"email":"j...@example.com","phone":"+1-123-456-7890"}
 android        \N      \N      \N      \N      \N
 android        \N      \N      \N      \N      \N
 
+-- !select29 --
+10     \N
diff --git a/regression-test/suites/load_p0/stream_load/test_json_load.groovy 
b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
index 6d0c221ca03..4432ad6cc0c 100644
--- a/regression-test/suites/load_p0/stream_load/test_json_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_json_load.groovy
@@ -734,4 +734,29 @@ suite("test_json_load", "p0") {
     } finally {
         try_sql("DROP TABLE IF EXISTS ${testTable}")
     }
+
+    // add duplicate json entry case
+    try {
+        sql "DROP TABLE IF EXISTS ${testTable}"
+        sql """CREATE TABLE IF NOT EXISTS ${testTable} 
+            (
+                `k1` varchar(1024) NULL,
+                `k2` varchar(1024) NULL
+            )
+            DUPLICATE KEY(`k1`)
+            COMMENT ''
+            DISTRIBUTED BY RANDOM BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1"
+            );"""
+
+        load_json_data.call("${testTable}", "${testTable}_case29", 'false', 
'true', 'json', '', '',
+                             '', '', '', 'test_duplicate_json_keys.json', 
false, 1)
+        
+        sql "sync"
+        qt_select29 "select * from ${testTable}"
+
+    } finally {
+        try_sql("DROP TABLE IF EXISTS ${testTable}")
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to