This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new bebc7ae4914 branch-4.1: [fix](serde) fix split_by_delimiter missing 
backslash escape handling #61995 (#64432)
bebc7ae4914 is described below

commit bebc7ae49140efe4111b9ade0c4dc8c54e8e7b7f
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Jun 16 13:37:03 2026 +0800

    branch-4.1: [fix](serde) fix split_by_delimiter missing backslash escape 
handling #61995 (#64432)
    
    Cherry-picked from #61995
    
    Co-authored-by: Chenyang Sun <[email protected]>
---
 .../complex_type_deserialize_util.h                |  4 +-
 .../data_type_serde/data_type_serde_map_test.cpp   | 57 ++++++++++++++++++++++
 regression-test/data/jsonb_p0/test_jsonb_cast.csv  |  2 +-
 regression-test/data/jsonb_p0/test_jsonb_cast.out  |  6 +--
 .../data/jsonb_p0/test_jsonb_unescaped.csv         |  2 +-
 .../jsonb_p0/test_jsonb_with_unescaped_string.out  |  4 +-
 6 files changed, 67 insertions(+), 8 deletions(-)

diff --git a/be/src/core/data_type_serde/complex_type_deserialize_util.h 
b/be/src/core/data_type_serde/complex_type_deserialize_util.h
index 20f49636c76..ff76d64dcd6 100644
--- a/be/src/core/data_type_serde/complex_type_deserialize_util.h
+++ b/be/src/core/data_type_serde/complex_type_deserialize_util.h
@@ -42,7 +42,9 @@ struct ComplexTypeDeserializeUtil {
         std::vector<SplitResult> elements;
         for (int pos = 0; pos < str.size; ++pos) {
             char c = str.data[pos];
-            if (c == '"' || c == '\'') {
+            if (c == '\\' && pos + 1 < static_cast<int>(str.size)) {
+                ++pos; // skip escaped character
+            } else if (c == '"' || c == '\'') {
                 if (!has_quote) {
                     quote_char = c;
                     has_quote = !has_quote;
diff --git a/be/test/core/data_type_serde/data_type_serde_map_test.cpp 
b/be/test/core/data_type_serde/data_type_serde_map_test.cpp
index 33d72e3b7e8..cfe6dee93e0 100644
--- a/be/test/core/data_type_serde/data_type_serde_map_test.cpp
+++ b/be/test/core/data_type_serde/data_type_serde_map_test.cpp
@@ -41,6 +41,7 @@
 #include "core/data_type/data_type_nullable.h"
 #include "core/data_type/data_type_string.h"
 #include "core/data_type/define_primitive_type.h"
+#include "core/data_type_serde/complex_type_deserialize_util.h"
 #include "core/field.h"
 #include "core/types.h"
 #include "storage/olap_common.h"
@@ -178,4 +179,60 @@ TEST_F(DataTypeMapSerDeTest, ArrowMemNotAligned) {
     EXPECT_TRUE(st.ok());
 }
 
+// Stream Load JSON stores Map as String via to_json_string, then converts back
+// via from_string → split_by_delimiter. The splitter must handle '\' escapes
+// so that '\"' inside a value doesn't flip quote state and expose inner 
':'/','.
+TEST_F(DataTypeMapSerDeTest, SplitByDelimiterHandlesBackslashEscape) {
+    DataTypeSerDe::FormatOptions opts;
+    opts.map_key_delim = ':';
+    opts.collection_delim = ',';
+
+    auto make_map_type = []() {
+        auto str = 
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+        return std::make_shared<DataTypeMap>(str, str);
+    };
+
+    // split_by_delimiter: '\"' must not toggle quote state
+    // Input (after stripping outer {}): "k":"[{\"a\":\"b\\nc:"
+    // Expected: 2 elements — key "k" and value "[{\"a\":\"b\\nc:"
+    {
+        std::string inner = "\"k\":\"[{\\\"a\\\":\\\"b\\\\nc:\"";
+        StringRef str(inner.data(), inner.size());
+        auto result = ComplexTypeDeserializeUtil::split_by_delimiter(
+                str, [&](char c) { return c == opts.map_key_delim || c == 
opts.collection_delim; });
+        EXPECT_EQ(result.size(), 2u);
+    }
+
+    // from_string: value ending with ':' (map_key_delim) must not cause split 
error
+    // Simulates to_json_string output: {"k":"[{\"a\":\"b\\nc:"}
+    {
+        auto map_type = make_map_type();
+        auto col = map_type->create_column();
+        std::string map_str = "{\"k\":\"[{\\\"a\\\":\\\"b\\\\nc:\"}";
+        StringRef ref(map_str.data(), map_str.size());
+        EXPECT_TRUE(map_type->get_serde()->from_string(ref, *col, opts).ok());
+        EXPECT_EQ(col->size(), 1u);
+    }
+
+    // from_string: value ending with ',' (collection_delim) — same class of 
bug
+    {
+        auto map_type = make_map_type();
+        auto col = map_type->create_column();
+        std::string map_str = "{\"k\":\"[{\\\"a\\\":\\\"b\\\\nc,\"}";
+        StringRef ref(map_str.data(), map_str.size());
+        EXPECT_TRUE(map_type->get_serde()->from_string(ref, *col, opts).ok());
+        EXPECT_EQ(col->size(), 1u);
+    }
+
+    // Control: value ending with ')' (not a delimiter) — always worked
+    {
+        auto map_type = make_map_type();
+        auto col = map_type->create_column();
+        std::string map_str = "{\"k\":\"[{\\\"a\\\":\\\"b\\\\nc)\"}";
+        StringRef ref(map_str.data(), map_str.size());
+        EXPECT_TRUE(map_type->get_serde()->from_string(ref, *col, opts).ok());
+        EXPECT_EQ(col->size(), 1u);
+    }
+}
+
 } // namespace doris
diff --git a/regression-test/data/jsonb_p0/test_jsonb_cast.csv 
b/regression-test/data/jsonb_p0/test_jsonb_cast.csv
index d4d64bebe19..3efda7706dc 100644
--- a/regression-test/data/jsonb_p0/test_jsonb_cast.csv
+++ b/regression-test/data/jsonb_p0/test_jsonb_cast.csv
@@ -1,4 +1,4 @@
 1      \N
 2      ['{\'x\':\'{"y":1}\', \'t\':\'{"y":2}\'}', '{"x":1}']
-3      ['foo\'bar', 'foo"bar', 'foo\\'bar', 'foo\'\'bar']
+3      ['foo\'bar', 'foo"bar', 'foo\'bar', 'foo\'\'bar']
 4      ['\/some\/cool\/url', '/some/cool/url', 
'a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e']
diff --git a/regression-test/data/jsonb_p0/test_jsonb_cast.out 
b/regression-test/data/jsonb_p0/test_jsonb_cast.out
index bb47b7249a5..0046ea38e0a 100644
--- a/regression-test/data/jsonb_p0/test_jsonb_cast.out
+++ b/regression-test/data/jsonb_p0/test_jsonb_cast.out
@@ -2,13 +2,13 @@
 -- !select_1 --
 1      \N
 2      ["{'x':'{"y":1}', 't':'{"y":2}'}", "{"x":1}"]
-3      ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"]
+3      ["foo'bar", "foo"bar", "foo'bar", "foo''bar"]
 4      ["/some/cool/url", "/some/cool/url", 
"a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"]
 
 -- !select_2 --
 1      \N
 2      ["{'x':'{"y":1}', 't':'{"y":2}'}", "{"x":1}"]
-3      ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"]
+3      ["foo'bar", "foo"bar", "foo'bar", "foo''bar"]
 4      ["/some/cool/url", "/some/cool/url", 
"a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"]
 27     ["{"k1":"v1", "k2":200}"]
 28     ["{"a.b.c":{"k1.a1":"v31", "k2":300},"a":"niu"}"]
@@ -18,7 +18,7 @@
 -- !select_json --
 1      \N
 2      ["{'x':'{\\"y\\":1}', 't':'{\\"y\\":2}'}","{\\"x\\":1}"]
-3      ["foo'bar', 'foo\\"bar', 'foo\\\\'bar', 'foo''bar"]
+3      ["foo'bar","foo\\"bar","foo'bar","foo''bar"]
 4      
["/some/cool/url","/some/cool/url","a\\\\_\\\\c\\\\l\\\\i\\\\c\\\\k\\\\h\\\\o\\\\u\\\\s\\\\e"]
 27     ["{\\"k1\\":\\"v1\\", \\"k2\\":200}"]
 28     ["{\\"a.b.c\\":{\\"k1.a1\\":\\"v31\\", 
\\"k2\\":300},\\"a\\":\\"niu\\"}"]
diff --git a/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv 
b/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv
index e4f859e7511..37c07297cbf 100644
--- a/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv
+++ b/regression-test/data/jsonb_p0/test_jsonb_unescaped.csv
@@ -1,5 +1,5 @@
 1      \N
 2      ['{\'x\' : \'{"y" : 1}\', \'t\' : \'{"y" : 2}\'}', '{"x" : 1}']
-3      ['foo\'bar', 'foo"bar', 'foo\\'bar', 'foo\'\'bar']
+3      ['foo\'bar', 'foo"bar', 'foo\'bar', 'foo\'\'bar']
 4      ['\/some\/cool\/url', '/some/cool/url', 
'a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e']
 5      ["\"双引号\"", "反斜\\线"]
\ No newline at end of file
diff --git a/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out 
b/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out
index 99fb23ef9ee..f7df0f30c14 100644
--- a/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out
+++ b/regression-test/data/jsonb_p0/test_jsonb_with_unescaped_string.out
@@ -2,14 +2,14 @@
 -- !select_csv --
 1      \N
 2      ["{'x' : '{"y" : 1}', 't' : '{"y" : 2}'}", "{"x" : 1}"]
-3      ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"]
+3      ["foo'bar", "foo"bar", "foo'bar", "foo''bar"]
 4      ["/some/cool/url", "/some/cool/url", 
"a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"]
 5      [""双引号"", "反斜\\线"]
 
 -- !select_json --
 1      \N
 2      ["{'x' : '{"y" : 1}', 't' : '{"y" : 2}'}", "'{"x" : 1}'"]
-3      ["foo'bar', 'foo"bar', 'foo\\'bar', 'foo''bar"]
+3      ["foo'bar", "foo"bar", "foo\\'bar", "foo''bar"]
 4      ["/some/cool/url", "/some/cool/url", 
"a\\_\\c\\l\\i\\c\\k\\h\\o\\u\\s\\e"]
 5      [""双引号"", "反斜\\线"]
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to