This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new a75760d18f5 brach-2.1 cherry-pick [Fix](Variant) fix serialize with 
json key contains `.` as name (#51864)
a75760d18f5 is described below

commit a75760d18f5ce9db3b4cbbeec7aca96d43e0272f
Author: lihangyu <[email protected]>
AuthorDate: Fri Jun 20 14:00:00 2025 +0800

    brach-2.1 cherry-pick [Fix](Variant) fix serialize with json key contains 
`.` as name (#51864)
    
    cherry-pick from #51857
---
 .../olap/rowset/segment_v2/hierarchical_data_reader.h   |   8 ++++++++
 be/src/vec/columns/column_object.cpp                    |   8 ++------
 be/src/vec/core/field.h                                 |   9 ++-------
 be/src/vec/data_types/data_type_object.cpp              |  10 ++++++++--
 be/src/vec/json/json_parser.h                           |   8 ++++++++
 be/src/vec/json/path_in_data.h                          |   8 --------
 be/test/vec/columns/column_object_test.cpp              |   1 +
 gensrc/proto/data.proto                                 |   2 ++
 regression-test/data/variant_p0/column_name.out         | Bin 469 -> 545 bytes
 regression-test/suites/variant_p0/column_name.groovy    |  15 +++++++++++++--
 10 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h 
b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
index 3197e95cf75..b9a4ff25ce7 100644
--- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
+++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h
@@ -61,6 +61,14 @@ struct SubcolumnReader {
 };
 using SubcolumnColumnReaders = vectorized::SubcolumnsTree<SubcolumnReader>;
 
+struct PathWithColumnAndType {
+    vectorized::PathInData path;
+    vectorized::ColumnPtr column;
+    vectorized::DataTypePtr type;
+};
+
+using PathsWithColumnAndType = std::vector<PathWithColumnAndType>;
+
 // Reader for hierarchical data for variant, merge with root(sparse encoded 
columns)
 class HierarchicalDataReader : public ColumnIterator {
 public:
diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index edc0177892f..2d454ed449c 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -805,11 +805,7 @@ void ColumnObject::try_insert(const Field& field) {
     }
     const auto& object = field.get<const VariantMap&>();
     size_t old_size = size();
-    for (const auto& [key_str, value] : object) {
-        PathInData key;
-        if (!key_str.empty()) {
-            key = PathInData(key_str);
-        }
+    for (const auto& [key, value] : object) {
         if (!has_subcolumn(key)) {
             bool succ = add_sub_column(key, old_size);
             if (!succ) {
@@ -894,7 +890,7 @@ void ColumnObject::get(size_t n, Field& res) const {
     auto& object = res.get<VariantMap&>();
 
     for (const auto& entry : subcolumns) {
-        auto it = object.try_emplace(entry->path.get_path()).first;
+        auto it = object.try_emplace(entry->path).first;
         entry->data.get(n, it->second);
     }
 }
diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h
index 8113dc602fb..922f9abb13e 100644
--- a/be/src/vec/core/field.h
+++ b/be/src/vec/core/field.h
@@ -43,6 +43,7 @@
 #include "util/quantile_state.h"
 #include "vec/common/uint128.h"
 #include "vec/core/types.h"
+#include "vec/json/path_in_data.h"
 
 namespace doris {
 namespace vectorized {
@@ -153,13 +154,7 @@ DEFINE_FIELD_VECTOR(Tuple);
 DEFINE_FIELD_VECTOR(Map);
 #undef DEFINE_FIELD_VECTOR
 
-using FieldMap = std::map<String, Field, std::less<String>>;
-#define DEFINE_FIELD_MAP(X)       \
-    struct X : public FieldMap {  \
-        using FieldMap::FieldMap; \
-    }
-DEFINE_FIELD_MAP(VariantMap);
-#undef DEFINE_FIELD_MAP
+using VariantMap = std::map<PathInData, Field>;
 
 class JsonbField {
 public:
diff --git a/be/src/vec/data_types/data_type_object.cpp 
b/be/src/vec/data_types/data_type_object.cpp
index c3c43c1bf69..40637ced6c4 100644
--- a/be/src/vec/data_types/data_type_object.cpp
+++ b/be/src/vec/data_types/data_type_object.cpp
@@ -69,6 +69,7 @@ int64_t 
DataTypeObject::get_uncompressed_serialized_bytes(const IColumn& column,
         }
         PColumnMeta column_meta_pb;
         column_meta_pb.set_name(entry->path.get_path());
+        entry->path.to_protobuf(column_meta_pb.mutable_column_path(), -1 /*not 
used here*/);
         type->to_pb_column_meta(&column_meta_pb);
         std::string meta_binary;
         column_meta_pb.SerializeToString(&meta_binary);
@@ -112,6 +113,7 @@ char* DataTypeObject::serialize(const IColumn& column, 
char* buf, int be_exec_ve
         ++num_of_columns;
         PColumnMeta column_meta_pb;
         column_meta_pb.set_name(entry->path.get_path());
+        entry->path.to_protobuf(column_meta_pb.mutable_column_path(), -1 /*not 
used here*/);
         type->to_pb_column_meta(&column_meta_pb);
         std::string meta_binary;
         column_meta_pb.SerializeToString(&meta_binary);
@@ -157,11 +159,15 @@ const char* DataTypeObject::deserialize(const char* buf, 
IColumn* column,
         MutableColumnPtr sub_column = type->create_column();
         buf = type->deserialize(buf, sub_column.get(), be_exec_version);
 
-        // add subcolumn to column_object
         PathInData key;
-        if (!column_meta_pb.name().empty()) {
+        if (column_meta_pb.has_column_path()) {
+            // init from path pb
+            key.from_protobuf(column_meta_pb.column_path());
+        } else if (!column_meta_pb.name().empty()) {
+            // init from name for compatible
             key = PathInData {column_meta_pb.name()};
         }
+        // add subcolumn to column_object
         column_object->add_sub_column(key, std::move(sub_column), type);
     }
     size_t num_rows = 0;
diff --git a/be/src/vec/json/json_parser.h b/be/src/vec/json/json_parser.h
index 576c7dcba72..115d661d4d8 100644
--- a/be/src/vec/json/json_parser.h
+++ b/be/src/vec/json/json_parser.h
@@ -28,6 +28,7 @@
 #include <utility>
 #include <vector>
 
+#include "runtime/primitive_type.h"
 #include "util/jsonb_writer.h"
 #include "vec/columns/column.h"
 #include "vec/common/string_ref.h"
@@ -120,6 +121,13 @@ enum class ExtractType {
     ToString = 0,
     // ...
 };
+/// Result of parsing of a document.
+/// Contains all paths extracted from document
+/// and values which are related to them.
+struct ParseResult {
+    std::vector<PathInData> paths;
+    std::vector<Field> values;
+};
 template <typename ParserImpl, bool parse_nested = false>
 class JSONDataParser {
 public:
diff --git a/be/src/vec/json/path_in_data.h b/be/src/vec/json/path_in_data.h
index 1367970f10d..2b511db4416 100644
--- a/be/src/vec/json/path_in_data.h
+++ b/be/src/vec/json/path_in_data.h
@@ -30,7 +30,6 @@
 
 #include "gen_cpp/segment_v2.pb.h"
 #include "vec/common/uint128.h"
-#include "vec/core/field.h"
 #include "vec/core/types.h"
 
 namespace doris::vectorized {
@@ -123,13 +122,6 @@ private:
     size_t current_anonymous_array_level = 0;
 };
 using PathsInData = std::vector<PathInData>;
-/// Result of parsing of a document.
-/// Contains all paths extracted from document
-/// and values which are related to them.
-struct ParseResult {
-    std::vector<PathInData> paths;
-    std::vector<Field> values;
-};
 
 struct PathInDataRef {
     const PathInData* ref;
diff --git a/be/test/vec/columns/column_object_test.cpp 
b/be/test/vec/columns/column_object_test.cpp
index 06d987e414e..a7498e82e87 100644
--- a/be/test/vec/columns/column_object_test.cpp
+++ b/be/test/vec/columns/column_object_test.cpp
@@ -22,6 +22,7 @@
 #include <gtest/gtest.h>
 
 #include "vec/columns/common_column_test.h"
+#include "vec/json/path_in_data.h"
 
 namespace doris::vectorized {
 
diff --git a/gensrc/proto/data.proto b/gensrc/proto/data.proto
index 755a3a042db..7cb126e973b 100644
--- a/gensrc/proto/data.proto
+++ b/gensrc/proto/data.proto
@@ -62,6 +62,8 @@ message PColumnMeta {
     repeated PColumnMeta children = 5;
     optional bool result_is_nullable = 6;
     optional string function_name = 7;
+    optional int32 be_exec_version = 8;
+    optional segment_v2.ColumnPathInfo column_path = 9;
 }
 
 message PBlock {
diff --git a/regression-test/data/variant_p0/column_name.out 
b/regression-test/data/variant_p0/column_name.out
index 6ac882d2922..0f54df05d91 100644
Binary files a/regression-test/data/variant_p0/column_name.out and 
b/regression-test/data/variant_p0/column_name.out differ
diff --git a/regression-test/suites/variant_p0/column_name.groovy 
b/regression-test/suites/variant_p0/column_name.groovy
index 26520aafa50..39b2b9766c2 100644
--- a/regression-test/suites/variant_p0/column_name.groovy
+++ b/regression-test/suites/variant_p0/column_name.groovy
@@ -25,7 +25,7 @@ suite("regression_test_variant_column_name", "variant_type"){
         )
         DUPLICATE KEY(`k`)
         DISTRIBUTED BY HASH(k) BUCKETS 1 
-        properties("replication_num" = "1", "disable_auto_compaction" = 
"true");
+        properties("replication_num" = "1", "disable_auto_compaction" = 
"false");
     """ 
 
     // sql "set experimental_enable_nereids_planner = false"
@@ -63,7 +63,18 @@ suite("regression_test_variant_column_name", "variant_type"){
     sql """insert into var_column_name values (7, '{"": 1234566}')"""
     sql """insert into var_column_name values (7, '{"": 8888888}')"""
 
-    qt_sql "select Tags[''] from var_column_name order by cast(Tags[''] as 
string)"
+    qt_sql "select cast(Tags[''] as text) from var_column_name order by 
cast(Tags[''] as string)"
+
+    // name with `.`
+    sql "truncate table var_column_name"
+    sql """insert into var_column_name values (7, '{"a.b": "UPPER CASE", 
"a.c": "lower case", "a" : {"b" : 123}, "a" : {"c" : 456}}')"""
+    for (int i = 0; i < 7; i++) {
+        sql """insert into var_column_name select * from var_column_name"""
+    }
+    qt_sql_cnt_1 "select count(Tags['a.b']) from var_column_name"
+    qt_sql_cnt_2 "select count(Tags['a.c']) from var_column_name"
+    qt_sql_cnt_3 "select count(Tags['a']['b']) from var_column_name"
+    qt_sql_cnt_4 "select count(Tags['a']['c']) from var_column_name"
 
     try {
         sql """insert into var_column_name values (7, '{"": "UPPER CASE", "": 
"lower case"}')"""


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to