This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new 70e555e62c6 branch-3.0: [Opt](Serde) optimize serialization to string 
on variant type (#43320)
70e555e62c6 is described below

commit 70e555e62c6f083547b578c4ef92c1aef7078dc7
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Thu Nov 7 12:33:12 2024 +0800

    branch-3.0: [Opt](Serde) optimize serialization to string on variant type 
(#43320)
    
    Cherry-picked from #43237
---
 be/src/vec/columns/column_object.cpp | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index af0074e797e..f38b810848a 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -1309,14 +1309,14 @@ rapidjson::Value* 
find_leaf_node_by_path(rapidjson::Value& json, const PathInDat
 // 2. nested array with only nulls, eg. [null. null],todo: think a better way 
to deal distinguish array null value and real null value.
 // 3. empty root jsonb value(not null)
 // 4. type is nothing
-bool skip_empty_json(const ColumnNullable* nullable, const DataTypePtr& type, 
int row,
-                     const PathInData& path) {
+bool skip_empty_json(const ColumnNullable* nullable, const DataTypePtr& type,
+                     TypeIndex base_type_id, int row, const PathInData& path) {
     // skip nulls
     if (nullable && nullable->is_null_at(row)) {
         return true;
     }
     // check if it is empty nested json array, then skip
-    if (type->equals(*ColumnObject::NESTED_TYPE)) {
+    if (base_type_id == TypeIndex::VARIANT && 
type->equals(*ColumnObject::NESTED_TYPE)) {
         Field field = (*nullable)[row];
         if (field.get_type() == Field::Types::Array) {
             const auto& array = field.get<Array>();
@@ -1336,7 +1336,7 @@ bool skip_empty_json(const ColumnNullable* nullable, 
const DataTypePtr& type, in
         return true;
     }
     // skip nothing type
-    if 
(WhichDataType(remove_nullable(get_base_type_of_array(type))).is_nothing()) {
+    if (base_type_id == TypeIndex::Nothing) {
         return true;
     }
     return false;
@@ -1344,17 +1344,19 @@ bool skip_empty_json(const ColumnNullable* nullable, 
const DataTypePtr& type, in
 
 Status find_and_set_leave_value(const IColumn* column, const PathInData& path,
                                 const DataTypeSerDeSPtr& type_serde, const 
DataTypePtr& type,
-                                rapidjson::Value& root,
+                                TypeIndex base_type_index, rapidjson::Value& 
root,
                                 rapidjson::Document::AllocatorType& allocator, 
Arena& mem_pool,
                                 int row) {
+#ifndef NDEBUG
     // sanitize type and column
     if (column->get_name() != type->create_column()->get_name()) {
         return Status::InternalError(
                 "failed to set value for path {}, expected type {}, but got {} 
at row {}",
                 path.get_path(), type->get_name(), column->get_name(), row);
     }
+#endif
     const auto* nullable = check_and_get_column<ColumnNullable>(column);
-    if (skip_empty_json(nullable, type, row, path)) {
+    if (skip_empty_json(nullable, type, base_type_index, row, path)) {
         return Status::OK();
     }
     // TODO could cache the result of leaf nodes with it's path info
@@ -1474,11 +1476,12 @@ Status 
ColumnObject::serialize_one_row_to_json_format(int row, rapidjson::String
     VLOG_DEBUG << "dump structure " << 
JsonFunctions::print_json_value(*doc_structure);
 #endif
     for (const auto& subcolumn : subcolumns) {
-        
RETURN_IF_ERROR(find_and_set_leave_value(subcolumn->data.get_finalized_column_ptr(),
-                                                 subcolumn->path,
-                                                 
subcolumn->data.get_least_common_type_serde(),
-                                                 
subcolumn->data.get_least_common_type(), root,
-                                                 
doc_structure->GetAllocator(), mem_pool, row));
+        RETURN_IF_ERROR(find_and_set_leave_value(
+                subcolumn->data.get_finalized_column_ptr(), subcolumn->path,
+                subcolumn->data.get_least_common_type_serde(),
+                subcolumn->data.get_least_common_type(),
+                subcolumn->data.least_common_type.get_base_type_id(), root,
+                doc_structure->GetAllocator(), mem_pool, row));
         if (subcolumn->path.empty() && !root.IsObject()) {
             // root was modified, only handle root node
             break;
@@ -1547,10 +1550,11 @@ Status ColumnObject::merge_sparse_to_root_column() {
                 ++null_count;
                 continue;
             }
-            bool succ = find_and_set_leave_value(column, subcolumn->path,
-                                                 
subcolumn->data.get_least_common_type_serde(),
-                                                 
subcolumn->data.get_least_common_type(), root,
-                                                 
doc_structure->GetAllocator(), mem_pool, i);
+            bool succ = find_and_set_leave_value(
+                    column, subcolumn->path, 
subcolumn->data.get_least_common_type_serde(),
+                    subcolumn->data.get_least_common_type(),
+                    subcolumn->data.least_common_type.get_base_type_id(), root,
+                    doc_structure->GetAllocator(), mem_pool, i);
             if (succ && subcolumn->path.empty() && !root.IsObject()) {
                 // root was modified, only handle root node
                 break;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to