This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/variant-sparse by this push:
     new 4472b88aa06 fix serialize with root (#45737)
4472b88aa06 is described below

commit 4472b88aa0675459d0f81f4838a80d3a14e00f06
Author: lihangyu <lihan...@selectdb.com>
AuthorDate: Fri Dec 20 18:11:39 2024 +0800

    fix serialize with root (#45737)
---
 be/src/vec/columns/column_object.cpp | 50 ++++++++++++++++++++++++------------
 be/src/vec/columns/column_object.h   |  2 ++
 2 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index 17c02172da8..059565e3837 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -1747,18 +1747,15 @@ void get_json_by_column_tree(rapidjson::Value& root, 
rapidjson::Document::Alloca
 }
 
 Status ColumnObject::serialize_one_row_to_string(int64_t row, std::string* 
output) const {
-    // if (!is_finalized()) {
-    //     const_cast<ColumnObject*>(this)->finalize();
-    // }
-    if (is_scalar_variant() && is_finalized()) {
-        auto type = get_root_type();
-        *output = type->to_string(*get_root(), row);
-        return Status::OK();
-    }
-    // TODO preallocate memory
     auto tmp_col = ColumnString::create();
     VectorBufferWriter write_buffer(*tmp_col.get());
-    RETURN_IF_ERROR(serialize_one_row_to_json_format(row, write_buffer, 
nullptr));
+    if (is_scalar_variant()) {
+        subcolumns.get_root()->data.serialize_text_json(row, write_buffer);
+        return Status::OK();
+    } else {
+        // TODO preallocate memory
+        RETURN_IF_ERROR(serialize_one_row_to_json_format(row, write_buffer, 
nullptr));
+    }
     write_buffer.commit();
     auto str_ref = tmp_col->get_data_at(0);
     *output = std::string(str_ref.data, str_ref.size);
@@ -1766,12 +1763,8 @@ Status ColumnObject::serialize_one_row_to_string(int64_t 
row, std::string* outpu
 }
 
 Status ColumnObject::serialize_one_row_to_string(int64_t row, BufferWritable& 
output) const {
-    // if (!is_finalized()) {
-    //     const_cast<ColumnObject*>(this)->finalize();
-    // }
-    if (is_scalar_variant() && is_finalized()) {
-        auto type = get_root_type();
-        type->to_string(*get_root(), row, output);
+    if (is_scalar_variant()) {
+        subcolumns.get_root()->data.serialize_text_json(row, output);
         return Status::OK();
     }
     RETURN_IF_ERROR(serialize_one_row_to_json_format(row, output, nullptr));
@@ -1836,8 +1829,27 @@ struct Prefix {
     bool root_is_first_flag = true;
 };
 
+bool ColumnObject::is_visible_root_value(size_t nrow) const {
+    if (is_null_root()) {
+        return false;
+    }
+    if (subcolumns.get_root()->data.is_null_at(nrow)) {
+        return false;
+    }
+    nrow = nrow - subcolumns.get_root()->data.num_of_defaults_in_prefix;
+    // only ColumnString which is DataTypeJsonb
+    const auto& nullable = assert_cast<const 
ColumnNullable&>(*subcolumns.get_root()->data.data[0]);
+    const auto& value_column = assert_cast<const 
ColumnString&>(nullable.get_nested_column());
+    return !value_column.get_data_at(nrow).empty();
+}
+
 Status ColumnObject::serialize_one_row_to_json_format(int64_t row_num, 
BufferWritable& output,
                                                       bool* is_null) const {
+    // root is not eighther null or empty, we should only process root value
+    if (is_visible_root_value(row_num)) {
+        subcolumns.get_root()->data.serialize_text_json(row_num, output);
+        return Status::OK();
+    }
     const auto& column_map = assert_cast<const 
ColumnMap&>(*serialized_sparse_column);
     const auto& sparse_data_offsets = column_map.get_offsets();
     const auto [sparse_data_paths, sparse_data_values] = 
get_sparse_data_paths_and_values();
@@ -1850,9 +1862,13 @@ Status 
ColumnObject::serialize_one_row_to_json_format(int64_t row_num, BufferWri
     // For example:
     // b.c, a.b, a.a, b.e, g, h.u.t -> a.a, a.b, b.c, b.e, g, h.u.t -> {"a" : 
{"a" : ..., "b" : ...}, "b" : {"c" : ..., "e" : ...}, "g" : ..., "h" : {"u" : 
{"t" : ...}}}.
     std::vector<String> sorted_paths;
-    std::map<std::string, Subcolumn> subcolumn_path_map;
+    std::unordered_map<std::string, Subcolumn> subcolumn_path_map;
     sorted_paths.reserve(get_subcolumns().size() + (sparse_data_end - 
sparse_data_offset));
     for (const auto& subcolumn : get_subcolumns()) {
+        // Skip root value, we have already processed it
+        if (subcolumn->data.is_root) {
+            continue;
+        }
         /// We consider null value and absence of the path in a row as 
equivalent cases, because we cannot actually distinguish them.
         /// So, we don't output null values at all.
         if (!subcolumn->data.is_null_at(row_num)) {
diff --git a/be/src/vec/columns/column_object.h 
b/be/src/vec/columns/column_object.h
index c7859ab4b93..175603fca62 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -631,6 +631,8 @@ private:
             size_t start, size_t length);
 
     bool try_add_new_subcolumn(const PathInData& path);
+
+    bool is_visible_root_value(size_t nrow) const;
 };
 
 } // namespace doris::vectorized


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to