This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch variant-sparse
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/variant-sparse by this push:
     new ba260c3cbaf fix 5 (#45604)
ba260c3cbaf is described below

commit ba260c3cbaff4ad3a143f3931dea9298701b2ad4
Author: Sun Chenyang <suncheny...@selectdb.com>
AuthorDate: Thu Dec 19 09:42:39 2024 +0800

    fix 5 (#45604)
---
 be/src/vec/columns/column_object.cpp | 66 +++++++++++++++++-------------------
 be/src/vec/columns/column_object.h   |  2 +-
 2 files changed, 33 insertions(+), 35 deletions(-)

diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index eb397e85a32..2b64f7f392f 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -1141,13 +1141,13 @@ std::pair<Field, FieldInfo> 
ColumnObject::deserialize_from_sparse_column(const C
     const auto& data_ref = value->get_data_at(row);
     const char* data = data_ref.data;
     DCHECK(data_ref.size > 1);
-    const TypeIndex type = assert_cast<const 
TypeIndex>(*reinterpret_cast<const uint8_t*>(data++));
+    const TypeIndex type = static_cast<const 
TypeIndex>(*reinterpret_cast<const uint8_t*>(data++));
     Field res;
     FieldInfo info_res = {
             .scalar_type_id = type,
             .have_nulls = false,
             .need_convert = false,
-            .num_dimensions = 1,
+            .num_dimensions = 0,
     };
     const char* end = parse_binary_from_sparse_column(type, data, res, 
info_res);
     DCHECK_EQ(end - data_ref.data, data_ref.size);
@@ -2034,12 +2034,17 @@ void ColumnObject::unnest(Subcolumns::NodePtr& entry, 
Subcolumns& subcolumns) co
 
 Status ColumnObject::finalize(FinalizeMode mode) {
     Subcolumns new_subcolumns;
+
     // finalize root first
-    if (mode == FinalizeMode::WRITE_MODE || !is_null_root()) {
+    if (!is_null_root()) {
         new_subcolumns.create_root(subcolumns.get_root()->data);
         new_subcolumns.get_mutable_root()->data.finalize(mode);
+    } else if (mode == FinalizeMode::WRITE_MODE) {
+        new_subcolumns.create_root(Subcolumn(num_rows, is_nullable, true));
     }
 
+    const bool need_pick_subcolumn_to_sparse_column =
+            mode == FinalizeMode::WRITE_MODE && subcolumns.size() > 
MAX_SUBCOLUMNS;
     // finalize all subcolumns
     for (auto&& entry : subcolumns) {
         const auto& least_common_type = entry->data.get_least_common_type();
@@ -2055,47 +2060,40 @@ Status ColumnObject::finalize(FinalizeMode mode) {
             continue;
         }
 
-        entry->data.finalize(mode);
-        entry->data.wrapp_array_nullable();
-
         if (entry->data.is_root) {
             continue;
         }
-        if (mode != FinalizeMode::WRITE_MODE) {
+        entry->data.finalize(mode);
+        entry->data.wrapp_array_nullable();
+
+        if (!need_pick_subcolumn_to_sparse_column) {
             new_subcolumns.add(entry->path, entry->data);
         }
     }
 
     // caculate stats & merge and encode sparse column
-    if (mode == FinalizeMode::WRITE_MODE) {
+    if (need_pick_subcolumn_to_sparse_column) {
         // pick sparse columns
         std::set<std::string_view> selected_path;
-        if (subcolumns.size() > MAX_SUBCOLUMNS) {
-            // pick subcolumns sort by size of none null values
-            std::unordered_map<std::string_view, size_t> none_null_value_sizes;
-            // 1. get the none null value sizes
-            for (auto&& entry : subcolumns) {
-                if (entry->data.is_root) {
-                    continue;
-                }
-                size_t size = entry->data.get_non_null_value_size();
-                none_null_value_sizes[entry->path.get_path()] = size;
-            }
-            // 2. sort by the size
-            std::vector<std::pair<std::string_view, size_t>> sorted_by_size(
-                    none_null_value_sizes.begin(), 
none_null_value_sizes.end());
-            std::sort(sorted_by_size.begin(), sorted_by_size.end(),
-                      [](const auto& a, const auto& b) { return a.second > 
b.second; });
-
-            // 3. pick MAX_SUBCOLUMNS selected subcolumns
-            for (size_t i = 0; i < std::min(MAX_SUBCOLUMNS, 
sorted_by_size.size()); ++i) {
-                selected_path.insert(sorted_by_size[i].first);
-            }
-        } else {
-            // all subcolumns should be selected, thus remaining subcolumns 
should be empty
-            for (auto&& entry : subcolumns) {
-                selected_path.insert(entry->path.get_path());
+        // pick subcolumns sort by size of none null values
+        std::unordered_map<std::string_view, size_t> none_null_value_sizes;
+        // 1. get the none null value sizes
+        for (auto&& entry : subcolumns) {
+            if (entry->data.is_root) {
+                continue;
             }
+            size_t size = entry->data.get_non_null_value_size();
+            none_null_value_sizes[entry->path.get_path()] = size;
+        }
+        // 2. sort by the size
+        std::vector<std::pair<std::string_view, size_t>> sorted_by_size(
+                none_null_value_sizes.begin(), none_null_value_sizes.end());
+        std::sort(sorted_by_size.begin(), sorted_by_size.end(),
+                  [](const auto& a, const auto& b) { return a.second > 
b.second; });
+
+        // 3. pick MAX_SUBCOLUMNS selected subcolumns
+        for (size_t i = 0; i < std::min(MAX_SUBCOLUMNS, 
sorted_by_size.size()); ++i) {
+            selected_path.insert(sorted_by_size[i].first);
         }
         std::map<std::string_view, Subcolumn> remaing_subcolumns;
         // add selected subcolumns to new_subcolumns, otherwise add to 
remaining_subcolumns
@@ -2167,7 +2165,7 @@ ColumnPtr ColumnObject::filter(const Filter& filter, 
ssize_t count) const {
 }
 
 ColumnPtr ColumnObject::replicate(const IColumn::Offsets& offsets) const {
-    // column_match_offsets_size(num_rows, offsets.size());
+    column_match_offsets_size(num_rows, offsets.size());
     return apply_for_columns([&](const ColumnPtr column) { return 
column->replicate(offsets); });
 }
 
diff --git a/be/src/vec/columns/column_object.h 
b/be/src/vec/columns/column_object.h
index 647516f97cd..f8ba93ef824 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -278,7 +278,7 @@ public:
     explicit ColumnObject(bool is_nullable_, DataTypePtr type, 
MutableColumnPtr&& column);
 
     // create without root, num_rows = size
-    explicit ColumnObject(size_t size);
+    explicit ColumnObject(size_t size = 0);
 
     ColumnObject(Subcolumns&& subcolumns_, bool is_nullable_);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to