This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d17699875bd [Fix](Variant) implement resize and set num_rows right 
when `subcolumns` is empty (#38364)
d17699875bd is described below

commit d17699875bd286dd6cd2f654cc2a1ac4162edeb3
Author: lihangyu <15605149...@163.com>
AuthorDate: Fri Jul 26 12:19:25 2024 +0800

    [Fix](Variant) implement resize and set num_rows right when `subcolumns` is 
empty (#38364)
    
    Set rows correct when subcolumns is empty, and implement resize method
    with correct semantic
    This fix the potential crash with stack bellow
    ```
    6# doris::vectorized::ColumnObject::get(unsigned long, 
doris::vectorized::Field&) const at 
/root/doris/be/src/vec/columns/column_object.cpp:841
    7# doris::vectorized::ColumnObject::operator[](unsigned long) const at 
/root/doris/be/src/vec/columns/column_object.cpp:838
    8# doris::vectorized::ColumnObject::insert_from(doris::vectorized::IColumn 
const&, unsigned long) in 
/home/work/unlimit_teamcity/TeamCity/Agents/20240725170551agent_172.16.0.48_1/work/60183217f6ee2a9c/output/be/lib/doris_be
    9# 
doris::vectorized::ColumnObject::insert_indices_from(doris::vectorized::IColumn 
const&, unsigned int const*, unsigned int const*) at 
/root/doris/be/src/vec/columns/column_object.cpp:1581
    ```
---
 be/src/vec/columns/column_object.cpp               | 61 +++++++++++++++++++++-
 be/src/vec/columns/column_object.h                 |  2 +
 .../nereids_rules_p0/mv/variant/variant_mv.groovy  |  2 +-
 3 files changed, 62 insertions(+), 3 deletions(-)

diff --git a/be/src/vec/columns/column_object.cpp 
b/be/src/vec/columns/column_object.cpp
index b56c4857334..65c0a5dcd89 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -509,9 +509,24 @@ MutableColumnPtr ColumnObject::apply_for_subcolumns(Func&& 
func) const {
         res->add_sub_column(subcolumn->path, new_subcolumn->assume_mutable(),
                             subcolumn->data.get_least_common_type());
     }
+    check_consistency();
     return res;
 }
 
+void ColumnObject::resize(size_t n) {
+    if (n == num_rows) {
+        return;
+    }
+    if (n > num_rows) {
+        insert_many_defaults(n - num_rows);
+    } else {
+        for (auto& subcolumn : subcolumns) {
+            subcolumn->data.pop_back(num_rows - n);
+        }
+    }
+    num_rows = n;
+}
+
 bool ColumnObject::Subcolumn::check_if_sparse_column(size_t num_rows) {
     if (num_rows < config::variant_threshold_rows_to_estimate_sparse_column) {
         return false;
@@ -697,8 +712,16 @@ MutableColumnPtr ColumnObject::clone_resized(size_t 
new_size) const {
     if (new_size == 0) {
         return ColumnObject::create(is_nullable);
     }
-    return apply_for_subcolumns(
+    // If subcolumns are empty, then res will be empty but new_size > 0
+    if (subcolumns.empty()) {
+        // Add an emtpy column with new_size rows
+        auto res = ColumnObject::create(true, false);
+        res->set_num_rows(new_size);
+        return res;
+    }
+    auto res = apply_for_subcolumns(
             [&](const auto& subcolumn) { return 
subcolumn.clone_resized(new_size); });
+    return res;
 }
 
 size_t ColumnObject::byte_size() const {
@@ -838,7 +861,10 @@ Field ColumnObject::operator[](size_t n) const {
 }
 
 void ColumnObject::get(size_t n, Field& res) const {
-    assert(n < size());
+    if (UNLIKELY(n >= size())) {
+        throw doris::Exception(ErrorCode::OUT_OF_BOUND,
+                               "Index ({}) for getting field is out of range", 
n);
+    }
     res = VariantMap();
     auto& object = res.get<VariantMap&>();
 
@@ -886,11 +912,32 @@ void ColumnObject::insert_range_from(const IColumn& src, 
size_t start, size_t le
 }
 
 ColumnPtr ColumnObject::replicate(const Offsets& offsets) const {
+    if (subcolumns.empty()) {
+        // Add an emtpy column with offsets.back rows
+        auto res = ColumnObject::create(true, false);
+        res->set_num_rows(offsets.back());
+    }
     return apply_for_subcolumns(
             [&](const auto& subcolumn) { return subcolumn.replicate(offsets); 
});
 }
 
 ColumnPtr ColumnObject::permute(const Permutation& perm, size_t limit) const {
+    if (subcolumns.empty()) {
+        if (limit == 0) {
+            limit = num_rows;
+        } else {
+            limit = std::min(num_rows, limit);
+        }
+
+        if (perm.size() < limit) {
+            throw doris::Exception(ErrorCode::INTERNAL_ERROR,
+                                   "Size of permutation is less than 
required.");
+        }
+        // Add an emtpy column with limit rows
+        auto res = ColumnObject::create(true, false);
+        res->set_num_rows(limit);
+        return res;
+    }
     return apply_for_subcolumns(
             [&](const auto& subcolumn) { return subcolumn.permute(perm, 
limit); });
 }
@@ -1420,6 +1467,12 @@ ColumnPtr ColumnObject::filter(const Filter& filter, 
ssize_t count) const {
         return finalized_object.apply_for_subcolumns(
                 [&](const auto& subcolumn) { return subcolumn.filter(filter, 
count); });
     }
+    if (subcolumns.empty()) {
+        // Add an emtpy column with filtered rows
+        auto res = ColumnObject::create(true, false);
+        res->set_num_rows(count_bytes_in_filter(filter));
+        return res;
+    }
     auto new_column = ColumnObject::create(true, false);
     for (auto& entry : subcolumns) {
         auto subcolumn = entry->data.get_finalized_column().filter(filter, 
count);
@@ -1433,6 +1486,10 @@ Status ColumnObject::filter_by_selector(const uint16_t* 
sel, size_t sel_size, IC
     if (!is_finalized()) {
         finalize();
     }
+    if (subcolumns.empty()) {
+        assert_cast<ColumnObject*>(col_ptr)->insert_many_defaults(sel_size);
+        return Status::OK();
+    }
     auto* res = assert_cast<ColumnObject*>(col_ptr);
     for (const auto& subcolumn : subcolumns) {
         auto new_subcolumn = 
subcolumn->data.get_least_common_type()->create_column();
diff --git a/be/src/vec/columns/column_object.h 
b/be/src/vec/columns/column_object.h
index 441589bdfbb..e9b6eb7dfd8 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -363,6 +363,8 @@ public:
 
     void clear() override;
 
+    void resize(size_t n) override;
+
     void clear_subcolumns_data();
 
     std::string get_name() const override {
diff --git 
a/regression-test/suites/nereids_rules_p0/mv/variant/variant_mv.groovy 
b/regression-test/suites/nereids_rules_p0/mv/variant/variant_mv.groovy
index b1c4d371dbb..23f5e889e78 100644
--- a/regression-test/suites/nereids_rules_p0/mv/variant/variant_mv.groovy
+++ b/regression-test/suites/nereids_rules_p0/mv/variant/variant_mv.groovy
@@ -573,7 +573,7 @@ suite("variant_mv") {
     where g2.actor['id'] > 34259289;
     """
     def query3_6 = """
-    SELECT
+    SELECT  
/*+SET_VAR(batch_size=4064,broker_load_batch_size=16352,disable_streaming_preaggregations=false,enable_distinct_streaming_aggregation=true,parallel_fragment_exec_instance_num=3,parallel_pipeline_task_num=0,profile_level=1,enable_pipeline_engine=true,enable_parallel_scan=true,parallel_scan_max_scanners_count=32,parallel_scan_min_rows_per_scanner=64,enable_fold_constant_by_be=true,enable_rewrite_element_at_to_slot=true,runtime_filter_type=1,enable_parallel_result_sink=false,ena
 [...]
     g1.id,
     g2.type,
     floor(cast(g1.actor['id'] as int) + 100.5),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to