This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d17699875bd [Fix](Variant) implement resize and set num_rows right
when `subcolumns` is empty (#38364)
d17699875bd is described below
commit d17699875bd286dd6cd2f654cc2a1ac4162edeb3
Author: lihangyu <[email protected]>
AuthorDate: Fri Jul 26 12:19:25 2024 +0800
[Fix](Variant) implement resize and set num_rows right when `subcolumns` is
empty (#38364)
Set rows correct when subcolumns is empty, and implement resize method
with correct semantic
This fix the potential crash with stack bellow
```
6# doris::vectorized::ColumnObject::get(unsigned long,
doris::vectorized::Field&) const at
/root/doris/be/src/vec/columns/column_object.cpp:841
7# doris::vectorized::ColumnObject::operator[](unsigned long) const at
/root/doris/be/src/vec/columns/column_object.cpp:838
8# doris::vectorized::ColumnObject::insert_from(doris::vectorized::IColumn
const&, unsigned long) in
/home/work/unlimit_teamcity/TeamCity/Agents/20240725170551agent_172.16.0.48_1/work/60183217f6ee2a9c/output/be/lib/doris_be
9#
doris::vectorized::ColumnObject::insert_indices_from(doris::vectorized::IColumn
const&, unsigned int const*, unsigned int const*) at
/root/doris/be/src/vec/columns/column_object.cpp:1581
```
---
be/src/vec/columns/column_object.cpp | 61 +++++++++++++++++++++-
be/src/vec/columns/column_object.h | 2 +
.../nereids_rules_p0/mv/variant/variant_mv.groovy | 2 +-
3 files changed, 62 insertions(+), 3 deletions(-)
diff --git a/be/src/vec/columns/column_object.cpp
b/be/src/vec/columns/column_object.cpp
index b56c4857334..65c0a5dcd89 100644
--- a/be/src/vec/columns/column_object.cpp
+++ b/be/src/vec/columns/column_object.cpp
@@ -509,9 +509,24 @@ MutableColumnPtr ColumnObject::apply_for_subcolumns(Func&&
func) const {
res->add_sub_column(subcolumn->path, new_subcolumn->assume_mutable(),
subcolumn->data.get_least_common_type());
}
+ check_consistency();
return res;
}
+void ColumnObject::resize(size_t n) {
+ if (n == num_rows) {
+ return;
+ }
+ if (n > num_rows) {
+ insert_many_defaults(n - num_rows);
+ } else {
+ for (auto& subcolumn : subcolumns) {
+ subcolumn->data.pop_back(num_rows - n);
+ }
+ }
+ num_rows = n;
+}
+
bool ColumnObject::Subcolumn::check_if_sparse_column(size_t num_rows) {
if (num_rows < config::variant_threshold_rows_to_estimate_sparse_column) {
return false;
@@ -697,8 +712,16 @@ MutableColumnPtr ColumnObject::clone_resized(size_t
new_size) const {
if (new_size == 0) {
return ColumnObject::create(is_nullable);
}
- return apply_for_subcolumns(
+ // If subcolumns are empty, then res will be empty but new_size > 0
+ if (subcolumns.empty()) {
+ // Add an emtpy column with new_size rows
+ auto res = ColumnObject::create(true, false);
+ res->set_num_rows(new_size);
+ return res;
+ }
+ auto res = apply_for_subcolumns(
[&](const auto& subcolumn) { return
subcolumn.clone_resized(new_size); });
+ return res;
}
size_t ColumnObject::byte_size() const {
@@ -838,7 +861,10 @@ Field ColumnObject::operator[](size_t n) const {
}
void ColumnObject::get(size_t n, Field& res) const {
- assert(n < size());
+ if (UNLIKELY(n >= size())) {
+ throw doris::Exception(ErrorCode::OUT_OF_BOUND,
+ "Index ({}) for getting field is out of range",
n);
+ }
res = VariantMap();
auto& object = res.get<VariantMap&>();
@@ -886,11 +912,32 @@ void ColumnObject::insert_range_from(const IColumn& src,
size_t start, size_t le
}
ColumnPtr ColumnObject::replicate(const Offsets& offsets) const {
+ if (subcolumns.empty()) {
+ // Add an emtpy column with offsets.back rows
+ auto res = ColumnObject::create(true, false);
+ res->set_num_rows(offsets.back());
+ }
return apply_for_subcolumns(
[&](const auto& subcolumn) { return subcolumn.replicate(offsets);
});
}
ColumnPtr ColumnObject::permute(const Permutation& perm, size_t limit) const {
+ if (subcolumns.empty()) {
+ if (limit == 0) {
+ limit = num_rows;
+ } else {
+ limit = std::min(num_rows, limit);
+ }
+
+ if (perm.size() < limit) {
+ throw doris::Exception(ErrorCode::INTERNAL_ERROR,
+ "Size of permutation is less than
required.");
+ }
+ // Add an emtpy column with limit rows
+ auto res = ColumnObject::create(true, false);
+ res->set_num_rows(limit);
+ return res;
+ }
return apply_for_subcolumns(
[&](const auto& subcolumn) { return subcolumn.permute(perm,
limit); });
}
@@ -1420,6 +1467,12 @@ ColumnPtr ColumnObject::filter(const Filter& filter,
ssize_t count) const {
return finalized_object.apply_for_subcolumns(
[&](const auto& subcolumn) { return subcolumn.filter(filter,
count); });
}
+ if (subcolumns.empty()) {
+ // Add an emtpy column with filtered rows
+ auto res = ColumnObject::create(true, false);
+ res->set_num_rows(count_bytes_in_filter(filter));
+ return res;
+ }
auto new_column = ColumnObject::create(true, false);
for (auto& entry : subcolumns) {
auto subcolumn = entry->data.get_finalized_column().filter(filter,
count);
@@ -1433,6 +1486,10 @@ Status ColumnObject::filter_by_selector(const uint16_t*
sel, size_t sel_size, IC
if (!is_finalized()) {
finalize();
}
+ if (subcolumns.empty()) {
+ assert_cast<ColumnObject*>(col_ptr)->insert_many_defaults(sel_size);
+ return Status::OK();
+ }
auto* res = assert_cast<ColumnObject*>(col_ptr);
for (const auto& subcolumn : subcolumns) {
auto new_subcolumn =
subcolumn->data.get_least_common_type()->create_column();
diff --git a/be/src/vec/columns/column_object.h
b/be/src/vec/columns/column_object.h
index 441589bdfbb..e9b6eb7dfd8 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -363,6 +363,8 @@ public:
void clear() override;
+ void resize(size_t n) override;
+
void clear_subcolumns_data();
std::string get_name() const override {
diff --git
a/regression-test/suites/nereids_rules_p0/mv/variant/variant_mv.groovy
b/regression-test/suites/nereids_rules_p0/mv/variant/variant_mv.groovy
index b1c4d371dbb..23f5e889e78 100644
--- a/regression-test/suites/nereids_rules_p0/mv/variant/variant_mv.groovy
+++ b/regression-test/suites/nereids_rules_p0/mv/variant/variant_mv.groovy
@@ -573,7 +573,7 @@ suite("variant_mv") {
where g2.actor['id'] > 34259289;
"""
def query3_6 = """
- SELECT
+ SELECT
/*+SET_VAR(batch_size=4064,broker_load_batch_size=16352,disable_streaming_preaggregations=false,enable_distinct_streaming_aggregation=true,parallel_fragment_exec_instance_num=3,parallel_pipeline_task_num=0,profile_level=1,enable_pipeline_engine=true,enable_parallel_scan=true,parallel_scan_max_scanners_count=32,parallel_scan_min_rows_per_scanner=64,enable_fold_constant_by_be=true,enable_rewrite_element_at_to_slot=true,runtime_filter_type=1,enable_parallel_result_sink=false,ena
[...]
g1.id,
g2.type,
floor(cast(g1.actor['id'] as int) + 100.5),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]