This is an automated email from the ASF dual-hosted git repository. xuyang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 1f7829e099 [Fix](array-type) bugfix for array column with delete condition (#13361) 1f7829e099 is described below commit 1f7829e09971cc60f329eff4fcae2fa438dc0765 Author: camby <104178...@qq.com> AuthorDate: Fri Oct 21 09:29:02 2022 +0800 [Fix](array-type) bugfix for array column with delete condition (#13361) Fix for SQL with array column: delete from tbl where c_array is null; more info please refer to #13360 Co-authored-by: cambyzju <zhuxiaol...@baidu.com> --- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 4 +-- be/src/olap/schema.cpp | 11 +++++--- be/src/olap/schema.h | 3 +- be/src/vec/columns/column_array.cpp | 32 ++++++++++++++++++++++ be/src/vec/columns/column_array.h | 2 ++ .../data/delete_p0/test_array_column_delete.out | 5 ++++ .../delete_p0/test_array_column_delete.groovy | 29 ++++++++++++++++++++ 7 files changed, 78 insertions(+), 8 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 61d369fb69..1f8e24a2b2 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -1081,8 +1081,8 @@ Status SegmentIterator::next_batch(vectorized::Block* block) { auto cid = _schema.column_id(i); auto column_desc = _schema.column(cid); if (_is_pred_column[cid]) { - _current_return_columns[cid] = Schema::get_predicate_column_nullable_ptr( - column_desc->type(), column_desc->is_nullable()); + _current_return_columns[cid] = + Schema::get_predicate_column_nullable_ptr(*column_desc); _current_return_columns[cid]->reserve(_opts.block_row_max); } else if (i >= block->columns()) { // if i >= block->columns means the column and not the pred_column means `column i` is diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp index d6352b0cd1..01f0343317 100644 --- a/be/src/olap/schema.cpp +++ b/be/src/olap/schema.cpp @@ -114,10 +114,13 @@ vectorized::DataTypePtr Schema::get_data_type_ptr(const Field& field) { return vectorized::DataTypeFactory::instance().create_data_type(field); } -vectorized::IColumn::MutablePtr Schema::get_predicate_column_nullable_ptr(FieldType type, - bool is_null) { - vectorized::IColumn::MutablePtr ptr = Schema::get_predicate_column_ptr(type); - if (is_null) { +vectorized::IColumn::MutablePtr Schema::get_predicate_column_nullable_ptr(const Field& field) { + if (UNLIKELY(field.type() == OLAP_FIELD_TYPE_ARRAY)) { + return get_data_type_ptr(field)->create_column(); + } + + vectorized::IColumn::MutablePtr ptr = Schema::get_predicate_column_ptr(field.type()); + if (field.is_nullable()) { return doris::vectorized::ColumnNullable::create(std::move(ptr), doris::vectorized::ColumnUInt8::create()); } diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h index f3f09ffe3c..7c578f4f27 100644 --- a/be/src/olap/schema.h +++ b/be/src/olap/schema.h @@ -112,8 +112,7 @@ public: static vectorized::IColumn::MutablePtr get_predicate_column_ptr(FieldType type); - static vectorized::IColumn::MutablePtr get_predicate_column_nullable_ptr(FieldType type, - bool is_null = false); + static vectorized::IColumn::MutablePtr get_predicate_column_nullable_ptr(const Field& field); const std::vector<Field*>& columns() const { return _cols; } diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index 413c490b96..4afa62e36d 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -503,6 +503,38 @@ void ColumnArray::insert_indices_from(const IColumn& src, const int* indices_beg } } +Status ColumnArray::filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) { + auto to = reinterpret_cast<vectorized::ColumnArray*>(col_ptr); + auto& to_offsets = to->get_offsets(); + + size_t element_size = 0; + size_t max_offset = 0; + for (size_t i = 0; i < sel_size; ++i) { + element_size += size_at(sel[i]); + max_offset = std::max(max_offset, offset_at(sel[i])); + } + if (max_offset > std::numeric_limits<uint16_t>::max()) { + return Status::IOError("array elements too large than uint16_t::max"); + } + + to_offsets.reserve(to_offsets.size() + sel_size); + auto nested_sel = std::make_unique<uint16_t[]>(element_size); + size_t nested_sel_size = 0; + for (size_t i = 0; i < sel_size; ++i) { + auto row_off = offset_at(sel[i]); + auto row_size = size_at(sel[i]); + to_offsets.push_back(to_offsets.back() + row_size); + for (auto j = 0; j < row_size; ++j) { + nested_sel[nested_sel_size++] = row_off + j; + } + } + + if (nested_sel_size > 0) { + return data->filter_by_selector(nested_sel.get(), nested_sel_size, &to->get_data()); + } + return Status::OK(); +} + ColumnPtr ColumnArray::replicate(const IColumn::Offsets& replicate_offsets) const { if (replicate_offsets.empty()) return clone_empty(); diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index 043cf5f629..824cca8b23 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -176,6 +176,8 @@ public: offsets->clear(); } + Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) override; + private: WrappedPtr data; WrappedPtr offsets; diff --git a/regression-test/data/delete_p0/test_array_column_delete.out b/regression-test/data/delete_p0/test_array_column_delete.out new file mode 100644 index 0000000000..8324b608c9 --- /dev/null +++ b/regression-test/data/delete_p0/test_array_column_delete.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +2 [12, 3] +3 [] + diff --git a/regression-test/suites/delete_p0/test_array_column_delete.groovy b/regression-test/suites/delete_p0/test_array_column_delete.groovy new file mode 100644 index 0000000000..21455285a4 --- /dev/null +++ b/regression-test/suites/delete_p0/test_array_column_delete.groovy @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_array_column_delete") { + def tableName = "test_array_column_delete" + + sql """ SET enable_vectorized_engine = TRUE; """ + sql "ADMIN SET FRONTEND CONFIG ('enable_array_type' = 'true')" + + sql """ DROP TABLE IF EXISTS ${tableName}; """ + sql """ CREATE TABLE ${tableName} (id INT NULL, c_array ARRAY<INT> NULL) ENGINE=OLAP DUPLICATE KEY(id) DISTRIBUTED BY HASH(id) BUCKETS 4 PROPERTIES ( "replication_allocation" = "tag.location.default: 1","in_memory" = "false","storage_format" = "V2") """ + sql """ insert into ${tableName} values(1, NULL),(2,[12,3]),(3,[]),(4,NULL),(5,NULL) """ + sql """ DELETE FROM ${tableName} WHERE c_array is NULL """ + qt_sql """ SELECT * FROM ${tableName} order by id """ +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org