This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch tpch501 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 7ae73ceeb72341acf9fddee1b194c23746b0d3b5 Author: morningman <[email protected]> AuthorDate: Sun Dec 31 23:26:56 2023 +0800 Revert "[tmp] optimize for ColumnNullable's serialize_vec/deserialize_vec (#29232)" This reverts commit 57325c5946480865ab02ff8f8c5e0ed51eefd9b6. --- be/src/exprs/bloom_filter_func.h | 47 +++----- be/src/pipeline/exec/data_queue.cpp | 4 - be/src/pipeline/exec/exchange_sink_buffer.cpp | 2 +- .../aggregate_functions/aggregate_function_null.h | 2 +- be/src/vec/columns/column_decimal.cpp | 4 +- be/src/vec/columns/column_nullable.cpp | 29 ++--- be/src/vec/columns/column_object.cpp | 2 +- be/src/vec/columns/column_string.cpp | 4 +- be/src/vec/common/schema_util.cpp | 4 +- be/src/vec/common/typeid_cast.h | 9 +- be/src/vec/core/block.cpp | 30 ++--- be/src/vec/core/block.h | 3 + be/src/vec/core/column_with_type_and_name.cpp | 25 ---- be/src/vec/core/column_with_type_and_name.h | 13 +-- be/src/vec/data_types/data_type.h | 4 + be/src/vec/data_types/data_type_nullable.cpp | 4 + be/src/vec/data_types/data_type_nullable.h | 1 + be/src/vec/data_types/get_least_supertype.cpp | 4 +- be/src/vec/exprs/vcast_expr.cpp | 7 +- be/src/vec/functions/function.cpp | 78 +++++++------ be/src/vec/functions/function.h | 1 + be/src/vec/functions/function_bitmap.cpp | 5 +- be/src/vec/functions/function_cast.h | 129 ++++++++++++++------- be/src/vec/functions/functions_geo.cpp | 28 ++--- be/src/vec/functions/functions_geo.h | 1 + be/src/vec/functions/functions_logical.cpp | 5 +- be/src/vec/functions/least_greast.cpp | 3 +- be/src/vec/functions/nullif.cpp | 17 +-- be/src/vec/runtime/vdata_stream_recvr.h | 2 +- 29 files changed, 246 insertions(+), 221 deletions(-) diff --git a/be/src/exprs/bloom_filter_func.h b/be/src/exprs/bloom_filter_func.h index 71dc3f6e663..3c60ccc89c7 100644 --- a/be/src/exprs/bloom_filter_func.h +++ b/be/src/exprs/bloom_filter_func.h @@ -312,22 +312,13 @@ struct CommonFindOp { void find_batch(const BloomFilterAdaptor& bloom_filter, const vectorized::ColumnPtr& column, uint8_t* results) const { - const T* __restrict data = nullptr; - const uint8_t* __restrict nullmap = nullptr; if (column->is_nullable()) { const auto* nullable = assert_cast<const vectorized::ColumnNullable*>(column.get()); - if (nullable->has_null()) { - nullmap = - assert_cast<const vectorized::ColumnUInt8&>(nullable->get_null_map_column()) - .get_data() - .data(); - } - data = (T*)nullable->get_nested_column().get_raw_data().data; - } else { - data = (T*)column->get_raw_data().data; - } + const auto& nullmap = + assert_cast<const vectorized::ColumnUInt8&>(nullable->get_null_map_column()) + .get_data(); - if (nullmap) { + const T* data = (T*)nullable->get_nested_column().get_raw_data().data; for (size_t i = 0; i < column->size(); i++) { if (!nullmap[i]) { results[i] = bloom_filter.test_element(data[i]); @@ -336,6 +327,7 @@ struct CommonFindOp { } } } else { + const T* data = (T*)column->get_raw_data().data; for (size_t i = 0; i < column->size(); i++) { results[i] = bloom_filter.test_element(data[i]); } @@ -348,8 +340,8 @@ struct CommonFindOp { }; struct StringFindOp : CommonFindOp<StringRef> { - static void insert_batch(BloomFilterAdaptor& bloom_filter, const vectorized::ColumnPtr& column, - size_t start) { + void insert_batch(BloomFilterAdaptor& bloom_filter, const vectorized::ColumnPtr& column, + size_t start) { if (column->is_nullable()) { const auto* nullable = assert_cast<const vectorized::ColumnNullable*>(column.get()); const auto& col = @@ -371,8 +363,8 @@ struct StringFindOp : CommonFindOp<StringRef> { } } - static void find_batch(const BloomFilterAdaptor& bloom_filter, - const vectorized::ColumnPtr& column, uint8_t* results) { + void find_batch(const BloomFilterAdaptor& bloom_filter, const vectorized::ColumnPtr& column, + uint8_t* results) { if (column->is_nullable()) { const auto* nullable = assert_cast<const vectorized::ColumnNullable*>(column.get()); const auto& col = @@ -380,17 +372,12 @@ struct StringFindOp : CommonFindOp<StringRef> { const auto& nullmap = assert_cast<const vectorized::ColumnUInt8&>(nullable->get_null_map_column()) .get_data(); - if (nullable->has_null()) { - for (size_t i = 0; i < column->size(); i++) { - if (!nullmap[i]) { - results[i] = bloom_filter.test_element(col.get_data_at(i)); - } else { - results[i] = false; - } - } - } else { - for (size_t i = 0; i < column->size(); i++) { + + for (size_t i = 0; i < column->size(); i++) { + if (!nullmap[i]) { results[i] = bloom_filter.test_element(col.get_data_at(i)); + } else { + results[i] = false; } } } else { @@ -405,9 +392,9 @@ struct StringFindOp : CommonFindOp<StringRef> { // We do not need to judge whether data is empty, because null will not appear // when filer used by the storage engine struct FixedStringFindOp : public StringFindOp { - static uint16_t find_batch_olap_engine(const BloomFilterAdaptor& bloom_filter, const char* data, - const uint8* nullmap, uint16_t* offsets, int number, - const bool is_parse_column) { + uint16_t find_batch_olap_engine(const BloomFilterAdaptor& bloom_filter, const char* data, + const uint8* nullmap, uint16_t* offsets, int number, + const bool is_parse_column) { return find_batch_olap<StringRef, true>(bloom_filter, data, nullmap, offsets, number, is_parse_column); } diff --git a/be/src/pipeline/exec/data_queue.cpp b/be/src/pipeline/exec/data_queue.cpp index 68dfbe94e5f..5b12c6876d9 100644 --- a/be/src/pipeline/exec/data_queue.cpp +++ b/be/src/pipeline/exec/data_queue.cpp @@ -86,10 +86,6 @@ bool DataQueue::has_data_or_finished(int child_idx) { //so next loop, will check the record idx + 1 first //maybe it's useful with many queue, others maybe always 0 bool DataQueue::remaining_has_data() { - if (_child_count == 1) { - return _cur_blocks_nums_in_queue[0] > 0; - } - int count = _child_count; while (--count >= 0) { _flag_queue_idx++; diff --git a/be/src/pipeline/exec/exchange_sink_buffer.cpp b/be/src/pipeline/exec/exchange_sink_buffer.cpp index 5366dc48cca..1a4a7866540 100644 --- a/be/src/pipeline/exec/exchange_sink_buffer.cpp +++ b/be/src/pipeline/exec/exchange_sink_buffer.cpp @@ -82,7 +82,7 @@ void ExchangeSinkBuffer<Parent>::close() { template <typename Parent> bool ExchangeSinkBuffer<Parent>::can_write() const { - size_t max_package_size = QUEUE_CAPACITY_FACTOR * _instance_to_package_queue.size(); + size_t max_package_size = 64 * _instance_to_package_queue.size(); size_t total_package_size = 0; for (auto& [_, q] : _instance_to_package_queue) { total_package_size += q.size(); diff --git a/be/src/vec/aggregate_functions/aggregate_function_null.h b/be/src/vec/aggregate_functions/aggregate_function_null.h index 285c017e4ff..becb06f7cfc 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_null.h +++ b/be/src/vec/aggregate_functions/aggregate_function_null.h @@ -210,7 +210,7 @@ public: } } - void add_batch(size_t batch_size, AggregateDataPtr* __restrict places, size_t place_offset, + void add_batch(size_t batch_size, AggregateDataPtr* places, size_t place_offset, const IColumn** columns, Arena* arena, bool agg_many) const override { const ColumnNullable* column = assert_cast<const ColumnNullable*>(columns[0]); // The overhead introduced is negligible here, just an extra memory read from NullMap diff --git a/be/src/vec/columns/column_decimal.cpp b/be/src/vec/columns/column_decimal.cpp index 67e92335ce0..1158e5b0d63 100644 --- a/be/src/vec/columns/column_decimal.cpp +++ b/be/src/vec/columns/column_decimal.cpp @@ -79,7 +79,7 @@ template <typename T> void ColumnDecimal<T>::serialize_vec(std::vector<StringRef>& keys, size_t num_rows, size_t max_row_byte_size) const { for (size_t i = 0; i < num_rows; ++i) { - memcpy_fixed<T>(const_cast<char*>(keys[i].data + keys[i].size), (char*)&data[i]); + memcpy(const_cast<char*>(keys[i].data + keys[i].size), &data[i], sizeof(T)); keys[i].size += sizeof(T); } } @@ -89,7 +89,7 @@ void ColumnDecimal<T>::serialize_vec_with_null_map(std::vector<StringRef>& keys, const uint8_t* null_map) const { for (size_t i = 0; i < num_rows; ++i) { if (null_map[i] == 0) { - memcpy_fixed<T>(const_cast<char*>(keys[i].data + keys[i].size), (char*)&data[i]); + memcpy(const_cast<char*>(keys[i].data + keys[i].size), &data[i], sizeof(T)); keys[i].size += sizeof(T); } } diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index 426de2d4f70..ecf330bead3 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -257,22 +257,15 @@ size_t ColumnNullable::get_max_row_byte_size() const { void ColumnNullable::serialize_vec(std::vector<StringRef>& keys, size_t num_rows, size_t max_row_byte_size) const { - if (has_null()) { - const auto& arr = get_null_map_data(); - for (size_t i = 0; i < num_rows; ++i) { - auto* val = const_cast<char*>(keys[i].data + keys[i].size); - *val = (arr[i] ? 1 : 0); - keys[i].size++; - } - get_nested_column().serialize_vec_with_null_map(keys, num_rows, arr.data()); - } else { - for (size_t i = 0; i < num_rows; ++i) { - auto* val = const_cast<char*>(keys[i].data + keys[i].size); - *val = 0; - keys[i].size++; - } - get_nested_column().serialize_vec(keys, num_rows, max_row_byte_size); + const auto& arr = get_null_map_data(); + static constexpr auto s = sizeof(arr[0]); + for (size_t i = 0; i < num_rows; ++i) { + auto* val = const_cast<char*>(keys[i].data + keys[i].size); + *val = (arr[i] ? 1 : 0); + keys[i].size += s; } + + get_nested_column().serialize_vec_with_null_map(keys, num_rows, arr.data()); } void ColumnNullable::deserialize_vec(std::vector<StringRef>& keys, const size_t num_rows) { @@ -289,11 +282,7 @@ void ColumnNullable::deserialize_vec(std::vector<StringRef>& keys, const size_t keys[i].data += sizeof(val); keys[i].size -= sizeof(val); } - if (_has_null) { - get_nested_column().deserialize_vec_with_null_map(keys, num_rows, arr.data()); - } else { - get_nested_column().deserialize_vec(keys, num_rows); - } + get_nested_column().deserialize_vec_with_null_map(keys, num_rows, arr.data()); } void ColumnNullable::insert_range_from(const IColumn& src, size_t start, size_t length) { diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index b726cfee851..730161cf5b8 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -479,7 +479,7 @@ void ColumnObject::Subcolumn::finalize() { throw doris::Exception(ErrorCode::INVALID_ARGUMENT, st.to_string() + ", real_code:{}", st.code()); } - part = ptr->convert_to_full_column_if_const(); + part = ptr; } result_column->insert_range_from(*part, 0, part_size); } diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 337f5e5663a..424a8717e14 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -313,7 +313,7 @@ void ColumnString::serialize_vec(std::vector<StringRef>& keys, size_t num_rows, uint32_t string_size(size_at(i)); auto* ptr = const_cast<char*>(keys[i].data + keys[i].size); - memcpy_fixed<uint32_t>(ptr, (char*)&string_size); + memcpy(ptr, &string_size, sizeof(string_size)); memcpy(ptr + sizeof(string_size), &chars[offset], string_size); keys[i].size += sizeof(string_size) + string_size; } @@ -327,7 +327,7 @@ void ColumnString::serialize_vec_with_null_map(std::vector<StringRef>& keys, siz uint32_t string_size(size_at(i)); auto* ptr = const_cast<char*>(keys[i].data + keys[i].size); - memcpy_fixed<uint32_t>(ptr, (char*)&string_size); + memcpy(ptr, &string_size, sizeof(string_size)); memcpy(ptr + sizeof(string_size), &chars[offset], string_size); keys[i].size += sizeof(string_size) + string_size; } diff --git a/be/src/vec/common/schema_util.cpp b/be/src/vec/common/schema_util.cpp index 6cea0cf6054..1290ddb237f 100644 --- a/be/src/vec/common/schema_util.cpp +++ b/be/src/vec/common/schema_util.cpp @@ -152,6 +152,7 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co Block tmp_block {arguments}; vectorized::ColumnNumbers argnum; argnum.emplace_back(0); + argnum.emplace_back(1); size_t result_column = tmp_block.columns(); auto ctx = FunctionContext::create_context(nullptr, {}, {}); // We convert column string to jsonb type just add a string jsonb field to dst column instead of parse @@ -160,8 +161,7 @@ Status cast_column(const ColumnWithTypeAndName& arg, const DataTypePtr& type, Co tmp_block.insert({nullptr, type, arg.name}); RETURN_IF_ERROR( function->execute(ctx.get(), tmp_block, argnum, result_column, arg.column->size())); - *result = std::move(tmp_block.get_by_position(result_column).column) - ->convert_to_full_column_if_const(); + *result = std::move(tmp_block.get_by_position(result_column).column); // Variant column is a really special case, src type is nullable but dst variant type is none nullable, // but we still need to wrap nullmap into variant root column to prevent from nullable info lost. // TODO rethink and better handle this sepecial situation diff --git a/be/src/vec/common/typeid_cast.h b/be/src/vec/common/typeid_cast.h index 85f99b492cd..fefd38409fa 100644 --- a/be/src/vec/common/typeid_cast.h +++ b/be/src/vec/common/typeid_cast.h @@ -59,18 +59,13 @@ To typeid_cast(From& from) { template <typename To, typename From> To typeid_cast(From* from) { -#ifndef NDEBUG try { if (typeid(*from) == typeid(std::remove_pointer_t<To>)) { return static_cast<To>(from); + } else { + return nullptr; } } catch (const std::exception& e) { throw doris::Exception(doris::ErrorCode::BAD_CAST, e.what()); } -#else - if (typeid(*from) == typeid(std::remove_pointer_t<To>)) { - return static_cast<To>(from); - } -#endif - return nullptr; } diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index 355b7efd597..aa7c0fb36ce 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -183,19 +183,27 @@ void Block::clear_names() { } void Block::insert(const ColumnWithTypeAndName& elem) { - if (!elem.name.empty()) { - index_by_name.emplace(elem.name, data.size()); - } + index_by_name.emplace(elem.name, data.size()); data.emplace_back(elem); } void Block::insert(ColumnWithTypeAndName&& elem) { - if (!elem.name.empty()) { - index_by_name.emplace(elem.name, data.size()); - } + index_by_name.emplace(elem.name, data.size()); data.emplace_back(std::move(elem)); } +void Block::insert_unique(const ColumnWithTypeAndName& elem) { + if (index_by_name.end() == index_by_name.find(elem.name)) { + insert(elem); + } +} + +void Block::insert_unique(ColumnWithTypeAndName&& elem) { + if (index_by_name.end() == index_by_name.find(elem.name)) { + insert(std::move(elem)); + } +} + void Block::erase(const std::set<size_t>& positions) { for (unsigned long position : std::ranges::reverse_view(positions)) { erase(position); @@ -227,15 +235,7 @@ void Block::erase(size_t position) { } void Block::erase_impl(size_t position) { - if (position == data.size() - 1) { - bool have_name = !data.back().name.empty(); - data.pop_back(); - if (!have_name) { - return; - } - } else { - data.erase(data.begin() + position); - } + data.erase(data.begin() + position); for (auto it = index_by_name.begin(); it != index_by_name.end();) { if (it->second == position) { diff --git a/be/src/vec/core/block.h b/be/src/vec/core/block.h index 8a2b09c3280..8433ebf074c 100644 --- a/be/src/vec/core/block.h +++ b/be/src/vec/core/block.h @@ -105,6 +105,9 @@ public: /// insert the column to the end void insert(const ColumnWithTypeAndName& elem); void insert(ColumnWithTypeAndName&& elem); + /// insert the column to the end, if there is no column with that name yet + void insert_unique(const ColumnWithTypeAndName& elem); + void insert_unique(ColumnWithTypeAndName&& elem); /// remove the column at the specified position void erase(size_t position); /// remove the column at the [start, end) diff --git a/be/src/vec/core/column_with_type_and_name.cpp b/be/src/vec/core/column_with_type_and_name.cpp index cd0f7194004..9ac2bbe6e44 100644 --- a/be/src/vec/core/column_with_type_and_name.cpp +++ b/be/src/vec/core/column_with_type_and_name.cpp @@ -30,7 +30,6 @@ #include "vec/columns/column.h" #include "vec/core/types.h" #include "vec/data_types/data_type.h" -#include "vec/data_types/data_type_nullable.h" namespace doris::vectorized { @@ -88,28 +87,4 @@ void ColumnWithTypeAndName::to_pb_column_meta(PColumnMeta* col_meta) const { type->to_pb_column_meta(col_meta); } -ColumnWithTypeAndName ColumnWithTypeAndName::get_nested(bool replace_null_data_to_default) const { - if (type->is_nullable()) { - auto nested_type = assert_cast<const DataTypeNullable*>(type.get())->get_nested_type(); - ColumnPtr nested_column = column; - if (column) { - nested_column = nested_column->convert_to_full_column_if_const(); - const auto* source_column = assert_cast<const ColumnNullable*>(nested_column.get()); - nested_column = source_column->get_nested_column_ptr(); - - if (replace_null_data_to_default) { - const auto& null_map = source_column->get_null_map_data(); - // only need to mutate nested column, avoid to copy nullmap - auto mutable_nested_col = (*std::move(nested_column)).mutate(); - mutable_nested_col->replace_column_null_data(null_map.data()); - - return {std::move(mutable_nested_col), nested_type, ""}; - } - } - return {nested_column, nested_type, ""}; - } else { - return {column, type, ""}; - } -} - } // namespace doris::vectorized diff --git a/be/src/vec/core/column_with_type_and_name.h b/be/src/vec/core/column_with_type_and_name.h index 53ca6f20b2d..caf68f46260 100644 --- a/be/src/vec/core/column_with_type_and_name.h +++ b/be/src/vec/core/column_with_type_and_name.h @@ -25,7 +25,6 @@ #include <iosfwd> #include <memory> #include <string> -#include <utility> #include "vec/core/types.h" #include "vec/data_types/data_type.h" @@ -48,13 +47,13 @@ struct ColumnWithTypeAndName { DataTypePtr type; String name; - ColumnWithTypeAndName() = default; - ColumnWithTypeAndName(ColumnPtr column_, DataTypePtr type_, String name_) - : column(std::move(column_)), type(std::move(type_)), name(std::move(name_)) {} + ColumnWithTypeAndName() {} + ColumnWithTypeAndName(const ColumnPtr& column_, const DataTypePtr& type_, const String& name_) + : column(column_), type(type_), name(name_) {} /// Uses type->create_column() to create column - ColumnWithTypeAndName(const DataTypePtr& type_, String name_) - : column(type_->create_column()), type(type_), name(std::move(name_)) {} + ColumnWithTypeAndName(const DataTypePtr& type_, const String& name_) + : column(type_->create_column()), type(type_), name(name_) {} ColumnWithTypeAndName clone_empty() const; bool operator==(const ColumnWithTypeAndName& other) const; @@ -64,8 +63,6 @@ struct ColumnWithTypeAndName { std::string to_string(size_t row_num) const; void to_pb_column_meta(PColumnMeta* col_meta) const; - - ColumnWithTypeAndName get_nested(bool replace_null_data_to_default = false) const; }; } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type.h b/be/src/vec/data_types/data_type.h index 10bcbf8d775..dfe62a8a902 100644 --- a/be/src/vec/data_types/data_type.h +++ b/be/src/vec/data_types/data_type.h @@ -196,6 +196,10 @@ public: virtual bool is_nullable() const { return false; } + /** Is this type can represent only NULL value? (It also implies is_nullable) + */ + virtual bool only_null() const { return false; } + /* the data type create from type_null, NULL literal*/ virtual bool is_null_literal() const { return false; } diff --git a/be/src/vec/data_types/data_type_nullable.cpp b/be/src/vec/data_types/data_type_nullable.cpp index 908015317b6..893b8637d2f 100644 --- a/be/src/vec/data_types/data_type_nullable.cpp +++ b/be/src/vec/data_types/data_type_nullable.cpp @@ -49,6 +49,10 @@ DataTypeNullable::DataTypeNullable(const DataTypePtr& nested_data_type_) } } +bool DataTypeNullable::only_null() const { + return typeid_cast<const DataTypeNothing*>(nested_data_type.get()); +} + std::string DataTypeNullable::to_string(const IColumn& column, size_t row_num) const { auto result = check_column_const_set_readability(column, row_num); ColumnPtr ptr = result.first; diff --git a/be/src/vec/data_types/data_type_nullable.h b/be/src/vec/data_types/data_type_nullable.h index 12410b70bd1..fc958096adf 100644 --- a/be/src/vec/data_types/data_type_nullable.h +++ b/be/src/vec/data_types/data_type_nullable.h @@ -109,6 +109,7 @@ public: } bool is_nullable() const override { return true; } size_t get_size_of_value_in_memory() const override; + bool only_null() const override; bool can_be_inside_low_cardinality() const override { return nested_data_type->can_be_inside_low_cardinality(); } diff --git a/be/src/vec/data_types/get_least_supertype.cpp b/be/src/vec/data_types/get_least_supertype.cpp index 5f09e01ac8d..1baf463b052 100644 --- a/be/src/vec/data_types/get_least_supertype.cpp +++ b/be/src/vec/data_types/get_least_supertype.cpp @@ -291,7 +291,9 @@ void get_least_supertype(const DataTypes& types, DataTypePtr* type) { typeid_cast<const DataTypeNullable*>(type.get())) { have_nullable = true; - nested_types.emplace_back(type_nullable->get_nested_type()); + if (!type_nullable->only_null()) { + nested_types.emplace_back(type_nullable->get_nested_type()); + } } else { nested_types.emplace_back(type); } diff --git a/be/src/vec/exprs/vcast_expr.cpp b/be/src/vec/exprs/vcast_expr.cpp index 3207ba5b541..47733a177db 100644 --- a/be/src/vec/exprs/vcast_expr.cpp +++ b/be/src/vec/exprs/vcast_expr.cpp @@ -106,6 +106,9 @@ doris::Status VCastExpr::execute(VExprContext* context, doris::vectorized::Block int column_id = 0; RETURN_IF_ERROR(_children[0]->execute(context, block, &column_id)); + size_t const_param_id = VExpr::insert_param( + block, {_cast_param, _cast_param_data_type, _target_data_type_name}, block->rows()); + // call function size_t num_columns_without_result = block->columns(); // prepare a column to save result @@ -114,8 +117,8 @@ doris::Status VCastExpr::execute(VExprContext* context, doris::vectorized::Block auto state = Status::OK(); try { state = _function->execute(context->fn_context(_fn_context_index), *block, - {static_cast<size_t>(column_id)}, num_columns_without_result, - block->rows(), false); + {static_cast<size_t>(column_id), const_param_id}, + num_columns_without_result, block->rows(), false); *result_column_id = num_columns_without_result; } catch (const Exception& e) { state = e.to_status(); diff --git a/be/src/vec/functions/function.cpp b/be/src/vec/functions/function.cpp index 0948bdb7ccd..6e7f6572ab8 100644 --- a/be/src/vec/functions/function.cpp +++ b/be/src/vec/functions/function.cpp @@ -48,7 +48,7 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum ColumnPtr src_not_nullable = src; MutableColumnPtr mutable_result_null_map_column; - if (const auto* nullable = check_and_get_column<ColumnNullable>(*src)) { + if (auto* nullable = check_and_get_column<ColumnNullable>(*src)) { src_not_nullable = nullable->get_nested_column_ptr(); result_null_map_column = nullable->get_null_map_column_ptr(); } @@ -69,25 +69,23 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum continue; } - if (const auto* nullable = assert_cast<const ColumnNullable*>(elem.column.get()); - nullable->has_null()) { + if (auto* nullable = assert_cast<const ColumnNullable*>(elem.column.get())) { const ColumnPtr& null_map_column = nullable->get_null_map_column_ptr(); if (!result_null_map_column) { result_null_map_column = null_map_column->clone_resized(input_rows_count); - continue; + } else { + if (!mutable_result_null_map_column) { + mutable_result_null_map_column = + std::move(result_null_map_column)->assume_mutable(); + } + + NullMap& result_null_map = + assert_cast<ColumnUInt8&>(*mutable_result_null_map_column).get_data(); + const NullMap& src_null_map = + assert_cast<const ColumnUInt8&>(*null_map_column).get_data(); + + VectorizedUtils::update_null_map(result_null_map, src_null_map); } - - if (!mutable_result_null_map_column) { - mutable_result_null_map_column = - std::move(result_null_map_column)->assume_mutable(); - } - - NullMap& result_null_map = - assert_cast<ColumnUInt8&>(*mutable_result_null_map_column).get_data(); - const NullMap& src_null_map = - assert_cast<const ColumnUInt8&>(*null_map_column).get_data(); - - VectorizedUtils::update_null_map(result_null_map, src_null_map); } } @@ -101,7 +99,8 @@ ColumnPtr wrap_in_nullable(const ColumnPtr& src, const Block& block, const Colum return ColumnNullable::create(src, ColumnUInt8::create(input_rows_count, 0)); } - return ColumnNullable::create(src_not_nullable, result_null_map_column); + return ColumnNullable::create(src_not_nullable->convert_to_full_column_if_const(), + result_null_map_column); } NullPresence get_null_presence(const Block& block, const ColumnNumbers& args) { @@ -113,6 +112,9 @@ NullPresence get_null_presence(const Block& block, const ColumnNumbers& args) { if (!res.has_nullable) { res.has_nullable = elem.type->is_nullable(); } + if (!res.has_null_constant) { + res.has_null_constant = elem.type->only_null(); + } } return res; @@ -125,6 +127,9 @@ NullPresence get_null_presence(const Block& block, const ColumnNumbers& args) { if (!res.has_nullable) { res.has_nullable = elem.type->is_nullable(); } + if (!res.has_null_constant) { + res.has_null_constant = elem.type->only_null(); + } } return res; @@ -221,27 +226,27 @@ Status PreparedFunctionImpl::default_implementation_for_nulls( NullPresence null_presence = get_null_presence(block, args); + if (null_presence.has_null_constant) { + block.get_by_position(result).column = + block.get_by_position(result).type->create_column_const(input_rows_count, Null()); + *executed = true; + return Status::OK(); + } + if (null_presence.has_nullable) { - bool need_to_default = need_replace_null_data_to_default(); + bool check_overflow_for_decimal = false; if (context) { - need_to_default &= context->check_overflow_for_decimal(); - } - ColumnNumbers new_args; - for (auto arg : args) { - new_args.push_back(block.columns()); - block.insert(block.get_by_position(arg).get_nested(need_to_default)); - DCHECK(!block.get_by_position(new_args.back()).column->is_nullable()); - } - - RETURN_IF_ERROR(execute_without_low_cardinality_columns(context, block, new_args, result, - block.rows(), dry_run)); - block.get_by_position(result).column = wrap_in_nullable( - block.get_by_position(result).column, block, args, result, input_rows_count); - - while (!new_args.empty()) { - block.erase(new_args.back()); - new_args.pop_back(); + check_overflow_for_decimal = context->check_overflow_for_decimal(); } + auto [temporary_block, new_args, new_result] = create_block_with_nested_columns( + block, args, result, + check_overflow_for_decimal && need_replace_null_data_to_default()); + + RETURN_IF_ERROR(execute_without_low_cardinality_columns( + context, temporary_block, new_args, new_result, temporary_block.rows(), dry_run)); + block.get_by_position(result).column = + wrap_in_nullable(temporary_block.get_by_position(new_result).column, block, args, + result, input_rows_count); *executed = true; return Status::OK(); } @@ -289,6 +294,9 @@ DataTypePtr FunctionBuilderImpl::get_return_type_without_low_cardinality( if (!arguments.empty() && use_default_implementation_for_nulls()) { NullPresence null_presence = get_null_presence(arguments); + if (null_presence.has_null_constant) { + return make_nullable(std::make_shared<DataTypeNothing>()); + } if (null_presence.has_nullable) { ColumnNumbers numbers(arguments.size()); std::iota(numbers.begin(), numbers.end(), 0); diff --git a/be/src/vec/functions/function.h b/be/src/vec/functions/function.h index 9ec5f6b4571..cb8ff34cdbb 100644 --- a/be/src/vec/functions/function.h +++ b/be/src/vec/functions/function.h @@ -62,6 +62,7 @@ void has_variadic_argument_types(...); struct NullPresence { bool has_nullable = false; + bool has_null_constant = false; }; template <typename T> diff --git a/be/src/vec/functions/function_bitmap.cpp b/be/src/vec/functions/function_bitmap.cpp index 9002ed5c723..ac80542f633 100644 --- a/be/src/vec/functions/function_bitmap.cpp +++ b/be/src/vec/functions/function_bitmap.cpp @@ -702,7 +702,10 @@ Status execute_bitmap_op_count_null_to_zero( exec_impl_func) { NullPresence null_presence = get_null_presence(block, arguments); - if (null_presence.has_nullable) { + if (null_presence.has_null_constant) { + block.get_by_position(result).column = + block.get_by_position(result).type->create_column_const(input_rows_count, 0); + } else if (null_presence.has_nullable) { auto [temporary_block, new_args, new_result] = create_block_with_nested_columns(block, arguments, result); RETURN_IF_ERROR(exec_impl_func(context, temporary_block, new_args, new_result, diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index f2cfbc7679f..2e1e48db14d 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -94,7 +94,6 @@ #include "vec/functions/function_helpers.h" #include "vec/io/reader_buffer.h" #include "vec/runtime/vdatetime_value.h" -#include "vec/utils/util.hpp" class DateLUTImpl; @@ -1453,7 +1452,12 @@ public: protected: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { - return wrapper_function(context, block, arguments, result, input_rows_count); + /// drop second argument, pass others + ColumnNumbers new_arguments {arguments.front()}; + if (arguments.size() > 2) + new_arguments.insert(std::end(new_arguments), std::next(std::begin(arguments), 2), + std::end(arguments)); + return wrapper_function(context, block, new_arguments, result, input_rows_count); } bool use_default_implementation_for_nulls() const override { return false; } @@ -1541,7 +1545,7 @@ struct ConvertThroughParsing { res == StringParser::PARSE_OVERFLOW || res == StringParser::PARSE_UNDERFLOW); } else if constexpr (IsDataTypeDateTimeV2<ToDataType>) { - const auto* type = assert_cast<const DataTypeDateTimeV2*>( + auto type = check_and_get_data_type<DataTypeDateTimeV2>( block.get_by_position(result).type.get()); parsed = try_parse_impl<ToDataType>(vec_to[i], read_buffer, context->state()->timezone_obj(), @@ -2245,7 +2249,7 @@ private: const auto& from_nested = from_type; const auto& to_nested = to_type; - if (from_type->is_null_literal()) { + if (from_type->only_null() || from_type->is_null_literal()) { if (!to_nested->is_nullable()) { return create_unsupport_wrapper("Cannot convert NULL to a non-nullable type"); } @@ -2270,32 +2274,82 @@ private: const DataTypePtr& to_type, bool skip_not_null_check) const { /// Determine whether pre-processing and/or post-processing must take place during conversion. + bool source_is_nullable = from_type->is_nullable(); bool result_is_nullable = to_type->is_nullable(); + auto wrapper = prepare_impl(context, remove_nullable(from_type), remove_nullable(to_type), + result_is_nullable); + if (result_is_nullable) { - return [this, from_type, to_type](FunctionContext* context, Block& block, - const ColumnNumbers& arguments, const size_t result, - size_t input_rows_count) { - auto nested_result_index = block.columns(); - block.insert(block.get_by_position(result).get_nested()); - auto nested_source_index = block.columns(); - block.insert(block.get_by_position(arguments[0]).get_nested()); - - RETURN_IF_ERROR(prepare_impl(context, remove_nullable(from_type), - remove_nullable(to_type), - true)(context, block, {nested_source_index}, - nested_result_index, input_rows_count)); - - block.get_by_position(result).column = - wrap_in_nullable(block.get_by_position(nested_result_index).column, block, - arguments, result, input_rows_count); - - block.erase(nested_source_index); - block.erase(nested_result_index); + return [wrapper, source_is_nullable](FunctionContext* context, Block& block, + const ColumnNumbers& arguments, + const size_t result, size_t input_rows_count) { + /// Create a temporary block on which to perform the operation. + auto& res = block.get_by_position(result); + const auto& ret_type = res.type; + const auto& nullable_type = static_cast<const DataTypeNullable&>(*ret_type); + const auto& nested_type = nullable_type.get_nested_type(); + + Block tmp_block; + size_t tmp_res_index = 0; + if (source_is_nullable) { + auto [t_block, tmp_args] = + create_block_with_nested_columns(block, arguments, true); + tmp_block = std::move(t_block); + tmp_res_index = tmp_block.columns(); + tmp_block.insert({nullptr, nested_type, ""}); + + /// Perform the requested conversion. + RETURN_IF_ERROR( + wrapper(context, tmp_block, {0}, tmp_res_index, input_rows_count)); + } else { + tmp_block = block; + + tmp_res_index = block.columns(); + tmp_block.insert({nullptr, nested_type, ""}); + + /// Perform the requested conversion. + RETURN_IF_ERROR(wrapper(context, tmp_block, arguments, tmp_res_index, + input_rows_count)); + } + + // Note: here we should return the nullable result column + const auto& tmp_res = tmp_block.get_by_position(tmp_res_index); + res.column = wrap_in_nullable(tmp_res.column, + Block({block.get_by_position(arguments[0]), tmp_res}), + {0}, 1, input_rows_count); + + return Status::OK(); + }; + } else if (source_is_nullable) { + /// Conversion from Nullable to non-Nullable. + + return [wrapper, skip_not_null_check](FunctionContext* context, Block& block, + const ColumnNumbers& arguments, + const size_t result, size_t input_rows_count) { + auto [tmp_block, tmp_args, tmp_res] = + create_block_with_nested_columns(block, arguments, result); + + /// Check that all values are not-NULL. + /// Check can be skipped in case if LowCardinality dictionary is transformed. + /// In that case, correctness will be checked beforehand. + if (!skip_not_null_check) { + const auto& col = block.get_by_position(arguments[0]).column; + const auto& nullable_col = assert_cast<const ColumnNullable&>(*col); + const auto& null_map = nullable_col.get_null_map_data(); + + if (!memory_is_zero(null_map.data(), null_map.size())) { + return Status::RuntimeError( + "Cannot convert NULL value to non-Nullable type"); + } + } + + RETURN_IF_ERROR(wrapper(context, tmp_block, tmp_args, tmp_res, input_rows_count)); + block.get_by_position(result).column = tmp_block.get_by_position(tmp_res).column; return Status::OK(); }; } else { - return prepare_impl(context, from_type, to_type, false); + return wrapper; } } @@ -2303,11 +2357,11 @@ private: /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested. WrapperType prepare_impl(FunctionContext* context, const DataTypePtr& from_type, const DataTypePtr& to_type, bool requested_result_is_nullable) const { - if (from_type->equals(*to_type)) { + if (from_type->equals(*to_type)) return create_identity_wrapper(from_type); - } + else if (WhichDataType(from_type).is_nothing()) + return create_nothing_wrapper(to_type.get()); - // variant needs to be judged first if (to_type->get_type_id() == TypeIndex::VARIANT) { return create_variant_wrapper(from_type, static_cast<const DataTypeObject&>(*to_type)); } @@ -2315,14 +2369,15 @@ private: return create_variant_wrapper(static_cast<const DataTypeObject&>(*from_type), to_type); } - switch (from_type->get_type_id()) { - case TypeIndex::Nothing: - return create_nothing_wrapper(to_type.get()); - case TypeIndex::JSONB: + if (from_type->get_type_id() == TypeIndex::JSONB) { + bool jsonb_string_as_string = context ? context->jsonb_string_as_string() : false; return create_jsonb_wrapper(static_cast<const DataTypeJsonb&>(*from_type), to_type, - context ? context->jsonb_string_as_string() : false); - default: - break; + jsonb_string_as_string); + } + if (to_type->get_type_id() == TypeIndex::JSONB) { + bool string_as_jsonb_string = context ? context->string_as_jsonb_string() : false; + return create_jsonb_wrapper(from_type, static_cast<const DataTypeJsonb&>(*to_type), + string_as_jsonb_string); } WrapperType ret; @@ -2368,9 +2423,8 @@ private: return false; }; - if (call_on_index_and_data_type<void>(to_type->get_type_id(), make_default_wrapper)) { + if (call_on_index_and_data_type<void>(to_type->get_type_id(), make_default_wrapper)) return ret; - } switch (to_type->get_type_id()) { case TypeIndex::String: @@ -2390,9 +2444,6 @@ private: case TypeIndex::BitMap: return create_bitmap_wrapper(context, from_type, static_cast<const DataTypeBitMap&>(*to_type)); - case TypeIndex::JSONB: - return create_jsonb_wrapper(from_type, static_cast<const DataTypeJsonb&>(*to_type), - context ? context->string_as_jsonb_string() : false); default: break; } diff --git a/be/src/vec/functions/functions_geo.cpp b/be/src/vec/functions/functions_geo.cpp index 55e7c25fc54..28e8b16fd13 100644 --- a/be/src/vec/functions/functions_geo.cpp +++ b/be/src/vec/functions/functions_geo.cpp @@ -133,7 +133,7 @@ struct StX { auto pt = point.decode_from(point_value.data, point_value.size); if (!pt) { - res->insert_default(); + res->insert_data(nullptr, 0); continue; } auto x_value = point.x(); @@ -165,7 +165,7 @@ struct StY { auto pt = point.decode_from(point_value.data, point_value.size); if (!pt) { - res->insert_default(); + res->insert_data(nullptr, 0); continue; } auto y_value = point.y(); @@ -200,7 +200,7 @@ struct StDistanceSphere { x_lat->operator[](row).get<Float64>(), y_lng->operator[](row).get<Float64>(), y_lat->operator[](row).get<Float64>(), &distance)) { - res->insert_default(); + res->insert_data(nullptr, 0); continue; } res->insert_data(const_cast<const char*>((char*)&distance), 0); @@ -234,7 +234,7 @@ struct StAngleSphere { x_lat->operator[](row).get<Float64>(), y_lng->operator[](row).get<Float64>(), y_lat->operator[](row).get<Float64>(), &angle)) { - res->insert_default(); + res->insert_data(nullptr, 0); continue; } res->insert_data(const_cast<const char*>((char*)&angle), 0); @@ -267,26 +267,26 @@ struct StAngle { auto shape_value1 = p1->get_data_at(row); auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size); if (!pt1) { - res->insert_default(); + res->insert_data(nullptr, 0); continue; } auto shape_value2 = p2->get_data_at(row); auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size); if (!pt2) { - res->insert_default(); + res->insert_data(nullptr, 0); continue; } auto shape_value3 = p3->get_data_at(row); auto pt3 = point3.decode_from(shape_value3.data, shape_value3.size); if (!pt3) { - res->insert_default(); + res->insert_data(nullptr, 0); continue; } double angle = 0; if (!GeoPoint::ComputeAngle(&point1, &point2, &point3, &angle)) { - res->insert_default(); + res->insert_data(nullptr, 0); continue; } res->insert_data(const_cast<const char*>((char*)&angle), 0); @@ -316,20 +316,20 @@ struct StAzimuth { auto shape_value1 = p1->get_data_at(row); auto pt1 = point1.decode_from(shape_value1.data, shape_value1.size); if (!pt1) { - res->insert_default(); + res->insert_data(nullptr, 0); continue; } auto shape_value2 = p2->get_data_at(row); auto pt2 = point2.decode_from(shape_value2.data, shape_value2.size); if (!pt2) { - res->insert_default(); + res->insert_data(nullptr, 0); continue; } double angle = 0; if (!GeoPoint::ComputeAzimuth(&point1, &point2, &angle)) { - res->insert_default(); + res->insert_data(nullptr, 0); continue; } res->insert_data(const_cast<const char*>((char*)&angle), 0); @@ -363,7 +363,7 @@ struct StAreaSquareMeters { double area = 0; if (!GeoShape::ComputeArea(shape.get(), &area, "square_meters")) { - res->insert_default(); + res->insert_data(nullptr, 0); continue; } res->insert_data(const_cast<const char*>((char*)&area), 0); @@ -398,7 +398,7 @@ struct StAreaSquareKm { double area = 0; if (!GeoShape::ComputeArea(shape.get(), &area, "square_km")) { - res->insert_default(); + res->insert_data(nullptr, 0); continue; } res->insert_data(const_cast<const char*>((char*)&area), 0); @@ -480,7 +480,7 @@ struct StContains { shapes[i] = std::shared_ptr<GeoShape>( GeoShape::from_encoded(strs[i]->data, strs[i]->size)); if (shapes[i] == nullptr) { - res->insert_default(); + res->insert_data(nullptr, 0); break; } } diff --git a/be/src/vec/functions/functions_geo.h b/be/src/vec/functions/functions_geo.h index 9c4db09e149..11d2cd7f881 100644 --- a/be/src/vec/functions/functions_geo.h +++ b/be/src/vec/functions/functions_geo.h @@ -68,6 +68,7 @@ public: DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { return make_nullable(std::make_shared<ReturnType>()); } + bool use_default_implementation_for_nulls() const override { return true; } Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { diff --git a/be/src/vec/functions/functions_logical.cpp b/be/src/vec/functions/functions_logical.cpp index c0b8d62ec25..1eceeac3de4 100644 --- a/be/src/vec/functions/functions_logical.cpp +++ b/be/src/vec/functions/functions_logical.cpp @@ -187,8 +187,9 @@ DataTypePtr FunctionAnyArityLogical<Impl, Name>::get_return_type_impl( } } - if (!(is_native_number(arg_type) || (Impl::special_implementation_for_nulls() && - is_native_number(remove_nullable(arg_type))))) { + if (!(is_native_number(arg_type) || + (Impl::special_implementation_for_nulls() && + (arg_type->only_null() || is_native_number(remove_nullable(arg_type)))))) { LOG(FATAL) << fmt::format("Illegal type ({}) of {} argument of function {}", arg_type->get_name(), i + 1, get_name()); } diff --git a/be/src/vec/functions/least_greast.cpp b/be/src/vec/functions/least_greast.cpp index 33748bf8bc1..4c008da67f9 100644 --- a/be/src/vec/functions/least_greast.cpp +++ b/be/src/vec/functions/least_greast.cpp @@ -190,7 +190,8 @@ struct FunctionFieldImpl { for (int row = 0; row < input_rows_count; ++row) { const auto& str_data = column_string.get_data_at(index_check_const(row, arg_const)); for (int col = 1; col < column_size; ++col) { - auto [column, is_const] = unpack_if_const(argument_columns[col]); + auto [column, is_const] = + unpack_if_const(block.safe_get_by_position(col).column); const auto& temp_data = assert_cast<const ColumnString&>(*column).get_data_at( index_check_const(row, is_const)); if (EqualsOp<StringRef, StringRef>::apply(temp_data, str_data)) { diff --git a/be/src/vec/functions/nullif.cpp b/be/src/vec/functions/nullif.cpp index d38e9cdbb3e..2fccee27d4d 100644 --- a/be/src/vec/functions/nullif.cpp +++ b/be/src/vec/functions/nullif.cpp @@ -52,6 +52,7 @@ class FunctionNullIf : public IFunction { public: struct NullPresence { bool has_nullable = false; + bool has_null_constant = false; }; static constexpr auto name = "nullif"; @@ -68,30 +69,32 @@ public: return make_nullable(arguments[0]); } - static NullPresence get_null_resense(const ColumnsWithTypeAndName& args) { + NullPresence get_null_resense(const ColumnsWithTypeAndName& args) const { NullPresence res; for (const auto& elem : args) { - if (!res.has_nullable) { - res.has_nullable = elem.type->is_nullable(); - } + if (!res.has_nullable) res.has_nullable = elem.type->is_nullable(); + if (!res.has_null_constant) res.has_null_constant = elem.type->only_null(); } return res; } - static DataTypePtr get_return_type_for_equal(const ColumnsWithTypeAndName& arguments) { + DataTypePtr get_return_type_for_equal(const ColumnsWithTypeAndName& arguments) const { ColumnsWithTypeAndName args_without_low_cardinality(arguments); for (ColumnWithTypeAndName& arg : args_without_low_cardinality) { bool is_const = arg.column && is_column_const(*arg.column); - if (is_const) { + if (is_const) arg.column = assert_cast<const ColumnConst&>(*arg.column).remove_low_cardinality(); - } } if (!arguments.empty()) { NullPresence null_presence = get_null_resense(arguments); + + if (null_presence.has_null_constant) { + return make_nullable(std::make_shared<DataTypeNothing>()); + } if (null_presence.has_nullable) { return make_nullable(std::make_shared<doris::vectorized::DataTypeUInt8>()); } diff --git a/be/src/vec/runtime/vdata_stream_recvr.h b/be/src/vec/runtime/vdata_stream_recvr.h index c62b96649f7..a09d86507d1 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.h +++ b/be/src/vec/runtime/vdata_stream_recvr.h @@ -211,7 +211,7 @@ public: _local_channel_dependency = local_channel_dependency; } - bool should_wait(); + virtual bool should_wait(); virtual Status get_batch(Block* next_block, bool* eos); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
