This is an automated email from the ASF dual-hosted git repository. gabriellee pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new aaae1497cd [Refactor](function) opt the exec of function with null column (#16256) aaae1497cd is described below commit aaae1497cdc04fc4fb0afba40c01c24e51ff90e8 Author: HappenLee <happen...@hotmail.com> AuthorDate: Wed Feb 1 15:56:31 2023 +0800 [Refactor](function) opt the exec of function with null column (#16256) --- be/src/vec/exprs/vectorized_fn_call.cpp | 2 + be/src/vec/functions/function.cpp | 14 ++-- be/src/vec/functions/function_cast.h | 11 ++- be/src/vec/functions/function_helpers.cpp | 123 ++++++++++++++---------------- be/src/vec/functions/function_helpers.h | 26 +++---- 5 files changed, 86 insertions(+), 90 deletions(-) diff --git a/be/src/vec/exprs/vectorized_fn_call.cpp b/be/src/vec/exprs/vectorized_fn_call.cpp index d8f614c570..021eac15e9 100644 --- a/be/src/vec/exprs/vectorized_fn_call.cpp +++ b/be/src/vec/exprs/vectorized_fn_call.cpp @@ -41,6 +41,8 @@ doris::Status VectorizedFnCall::prepare(doris::RuntimeState* state, argument_template.reserve(_children.size()); std::vector<std::string_view> child_expr_name; for (auto child : _children) { + // TODO: rethink we really create column here. maybe only need nullptr just to + // get the function auto column = child->data_type()->create_column(); argument_template.emplace_back(std::move(column), child->data_type(), child->expr_name()); child_expr_name.emplace_back(child->expr_name()); diff --git a/be/src/vec/functions/function.cpp b/be/src/vec/functions/function.cpp index 41f3141c06..662a2a58af 100644 --- a/be/src/vec/functions/function.cpp +++ b/be/src/vec/functions/function.cpp @@ -217,11 +217,12 @@ Status PreparedFunctionImpl::default_implementation_for_nulls( } if (null_presence.has_nullable) { - Block temporary_block = create_block_with_nested_columns(block, args, result); + auto [temporary_block, new_args, new_result] = + create_block_with_nested_columns(block, args, result); RETURN_IF_ERROR(execute_without_low_cardinality_columns( - context, temporary_block, args, result, temporary_block.rows(), dry_run)); + context, temporary_block, new_args, new_result, temporary_block.rows(), dry_run)); block.get_by_position(result).column = - wrap_in_nullable(temporary_block.get_by_position(result).column, block, args, + wrap_in_nullable(temporary_block.get_by_position(new_result).column, block, args, result, input_rows_count); *executed = true; return Status::OK(); @@ -295,10 +296,9 @@ DataTypePtr FunctionBuilderImpl::get_return_type_without_low_cardinality( } if (null_presence.has_nullable) { ColumnNumbers numbers(arguments.size()); - for (size_t i = 0; i < arguments.size(); i++) { - numbers[i] = i; - } - Block nested_block = create_block_with_nested_columns(Block(arguments), numbers); + std::iota(numbers.begin(), numbers.end(), 0); + auto [nested_block, _] = + create_block_with_nested_columns(Block(arguments), numbers, false); auto return_type = get_return_type_impl( ColumnsWithTypeAndName(nested_block.begin(), nested_block.end())); return make_nullable(return_type); diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index e3baaecdd2..a6817134ea 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -1592,7 +1592,9 @@ private: Block tmp_block; size_t tmp_res_index = 0; if (source_is_nullable) { - tmp_block = create_block_with_nested_columns_only_args(block, arguments); + auto [t_block, tmp_args] = + create_block_with_nested_columns(block, arguments, true); + tmp_block = std::move(t_block); tmp_res_index = tmp_block.columns(); tmp_block.insert({nullptr, nested_type, ""}); @@ -1624,7 +1626,8 @@ private: return [wrapper, skip_not_null_check](FunctionContext* context, Block& block, const ColumnNumbers& arguments, const size_t result, size_t input_rows_count) { - Block tmp_block = create_block_with_nested_columns(block, arguments, result); + auto [tmp_block, tmp_args, tmp_res] = + create_block_with_nested_columns(block, arguments, result); /// Check that all values are not-NULL. /// Check can be skipped in case if LowCardinality dictionary is transformed. @@ -1640,8 +1643,8 @@ private: } } - RETURN_IF_ERROR(wrapper(context, tmp_block, arguments, result, input_rows_count)); - block.get_by_position(result).column = tmp_block.get_by_position(result).column; + RETURN_IF_ERROR(wrapper(context, tmp_block, tmp_args, tmp_res, input_rows_count)); + block.get_by_position(result).column = tmp_block.get_by_position(tmp_res).column; return Status::OK(); }; } else { diff --git a/be/src/vec/functions/function_helpers.cpp b/be/src/vec/functions/function_helpers.cpp index fcfdd4b3a2..c77f3c5ab7 100644 --- a/be/src/vec/functions/function_helpers.cpp +++ b/be/src/vec/functions/function_helpers.cpp @@ -20,88 +20,83 @@ #include "vec/functions/function_helpers.h" +#include "common/consts.h" #include "vec/columns/column_nullable.h" #include "vec/data_types/data_type_nullable.h" #include "vec/functions/function.h" namespace doris::vectorized { -Block create_block_with_nested_columns_only_args(const Block& block, const ColumnNumbers& args) { - std::set<size_t> args_set(args.begin(), args.end()); +std::tuple<Block, ColumnNumbers> create_block_with_nested_columns(const Block& block, + const ColumnNumbers& args, + const bool need_check_same) { Block res; + ColumnNumbers res_args(args.size()); + + // only build temp block by args column, if args[i] == args[j] + // just keep one + for (size_t i = 0; i < args.size(); ++i) { + bool is_in_res = false; + size_t pre_loc = 0; + + if (need_check_same) { + for (int j = 0; j < i; ++j) { + if (args[j] == args[i]) { + is_in_res = true; + pre_loc = res_args[j]; + break; + } + } + } - for (auto i : args_set) { - const auto& col = block.get_by_position(i); - - if (col.type->is_nullable()) { - const DataTypePtr& nested_type = - static_cast<const DataTypeNullable&>(*col.type).get_nested_type(); - - if (!col.column) { - res.insert({nullptr, nested_type, col.name}); - } else if (auto* nullable = check_and_get_column<ColumnNullable>(*col.column)) { - const auto& nested_col = nullable->get_nested_column_ptr(); - res.insert({nested_col, nested_type, col.name}); - } else if (auto* const_column = check_and_get_column<ColumnConst>(*col.column)) { - const auto& nested_col = - check_and_get_column<ColumnNullable>(const_column->get_data_column()) - ->get_nested_column_ptr(); - res.insert({ColumnConst::create(nested_col, col.column->size()), nested_type, - col.name}); + if (!is_in_res) { + const auto& col = block.get_by_position(args[i]); + if (col.type->is_nullable()) { + const DataTypePtr& nested_type = + static_cast<const DataTypeNullable&>(*col.type).get_nested_type(); + + if (!col.column) { + res.insert({nullptr, nested_type, col.name}); + } else if (auto* nullable = check_and_get_column<ColumnNullable>(*col.column)) { + const auto& nested_col = nullable->get_nested_column_ptr(); + res.insert({nested_col, nested_type, col.name}); + } else if (auto* const_column = check_and_get_column<ColumnConst>(*col.column)) { + const auto& nested_col = + check_and_get_column<ColumnNullable>(const_column->get_data_column()) + ->get_nested_column_ptr(); + res.insert({ColumnConst::create(nested_col, col.column->size()), nested_type, + col.name}); + } else { + LOG(FATAL) << "Illegal column for DataTypeNullable"; + } } else { - LOG(FATAL) << "Illegal column for DataTypeNullable"; + res.insert(col); } + + res_args[i] = res.columns() - 1; } else { - res.insert(col); + res_args[i] = pre_loc; } } - return res; -} - -static Block create_block_with_nested_columns_impl(const Block& block, - const std::unordered_set<size_t>& args) { - Block res; - size_t columns = block.columns(); - - for (size_t i = 0; i < columns; ++i) { - const auto& col = block.get_by_position(i); - - if (args.count(i) && col.type->is_nullable()) { - const DataTypePtr& nested_type = - static_cast<const DataTypeNullable&>(*col.type).get_nested_type(); - - if (!col.column) { - res.insert({nullptr, nested_type, col.name}); - } else if (auto* nullable = check_and_get_column<ColumnNullable>(*col.column)) { - const auto& nested_col = nullable->get_nested_column_ptr(); - res.insert({nested_col, nested_type, col.name}); - } else if (auto* const_column = check_and_get_column<ColumnConst>(*col.column)) { - const auto& nested_col = - check_and_get_column<ColumnNullable>(const_column->get_data_column()) - ->get_nested_column_ptr(); - res.insert({ColumnConst::create(nested_col, col.column->size()), nested_type, - col.name}); - } else { - LOG(FATAL) << "Illegal column for DataTypeNullable"; - } - } else - res.insert(col); + // TODO: only support match function, rethink the logic + for (const auto& ctn : block) { + if (ctn.name.size() > BeConsts::BLOCK_TEMP_COLUMN_PREFIX.size() && + starts_with(ctn.name, BeConsts::BLOCK_TEMP_COLUMN_PREFIX)) { + res.insert(ctn); + } } - return res; -} - -Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args) { - std::unordered_set<size_t> args_set(args.begin(), args.end()); - return create_block_with_nested_columns_impl(block, args_set); + return {res, res_args}; } -Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args, - size_t result) { - std::unordered_set<size_t> args_set(args.begin(), args.end()); - args_set.insert(result); - return create_block_with_nested_columns_impl(block, args_set); +std::tuple<Block, ColumnNumbers, size_t> create_block_with_nested_columns(const Block& block, + const ColumnNumbers& args, + size_t result) { + auto [res, res_args] = create_block_with_nested_columns(block, args, true); + // insert result column in temp block + res.insert(block.get_by_position(result)); + return {res, res_args, res.columns() - 1}; } void validate_argument_type(const IFunction& func, const DataTypes& arguments, diff --git a/be/src/vec/functions/function_helpers.h b/be/src/vec/functions/function_helpers.h index 777bd415dc..474ac435bf 100644 --- a/be/src/vec/functions/function_helpers.h +++ b/be/src/vec/functions/function_helpers.h @@ -86,21 +86,17 @@ inline std::enable_if_t<IsDecimalNumber<T>, Field> to_field(const T& x, UInt32 s Columns convert_const_tuple_to_constant_elements(const ColumnConst& column); -/// Returns the copy of a given block in which each column specified in -/// the "arguments" parameter is replaced with its respective nested -/// column if it is nullable. -Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args); - -/// Similar function as above. Additionally transform the result type if needed. -Block create_block_with_nested_columns(const Block& block, const ColumnNumbers& args, - size_t result); - -/// Returns the copy of a given block in only args column specified in -/// the "arguments" parameter is replaced with its respective nested -/// column if it is nullable. -/// TODO: the old funciton `create_block_with_nested_columns` have performance problem, replace all -/// by the function and delete old one. -Block create_block_with_nested_columns_only_args(const Block& block, const ColumnNumbers& args); +/// Returns the copy of a tmp block and temp args order same as args +/// in which only args column each column specified in the "arguments" +/// parameter is replaced with its respective nested column if it is nullable. +std::tuple<Block, ColumnNumbers> create_block_with_nested_columns(const Block& block, + const ColumnNumbers& args, + const bool need_check_same); + +// Same as above and return the new_res loc in tuple +std::tuple<Block, ColumnNumbers, size_t> create_block_with_nested_columns(const Block& block, + const ColumnNumbers& args, + size_t result); /// Checks argument type at specified index with predicate. /// throws if there is no argument at specified index or if predicate returns false. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org