This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-1.1-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.1-lts by this push: new 401fe8bc9a [fix](cherry-pick)pick some code from master to fix bugs (#13517) 401fe8bc9a is described below commit 401fe8bc9aa0a25360ba34fd7d7d86f32a98de8c Author: starocean999 <40539150+starocean...@users.noreply.github.com> AuthorDate: Fri Oct 21 10:15:21 2022 +0800 [fix](cherry-pick)pick some code from master to fix bugs (#13517) --- be/src/vec/exec/vsort_node.cpp | 21 ++++- be/src/vec/exec/vsort_node.h | 4 + be/src/vec/functions/in.cpp | 97 ++++++++++++++++++---- .../java/org/apache/doris/analysis/SelectStmt.java | 12 +++ .../java/org/apache/doris/planner/SortNode.java | 11 +++ gensrc/thrift/PlanNodes.thrift | 2 + 6 files changed, 130 insertions(+), 17 deletions(-) diff --git a/be/src/vec/exec/vsort_node.cpp b/be/src/vec/exec/vsort_node.cpp index 99c8090fde..c494d8231d 100644 --- a/be/src/vec/exec/vsort_node.cpp +++ b/be/src/vec/exec/vsort_node.cpp @@ -36,6 +36,16 @@ Status VSortNode::init(const TPlanNode& tnode, RuntimeState* state) { RETURN_IF_ERROR(_vsort_exec_exprs.init(tnode.sort_node.sort_info, _pool)); _is_asc_order = tnode.sort_node.sort_info.is_asc_order; _nulls_first = tnode.sort_node.sort_info.nulls_first; + if (tnode.sort_node.sort_info.__isset.slot_exprs_nullability_changed_flags) { + _need_convert_to_nullable_flags = + tnode.sort_node.sort_info.slot_exprs_nullability_changed_flags; + } else { + _need_convert_to_nullable_flags.resize( + tnode.sort_node.sort_info.__isset.sort_tuple_slot_exprs + ? tnode.sort_node.sort_info.sort_tuple_slot_exprs.size() + : 0, + false); + } return Status::OK(); } @@ -177,8 +187,17 @@ Status VSortNode::pretreat_block(doris::vectorized::Block& block) { } Block new_block; + int i = 0; + for (auto column_id : valid_column_ids) { - new_block.insert(block.get_by_position(column_id)); + if (_need_convert_to_nullable_flags[i]) { + auto column_ptr = make_nullable(block.get_by_position(column_id).column); + new_block.insert( + {column_ptr, make_nullable(block.get_by_position(column_id).type), ""}); + } else { + new_block.insert(block.get_by_position(column_id)); + } + i++; } block.swap(new_block); } diff --git a/be/src/vec/exec/vsort_node.h b/be/src/vec/exec/vsort_node.h index f67326afa6..ba63839dd8 100644 --- a/be/src/vec/exec/vsort_node.h +++ b/be/src/vec/exec/vsort_node.h @@ -76,6 +76,10 @@ private: std::vector<Block> _sorted_blocks; std::priority_queue<SortCursor> _priority_queue; + // for some reason, _sort_tuple_slot_expr_ctxs is not-null but _lhs_ordering_expr_ctxs is nullable + // this flag list would be used to convert column to nullable. + std::vector<bool> _need_convert_to_nullable_flags; + // TODO: Not using now, maybe should be delete // Keeps track of the number of rows skipped for handling _offset. int64_t _num_rows_skipped; diff --git a/be/src/vec/functions/in.cpp b/be/src/vec/functions/in.cpp index 216165b826..18ffe6e7b8 100644 --- a/be/src/vec/functions/in.cpp +++ b/be/src/vec/functions/in.cpp @@ -67,10 +67,17 @@ public: } auto* state = new InState(); context->set_function_state(scope, state); - state->hybrid_set.reset( - vec_create_set(convert_type_to_primitive(context->get_arg_type(0)->type))); + if (context->get_arg_type(0)->type == FunctionContext::Type::TYPE_CHAR || + context->get_arg_type(0)->type == FunctionContext::Type::TYPE_VARCHAR || + context->get_arg_type(0)->type == FunctionContext::Type::TYPE_STRING) { + // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly + state->hybrid_set.reset(new StringValueSet()); + } else { + state->hybrid_set.reset( + vec_create_set(convert_type_to_primitive(context->get_arg_type(0)->type))); + } - DCHECK(context->get_num_args() > 1); + DCHECK(context->get_num_args() >= 1); for (int i = 1; i < context->get_num_args(); ++i) { const auto& const_column_ptr = context->get_constant_col(i); if (const_column_ptr != nullptr) { @@ -78,7 +85,7 @@ public: if (const_data.data == nullptr) { state->null_in_set = true; } else { - state->hybrid_set->insert((void *) const_data.data, const_data.size); + state->hybrid_set->insert((void*)const_data.data, const_data.size); } } else { state->use_set = false; @@ -91,7 +98,7 @@ public: Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) override { auto in_state = reinterpret_cast<InState*>( - context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); if (!in_state) { return Status::RuntimeError(fmt::format("funciton context for function '{}' must have Set;", get_name())); @@ -109,17 +116,74 @@ public: auto materialized_column = left_arg.column->convert_to_full_column_if_const(); if (in_state->use_set) { - for (size_t i = 0; i < input_rows_count; ++i) { - const auto& ref_data = materialized_column->get_data_at(i); - if (ref_data.data) { - vec_res[i] = negative ^ in_state->hybrid_set->find((void *) ref_data.data, ref_data.size); - if (in_state->null_in_set) { + if (materialized_column->is_nullable()) { + auto* null_col_ptr = vectorized::check_and_get_column<vectorized::ColumnNullable>( + materialized_column); + auto& null_bitmap = reinterpret_cast<const vectorized::ColumnUInt8&>( + null_col_ptr->get_null_map_column()) + .get_data(); + auto* nested_col_ptr = null_col_ptr->get_nested_column_ptr().get(); + auto search_hash_set = [&](auto* col_ptr) { + for (size_t i = 0; i < input_rows_count; ++i) { + const auto& ref_data = col_ptr->get_data_at(i); + vec_res[i] = + !null_bitmap[i] && + in_state->hybrid_set->find((void*)ref_data.data, ref_data.size); + if constexpr (negative) { + vec_res[i] = !vec_res[i]; + } + } + }; + + if (nested_col_ptr->is_column_string()) { + const auto* column_string_ptr = + reinterpret_cast<const vectorized::ColumnString*>(nested_col_ptr); + search_hash_set(column_string_ptr); + } else { + // todo support other column type + search_hash_set(nested_col_ptr); + } + + if (!in_state->null_in_set) { + for (size_t i = 0; i < input_rows_count; ++i) { + vec_null_map_to[i] = null_bitmap[i]; + } + } else { + for (size_t i = 0; i < input_rows_count; ++i) { + vec_null_map_to[i] = null_bitmap[i] || (negative == vec_res[i]); + } + } + + } else { // non-nullable + + auto search_hash_set = [&](auto* col_ptr) { + for (size_t i = 0; i < input_rows_count; ++i) { + const auto& ref_data = col_ptr->get_data_at(i); + vec_res[i] = + in_state->hybrid_set->find((void*)ref_data.data, ref_data.size); + if constexpr (negative) { + vec_res[i] = !vec_res[i]; + } + } + }; + + if (materialized_column->is_column_string()) { + const auto* column_string_ptr = + reinterpret_cast<const vectorized::ColumnString*>( + materialized_column.get()); + search_hash_set(column_string_ptr); + } else { + search_hash_set(materialized_column.get()); + } + + if (in_state->null_in_set) { + for (size_t i = 0; i < input_rows_count; ++i) { vec_null_map_to[i] = negative == vec_res[i]; - } else { - vec_null_map_to[i] = false; } } else { - vec_null_map_to[i] = true; + for (size_t i = 0; i < input_rows_count; ++i) { + vec_null_map_to[i] = false; + } } } } else { @@ -144,9 +208,9 @@ public: if (set_data.data == nullptr) null_in_set = true; else - hybrid_set->insert((void *)(set_data.data), set_data.size); + hybrid_set->insert((void*)(set_data.data), set_data.size); } - vec_res[i] = negative ^ hybrid_set->find((void *) ref_data.data, ref_data.size); + vec_res[i] = negative ^ hybrid_set->find((void*)ref_data.data, ref_data.size); if (null_in_set) { vec_null_map_to[i] = negative == vec_res[i]; } else { @@ -156,7 +220,8 @@ public: } if (block.get_by_position(result).type->is_nullable()) { - block.replace_by_position(result, ColumnNullable::create(std::move(res), std::move(col_null_map_to))); + block.replace_by_position( + result, ColumnNullable::create(std::move(res), std::move(col_null_map_to))); } else { block.replace_by_position(result, std::move(res)); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java index b7ef73a2d6..7de001a808 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SelectStmt.java @@ -1395,7 +1395,19 @@ public class SelectStmt extends QueryStmt { } List<Expr> oriGroupingExprs = groupByClause.getOriGroupingExprs(); if (oriGroupingExprs != null) { + // we must make sure the expr is analyzed before rewrite + try { + for (Expr expr : oriGroupingExprs) { + expr.analyze(analyzer); + } + } catch (AnalysisException ex) { + //ignore any exception + } rewriter.rewriteList(oriGroupingExprs, analyzer); + // after rewrite, need reset the analyze status for later re-analyze + for (Expr expr : oriGroupingExprs) { + expr.reset(); + } } } if (orderByElements != null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java index c6d5bf2332..60be79de29 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SortNode.java @@ -66,6 +66,8 @@ public class SortNode extends PlanNode { private boolean isUnusedExprRemoved = false; + private ArrayList<Boolean> nullabilityChangedFlags = Lists.newArrayList(); + public void setIsAnalyticSort(boolean v) { isAnalyticSort = v; } @@ -131,6 +133,7 @@ public class SortNode extends PlanNode { for (int i = slotDescriptorList.size() - 1; i >= 0; i--) { if (!slotDescriptorList.get(i).isMaterialized()) { resolvedTupleExprs.remove(i); + nullabilityChangedFlags.remove(i); } } } @@ -201,6 +204,9 @@ public class SortNode extends PlanNode { removeUnusedExprs(); if (resolvedTupleExprs != null) { sortInfo.setSortTupleSlotExprs(Expr.treesToThrift(resolvedTupleExprs)); + // FIXME this is a bottom line solution for wrong nullability of resolvedTupleExprs + // remove the following line after nereids online + sortInfo.setSlotExprsNullabilityChangedFlags(nullabilityChangedFlags); } TSortNode sortNode = new TSortNode(sortInfo, useTopN); @@ -267,11 +273,16 @@ public class SortNode extends PlanNode { for (int i = 0; i < slotExprs.size(); ++i) { resolvedTupleExprs.add(slotExprs.get(i)); outputSmap.put(slotExprs.get(i), new SlotRef(sortTupleSlots.get(i))); + nullabilityChangedFlags.add(slotExprs.get(i).isNullable()); } ExprSubstitutionMap childSmap = getCombinedChildSmap(); resolvedTupleExprs = Expr.substituteList(resolvedTupleExprs, childSmap, analyzer, false); + for (int i = 0; i < resolvedTupleExprs.size(); ++i) { + nullabilityChangedFlags.set(i, nullabilityChangedFlags.get(i) ^ resolvedTupleExprs.get(i).isNullable()); + } + // Remap the ordering exprs to the tuple materialized by this sort node. The mapping // is a composition of the childSmap and the outputSmap_ because the child node may // have also remapped its input (e.g., as in a a series of (sort->analytic)* nodes). diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index 9f23f96cef..4458416ee0 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -499,6 +499,8 @@ struct TSortInfo { // Expressions evaluated over the input row that materialize the tuple to be sorted. // Contains one expr per slot in the materialized tuple. 4: optional list<Exprs.TExpr> sort_tuple_slot_exprs + // Indicates the nullable info of sort_tuple_slot_exprs is changed after substitute by child's smap + 5: optional list<bool> slot_exprs_nullability_changed_flags } struct TSortNode { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org