This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit c61be0cb8a85cda1b61b1bc0535020488207b46c Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com> AuthorDate: Mon Jan 23 16:09:46 2023 +0800 [vectorized](analytic) fix analytic node of window function get wrong… (#16074) [Bug] 基础函数rank()开窗排序结果错误 #15951 --- be/src/vec/exec/vanalytic_eval_node.cpp | 28 ++++++++++++++++++++++------ be/src/vec/exec/vanalytic_eval_node.h | 14 +++++++++++++- 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/be/src/vec/exec/vanalytic_eval_node.cpp b/be/src/vec/exec/vanalytic_eval_node.cpp index 74625c14a6..55d98d47ab 100644 --- a/be/src/vec/exec/vanalytic_eval_node.cpp +++ b/be/src/vec/exec/vanalytic_eval_node.cpp @@ -395,8 +395,8 @@ BlockRowPos VAnalyticEvalNode::_get_partition_by_end() { } //_partition_by_columns,_order_by_columns save in blocks, so if need to calculate the boundary, may find in which blocks firstly -BlockRowPos VAnalyticEvalNode::_compare_row_to_find_end(int idx, BlockRowPos start, - BlockRowPos end) { +BlockRowPos VAnalyticEvalNode::_compare_row_to_find_end(int idx, BlockRowPos start, BlockRowPos end, + bool need_check_first) { int64_t start_init_row_num = start.row_num; ColumnPtr start_column = _input_blocks[start.block_num].get_by_position(idx).column; ColumnPtr start_next_block_column = start_column; @@ -406,10 +406,20 @@ BlockRowPos VAnalyticEvalNode::_compare_row_to_find_end(int idx, BlockRowPos sta int64_t start_block_num = start.block_num; int64_t end_block_num = end.block_num; int64_t mid_blcok_num = end.block_num; + // To fix this problem: https://github.com/apache/doris/issues/15951 + // in this case, the partition by column is last row of block, so it's pointed to a new block at row = 0, range is: [left, right) + // From the perspective of order by column, the two values are exactly equal. + // so the range will be get wrong because it's compare_at == 0 with next block at row = 0 + if (need_check_first && end.block_num > 0 && end.row_num == 0) { + end.block_num--; + end_block_num--; + end.row_num = _input_blocks[end_block_num].rows(); + } //binary search find in which block while (start_block_num < end_block_num) { mid_blcok_num = (start_block_num + end_block_num + 1) >> 1; start_next_block_column = _input_blocks[mid_blcok_num].get_by_position(idx).column; + //Compares (*this)[n] and rhs[m], this: start[init_row] rhs: mid[0] if (start_column->compare_at(start_init_row_num, 0, *start_next_block_column, 1) == 0) { start_block_num = mid_blcok_num; } else { @@ -417,6 +427,8 @@ BlockRowPos VAnalyticEvalNode::_compare_row_to_find_end(int idx, BlockRowPos sta } } + // have check the start.block_num: start_column[start_init_row_num] with mid_blcok_num start_next_block_column[0] + // now next block must not be result, so need check with end_block_num: start_next_block_column[last_row] if (end_block_num == mid_blcok_num - 1) { start_next_block_column = _input_blocks[end_block_num].get_by_position(idx).column; int64_t block_size = _input_blocks[end_block_num].rows(); @@ -429,6 +441,7 @@ BlockRowPos VAnalyticEvalNode::_compare_row_to_find_end(int idx, BlockRowPos sta } //check whether need get column again, maybe same as first init + // if the start_block_num have move to forword, so need update start block num and compare it from row_num=0 if (start_column.get() != start_next_block_column.get()) { start_init_row_num = 0; start.block_num = start_block_num; @@ -437,15 +450,18 @@ BlockRowPos VAnalyticEvalNode::_compare_row_to_find_end(int idx, BlockRowPos sta //binary search, set start and end pos int64_t start_pos = start_init_row_num; int64_t end_pos = _input_blocks[start.block_num].rows() - 1; + //if end_block_num haven't moved, only start_block_num go to the end block + //so could used the end.row_num for binary search if (start.block_num == end.block_num) { end_pos = end.row_num; } while (start_pos < end_pos) { int64_t mid_pos = (start_pos + end_pos) >> 1; - if (start_column->compare_at(start_init_row_num, mid_pos, *start_column, 1)) + if (start_column->compare_at(start_init_row_num, mid_pos, *start_column, 1)) { end_pos = mid_pos; - else + } else { start_pos = mid_pos + 1; + } } start.row_num = start_pos; //upadte row num, return the find end return start; @@ -615,8 +631,8 @@ void VAnalyticEvalNode::_update_order_by_range() { _order_by_start = _order_by_end; _order_by_end = _partition_by_end; for (size_t i = 0; i < _order_by_eq_expr_ctxs.size(); ++i) { - _order_by_end = - _compare_row_to_find_end(_ordey_by_column_idxs[i], _order_by_start, _order_by_end); + _order_by_end = _compare_row_to_find_end(_ordey_by_column_idxs[i], _order_by_start, + _order_by_end, true); } _order_by_start.pos = input_block_first_row_positions[_order_by_start.block_num] + _order_by_start.row_num; diff --git a/be/src/vec/exec/vanalytic_eval_node.h b/be/src/vec/exec/vanalytic_eval_node.h index 9597606f6c..e4eff3df99 100644 --- a/be/src/vec/exec/vanalytic_eval_node.h +++ b/be/src/vec/exec/vanalytic_eval_node.h @@ -19,6 +19,8 @@ #include <thrift/protocol/TDebugProtocol.h> +#include <string> + #include "exec/exec_node.h" #include "exprs/expr.h" #include "runtime/tuple.h" @@ -34,6 +36,15 @@ struct BlockRowPos { int64_t block_num; //the pos at which block int64_t row_num; //the pos at which row int64_t pos; //pos = all blocks size + row_num + std::string debug_string() { + std::string res = "\t block_num: "; + res += std::to_string(block_num); + res += "\t row_num: "; + res += std::to_string(row_num); + res += "\t pos: "; + res += std::to_string(pos); + return res; + } }; class AggFnEvaluator; @@ -73,7 +84,8 @@ private: void _insert_result_info(int64_t current_block_rows); Status _output_current_block(Block* block); BlockRowPos _get_partition_by_end(); - BlockRowPos _compare_row_to_find_end(int idx, BlockRowPos start, BlockRowPos end); + BlockRowPos _compare_row_to_find_end(int idx, BlockRowPos start, BlockRowPos end, + bool need_check_first = false); Status _fetch_next_block_data(RuntimeState* state); Status _consumed_block_and_init_partition(RuntimeState* state, bool* next_partition, bool* eos); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org