This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit c61be0cb8a85cda1b61b1bc0535020488207b46c
Author: zhangstar333 <87313068+zhangstar...@users.noreply.github.com>
AuthorDate: Mon Jan 23 16:09:46 2023 +0800

    [vectorized](analytic) fix analytic node of window function get wrong… 
(#16074)
    
    [Bug] 基础函数rank()开窗排序结果错误 #15951
---
 be/src/vec/exec/vanalytic_eval_node.cpp | 28 ++++++++++++++++++++++------
 be/src/vec/exec/vanalytic_eval_node.h   | 14 +++++++++++++-
 2 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/be/src/vec/exec/vanalytic_eval_node.cpp 
b/be/src/vec/exec/vanalytic_eval_node.cpp
index 74625c14a6..55d98d47ab 100644
--- a/be/src/vec/exec/vanalytic_eval_node.cpp
+++ b/be/src/vec/exec/vanalytic_eval_node.cpp
@@ -395,8 +395,8 @@ BlockRowPos VAnalyticEvalNode::_get_partition_by_end() {
 }
 
 //_partition_by_columns,_order_by_columns save in blocks, so if need to 
calculate the boundary, may find in which blocks firstly
-BlockRowPos VAnalyticEvalNode::_compare_row_to_find_end(int idx, BlockRowPos 
start,
-                                                        BlockRowPos end) {
+BlockRowPos VAnalyticEvalNode::_compare_row_to_find_end(int idx, BlockRowPos 
start, BlockRowPos end,
+                                                        bool need_check_first) 
{
     int64_t start_init_row_num = start.row_num;
     ColumnPtr start_column = 
_input_blocks[start.block_num].get_by_position(idx).column;
     ColumnPtr start_next_block_column = start_column;
@@ -406,10 +406,20 @@ BlockRowPos 
VAnalyticEvalNode::_compare_row_to_find_end(int idx, BlockRowPos sta
     int64_t start_block_num = start.block_num;
     int64_t end_block_num = end.block_num;
     int64_t mid_blcok_num = end.block_num;
+    // To fix this problem: https://github.com/apache/doris/issues/15951
+    // in this case, the partition by column is last row of block, so it's 
pointed to a new block at row = 0, range is: [left, right)
+    // From the perspective of order by column, the two values are exactly 
equal.
+    // so the range will be get wrong because it's compare_at == 0 with next 
block at row = 0
+    if (need_check_first && end.block_num > 0 && end.row_num == 0) {
+        end.block_num--;
+        end_block_num--;
+        end.row_num = _input_blocks[end_block_num].rows();
+    }
     //binary search find in which block
     while (start_block_num < end_block_num) {
         mid_blcok_num = (start_block_num + end_block_num + 1) >> 1;
         start_next_block_column = 
_input_blocks[mid_blcok_num].get_by_position(idx).column;
+        //Compares (*this)[n] and rhs[m], this: start[init_row]  rhs: mid[0]
         if (start_column->compare_at(start_init_row_num, 0, 
*start_next_block_column, 1) == 0) {
             start_block_num = mid_blcok_num;
         } else {
@@ -417,6 +427,8 @@ BlockRowPos VAnalyticEvalNode::_compare_row_to_find_end(int 
idx, BlockRowPos sta
         }
     }
 
+    // have check the start.block_num:  start_column[start_init_row_num] with 
mid_blcok_num start_next_block_column[0]
+    // now next block must not be result, so need check with end_block_num: 
start_next_block_column[last_row]
     if (end_block_num == mid_blcok_num - 1) {
         start_next_block_column = 
_input_blocks[end_block_num].get_by_position(idx).column;
         int64_t block_size = _input_blocks[end_block_num].rows();
@@ -429,6 +441,7 @@ BlockRowPos VAnalyticEvalNode::_compare_row_to_find_end(int 
idx, BlockRowPos sta
     }
 
     //check whether need get column again, maybe same as first init
+    // if the start_block_num have move to forword, so need update start block 
num and compare it from row_num=0
     if (start_column.get() != start_next_block_column.get()) {
         start_init_row_num = 0;
         start.block_num = start_block_num;
@@ -437,15 +450,18 @@ BlockRowPos 
VAnalyticEvalNode::_compare_row_to_find_end(int idx, BlockRowPos sta
     //binary search, set start and end pos
     int64_t start_pos = start_init_row_num;
     int64_t end_pos = _input_blocks[start.block_num].rows() - 1;
+    //if end_block_num haven't moved, only start_block_num go to the end block
+    //so could used the end.row_num for binary search
     if (start.block_num == end.block_num) {
         end_pos = end.row_num;
     }
     while (start_pos < end_pos) {
         int64_t mid_pos = (start_pos + end_pos) >> 1;
-        if (start_column->compare_at(start_init_row_num, mid_pos, 
*start_column, 1))
+        if (start_column->compare_at(start_init_row_num, mid_pos, 
*start_column, 1)) {
             end_pos = mid_pos;
-        else
+        } else {
             start_pos = mid_pos + 1;
+        }
     }
     start.row_num = start_pos; //upadte row num, return the find end
     return start;
@@ -615,8 +631,8 @@ void VAnalyticEvalNode::_update_order_by_range() {
     _order_by_start = _order_by_end;
     _order_by_end = _partition_by_end;
     for (size_t i = 0; i < _order_by_eq_expr_ctxs.size(); ++i) {
-        _order_by_end =
-                _compare_row_to_find_end(_ordey_by_column_idxs[i], 
_order_by_start, _order_by_end);
+        _order_by_end = _compare_row_to_find_end(_ordey_by_column_idxs[i], 
_order_by_start,
+                                                 _order_by_end, true);
     }
     _order_by_start.pos =
             input_block_first_row_positions[_order_by_start.block_num] + 
_order_by_start.row_num;
diff --git a/be/src/vec/exec/vanalytic_eval_node.h 
b/be/src/vec/exec/vanalytic_eval_node.h
index 9597606f6c..e4eff3df99 100644
--- a/be/src/vec/exec/vanalytic_eval_node.h
+++ b/be/src/vec/exec/vanalytic_eval_node.h
@@ -19,6 +19,8 @@
 
 #include <thrift/protocol/TDebugProtocol.h>
 
+#include <string>
+
 #include "exec/exec_node.h"
 #include "exprs/expr.h"
 #include "runtime/tuple.h"
@@ -34,6 +36,15 @@ struct BlockRowPos {
     int64_t block_num; //the pos at which block
     int64_t row_num;   //the pos at which row
     int64_t pos;       //pos = all blocks size + row_num
+    std::string debug_string() {
+        std::string res = "\t block_num: ";
+        res += std::to_string(block_num);
+        res += "\t row_num: ";
+        res += std::to_string(row_num);
+        res += "\t pos: ";
+        res += std::to_string(pos);
+        return res;
+    }
 };
 
 class AggFnEvaluator;
@@ -73,7 +84,8 @@ private:
     void _insert_result_info(int64_t current_block_rows);
     Status _output_current_block(Block* block);
     BlockRowPos _get_partition_by_end();
-    BlockRowPos _compare_row_to_find_end(int idx, BlockRowPos start, 
BlockRowPos end);
+    BlockRowPos _compare_row_to_find_end(int idx, BlockRowPos start, 
BlockRowPos end,
+                                         bool need_check_first = false);
 
     Status _fetch_next_block_data(RuntimeState* state);
     Status _consumed_block_and_init_partition(RuntimeState* state, bool* 
next_partition, bool* eos);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to