This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ccb122ca257 [fix](grouping sets) fix grouping sets have multiple empty 
sets (#32317)
ccb122ca257 is described below

commit ccb122ca25743ae4e1848d9bd91f2c06c7e5d84b
Author: Mryange <59914473+mrya...@users.noreply.github.com>
AuthorDate: Mon Mar 18 11:49:05 2024 +0800

    [fix](grouping sets) fix grouping sets have multiple empty sets (#32317)
    
    in this #32112, handling empty sets (empty expression cases) has been 
addressed. However, multiple empty sets in grouping sets have different 
grouping IDs
---
 be/src/pipeline/exec/repeat_operator.cpp           | 27 ++++++++++++++----
 be/src/pipeline/exec/repeat_operator.h             |  5 +++-
 be/src/vec/exec/vrepeat_node.cpp                   | 23 ++++++++++++----
 be/src/vec/exec/vrepeat_node.h                     |  3 ++
 .../correctness_p0/test_grouping_sets_empty.out    | 32 ++++++++++++++++++++--
 .../correctness_p0/test_grouping_sets_empty.groovy | 26 ++++++++++++++++--
 6 files changed, 98 insertions(+), 18 deletions(-)

diff --git a/be/src/pipeline/exec/repeat_operator.cpp 
b/be/src/pipeline/exec/repeat_operator.cpp
index 5554599af00..0f9cf93b3f5 100644
--- a/be/src/pipeline/exec/repeat_operator.cpp
+++ b/be/src/pipeline/exec/repeat_operator.cpp
@@ -158,6 +158,17 @@ Status 
RepeatLocalState::get_repeated_block(vectorized::Block* child_block, int
 
     const auto rows = child_block->rows();
     // Fill grouping ID to block
+    RETURN_IF_ERROR(add_grouping_id_column(rows, cur_col, columns, 
repeat_id_idx));
+
+    DCHECK_EQ(cur_col, column_size);
+
+    return Status::OK();
+}
+
+Status RepeatLocalState::add_grouping_id_column(std::size_t rows, std::size_t& 
cur_col,
+                                                vectorized::MutableColumns& 
columns,
+                                                int repeat_id_idx) {
+    auto& p = _parent->cast<RepeatOperatorX>();
     for (auto slot_idx = 0; slot_idx < p._grouping_list.size(); slot_idx++) {
         DCHECK_LT(slot_idx, p._output_tuple_desc->slots().size());
         const SlotDescriptor* _virtual_slot_desc = 
p._output_tuple_desc->slots()[cur_col];
@@ -166,14 +177,10 @@ Status 
RepeatLocalState::get_repeated_block(vectorized::Block* child_block, int
         int64_t val = p._grouping_list[slot_idx][repeat_id_idx];
         auto* column_ptr = columns[cur_col].get();
         DCHECK(!p._output_slots[cur_col]->is_nullable());
-
         auto* col = 
assert_cast<vectorized::ColumnVector<vectorized::Int64>*>(column_ptr);
         col->insert_raw_integers(val, rows);
         cur_col++;
     }
-
-    DCHECK_EQ(cur_col, column_size);
-
     return Status::OK();
 }
 
@@ -228,8 +235,16 @@ Status RepeatOperatorX::pull(doris::RuntimeState* state, 
vectorized::Block* outp
             _repeat_id_idx = 0;
         }
     } else if (local_state._expr_ctxs.empty()) {
-        DCHECK(!_intermediate_block || (_intermediate_block && 
_intermediate_block->rows() == 0));
-        output_block->swap(_child_block);
+        auto m_block = 
vectorized::VectorizedUtils::build_mutable_mem_reuse_block(output_block,
+                                                                               
   _output_slots);
+        auto rows = _child_block.rows();
+        auto& columns = m_block.mutable_columns();
+
+        for (int repeat_id_idx = 0; repeat_id_idx < _repeat_id_list.size(); 
repeat_id_idx++) {
+            std::size_t cur_col = 0;
+            RETURN_IF_ERROR(
+                    local_state.add_grouping_id_column(rows, cur_col, columns, 
repeat_id_idx));
+        }
         
_child_block.clear_column_data(_child_x->row_desc().num_materialized_slots());
     }
     RETURN_IF_ERROR(vectorized::VExprContext::filter_block(_conjuncts, 
output_block,
diff --git a/be/src/pipeline/exec/repeat_operator.h 
b/be/src/pipeline/exec/repeat_operator.h
index 28229210299..9cb671fccb0 100644
--- a/be/src/pipeline/exec/repeat_operator.h
+++ b/be/src/pipeline/exec/repeat_operator.h
@@ -59,6 +59,9 @@ public:
     Status get_repeated_block(vectorized::Block* child_block, int 
repeat_id_idx,
                               vectorized::Block* output_block);
 
+    Status add_grouping_id_column(std::size_t rows, std::size_t& cur_col,
+                                  vectorized::MutableColumns& columns, int 
repeat_id_idx);
+
 private:
     friend class RepeatOperatorX;
     template <typename LocalStateType>
@@ -97,7 +100,7 @@ private:
     TupleId _output_tuple_id;
     const TupleDescriptor* _output_tuple_desc = nullptr;
 
-    std::vector<SlotDescriptor*> _output_slots;
+    mutable std::vector<SlotDescriptor*> _output_slots;
 
     vectorized::VExprContextSPtrs _expr_ctxs;
 };
diff --git a/be/src/vec/exec/vrepeat_node.cpp b/be/src/vec/exec/vrepeat_node.cpp
index d494ff514e9..1a919d2aa9e 100644
--- a/be/src/vec/exec/vrepeat_node.cpp
+++ b/be/src/vec/exec/vrepeat_node.cpp
@@ -153,6 +153,15 @@ Status VRepeatNode::get_repeated_block(Block* child_block, 
int repeat_id_idx, Bl
 
     const auto rows = child_block->rows();
     // Fill grouping ID to block
+    RETURN_IF_ERROR(add_grouping_id_column(rows, cur_col, columns, 
repeat_id_idx));
+    output_block->set_columns(std::move(columns));
+    DCHECK_EQ(cur_col, column_size);
+
+    return Status::OK();
+}
+
+Status VRepeatNode::add_grouping_id_column(std::size_t rows, std::size_t& 
cur_col,
+                                           vectorized::MutableColumns& 
columns, int repeat_id_idx) {
     for (auto slot_idx = 0; slot_idx < _grouping_list.size(); slot_idx++) {
         DCHECK_LT(slot_idx, _output_tuple_desc->slots().size());
         const SlotDescriptor* _virtual_slot_desc = 
_output_tuple_desc->slots()[cur_col];
@@ -166,9 +175,6 @@ Status VRepeatNode::get_repeated_block(Block* child_block, 
int repeat_id_idx, Bl
         col->insert_raw_integers(val, rows);
         cur_col++;
     }
-    output_block->set_columns(std::move(columns));
-    DCHECK_EQ(cur_col, column_size);
-
     return Status::OK();
 }
 
@@ -194,8 +200,15 @@ Status VRepeatNode::pull(doris::RuntimeState* state, 
vectorized::Block* output_b
             _repeat_id_idx = 0;
         }
     } else if (_expr_ctxs.empty()) {
-        DCHECK(!_intermediate_block || (_intermediate_block && 
_intermediate_block->rows() == 0));
-        output_block->swap(*_child_block);
+        auto m_block = 
vectorized::VectorizedUtils::build_mutable_mem_reuse_block(output_block,
+                                                                               
   _output_slots);
+        auto rows = _child_block->rows();
+        auto& columns = m_block.mutable_columns();
+
+        for (int repeat_id_idx = 0; repeat_id_idx < _repeat_id_list.size(); 
repeat_id_idx++) {
+            std::size_t cur_col = 0;
+            RETURN_IF_ERROR(add_grouping_id_column(rows, cur_col, columns, 
repeat_id_idx));
+        }
         release_block_memory(*_child_block);
     }
     RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, output_block, 
output_block->columns()));
diff --git a/be/src/vec/exec/vrepeat_node.h b/be/src/vec/exec/vrepeat_node.h
index 94737580031..2372cf43a01 100644
--- a/be/src/vec/exec/vrepeat_node.h
+++ b/be/src/vec/exec/vrepeat_node.h
@@ -64,6 +64,9 @@ public:
 private:
     Status get_repeated_block(Block* child_block, int repeat_id_idx, Block* 
output_block);
 
+    Status add_grouping_id_column(std::size_t rows, std::size_t& cur_col,
+                                  vectorized::MutableColumns& columns, int 
repeat_id_idx);
+
     // Slot id set used to indicate those slots need to set to null.
     std::vector<std::set<SlotId>> _slot_id_set_list;
     // all slot id
diff --git a/regression-test/data/correctness_p0/test_grouping_sets_empty.out 
b/regression-test/data/correctness_p0/test_grouping_sets_empty.out
index 4d3b2c82c87..fde7e6420da 100644
--- a/regression-test/data/correctness_p0/test_grouping_sets_empty.out
+++ b/regression-test/data/correctness_p0/test_grouping_sets_empty.out
@@ -1,13 +1,39 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !select1 --
-1
+3
 
 -- !select2 --
-1
+3
 
 -- !select3 --
-1
+3
+3
+3
 
 -- !select4 --
+\N
+\N
+\N
+1
+2
+3
+
+-- !select5 --
+3
+
+-- !select6 --
+3
+
+-- !select7 --
+3
+3
+3
+
+-- !select8 --
+\N
+\N
+\N
 1
+2
+3
 
diff --git 
a/regression-test/suites/correctness_p0/test_grouping_sets_empty.groovy 
b/regression-test/suites/correctness_p0/test_grouping_sets_empty.groovy
index 23f35e3b80f..7265f752b44 100644
--- a/regression-test/suites/correctness_p0/test_grouping_sets_empty.groovy
+++ b/regression-test/suites/correctness_p0/test_grouping_sets_empty.groovy
@@ -17,12 +17,16 @@
 
 suite("test_grouping_sets_empty") {
 
+    sql"""
+        drop table if exists test_grouping_sets_empty;
+    """
+
     sql"""
         create table test_grouping_sets_empty (a int) distributed by hash(a) 
buckets 1 properties ( 'replication_num' = '1');
     """
 
     sql """
-        insert into test_grouping_sets_empty values (1);
+        insert into test_grouping_sets_empty values (1),(2),(3);
     """
 
 
@@ -39,21 +43,37 @@ suite("test_grouping_sets_empty") {
         select count(*) from test_grouping_sets_empty group by grouping sets 
(());
     """
 
+    qt_select3 """
+        select count(*) from test_grouping_sets_empty group by grouping sets 
((),(),());
+    """
+
+    qt_select4 """
+        select a from test_grouping_sets_empty group by grouping sets 
((),(),(),(a)) order by a;
+    """
+
 
     sql """ 
         set experimental_enable_pipeline_x_engine=false;
     """
 
 
-    qt_select3 """
+    qt_select5 """
         select count(a) from test_grouping_sets_empty group by grouping sets 
(());
     """
 
 
-    qt_select4 """
+    qt_select6 """
         select count(*) from test_grouping_sets_empty group by grouping sets 
(());
     """
 
+    qt_select7 """
+        select count(*) from test_grouping_sets_empty group by grouping sets 
((),(),());
+    """
+
+    qt_select8 """
+        select a from test_grouping_sets_empty group by grouping sets 
((),(),(),(a)) order by a;
+    """
+
 
 
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to