This is an automated email from the ASF dual-hosted git repository.

mrhhsg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new ec6abb6b432 [fix](set) incorrect result of set operator (#35607)
ec6abb6b432 is described below

commit ec6abb6b4328268cd1b167d0c43b4a1ba6cc2efa
Author: Jerry Hu <mrh...@gmail.com>
AuthorDate: Thu May 30 10:33:13 2024 +0800

    [fix](set) incorrect result of set operator (#35607)
    
    If there are duplicated expressions in the select list, the result will
    be incorrect.
    
    ## Proposed changes
    
    Issue Number: close #28438
    
    <!--Describe your changes.-->
---
 be/src/pipeline/dependency.h                                 |  4 ++--
 be/src/pipeline/exec/set_sink_operator.cpp                   |  2 +-
 be/src/pipeline/exec/set_source_operator.cpp                 |  4 ++--
 be/src/vec/exec/vset_operation_node.cpp                      |  6 +++---
 be/src/vec/exec/vset_operation_node.h                        |  4 ++--
 regression-test/data/query_p0/operator/test_set_operator.out | 12 ++++++++++++
 .../suites/query_p0/operator/test_set_operator.groovy        |  8 ++++++++
 7 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/be/src/pipeline/dependency.h b/be/src/pipeline/dependency.h
index cdc0eec3933..d7084f85d5d 100644
--- a/be/src/pipeline/dependency.h
+++ b/be/src/pipeline/dependency.h
@@ -609,8 +609,8 @@ public:
     vectorized::Block build_block; // build to source
     //record element size in hashtable
     int64_t valid_element_in_hash_tbl = 0;
-    //first:column_id, could point to origin column or cast column
-    //second:idx mapped to column types
+    //first: idx mapped to column types
+    //second: column_id, could point to origin column or cast column
     std::unordered_map<int, int> build_col_idx;
 
     //// shared static states (shared, decided in prepare/open...)
diff --git a/be/src/pipeline/exec/set_sink_operator.cpp 
b/be/src/pipeline/exec/set_sink_operator.cpp
index be44d4dbf7d..ff6bdfed1a3 100644
--- a/be/src/pipeline/exec/set_sink_operator.cpp
+++ b/be/src/pipeline/exec/set_sink_operator.cpp
@@ -137,7 +137,7 @@ Status 
SetSinkOperatorX<is_intersect>::_extract_build_column(
 
         raw_ptrs[i] = block.get_by_position(result_col_id).column.get();
         DCHECK_GE(result_col_id, 0);
-        local_state._shared_state->build_col_idx.insert({result_col_id, i});
+        local_state._shared_state->build_col_idx.insert({i, result_col_id});
     }
     return Status::OK();
 }
diff --git a/be/src/pipeline/exec/set_source_operator.cpp 
b/be/src/pipeline/exec/set_source_operator.cpp
index 0f118a7818e..0994350430b 100644
--- a/be/src/pipeline/exec/set_source_operator.cpp
+++ b/be/src/pipeline/exec/set_source_operator.cpp
@@ -151,8 +151,8 @@ void SetSourceOperatorX<is_intersect>::_add_result_columns(
 
     auto it = value.begin();
     for (auto idx = build_col_idx.begin(); idx != build_col_idx.end(); ++idx) {
-        auto& column = *build_block.get_by_position(idx->first).column;
-        local_state._mutable_cols[idx->second]->insert_from(column, 
it->row_num);
+        auto& column = *build_block.get_by_position(idx->second).column;
+        local_state._mutable_cols[idx->first]->insert_from(column, 
it->row_num);
     }
     block_size++;
 }
diff --git a/be/src/vec/exec/vset_operation_node.cpp 
b/be/src/vec/exec/vset_operation_node.cpp
index c207fb18f05..2b2573d83bc 100644
--- a/be/src/vec/exec/vset_operation_node.cpp
+++ b/be/src/vec/exec/vset_operation_node.cpp
@@ -331,8 +331,8 @@ void 
VSetOperationNode<is_intersect>::add_result_columns(RowRefListWithFlags& va
                                                          int& block_size) {
     auto it = value.begin();
     for (auto idx = _build_col_idx.begin(); idx != _build_col_idx.end(); 
++idx) {
-        const auto& column = *_build_block.get_by_position(idx->first).column;
-        _mutable_cols[idx->second]->insert_from(column, it->row_num);
+        const auto& column = *_build_block.get_by_position(idx->second).column;
+        _mutable_cols[idx->first]->insert_from(column, it->row_num);
     }
     block_size++;
 }
@@ -434,7 +434,7 @@ Status 
VSetOperationNode<is_intersect>::extract_build_column(Block& block, Colum
         }
         raw_ptrs[i] = block.get_by_position(result_col_id).column.get();
         DCHECK_GE(result_col_id, 0);
-        _build_col_idx.insert({result_col_id, i});
+        _build_col_idx.insert({i, result_col_id});
     }
     return Status::OK();
 }
diff --git a/be/src/vec/exec/vset_operation_node.h 
b/be/src/vec/exec/vset_operation_node.h
index 508f8073689..9f3ba8fba36 100644
--- a/be/src/vec/exec/vset_operation_node.h
+++ b/be/src/vec/exec/vset_operation_node.h
@@ -112,8 +112,8 @@ private:
     std::vector<VExprContextSPtrs> _child_expr_lists;
     //record build column type
     DataTypes _left_table_data_types;
-    //first:column_id, could point to origin column or cast column
-    //second:idx mapped to column types
+    //first: idx mapped to column types
+    //second: column_id, could point to origin column or cast column
     std::unordered_map<int, int> _build_col_idx;
     //record insert column id during probe
     std::vector<uint16_t> _probe_column_inserted_id;
diff --git a/regression-test/data/query_p0/operator/test_set_operator.out 
b/regression-test/data/query_p0/operator/test_set_operator.out
index 1d8bc5ef93e..48eb4a0c9ba 100644
--- a/regression-test/data/query_p0/operator/test_set_operator.out
+++ b/regression-test/data/query_p0/operator/test_set_operator.out
@@ -13,3 +13,15 @@
 9
 9
 
+-- !select_minus --
+3      3
+4      4
+5      5
+7      7
+
+-- !select_except --
+3      3
+4      4
+5      5
+7      7
+
diff --git a/regression-test/suites/query_p0/operator/test_set_operator.groovy 
b/regression-test/suites/query_p0/operator/test_set_operator.groovy
index 1bc9cc29e4c..7d6219585e4 100644
--- a/regression-test/suites/query_p0/operator/test_set_operator.groovy
+++ b/regression-test/suites/query_p0/operator/test_set_operator.groovy
@@ -89,4 +89,12 @@ suite("test_set_operators", "query,p0,arrow_flight_sql") {
             t3 
             on t2.col1=t3.col1;
     """
+
+    order_qt_select_minus """
+        select col1, col1 from t1 minus select col1, col1 from t2;
+    """
+
+    order_qt_select_except """
+        select col1, col1 from t1 except select col1, col1 from t2;
+    """
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to