This is an automated email from the ASF dual-hosted git repository. mrhhsg pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new ec6abb6b432 [fix](set) incorrect result of set operator (#35607) ec6abb6b432 is described below commit ec6abb6b4328268cd1b167d0c43b4a1ba6cc2efa Author: Jerry Hu <mrh...@gmail.com> AuthorDate: Thu May 30 10:33:13 2024 +0800 [fix](set) incorrect result of set operator (#35607) If there are duplicated expressions in the select list, the result will be incorrect. ## Proposed changes Issue Number: close #28438 <!--Describe your changes.--> --- be/src/pipeline/dependency.h | 4 ++-- be/src/pipeline/exec/set_sink_operator.cpp | 2 +- be/src/pipeline/exec/set_source_operator.cpp | 4 ++-- be/src/vec/exec/vset_operation_node.cpp | 6 +++--- be/src/vec/exec/vset_operation_node.h | 4 ++-- regression-test/data/query_p0/operator/test_set_operator.out | 12 ++++++++++++ .../suites/query_p0/operator/test_set_operator.groovy | 8 ++++++++ 7 files changed, 30 insertions(+), 10 deletions(-) diff --git a/be/src/pipeline/dependency.h b/be/src/pipeline/dependency.h index cdc0eec3933..d7084f85d5d 100644 --- a/be/src/pipeline/dependency.h +++ b/be/src/pipeline/dependency.h @@ -609,8 +609,8 @@ public: vectorized::Block build_block; // build to source //record element size in hashtable int64_t valid_element_in_hash_tbl = 0; - //first:column_id, could point to origin column or cast column - //second:idx mapped to column types + //first: idx mapped to column types + //second: column_id, could point to origin column or cast column std::unordered_map<int, int> build_col_idx; //// shared static states (shared, decided in prepare/open...) diff --git a/be/src/pipeline/exec/set_sink_operator.cpp b/be/src/pipeline/exec/set_sink_operator.cpp index be44d4dbf7d..ff6bdfed1a3 100644 --- a/be/src/pipeline/exec/set_sink_operator.cpp +++ b/be/src/pipeline/exec/set_sink_operator.cpp @@ -137,7 +137,7 @@ Status SetSinkOperatorX<is_intersect>::_extract_build_column( raw_ptrs[i] = block.get_by_position(result_col_id).column.get(); DCHECK_GE(result_col_id, 0); - local_state._shared_state->build_col_idx.insert({result_col_id, i}); + local_state._shared_state->build_col_idx.insert({i, result_col_id}); } return Status::OK(); } diff --git a/be/src/pipeline/exec/set_source_operator.cpp b/be/src/pipeline/exec/set_source_operator.cpp index 0f118a7818e..0994350430b 100644 --- a/be/src/pipeline/exec/set_source_operator.cpp +++ b/be/src/pipeline/exec/set_source_operator.cpp @@ -151,8 +151,8 @@ void SetSourceOperatorX<is_intersect>::_add_result_columns( auto it = value.begin(); for (auto idx = build_col_idx.begin(); idx != build_col_idx.end(); ++idx) { - auto& column = *build_block.get_by_position(idx->first).column; - local_state._mutable_cols[idx->second]->insert_from(column, it->row_num); + auto& column = *build_block.get_by_position(idx->second).column; + local_state._mutable_cols[idx->first]->insert_from(column, it->row_num); } block_size++; } diff --git a/be/src/vec/exec/vset_operation_node.cpp b/be/src/vec/exec/vset_operation_node.cpp index c207fb18f05..2b2573d83bc 100644 --- a/be/src/vec/exec/vset_operation_node.cpp +++ b/be/src/vec/exec/vset_operation_node.cpp @@ -331,8 +331,8 @@ void VSetOperationNode<is_intersect>::add_result_columns(RowRefListWithFlags& va int& block_size) { auto it = value.begin(); for (auto idx = _build_col_idx.begin(); idx != _build_col_idx.end(); ++idx) { - const auto& column = *_build_block.get_by_position(idx->first).column; - _mutable_cols[idx->second]->insert_from(column, it->row_num); + const auto& column = *_build_block.get_by_position(idx->second).column; + _mutable_cols[idx->first]->insert_from(column, it->row_num); } block_size++; } @@ -434,7 +434,7 @@ Status VSetOperationNode<is_intersect>::extract_build_column(Block& block, Colum } raw_ptrs[i] = block.get_by_position(result_col_id).column.get(); DCHECK_GE(result_col_id, 0); - _build_col_idx.insert({result_col_id, i}); + _build_col_idx.insert({i, result_col_id}); } return Status::OK(); } diff --git a/be/src/vec/exec/vset_operation_node.h b/be/src/vec/exec/vset_operation_node.h index 508f8073689..9f3ba8fba36 100644 --- a/be/src/vec/exec/vset_operation_node.h +++ b/be/src/vec/exec/vset_operation_node.h @@ -112,8 +112,8 @@ private: std::vector<VExprContextSPtrs> _child_expr_lists; //record build column type DataTypes _left_table_data_types; - //first:column_id, could point to origin column or cast column - //second:idx mapped to column types + //first: idx mapped to column types + //second: column_id, could point to origin column or cast column std::unordered_map<int, int> _build_col_idx; //record insert column id during probe std::vector<uint16_t> _probe_column_inserted_id; diff --git a/regression-test/data/query_p0/operator/test_set_operator.out b/regression-test/data/query_p0/operator/test_set_operator.out index 1d8bc5ef93e..48eb4a0c9ba 100644 --- a/regression-test/data/query_p0/operator/test_set_operator.out +++ b/regression-test/data/query_p0/operator/test_set_operator.out @@ -13,3 +13,15 @@ 9 9 +-- !select_minus -- +3 3 +4 4 +5 5 +7 7 + +-- !select_except -- +3 3 +4 4 +5 5 +7 7 + diff --git a/regression-test/suites/query_p0/operator/test_set_operator.groovy b/regression-test/suites/query_p0/operator/test_set_operator.groovy index 1bc9cc29e4c..7d6219585e4 100644 --- a/regression-test/suites/query_p0/operator/test_set_operator.groovy +++ b/regression-test/suites/query_p0/operator/test_set_operator.groovy @@ -89,4 +89,12 @@ suite("test_set_operators", "query,p0,arrow_flight_sql") { t3 on t2.col1=t3.col1; """ + + order_qt_select_minus """ + select col1, col1 from t1 minus select col1, col1 from t2; + """ + + order_qt_select_except """ + select col1, col1 from t1 except select col1, col1 from t2; + """ } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org