This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch new_join
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/new_join by this push:
new 986f6a74299 support tpch q21 (#26248)
986f6a74299 is described below
commit 986f6a742998193f172f65e00c49a7a5a3cb79fc
Author: Pxl <[email protected]>
AuthorDate: Thu Nov 2 10:30:37 2023 +0800
support tpch q21 (#26248)
---
be/src/vec/common/hash_table/hash_map.h | 33 +-
be/src/vec/exec/join/process_hash_table_probe.h | 17 +-
.../vec/exec/join/process_hash_table_probe_impl.h | 332 +++------------------
3 files changed, 61 insertions(+), 321 deletions(-)
diff --git a/be/src/vec/common/hash_table/hash_map.h
b/be/src/vec/common/hash_table/hash_map.h
index cafe01e8231..9e368bb3ff6 100644
--- a/be/src/vec/common/hash_table/hash_map.h
+++ b/be/src/vec/common/hash_table/hash_map.h
@@ -236,6 +236,8 @@ public:
size_t size() const { return next.size(); }
+ std::vector<uint8_t>& get_visited() { return visited; }
+
void build(const Key* __restrict keys, const uint32_t* __restrict
bucket_nums,
size_t num_elem) {
build_keys = keys;
@@ -246,7 +248,7 @@ public:
}
}
- template <int JoinOpType>
+ template <int JoinOpType, bool with_other_conjuncts>
auto find_batch(const Key* __restrict keys, const uint32_t* __restrict
bucket_nums,
int probe_idx, uint32_t build_idx, int probe_rows,
uint32_t* __restrict probe_idxs, uint32_t* __restrict
build_idxs) {
@@ -254,8 +256,8 @@ public:
JoinOpType == doris::TJoinOp::FULL_OUTER_JOIN ||
JoinOpType == doris::TJoinOp::LEFT_OUTER_JOIN ||
JoinOpType == doris::TJoinOp::RIGHT_OUTER_JOIN) {
- return _find_batch_inner_outer_join<JoinOpType>(keys, bucket_nums,
probe_idx, build_idx,
- probe_rows,
probe_idxs, build_idxs);
+ return _find_batch_inner_outer_join<JoinOpType,
with_other_conjuncts>(
+ keys, bucket_nums, probe_idx, build_idx, probe_rows,
probe_idxs, build_idxs);
}
if constexpr (JoinOpType == doris::TJoinOp::LEFT_ANTI_JOIN ||
JoinOpType == doris::TJoinOp::LEFT_SEMI_JOIN) {
@@ -264,7 +266,8 @@ public:
}
if constexpr (JoinOpType == doris::TJoinOp::RIGHT_ANTI_JOIN ||
JoinOpType == doris::TJoinOp::RIGHT_SEMI_JOIN) {
- return _find_batch_right_semi_anti(keys, bucket_nums, probe_idx,
probe_rows);
+ return _find_batch_right_semi_anti<with_other_conjuncts>(
+ keys, bucket_nums, probe_idx, probe_rows, probe_idxs,
build_idxs);
}
return std::tuple {0, 0u, 0};
}
@@ -292,21 +295,30 @@ public:
}
private:
+ template <bool with_other_conjuncts>
auto _find_batch_right_semi_anti(const Key* __restrict keys,
const uint32_t* __restrict bucket_nums,
int probe_idx,
- int probe_rows) {
+ int probe_rows, uint32_t* __restrict
probe_idxs,
+ uint32_t* __restrict build_idxs) {
+ auto matched_cnt = 0;
while (probe_idx < probe_rows) {
auto build_idx = first[bucket_nums[probe_idx]];
while (build_idx) {
if (keys[probe_idx] == build_keys[build_idx]) {
- visited[build_idx] = 1;
+ if constexpr (with_other_conjuncts) {
+ build_idxs[matched_cnt] = build_idx;
+ probe_idxs[matched_cnt] = probe_idx;
+ matched_cnt++;
+ } else {
+ visited[build_idx] = 1;
+ }
}
build_idx = next[build_idx];
}
probe_idx++;
}
- return std::tuple {probe_idx, 0u, 0};
+ return std::tuple {probe_idx, 0u, matched_cnt};
}
template <int JoinOpType>
@@ -334,7 +346,7 @@ private:
return std::tuple {probe_idx, 0u, matched_cnt};
}
- template <int JoinOpType>
+ template <int JoinOpType, bool with_other_conjuncts>
auto _find_batch_inner_outer_join(const Key* __restrict keys,
const uint32_t* __restrict bucket_nums,
int probe_idx,
uint32_t build_idx, int probe_rows,
@@ -348,8 +360,9 @@ private:
if (keys[probe_idx] == build_keys[build_idx]) {
probe_idxs[matched_cnt] = probe_idx;
build_idxs[matched_cnt] = build_idx;
- if constexpr (JoinOpType ==
doris::TJoinOp::RIGHT_OUTER_JOIN ||
- JoinOpType ==
doris::TJoinOp::FULL_OUTER_JOIN) {
+ if constexpr (!with_other_conjuncts &&
+ (JoinOpType ==
doris::TJoinOp::RIGHT_OUTER_JOIN ||
+ JoinOpType ==
doris::TJoinOp::FULL_OUTER_JOIN)) {
visited[build_idx] = 1;
}
matched_cnt++;
diff --git a/be/src/vec/exec/join/process_hash_table_probe.h
b/be/src/vec/exec/join/process_hash_table_probe.h
index 9c2fd6094b5..34b5dc3ee8d 100644
--- a/be/src/vec/exec/join/process_hash_table_probe.h
+++ b/be/src/vec/exec/join/process_hash_table_probe.h
@@ -68,24 +68,13 @@ struct ProcessHashTableProbe {
// and output block may be different
// The output result is determined by the other join conjunct result and
same_to_prev struct
Status do_other_join_conjuncts(Block* output_block, bool is_mark_join,
- int multi_matched_output_row_count, bool
is_the_last_sub_block);
-
- void _process_splited_equal_matched_tuples(int start_row_idx, int
row_count,
- const UInt8* __restrict
other_hit_column,
- UInt8* __restrict null_map_data,
- UInt8* __restrict filter_map,
Block* output_block);
-
- void _emplace_element(int32_t block_row, int& current_offset);
+ bool is_the_last_sub_block,
std::vector<uint8_t>& visited);
template <typename HashTableType>
typename HashTableType::State _init_probe_side(HashTableType&
hash_table_ctx, size_t probe_rows,
bool
with_other_join_conjuncts,
const uint8_t* null_map);
- template <typename Mapped, bool with_other_join_conjuncts>
- ForwardIterator<Mapped>& _probe_row_match(int& current_offset, int&
probe_index,
- size_t& probe_size, bool&
all_match_one);
-
// Process full outer join/ right join / right semi/anti join to output
the join result
// in hash table
template <typename HashTableType>
@@ -111,12 +100,8 @@ struct ProcessHashTableProbe {
std::unique_ptr<Arena> _serialize_key_arena;
std::vector<char> _probe_side_find_result;
- std::vector<bool*> _visited_map;
- std::vector<bool> _same_to_prev;
-
int _right_col_idx;
int _right_col_len;
- int _row_count_from_last_probe;
bool _have_other_join_conjunct;
bool _is_right_semi_anti;
diff --git a/be/src/vec/exec/join/process_hash_table_probe_impl.h
b/be/src/vec/exec/join/process_hash_table_probe_impl.h
index 6a21086f50e..e3fadf2056f 100644
--- a/be/src/vec/exec/join/process_hash_table_probe_impl.h
+++ b/be/src/vec/exec/join/process_hash_table_probe_impl.h
@@ -68,9 +68,9 @@ void ProcessHashTableProbe<JoinOpType,
Parent>::build_side_output_column(
constexpr auto probe_all =
JoinOpType == TJoinOp::LEFT_OUTER_JOIN || JoinOpType ==
TJoinOp::FULL_OUTER_JOIN;
- if (!is_semi_anti_join || have_other_join_conjunct) {
+ if ((!is_semi_anti_join || have_other_join_conjunct) && size) {
for (int i = 0; i < _right_col_len; i++) {
- const auto& column = *_build_block->get_by_position(i).column;
+ const auto& column = *_build_block->safe_get_by_position(i).column;
if (output_slot_flags[i]) {
mcol[i + _right_col_idx]->insert_indices_from_join(column,
_build_indexs.data(),
_build_indexs.data() + size);
@@ -126,17 +126,7 @@ typename HashTableType::State
ProcessHashTableProbe<JoinOpType, Parent>::_init_p
? 0
: _parent->left_table_data_types().size();
_right_col_len = _parent->right_table_data_types().size();
- _row_count_from_last_probe = 0;
-
- _build_indexs.clear();
- _probe_indexs.clear();
- if (with_other_join_conjuncts) {
- // use in right join to change visited state after exec the vother
join conjunct
- _visited_map.clear();
- _same_to_prev.clear();
- _visited_map.reserve(_batch_size * PROBE_SIDE_EXPLODE_RATE);
- _same_to_prev.reserve(_batch_size * PROBE_SIDE_EXPLODE_RATE);
- }
+
_probe_indexs.resize(_batch_size);
_build_indexs.resize(_batch_size);
@@ -149,41 +139,6 @@ typename HashTableType::State
ProcessHashTableProbe<JoinOpType, Parent>::_init_p
return typename HashTableType::State(_parent->_probe_columns);
}
-template <int JoinOpType, typename Parent>
-template <typename Mapped, bool with_other_join_conjuncts>
-ForwardIterator<Mapped>& ProcessHashTableProbe<JoinOpType,
Parent>::_probe_row_match(
- int& current_offset, int& probe_index, size_t& probe_size, bool&
all_match_one) {
- auto& probe_row_match_iter =
std::get<ForwardIterator<Mapped>>(_parent->_probe_row_match_iter);
- if (!probe_row_match_iter.ok()) {
- return probe_row_match_iter;
- }
-
- SCOPED_TIMER(_search_hashtable_timer);
- for (; probe_row_match_iter.ok() && current_offset < _batch_size;
++probe_row_match_iter) {
- _emplace_element(probe_row_match_iter->row_num, current_offset);
- _probe_indexs.emplace_back(probe_index);
- if constexpr (with_other_join_conjuncts) {
- _visited_map.emplace_back(&probe_row_match_iter->visited);
- }
- }
-
- _row_count_from_last_probe = current_offset;
- all_match_one &= (current_offset == 1);
- if (!probe_row_match_iter.ok()) {
- ++probe_index;
- }
- probe_size = 1;
-
- return probe_row_match_iter;
-}
-
-template <int JoinOpType, typename Parent>
-void ProcessHashTableProbe<JoinOpType, Parent>::_emplace_element(int32_t
block_row,
- int&
current_offset) {
- _build_indexs.emplace_back(block_row);
- current_offset++;
-}
-
template <int JoinOpType, typename Parent>
template <bool need_null_map_for_probe, bool ignore_null, typename
HashTableType,
bool with_other_conjuncts, bool is_mark_join>
@@ -194,39 +149,20 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_process(HashTableType& hash
size_t
probe_rows) {
auto& probe_index = _parent->_probe_index;
auto& build_index = _parent->_build_index;
-
- using Mapped = typename HashTableType::Mapped;
+ auto last_probe_index = probe_index;
_init_probe_side<HashTableType>(hash_table_ctx, probe_rows,
with_other_conjuncts,
need_null_map_for_probe ? null_map->data()
: nullptr);
auto& mcol = mutable_block.mutable_columns();
- int last_probe_index = probe_index;
-
int current_offset = 0;
bool all_match_one = false;
size_t probe_size = 0;
- auto& probe_row_match_iter = _probe_row_match<Mapped,
with_other_conjuncts>(
- current_offset, probe_index, probe_size, all_match_one);
-
- // If not(which means it excceed batch size), probe_index is not increased
and
- // remaining matched rows for the current probe row will be
- // handled in the next call of this function
- int multi_matched_output_row_count = 0;
-
// Is the last sub block of splitted block
bool is_the_last_sub_block = false;
- if (with_other_conjuncts && probe_size != 0) {
- is_the_last_sub_block = !probe_row_match_iter.ok();
- _same_to_prev.emplace_back(false);
- for (int i = 0; i < current_offset - 1; ++i) {
- _same_to_prev.emplace_back(true);
- }
- }
-
std::unique_ptr<ColumnFilterHelper> mark_column;
if (is_mark_join) {
mark_column = std::make_unique<ColumnFilterHelper>(*mcol[mcol.size() -
1]);
@@ -235,7 +171,7 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_process(HashTableType& hash
{
SCOPED_TIMER(_search_hashtable_timer);
auto [new_probe_idx, new_build_idx, new_current_offset] =
- hash_table_ctx.hash_table->template find_batch<JoinOpType>(
+ hash_table_ctx.hash_table->template find_batch<JoinOpType,
with_other_conjuncts>(
hash_table_ctx.keys,
hash_table_ctx.bucket_nums.data(), probe_index,
build_index, probe_rows, _probe_indexs.data(),
_build_indexs.data());
probe_index = new_probe_idx;
@@ -256,8 +192,8 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_process(HashTableType& hash
output_block->swap(mutable_block.to_block());
if constexpr (with_other_conjuncts) {
- return do_other_join_conjuncts(output_block, is_mark_join,
multi_matched_output_row_count,
- is_the_last_sub_block);
+ return do_other_join_conjuncts(output_block, is_mark_join,
is_the_last_sub_block,
+
hash_table_ctx.hash_table->get_visited());
}
return Status::OK();
@@ -265,8 +201,8 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_process(HashTableType& hash
template <int JoinOpType, typename Parent>
Status ProcessHashTableProbe<JoinOpType, Parent>::do_other_join_conjuncts(
- Block* output_block, bool is_mark_join, int
multi_matched_output_row_count,
- bool is_the_last_sub_block) {
+ Block* output_block, bool is_mark_join, bool is_the_last_sub_block,
+ std::vector<uint8_t>& visited) {
// dispose the other join conjunct exec
auto row_count = output_block->rows();
if (!row_count) {
@@ -301,22 +237,10 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_other_join_conjuncts(
auto null_map_column = ColumnVector<UInt8>::create(row_count, 0);
auto* __restrict null_map_data = null_map_column->get_data().data();
- // It contains non-first sub block of splited equal-conjuncts-matched
tuples from last probe row
- if (_row_count_from_last_probe > 0) {
- _process_splited_equal_matched_tuples(0,
_row_count_from_last_probe, filter_column_ptr,
- null_map_data, filter_map,
output_block);
- // This is the last sub block of splitted block, and no
equal-conjuncts-matched tuple
- // is output in all sub blocks, need to output a tuple for this
probe row
- if (is_the_last_sub_block &&
!_parent->_is_any_probe_match_row_output) {
- filter_map[0] = true;
- null_map_data[0] = true;
- }
- }
- int end_idx = row_count - multi_matched_output_row_count;
// process equal-conjuncts-matched tuples that are newly generated
// in this run if there are any.
- for (int i = _row_count_from_last_probe; i < end_idx; ++i) {
- auto join_hit = _visited_map[i] != nullptr;
+ for (int i = 0; i < row_count; ++i) {
+ auto join_hit = _build_indexs[i];
auto other_hit = filter_column_ptr[i];
if (!other_hit) {
@@ -330,89 +254,31 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_other_join_conjuncts(
}
null_map_data[i] = !join_hit || !other_hit;
- // For cases where one probe row matches multiple build rows for
equal conjuncts,
- // all the other-conjuncts-matched tuples should be output.
- //
- // Other-conjuncts-NOT-matched tuples fall into two categories:
- // 1. The beginning consecutive one(s).
- // For these tuples, only the last one is marked to output;
- // If there are any following other-conjuncts-matched tuples,
- // the last tuple is also marked NOT to output.
- // 2. All the remaining other-conjuncts-NOT-matched tuples.
- // All these tuples are marked not to output.
if (join_hit) {
- *_visited_map[i] |= other_hit;
- filter_map[i] = other_hit || !_same_to_prev[i] ||
- (!filter_column_ptr[i] && filter_map[i - 1]);
- // Here to keep only hit join conjunct and other join conjunt
is true need to be output.
- // if not, only some key must keep one row will output will
null right table column
- if (_same_to_prev[i] && filter_map[i] && !filter_column_ptr[i
- 1]) {
- filter_map[i - 1] = false;
- }
+ filter_map[i] = other_hit;
} else {
filter_map[i] = true;
}
}
- // It contains the first sub block of splited equal-conjuncts-matched
tuples of the current probe row
- if (multi_matched_output_row_count > 0) {
- _parent->_is_any_probe_match_row_output = false;
- _process_splited_equal_matched_tuples(row_count -
multi_matched_output_row_count,
-
multi_matched_output_row_count, filter_column_ptr,
- null_map_data, filter_map,
output_block);
- }
-
for (size_t i = 0; i < row_count; ++i) {
if (filter_map[i]) {
_tuple_is_null_right_flags->emplace_back(null_map_data[i]);
+ if constexpr (JoinOpType == TJoinOp::FULL_OUTER_JOIN ||
+ JoinOpType == TJoinOp::RIGHT_OUTER_JOIN) {
+ visited[_build_indexs[i]] = 1;
+ }
}
}
output_block->get_by_position(result_column_id).column =
std::move(new_filter_column);
} else if constexpr (JoinOpType == TJoinOp::LEFT_SEMI_JOIN) {
- // TODO: resize in advance
- auto new_filter_column = ColumnVector<UInt8>::create();
+ auto new_filter_column = ColumnVector<UInt8>::create(row_count);
auto& filter_map = new_filter_column->get_data();
size_t start_row_idx = 1;
- // We are handling euqual-conjuncts matched tuples that are splitted
into multiple blocks
- if (_row_count_from_last_probe > 0) {
- if (_parent->_is_any_probe_match_row_output) {
- // if any matched tuple for this probe row is output,
- // ignore all the following tuples for this probe row.
- for (int row_idx = 0; row_idx < _row_count_from_last_probe;
++row_idx) {
- filter_map.emplace_back(false);
- }
- start_row_idx += _row_count_from_last_probe;
- if (_row_count_from_last_probe < row_count) {
-
filter_map.emplace_back(filter_column_ptr[_row_count_from_last_probe]);
- }
- } else {
- filter_map.emplace_back(filter_column_ptr[0]);
- }
- } else {
- filter_map.emplace_back(filter_column_ptr[0]);
- }
+ filter_map.emplace_back(filter_column_ptr[0]);
for (size_t i = start_row_idx; i < row_count; ++i) {
- if (filter_column_ptr[i] || (_same_to_prev[i] && filter_map[i -
1])) {
- // Only last same element is true, output last one
- filter_map.push_back(true);
- filter_map[i - 1] = !_same_to_prev[i] && filter_map[i - 1];
- } else {
- filter_map.push_back(false);
- }
- }
- // It contains the first sub block of splited equal-conjuncts-matched
tuples of the current probe row
- if (multi_matched_output_row_count > 0) {
- // If a matched row is output, all the equal-matched tuples in
- // the following sub blocks should be ignored
- _parent->_is_any_probe_match_row_output = filter_map[row_count -
1];
- } else if (_row_count_from_last_probe > 0 &&
!_parent->_is_any_probe_match_row_output) {
- // We are handling euqual-conjuncts matched tuples that are
splitted into multiple blocks,
- // and no matched tuple has been output in all previous run.
- // If a tuple is output in this run, all the following mathced
tuples should be ignored
- if (filter_map[_row_count_from_last_probe - 1]) {
- _parent->_is_any_probe_match_row_output = true;
- }
+ filter_map[i] = filter_column_ptr[i];
}
/// FIXME: incorrect result of semi mark join with other
conjuncts(null value missed).
@@ -423,14 +289,9 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_other_join_conjuncts(
// For mark join, we only filter rows which have duplicate join
keys.
// And then, we set matched_map to the join result to do the mark
join's filtering.
- for (size_t i = 1; i < row_count; ++i) {
- if (!_same_to_prev[i]) {
- helper.insert_value(filter_map[i - 1]);
- filter_map[i - 1] = true;
- }
+ for (size_t i = 0; i < row_count; ++i) {
+ helper.insert_value(filter_map[i]);
}
- helper.insert_value(filter_map[filter_map.size() - 1]);
- filter_map[filter_map.size() - 1] = true;
}
output_block->get_by_position(result_column_id).column =
std::move(new_filter_column);
@@ -449,34 +310,12 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_other_join_conjuncts(
// if there are none, just pick a tuple and output.
size_t start_row_idx = 1;
- // We are handling euqual-conjuncts matched tuples that are splitted
into multiple blocks
- if (_row_count_from_last_probe > 0 &&
_parent->_is_any_probe_match_row_output) {
- // if any matched tuple for this probe row is output,
- // ignore all the following tuples for this probe row.
- for (int row_idx = 0; row_idx < _row_count_from_last_probe;
++row_idx) {
- filter_map[row_idx] = false;
- }
- start_row_idx += _row_count_from_last_probe;
- if (_row_count_from_last_probe < row_count) {
- filter_map[_row_count_from_last_probe] =
- filter_column_ptr[_row_count_from_last_probe] &&
- _visited_map[_row_count_from_last_probe];
- }
- } else {
- // Both equal conjuncts and other conjuncts are true
- filter_map[0] = filter_column_ptr[0] && _visited_map[0];
- }
+ // Both equal conjuncts and other conjuncts are true
+ filter_map[0] = filter_column_ptr[0] && _build_indexs[0];
for (size_t i = start_row_idx; i < row_count; ++i) {
- if ((_visited_map[i] && filter_column_ptr[i]) ||
- (_same_to_prev[i] && filter_map[i - 1])) {
- // When either of two conditions is meet:
- // 1. Both equal conjuncts and other conjuncts are true or
same_to_prev
- // 2. This row is joined from the same build side row as the
previous row
- // Set filter_map[i] to true and filter_map[i - 1] to false if
same_to_prev[i]
- // is true.
- filter_map[i] = true;
- filter_map[i - 1] = !_same_to_prev[i] && filter_map[i - 1];
+ if (_build_indexs[i] && filter_column_ptr[i]) {
+ filter_map[i] = _build_indexs[i] && filter_column_ptr[i];
} else {
filter_map[i] = false;
}
@@ -487,60 +326,8 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_other_join_conjuncts(
*(output_block->get_by_position(orig_columns - 1)
.column->assume_mutable()))
.get_data();
- for (int i = 1; i < row_count; ++i) {
- if (!_same_to_prev[i]) {
- matched_map.push_back(!filter_map[i - 1]);
- filter_map[i - 1] = true;
- }
- }
- matched_map.push_back(!filter_map[row_count - 1]);
- filter_map[row_count - 1] = true;
- } else {
- int end_row_idx = 0;
- if (_row_count_from_last_probe > 0) {
- end_row_idx = row_count - multi_matched_output_row_count;
- if (!_parent->_is_any_probe_match_row_output) {
- // We are handling euqual-conjuncts matched tuples that
are splitted into multiple blocks,
- // and no matched tuple has been output in all previous
run.
- // If a tuple is output in this run, all the following
mathced tuples should be ignored
- if (filter_map[_row_count_from_last_probe - 1]) {
- _parent->_is_any_probe_match_row_output = true;
- filter_map[_row_count_from_last_probe - 1] = false;
- }
- if (is_the_last_sub_block &&
!_parent->_is_any_probe_match_row_output) {
- // This is the last sub block of splitted block, and
no equal-conjuncts-matched tuple
- // is output in all sub blocks, output a tuple for
this probe row
- filter_map[0] = true;
- }
- }
- if (multi_matched_output_row_count > 0) {
- // It contains the first sub block of splited
equal-conjuncts-matched tuples of the current probe row
- // If a matched row is output, all the equal-matched
tuples in
- // the following sub blocks should be ignored
- _parent->_is_any_probe_match_row_output =
filter_map[row_count - 1];
- filter_map[row_count - 1] = false;
- }
- } else if (multi_matched_output_row_count > 0) {
- end_row_idx = row_count - multi_matched_output_row_count;
- // It contains the first sub block of splited
equal-conjuncts-matched tuples of the current probe row
- // If a matched row is output, all the equal-matched tuples in
- // the following sub blocks should be ignored
- _parent->_is_any_probe_match_row_output = filter_map[row_count
- 1];
- filter_map[row_count - 1] = false;
- } else {
- end_row_idx = row_count;
- }
-
- // Same to the semi join, but change the last value to opposite
value
- for (int i = 1 + _row_count_from_last_probe; i < end_row_idx; ++i)
{
- if (!_same_to_prev[i]) {
- filter_map[i - 1] = !filter_map[i - 1];
- }
- }
- auto non_sub_blocks_matched_row_count =
- row_count - _row_count_from_last_probe -
multi_matched_output_row_count;
- if (non_sub_blocks_matched_row_count > 0) {
- filter_map[end_row_idx - 1] = !filter_map[end_row_idx - 1];
+ for (int i = 0; i < row_count; ++i) {
+ matched_map.push_back(!filter_map[i]);
}
}
@@ -548,16 +335,13 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_other_join_conjuncts(
} else if constexpr (JoinOpType == TJoinOp::RIGHT_SEMI_JOIN ||
JoinOpType == TJoinOp::RIGHT_ANTI_JOIN) {
for (int i = 0; i < row_count; ++i) {
- DCHECK(_visited_map[i]);
- *_visited_map[i] |= filter_column_ptr[i];
+ visited[_build_indexs[i]] |= filter_column_ptr[i];
}
} else if constexpr (JoinOpType == TJoinOp::RIGHT_OUTER_JOIN) {
auto filter_size = 0;
for (int i = 0; i < row_count; ++i) {
- DCHECK(_visited_map[i]);
- auto result = filter_column_ptr[i];
- *_visited_map[i] |= result;
- filter_size += result;
+ visited[_build_indexs[i]] |= filter_column_ptr[i];
+ filter_size += filter_column_ptr[i];
}
_tuple_is_null_left_flags->resize_fill(filter_size, 0);
}
@@ -579,42 +363,6 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::do_other_join_conjuncts(
return Status::OK();
}
-// For left or full outer join with other conjuncts.
-// If multiple equal-conjuncts-matched tuples is splitted into several
-// sub blocks, just filter out all the other-conjuncts-NOT-matched tuples at
first,
-// and when processing the last sub block, check whether there are any
-// equal-conjuncts-matched tuple is output in all sub blocks,
-// if not, just pick a tuple and output.
-template <int JoinOpType, typename Parent>
-void ProcessHashTableProbe<JoinOpType,
Parent>::_process_splited_equal_matched_tuples(
- int start_row_idx, int row_count, const UInt8* __restrict
other_hit_column,
- UInt8* __restrict null_map_data, UInt8* __restrict filter_map, Block*
output_block) {
- int end_row_idx = start_row_idx + row_count;
- for (int i = start_row_idx; i < end_row_idx; ++i) {
- auto join_hit = _visited_map[i] != nullptr;
- auto other_hit = other_hit_column[i];
-
- if (!other_hit) {
- for (size_t j = 0; j < _right_col_len; ++j) {
- typeid_cast<ColumnNullable*>(
- std::move(*output_block->get_by_position(j +
_right_col_idx).column)
- .assume_mutable()
- .get())
- ->get_null_map_data()[i] = true;
- }
- }
-
- null_map_data[i] = !join_hit || !other_hit;
- filter_map[i] = other_hit;
-
- if (join_hit) {
- *_visited_map[i] |= other_hit;
- }
- }
- _parent->_is_any_probe_match_row_output |=
- simd::contain_byte(filter_map + start_row_idx, row_count, 1);
-}
-
template <int JoinOpType, typename Parent>
template <typename HashTableType>
Status ProcessHashTableProbe<JoinOpType, Parent>::process_data_in_hashtable(
@@ -624,31 +372,25 @@ Status ProcessHashTableProbe<JoinOpType,
Parent>::process_data_in_hashtable(
auto& mcol = mutable_block.mutable_columns();
*eos = hash_table_ctx.hash_table->template
iterate_map<JoinOpType>(_build_indexs);
auto block_size = _build_indexs.size();
- int right_col_idx =
- JoinOpType == TJoinOp::RIGHT_OUTER_JOIN || JoinOpType ==
TJoinOp::FULL_OUTER_JOIN
- ? _parent->left_table_data_types().size()
- : 0;
- int right_col_len = _parent->right_table_data_types().size();
if (block_size) {
- for (size_t j = 0; j < right_col_len; ++j) {
- const auto& column = *_build_block->get_by_position(j).column;
- mcol[j + right_col_idx]->insert_indices_from_join(
- column, _build_indexs.data(), _build_indexs.data() +
_build_indexs.size());
+ for (size_t j = 0; j < _right_col_len; ++j) {
+ const auto& column = *_build_block->safe_get_by_position(j).column;
+ mcol[j + _right_col_idx]->insert_indices_from_join(column,
_build_indexs.data(),
+
_build_indexs.data() + block_size);
}
// just resize the left table column in case with other conjunct to
make block size is not zero
if (_is_right_semi_anti && _have_other_join_conjunct) {
- auto target_size = mcol[right_col_idx]->size();
- for (int i = 0; i < right_col_idx; ++i) {
- mcol[i]->resize(target_size);
+ for (int i = 0; i < _right_col_idx; ++i) {
+ mcol[i]->resize(block_size);
}
}
// right outer join / full join need insert data of left table
if constexpr (JoinOpType == TJoinOp::RIGHT_OUTER_JOIN ||
JoinOpType == TJoinOp::FULL_OUTER_JOIN) {
- for (int i = 0; i < right_col_idx; ++i) {
+ for (int i = 0; i < _right_col_idx; ++i) {
assert_cast<ColumnNullable*>(mcol[i].get())->insert_many_defaults(block_size);
}
_tuple_is_null_left_flags->resize_fill(block_size, 1);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]