This is an automated email from the ASF dual-hosted git repository. lihaopeng pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 6a566ccb74 [Enhancement][Vectorized] add constexpr_loop_match (#10283) 6a566ccb74 is described below commit 6a566ccb74c42e387dd58a78a318385dbab36b3d Author: Pxl <952130...@qq.com> AuthorDate: Wed Jun 29 14:58:50 2022 +0800 [Enhancement][Vectorized] add constexpr_loop_match (#10283) --- be/src/vec/exec/join/vhash_join_node.cpp | 61 +++++++++++----------- be/src/vec/exec/join/vhash_join_node.h | 2 +- be/src/vec/functions/function_json.cpp | 34 +++++++++--- be/src/vec/utils/template_helpers.hpp | 89 +++++++++++++++++++++++++++----- 4 files changed, 136 insertions(+), 50 deletions(-) diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index fa1400a626..e625c6c883 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -24,6 +24,7 @@ #include "vec/core/materialize_block.h" #include "vec/exprs/vexpr.h" #include "vec/exprs/vexpr_context.h" +#include "vec/utils/template_helpers.hpp" #include "vec/utils/util.hpp" namespace doris::vectorized { @@ -37,7 +38,7 @@ std::variant<std::false_type, std::true_type> static inline make_bool_variant(bo } using ProfileCounter = RuntimeProfile::Counter; -template <class HashTableContext, bool ignore_null, bool build_unique> +template <class HashTableContext> struct ProcessHashTableBuild { ProcessHashTableBuild(int rows, Block& acquired_block, ColumnRawPtrs& build_raw_ptrs, HashJoinNode* join_node, int batch_size, uint8_t offset) @@ -49,8 +50,8 @@ struct ProcessHashTableBuild { _batch_size(batch_size), _offset(offset) {} - Status operator()(HashTableContext& hash_table_ctx, ConstNullMapPtr null_map, - bool has_runtime_filter) { + template <bool ignore_null, bool build_unique, bool has_runtime_filter> + void run(HashTableContext& hash_table_ctx, ConstNullMapPtr null_map) { using KeyGetter = typename HashTableContext::State; using Mapped = typename HashTableContext::Mapped; int64_t old_bucket_bytes = hash_table_ctx.hash_table.get_buffer_size_in_bytes(); @@ -73,7 +74,7 @@ struct ProcessHashTableBuild { hash_table_ctx.hash_table.reset_resize_timer(); vector<int>& inserted_rows = _join_node->_inserted_rows[&_acquired_block]; - if (has_runtime_filter) { + if constexpr (has_runtime_filter) { inserted_rows.reserve(_batch_size); } @@ -92,14 +93,14 @@ struct ProcessHashTableBuild { if (emplace_result.is_inserted()) { new (&emplace_result.get_mapped()) Mapped({k, _offset}); - if (has_runtime_filter) { + if constexpr (has_runtime_filter) { inserted_rows.push_back(k); } } else { if constexpr (!build_unique) { /// The first element of the list is stored in the value of the hash table, the rest in the pool. emplace_result.get_mapped().insert({k, _offset}, _join_node->_arena); - if (has_runtime_filter) { + if constexpr (has_runtime_filter) { inserted_rows.push_back(k); } } else { @@ -110,10 +111,17 @@ struct ProcessHashTableBuild { COUNTER_UPDATE(_join_node->_build_table_expanse_timer, hash_table_ctx.hash_table.get_resize_timer_value()); - - return Status::OK(); } + template <bool ignore_null, bool build_unique, bool has_runtime_filter> + struct Reducer { + template <typename... TArgs> + static void run(ProcessHashTableBuild<HashTableContext>& build, TArgs&&... args) { + build.template run<ignore_null, build_unique, has_runtime_filter>( + std::forward<TArgs>(args)...); + } + }; + private: const int _rows; int _skip_rows; @@ -275,7 +283,7 @@ struct ProcessHashTableProbe { { SCOPED_TIMER(_search_hashtable_timer); - for (; _probe_index < _probe_rows;) { + while (_probe_index < _probe_rows) { if constexpr (ignore_null) { if ((*null_map)[_probe_index]) { _items_counts[_probe_index++] = (uint32_t)0; @@ -394,7 +402,7 @@ struct ProcessHashTableProbe { int current_offset = 0; - for (; _probe_index < _probe_rows;) { + while (_probe_index < _probe_rows) { // ignore null rows if constexpr (ignore_null) { if ((*null_map)[_probe_index]) { @@ -509,11 +517,6 @@ struct ProcessHashTableProbe { } output_block->get_by_position(result_column_id).column = std::move(new_filter_column); - } else if constexpr (JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN) { - for (int i = 0; i < column->size(); ++i) { - DCHECK(visited_map[i]); - *visited_map[i] |= column->get_bool(i); - } } else if constexpr (JoinOpType::value == TJoinOp::LEFT_SEMI_JOIN) { auto new_filter_column = ColumnVector<UInt8>::create(); auto& filter_map = new_filter_column->get_data(); @@ -556,7 +559,8 @@ struct ProcessHashTableProbe { output_block->get_by_position(result_column_id).column = std::move(new_filter_column); } else if constexpr (JoinOpType::value == TJoinOp::RIGHT_SEMI_JOIN || - JoinOpType::value == TJoinOp::RIGHT_ANTI_JOIN) { + JoinOpType::value == TJoinOp::RIGHT_ANTI_JOIN || + JoinOpType::value == TJoinOp::RIGHT_OUTER_JOIN) { for (int i = 0; i < column->size(); ++i) { DCHECK(visited_map[i]); *visited_map[i] |= column->get_bool(i); @@ -822,7 +826,9 @@ Status HashJoinNode::close(RuntimeState* state) { VExpr::close(_build_expr_ctxs, state); VExpr::close(_probe_expr_ctxs, state); - if (_vother_join_conjunct_ptr) (*_vother_join_conjunct_ptr)->close(state); + if (_vother_join_conjunct_ptr) { + (*_vother_join_conjunct_ptr)->close(state); + } _hash_table_mem_tracker->release(_mem_used); @@ -1169,19 +1175,14 @@ Status HashJoinNode::_process_build_block(RuntimeState* state, Block& block, uin [&](auto&& arg) { using HashTableCtxType = std::decay_t<decltype(arg)>; if constexpr (!std::is_same_v<HashTableCtxType, std::monostate>) { -#define CALL_BUILD_FUNCTION(HAS_NULL, BUILD_UNIQUE) \ - ProcessHashTableBuild<HashTableCtxType, HAS_NULL, BUILD_UNIQUE> hash_table_build_process( \ - rows, block, raw_ptrs, this, state->batch_size(), offset); \ - st = hash_table_build_process(arg, &null_map_val, has_runtime_filter); - if (std::pair {has_null, _build_unique} == std::pair {true, true}) { - CALL_BUILD_FUNCTION(true, true); - } else if (std::pair {has_null, _build_unique} == std::pair {true, false}) { - CALL_BUILD_FUNCTION(true, false); - } else if (std::pair {has_null, _build_unique} == std::pair {false, true}) { - CALL_BUILD_FUNCTION(false, true); - } else { - CALL_BUILD_FUNCTION(false, false); - } + ProcessHashTableBuild<HashTableCtxType> hash_table_build_process( + rows, block, raw_ptrs, this, state->batch_size(), offset); + + constexpr_3_bool_match<ProcessHashTableBuild< + HashTableCtxType>::template Reducer>::run(has_null, _build_unique, + has_runtime_filter, + hash_table_build_process, arg, + &null_map_val); } else { LOG(FATAL) << "FATAL: uninited hash table"; } diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index 7db2db48d3..175134b522 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -252,7 +252,7 @@ private: void _hash_table_init(); - template <class HashTableContext, bool ignore_null, bool build_unique> + template <class HashTableContext> friend struct ProcessHashTableBuild; template <class HashTableContext, class JoinOpType, bool ignore_null> diff --git a/be/src/vec/functions/function_json.cpp b/be/src/vec/functions/function_json.cpp index 2019ac5910..6abdc6a4b2 100644 --- a/be/src/vec/functions/function_json.cpp +++ b/be/src/vec/functions/function_json.cpp @@ -407,18 +407,37 @@ struct JsonParser<'3'> { } }; +template <> +struct JsonParser<'4'> { + // time + static void update_value(StringParser::ParseResult& result, rapidjson::Value& value, + StringRef data, rapidjson::Document::AllocatorType& allocator) { + // remove double quotes, "xxx" -> xxx + value.SetString(data.data + 1, data.size - 2, allocator); + } +}; + +template <int flag, typename Impl> +struct ExecuteReducer { + template <typename... TArgs> + static void run(TArgs&&... args) { + Impl::template execute_type<JsonParser<flag>>(std::forward<TArgs>(args)...); + } +}; + struct FunctionJsonArrayImpl { static constexpr auto name = "json_array"; - CONSTEXPR_LOOP_MATCH_DECLARE(execute_type); + template <int flag> + using Reducer = ExecuteReducer<flag, FunctionJsonArrayImpl>; static void execute_parse(const std::string& type_flags, const std::vector<const ColumnString*>& data_columns, std::vector<rapidjson::Value>& objects, rapidjson::Document::AllocatorType& allocator) { for (int i = 0; i < data_columns.size() - 1; i++) { - constexpr_loop_match<'0', '6', JsonParser>(type_flags[i], objects, allocator, - data_columns[i]); + constexpr_int_match<'0', '5', Reducer>::run(type_flags[i], objects, allocator, + data_columns[i]); } } @@ -439,7 +458,8 @@ struct FunctionJsonArrayImpl { struct FunctionJsonObjectImpl { static constexpr auto name = "json_object"; - CONSTEXPR_LOOP_MATCH_DECLARE(execute_type); + template <int flag> + using Reducer = ExecuteReducer<flag, FunctionJsonObjectImpl>; static void execute_parse(std::string type_flags, const std::vector<const ColumnString*>& data_columns, @@ -450,8 +470,8 @@ struct FunctionJsonObjectImpl { } for (int i = 0; i + 1 < data_columns.size() - 1; i += 2) { - constexpr_loop_match<'0', '6', JsonParser>(type_flags[i + 1], objects, allocator, - data_columns[i], data_columns[i + 1]); + constexpr_int_match<'0', '5', Reducer>::run(type_flags[i + 1], objects, allocator, + data_columns[i], data_columns[i + 1]); } } @@ -464,7 +484,7 @@ struct FunctionJsonObjectImpl { rapidjson::Value value; for (int i = 0; i < objects.size(); i++) { - JsonParser<'4'>::update_value(result, key, key_column->get_data_at(i), + JsonParser<'5'>::update_value(result, key, key_column->get_data_at(i), allocator); // key always is string TypeImpl::update_value(result, value, value_column->get_data_at(i), allocator); objects[i].AddMember(key, value, allocator); diff --git a/be/src/vec/utils/template_helpers.hpp b/be/src/vec/utils/template_helpers.hpp index afe745d1aa..5db9147c89 100644 --- a/be/src/vec/utils/template_helpers.hpp +++ b/be/src/vec/utils/template_helpers.hpp @@ -17,6 +17,8 @@ #pragma once +#include <limits> + #include "http/http_status.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column_complex.h" @@ -70,18 +72,81 @@ IAggregateFunction* create_class_with_type(const IDataType& argument_type, TArgs return nullptr; } -// We can use template lambda function in C++20, but now just use static function -#define CONSTEXPR_LOOP_MATCH_DECLARE(EXECUTE) \ - template <int start, int end, template <int> typename Object, typename... TArgs> \ - static void constexpr_loop_match(int target, TArgs&&... args) { \ - if constexpr (start < end) { \ - if (start == target) { \ - EXECUTE<Object<start>>(std::forward<TArgs>(args)...); \ - } else { \ - constexpr_loop_match<start + 1, end, Object>(target, \ - std::forward<TArgs>(args)...); \ - } \ - } \ +template <typename LoopType, LoopType start, LoopType end, template <LoopType> typename Reducer> +struct constexpr_loop_match { + template <typename... TArgs> + static void run(LoopType target, TArgs&&... args) { + if constexpr (start <= end) { + if (start == target) { + Reducer<start>::run(std::forward<TArgs>(args)...); + } else { + if constexpr (start < std::numeric_limits<LoopType>::max()) { + constexpr_loop_match<LoopType, start + 1, end, Reducer>::run( + target, std::forward<TArgs>(args)...); + } + } + } + } +}; + +template <int start, int end, template <int> typename Reducer> +using constexpr_int_match = constexpr_loop_match<int, start, end, Reducer>; + +template <template <bool> typename Reducer> +using constexpr_bool_match = constexpr_loop_match<bool, false, true, Reducer>; + +// we can't use variadic-parameters, because it will reject alias-templates. +// https://stackoverflow.com/questions/30707011/pack-expansion-for-alias-template +template <typename LoopType, LoopType start, LoopType end, + template <LoopType, LoopType> typename Reducer, + template <template <LoopType> typename> typename InnerMatch> +struct constexpr_2_loop_match { + template <LoopType matched> + using InnerReducer = Reducer<start, matched>; + + template <typename... TArgs> + static void run(LoopType target, TArgs&&... args) { + if constexpr (start <= end) { + if (start == target) { + InnerMatch<InnerReducer>::run(std::forward<TArgs>(args)...); + } else { + if constexpr (start < std::numeric_limits<LoopType>::max()) { + constexpr_2_loop_match<LoopType, start + 1, end, Reducer, InnerMatch>::run( + target, std::forward<TArgs>(args)...); + } + } + } } +}; + +template <template <bool, bool> typename Reducer> +using constexpr_2_bool_match = + constexpr_2_loop_match<bool, false, true, Reducer, constexpr_bool_match>; + +template <typename LoopType, LoopType start, LoopType end, + template <LoopType, LoopType, LoopType> typename Reducer, + template <template <LoopType, LoopType> typename> typename InnerMatch> +struct constexpr_3_loop_match { + template <LoopType matched, LoopType matched_next> + using InnerReducer = Reducer<start, matched, matched_next>; + + template <typename... TArgs> + static void run(LoopType target, TArgs&&... args) { + if constexpr (start <= end) { + if (start == target) { + InnerMatch<InnerReducer>::run(std::forward<TArgs>(args)...); + } else { + if constexpr (start < std::numeric_limits<LoopType>::max()) { + constexpr_3_loop_match<LoopType, start + 1, end, Reducer, InnerMatch>::run( + target, std::forward<TArgs>(args)...); + } + } + } + } +}; + +template <template <bool, bool, bool> typename Reducer> +using constexpr_3_bool_match = + constexpr_3_loop_match<bool, false, true, Reducer, constexpr_2_bool_match>; } // namespace doris::vectorized --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org