github-actions[bot] commented on code in PR #36250: URL: https://github.com/apache/doris/pull/36250#discussion_r1637786545
########## be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp: ########## @@ -98,6 +98,36 @@ size_t PartitionedHashJoinSinkLocalState::revocable_mem_size(RuntimeState* state return mem_size; } +size_t PartitionedHashJoinSinkLocalState::estimate_memory(vectorized::Block* block, + bool eos) const { + size_t estimated_size = block->allocated_bytes(); + if (!_shared_state->need_to_spill) { + size_t build_side_rows = block != nullptr ? block->rows() : 0; + if (_shared_state->inner_shared_state) { + auto inner_sink_state_ = _shared_state->inner_runtime_state->get_sink_local_state(); + if (inner_sink_state_) { + auto inner_sink_state = + assert_cast<HashJoinBuildSinkLocalState*>(inner_sink_state_); + estimated_size += inner_sink_state->_build_side_mem_used; + build_side_rows += inner_sink_state->_build_side_rows; + } + } + + if (build_side_rows > 0 && eos) { + /// here template argument(`StringRef`) is insignificant + size_t bucket_count = JoinHashTable<StringRef>::calc_bucket_size(build_side_rows); + // see `JoinHashTable::first` + estimated_size += (bucket_count + 1) * sizeof(uint32_t); + // `JoinHashTable::next` + estimated_size += build_side_rows * sizeof(uint32_t); + // `JoinHashTable::visited` + estimated_size += build_side_rows * sizeof(uint8_t); + } + } + + return estimated_size; +} + Status PartitionedHashJoinSinkLocalState::_revoke_unpartitioned_block(RuntimeState* state) { Review Comment: warning: function '_revoke_unpartitioned_block' has cognitive complexity of 59 (threshold 50) [readability-function-cognitive-complexity] ```cpp Status PartitionedHashJoinSinkLocalState::_revoke_unpartitioned_block(RuntimeState* state) { ^ ``` <details> <summary>Additional context</summary> **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:138:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (inner_sink_state_) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:143:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp if (build_blocks.empty()) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:160:** nesting level increased to 1 ```cpp auto spill_func = [build_blocks = std::move(build_blocks), state, num_slots, ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:175:** nesting level increased to 2 ```cpp [](std::vector<uint32_t>& indices) { indices.reserve(reserved_size); }); ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:177:** nesting level increased to 2 ```cpp auto flush_rows = [&state, this](std::unique_ptr<vectorized::MutableBlock>& partition_block, ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:182:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if (!status.ok()) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:192:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp for (size_t block_idx = 0; block_idx != build_blocks.size(); ++block_idx) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:195:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if (UNLIKELY(build_block.empty())) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:199:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp if (build_block.columns() > num_slots) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:209:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (size_t i = 0; i != build_block.rows(); ++i) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:213:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp for (uint32_t partition_idx = 0; partition_idx != p._partition_count; ++partition_idx) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:219:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp if (UNLIKELY(!partition_block)) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:227:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp if (!st.ok()) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:237:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp if (partition_block->rows() >= reserved_size || is_last_block) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:237:** +1 ```cpp if (partition_block->rows() >= reserved_size || is_last_block) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:238:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp if (!flush_rows(partition_block, spilling_stream)) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:252:** nesting level increased to 1 ```cpp auto exception_catch_func = [spill_func, shared_state_holder, execution_context, state, ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:257:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (shared_state_sptr) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:260:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (!shared_state_sptr || !execution_context_lock || state->is_cancelled()) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:260:** +1 ```cpp if (!shared_state_sptr || !execution_context_lock || state->is_cancelled()) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:265:** nesting level increased to 2 ```cpp auto status = [&]() { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:266:** +3, including nesting penalty of 2, nesting level increased to 3 ```cpp RETURN_IF_CATCH_EXCEPTION(spill_func()); ^ ``` **be/src/common/exception.h:89:** expanded from macro 'RETURN_IF_CATCH_EXCEPTION' ```cpp do { \ ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:266:** +4, including nesting penalty of 3, nesting level increased to 4 ```cpp RETURN_IF_CATCH_EXCEPTION(spill_func()); ^ ``` **be/src/common/exception.h:94:** expanded from macro 'RETURN_IF_CATCH_EXCEPTION' ```cpp } catch (const doris::Exception& e) { \ ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:266:** +5, including nesting penalty of 4, nesting level increased to 5 ```cpp RETURN_IF_CATCH_EXCEPTION(spill_func()); ^ ``` **be/src/common/exception.h:95:** expanded from macro 'RETURN_IF_CATCH_EXCEPTION' ```cpp if (e.code() == doris::ErrorCode::MEM_ALLOC_FAILED) { \ ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:270:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp if (!status.ok()) { ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:280:** +1, including nesting penalty of 0, nesting level increased to 1 ```cpp DBUG_EXECUTE_IF( ^ ``` **be/src/util/debug_points.h:36:** expanded from macro 'DBUG_EXECUTE_IF' ```cpp if (UNLIKELY(config::enable_debug_points)) { \ ^ ``` **be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp:280:** +2, including nesting penalty of 1, nesting level increased to 2 ```cpp DBUG_EXECUTE_IF( ^ ``` **be/src/util/debug_points.h:38:** expanded from macro 'DBUG_EXECUTE_IF' ```cpp if (dp) { \ ^ ``` </details> -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org