yiguolei commented on code in PR #21361: URL: https://github.com/apache/doris/pull/21361#discussion_r1248372162
########## be/src/vec/exec/join/process_hash_table_probe_impl.h: ########## @@ -177,22 +177,51 @@ Status ProcessHashTableProbe<JoinOpType>::do_process(HashTableType& hash_table_c KeyGetter key_getter(probe_raw_ptrs, _join_node->_probe_key_sz, nullptr); if (probe_index == 0) { - size_t old_probe_keys_memory_usage = 0; - if (_arena) { - old_probe_keys_memory_usage = _arena->size(); + if (!_arena) { + _arena.reset(new Arena()); } - _arena.reset(new Arena()); // TODO arena reuse by clear()? if constexpr (ColumnsHashing::IsPreSerializedKeysHashMethodTraits<KeyGetter>::value) { if (_probe_keys.size() < probe_rows) { _probe_keys.resize(probe_rows); } - size_t keys_size = probe_raw_ptrs.size(); - for (size_t i = 0; i < probe_rows; ++i) { - _probe_keys[i] = - serialize_keys_to_pool_contiguous(i, keys_size, probe_raw_ptrs, *_arena); + size_t max_one_row_byte_size = 0; + for (const auto column : probe_raw_ptrs) { + max_one_row_byte_size += column->get_max_row_byte_size(); + } + size_t total_bytes = max_one_row_byte_size * probe_rows; + + if (total_bytes > config::pre_serialize_keys_limit_bytes) { + // reach mem limit, don't serialize in batch + _arena->clear(); + size_t keys_size = probe_raw_ptrs.size(); + for (size_t i = 0; i < probe_rows; ++i) { + _probe_keys[i] = serialize_keys_to_pool_contiguous(i, keys_size, probe_raw_ptrs, + *_arena); + } + _join_node->_probe_arena_memory_usage->add(_arena->size()); + } else { + _arena->clear(); + if (!_serialize_key_arena) { + _serialize_key_arena.reset(new Arena); + } + if (total_bytes > _serialized_key_buffer_size) { + _serialized_key_buffer_size = total_bytes; + _serialize_key_arena->clear(); + _serialized_key_buffer = reinterpret_cast<uint8_t*>( + _serialize_key_arena->alloc(_serialized_key_buffer_size)); + } + + for (size_t i = 0; i < probe_rows; ++i) { + _probe_keys[i].data = reinterpret_cast<char*>(_serialized_key_buffer + + i * max_one_row_byte_size); + _probe_keys[i].size = 0; + } + + for (const auto column : probe_raw_ptrs) { + column->serialize_vec(_probe_keys, probe_rows, max_one_row_byte_size); + } + _join_node->_probe_arena_memory_usage->add(_serialized_key_buffer_size); } - _join_node->_probe_arena_memory_usage->add(_arena->size() - - old_probe_keys_memory_usage); } Review Comment: should release arena's memory in close method to release memory earlier. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org