yiguolei commented on code in PR #21361:
URL: https://github.com/apache/doris/pull/21361#discussion_r1248372162


##########
be/src/vec/exec/join/process_hash_table_probe_impl.h:
##########
@@ -177,22 +177,51 @@ Status 
ProcessHashTableProbe<JoinOpType>::do_process(HashTableType& hash_table_c
     KeyGetter key_getter(probe_raw_ptrs, _join_node->_probe_key_sz, nullptr);
 
     if (probe_index == 0) {
-        size_t old_probe_keys_memory_usage = 0;
-        if (_arena) {
-            old_probe_keys_memory_usage = _arena->size();
+        if (!_arena) {
+            _arena.reset(new Arena());
         }
-        _arena.reset(new Arena()); // TODO arena reuse by clear()?
         if constexpr 
(ColumnsHashing::IsPreSerializedKeysHashMethodTraits<KeyGetter>::value) {
             if (_probe_keys.size() < probe_rows) {
                 _probe_keys.resize(probe_rows);
             }
-            size_t keys_size = probe_raw_ptrs.size();
-            for (size_t i = 0; i < probe_rows; ++i) {
-                _probe_keys[i] =
-                        serialize_keys_to_pool_contiguous(i, keys_size, 
probe_raw_ptrs, *_arena);
+            size_t max_one_row_byte_size = 0;
+            for (const auto column : probe_raw_ptrs) {
+                max_one_row_byte_size += column->get_max_row_byte_size();
+            }
+            size_t total_bytes = max_one_row_byte_size * probe_rows;
+
+            if (total_bytes > config::pre_serialize_keys_limit_bytes) {
+                // reach mem limit, don't serialize in batch
+                _arena->clear();
+                size_t keys_size = probe_raw_ptrs.size();
+                for (size_t i = 0; i < probe_rows; ++i) {
+                    _probe_keys[i] = serialize_keys_to_pool_contiguous(i, 
keys_size, probe_raw_ptrs,
+                                                                       
*_arena);
+                }
+                _join_node->_probe_arena_memory_usage->add(_arena->size());
+            } else {
+                _arena->clear();
+                if (!_serialize_key_arena) {
+                    _serialize_key_arena.reset(new Arena);
+                }
+                if (total_bytes > _serialized_key_buffer_size) {
+                    _serialized_key_buffer_size = total_bytes;
+                    _serialize_key_arena->clear();
+                    _serialized_key_buffer = reinterpret_cast<uint8_t*>(
+                            
_serialize_key_arena->alloc(_serialized_key_buffer_size));
+                }
+
+                for (size_t i = 0; i < probe_rows; ++i) {
+                    _probe_keys[i].data = 
reinterpret_cast<char*>(_serialized_key_buffer +
+                                                                  i * 
max_one_row_byte_size);
+                    _probe_keys[i].size = 0;
+                }
+
+                for (const auto column : probe_raw_ptrs) {
+                    column->serialize_vec(_probe_keys, probe_rows, 
max_one_row_byte_size);
+                }
+                
_join_node->_probe_arena_memory_usage->add(_serialized_key_buffer_size);
             }
-            _join_node->_probe_arena_memory_usage->add(_arena->size() -
-                                                       
old_probe_keys_memory_usage);
         }

Review Comment:
   should release arena's memory in close method to release memory earlier.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to