Gabriel39 commented on code in PR #61535:
URL: https://github.com/apache/doris/pull/61535#discussion_r2985512147


##########
be/src/exec/operator/olap_scan_operator.h:
##########
@@ -309,6 +309,9 @@ class OlapScanLocalState final : public 
ScanLocalState<OlapScanLocalState> {
     // Variant subtree: times selecting doc snapshot all iterator (merge doc 
snapshot into root)
     RuntimeProfile::Counter* _variant_doc_value_column_iter_count = nullptr;
 
+    RuntimeProfile::Counter* _adaptive_batch_predict_min_rows_counter = 
nullptr;
+    RuntimeProfile::Counter* _adaptive_batch_predict_max_rows_counter = 
nullptr;

Review Comment:
   Use `HighWaterMarkCounter`



##########
be/src/storage/rowset/rowset_reader_context.h:
##########
@@ -73,6 +73,13 @@ struct RowsetReaderContext {
     bool use_page_cache = false;
     int sequence_id_idx = -1;
     int batch_size = 1024;
+    // Adaptive batch size: target output block byte budget. 0 = disabled.
+    size_t preferred_block_size_bytes = 0;
+    // Per-column byte limit for the adaptive predictor. 0 = no column limit.
+    size_t preferred_max_col_bytes = 1048576;

Review Comment:
   0 by default?



##########
be/src/runtime/runtime_state.h:
##########
@@ -139,6 +139,20 @@ class RuntimeState {
     const DescriptorTbl& desc_tbl() const { return *_desc_tbl; }
     void set_desc_tbl(const DescriptorTbl* desc_tbl) { _desc_tbl = desc_tbl; }
     MOCK_FUNCTION int batch_size() const { return _query_options.batch_size; }
+    MOCK_FUNCTION size_t preferred_block_size_bytes() const {
+        if (_query_options.__isset.preferred_block_size_bytes) {
+            auto v = _query_options.preferred_block_size_bytes;
+            return v > 0 ? static_cast<size_t>(v) : 0;
+        }
+        return 8388608UL; // 8MB default

Review Comment:
   Maybe we should not enable this feature by default



##########
be/src/common/config.cpp:
##########
@@ -433,6 +433,8 @@ DEFINE_mInt32(pk_index_page_cache_stale_sweep_time_sec, 
"600");
 DEFINE_mBool(enable_low_cardinality_optimize, "true");
 DEFINE_Bool(enable_low_cardinality_cache_code, "true");
 
+DEFINE_mBool(enable_adaptive_batch_size, "true");

Review Comment:
   Could we use a session variable instead ?



##########
be/src/storage/iterator/block_reader.cpp:
##########
@@ -386,6 +387,16 @@ Status BlockReader::_replace_key_next_block(Block* block, 
bool* eof) {
                 break;
             }
         }
+        // Byte-budget check: after the inner loop _next_row is either EOF or 
the next different
+        // key, so it is safe to stop accumulating here without repeating any 
row.
+        if (config::enable_adaptive_batch_size && 
_reader_context.preferred_block_size_bytes > 0 &&

Review Comment:
   Extract a common function to do this checking



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to