This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new fc3e8f84f45 [opt](multi-catalog) Optimize remote scan concurrency. 
(#52516)
fc3e8f84f45 is described below

commit fc3e8f84f4531b6f53d0230e62950150642da5f6
Author: Qi Chen <[email protected]>
AuthorDate: Tue Jul 8 21:12:18 2025 +0800

    [opt](multi-catalog) Optimize remote scan concurrency. (#52516)
    
    
    Cherry-pick #51415
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/pipeline/exec/file_scan_operator.cpp | 11 +++++------
 be/src/vec/exec/scan/scanner_scheduler.cpp  | 11 ++++++-----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/be/src/pipeline/exec/file_scan_operator.cpp 
b/be/src/pipeline/exec/file_scan_operator.cpp
index 62994bc6db4..00ebfe83535 100644
--- a/be/src/pipeline/exec/file_scan_operator.cpp
+++ b/be/src/pipeline/exec/file_scan_operator.cpp
@@ -39,9 +39,9 @@ Status 
FileScanLocalState::_init_scanners(std::list<vectorized::VScannerSPtr>* s
 
     auto& p = _parent->cast<FileScanOperatorX>();
     // There's only one scan range for each backend in batch split mode. Each 
backend only starts up one ScanNode instance.
-    size_t shard_num = std::min<size_t>(
-            config::doris_scanner_thread_pool_thread_num / 
p.query_parallel_instance_num(),
-            _max_scanners);
+    size_t shard_num = 
std::min<size_t>(vectorized::ScannerScheduler::get_remote_scan_thread_num() /
+                                                
p.query_parallel_instance_num(),
+                                        _max_scanners);
     shard_num = std::max(shard_num, (size_t)1);
     _kv_cache.reset(new vectorized::ShardedKVCache(shard_num));
     for (int i = 0; i < _max_scanners; ++i) {
@@ -65,9 +65,8 @@ void FileScanLocalState::set_scan_ranges(RuntimeState* state,
     auto& p = _parent->cast<FileScanOperatorX>();
 
     auto calc_max_scanners = [&](int parallel_instance_num) -> int {
-        int max_scanners = config::doris_scanner_thread_pool_thread_num / 
parallel_instance_num;
-        max_scanners =
-                std::max(std::max(max_scanners, 
state->parallel_scan_max_scanners_count()), 1);
+        int max_scanners =
+                vectorized::ScannerScheduler::get_remote_scan_thread_num() / 
parallel_instance_num;
         if (should_run_serial()) {
             max_scanners = 1;
         }
diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp 
b/be/src/vec/exec/scan/scanner_scheduler.cpp
index 1b14d172790..b518056c897 100644
--- a/be/src/vec/exec/scan/scanner_scheduler.cpp
+++ b/be/src/vec/exec/scan/scanner_scheduler.cpp
@@ -323,11 +323,12 @@ void 
ScannerScheduler::_scanner_scan(std::shared_ptr<ScannerContext> ctx,
 }
 
 int ScannerScheduler::get_remote_scan_thread_num() {
-    int remote_max_thread_num = 
config::doris_max_remote_scanner_thread_pool_thread_num != -1
-                                        ? 
config::doris_max_remote_scanner_thread_pool_thread_num
-                                        : std::max(512, CpuInfo::num_cores() * 
10);
-    remote_max_thread_num =
-            std::max(remote_max_thread_num, 
config::doris_scanner_thread_pool_thread_num);
+    static int remote_max_thread_num = []() {
+        int num = config::doris_max_remote_scanner_thread_pool_thread_num != -1
+                          ? 
config::doris_max_remote_scanner_thread_pool_thread_num
+                          : std::max(512, CpuInfo::num_cores() * 10);
+        return std::max(num, config::doris_scanner_thread_pool_thread_num);
+    }();
     return remote_max_thread_num;
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to