This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new fc3e8f84f45 [opt](multi-catalog) Optimize remote scan concurrency.
(#52516)
fc3e8f84f45 is described below
commit fc3e8f84f4531b6f53d0230e62950150642da5f6
Author: Qi Chen <[email protected]>
AuthorDate: Tue Jul 8 21:12:18 2025 +0800
[opt](multi-catalog) Optimize remote scan concurrency. (#52516)
Cherry-pick #51415
### Check List (For Author)
- Test <!-- At least one of them must be included. -->
- [ ] Regression test
- [ ] Unit Test
- [ ] Manual test (add detailed scripts or steps below)
- [ ] No need to test or manual test. Explain why:
- [ ] This is a refactor/code format and no logic has been changed.
- [ ] Previous test can cover this change.
- [ ] No code files have been changed.
- [ ] Other reason <!-- Add your reason? -->
- Behavior changed:
- [ ] No.
- [ ] Yes. <!-- Explain the behavior change -->
- Does this need documentation?
- [ ] No.
- [ ] Yes. <!-- Add document PR link here. eg:
https://github.com/apache/doris-website/pull/1214 -->
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label <!-- Add branch pick label that this PR
should merge into -->
---
be/src/pipeline/exec/file_scan_operator.cpp | 11 +++++------
be/src/vec/exec/scan/scanner_scheduler.cpp | 11 ++++++-----
2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/be/src/pipeline/exec/file_scan_operator.cpp
b/be/src/pipeline/exec/file_scan_operator.cpp
index 62994bc6db4..00ebfe83535 100644
--- a/be/src/pipeline/exec/file_scan_operator.cpp
+++ b/be/src/pipeline/exec/file_scan_operator.cpp
@@ -39,9 +39,9 @@ Status
FileScanLocalState::_init_scanners(std::list<vectorized::VScannerSPtr>* s
auto& p = _parent->cast<FileScanOperatorX>();
// There's only one scan range for each backend in batch split mode. Each
backend only starts up one ScanNode instance.
- size_t shard_num = std::min<size_t>(
- config::doris_scanner_thread_pool_thread_num /
p.query_parallel_instance_num(),
- _max_scanners);
+ size_t shard_num =
std::min<size_t>(vectorized::ScannerScheduler::get_remote_scan_thread_num() /
+
p.query_parallel_instance_num(),
+ _max_scanners);
shard_num = std::max(shard_num, (size_t)1);
_kv_cache.reset(new vectorized::ShardedKVCache(shard_num));
for (int i = 0; i < _max_scanners; ++i) {
@@ -65,9 +65,8 @@ void FileScanLocalState::set_scan_ranges(RuntimeState* state,
auto& p = _parent->cast<FileScanOperatorX>();
auto calc_max_scanners = [&](int parallel_instance_num) -> int {
- int max_scanners = config::doris_scanner_thread_pool_thread_num /
parallel_instance_num;
- max_scanners =
- std::max(std::max(max_scanners,
state->parallel_scan_max_scanners_count()), 1);
+ int max_scanners =
+ vectorized::ScannerScheduler::get_remote_scan_thread_num() /
parallel_instance_num;
if (should_run_serial()) {
max_scanners = 1;
}
diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp
b/be/src/vec/exec/scan/scanner_scheduler.cpp
index 1b14d172790..b518056c897 100644
--- a/be/src/vec/exec/scan/scanner_scheduler.cpp
+++ b/be/src/vec/exec/scan/scanner_scheduler.cpp
@@ -323,11 +323,12 @@ void
ScannerScheduler::_scanner_scan(std::shared_ptr<ScannerContext> ctx,
}
int ScannerScheduler::get_remote_scan_thread_num() {
- int remote_max_thread_num =
config::doris_max_remote_scanner_thread_pool_thread_num != -1
- ?
config::doris_max_remote_scanner_thread_pool_thread_num
- : std::max(512, CpuInfo::num_cores() *
10);
- remote_max_thread_num =
- std::max(remote_max_thread_num,
config::doris_scanner_thread_pool_thread_num);
+ static int remote_max_thread_num = []() {
+ int num = config::doris_max_remote_scanner_thread_pool_thread_num != -1
+ ?
config::doris_max_remote_scanner_thread_pool_thread_num
+ : std::max(512, CpuInfo::num_cores() * 10);
+ return std::max(num, config::doris_scanner_thread_pool_thread_num);
+ }();
return remote_max_thread_num;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]