yiguolei commented on code in PR #30746: URL: https://github.com/apache/doris/pull/30746#discussion_r1477851532
########## be/src/vec/exec/scan/scanner_context.cpp: ########## @@ -210,148 +141,237 @@ Status ScannerContext::init() { thread_token == nullptr ? "False" : "True"); } + // submit `_max_thread_num` running scanners to `ScannerScheduler` + // When a running scanners is finished, it will submit one of the remaining scanners. + for (int i = 0; i < _max_thread_num; ++i) { + std::weak_ptr<ScannerDelegate> next_scanner; + if (_scanners.try_dequeue(next_scanner)) { + vectorized::BlockUPtr block = get_free_block(_batch_size); + submit_scan_task(std::make_shared<ScanTask>(next_scanner, std::move(block))); + _num_running_scanners++; + } + } + return Status::OK(); } std::string ScannerContext::parent_name() { return _parent ? _parent->get_name() : _local_state->get_name(); } -vectorized::BlockUPtr ScannerContext::get_free_block() { +vectorized::BlockUPtr ScannerContext::get_free_block(int batch_size) { vectorized::BlockUPtr block; if (_free_blocks.try_dequeue(block)) { + std::lock_guard<std::mutex> fl(_free_blocks_lock); DCHECK(block->mem_reuse()); - _free_blocks_memory_usage->add(-block->allocated_bytes()); - _serving_blocks_num++; + _free_blocks_memory_usage -= block->allocated_bytes(); + _free_blocks_memory_usage_mark->set(_free_blocks_memory_usage); return block; } - block = vectorized::Block::create_unique(_output_tuple_desc->slots(), _batch_size, - true /*ignore invalid slots*/); - - COUNTER_UPDATE(_newly_create_free_blocks_num, 1); - - _serving_blocks_num++; - return block; + _newly_create_free_blocks_num->update(1); + return vectorized::Block::create_unique(_output_tuple_desc->slots(), batch_size, + true /*ignore invalid slots*/); } -void ScannerContext::return_free_block(std::unique_ptr<vectorized::Block> block) { - _serving_blocks_num--; - if (block->mem_reuse()) { - // Only put blocks with schema to free blocks, because colocate blocks - // need schema. - _estimated_block_bytes = std::max(block->allocated_bytes(), (size_t)16); +void ScannerContext::return_free_block(vectorized::BlockUPtr block) { + std::lock_guard<std::mutex> fl(_free_blocks_lock); + if (block->mem_reuse() && _free_blocks_memory_usage < _max_bytes_in_queue) { block->clear_column_data(); - _free_blocks_memory_usage->add(block->allocated_bytes()); + _free_blocks_memory_usage += block->allocated_bytes(); + _free_blocks_memory_usage_mark->set(_free_blocks_memory_usage); _free_blocks.enqueue(std::move(block)); } } -void ScannerContext::append_blocks_to_queue(std::vector<vectorized::BlockUPtr>& blocks) { - std::lock_guard l(_transfer_lock); - auto old_bytes_in_queue = _cur_bytes_in_queue; - for (auto& b : blocks) { - auto st = validate_block_schema(b.get()); - if (!st.ok()) { - set_status_on_error(st, false); +bool ScannerContext::empty_in_queue(int id) { + std::lock_guard<std::mutex> l(_transfer_lock); + return _blocks_queue.empty(); +} + +void ScannerContext::submit_scan_task(std::shared_ptr<ScanTask> scan_task) { + _scanner_sched_counter->update(1); + _num_scheduled_scanners++; + _scanner_scheduler->submit(shared_from_this(), scan_task); +} + +void ScannerContext::append_block_to_queue(std::shared_ptr<ScanTask> scan_task) { + Status st = validate_block_schema(scan_task->current_block.get()); + if (!st.ok()) { + scan_task->set_status(st); Review Comment: If the status is not ok, you set set status to scan_task, but how to tell scannode that there is error?? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org