yiguolei commented on code in PR #30746: URL: https://github.com/apache/doris/pull/30746#discussion_r1477191550
########## be/src/vec/exec/scan/scanner_context.cpp: ########## @@ -210,148 +136,238 @@ Status ScannerContext::init() { thread_token == nullptr ? "False" : "True"); } + // submit `_max_thread_num` running scanners to `ScannerScheduler` + // When a running scanners is finished, it will submit one of the remaining scanners. + for (int i = 0; i < _max_thread_num; ++i) { + std::weak_ptr<ScannerDelegate> next_scanner; + if (_scanners.try_dequeue(next_scanner)) { + vectorized::BlockUPtr block = get_free_block(_batch_size); + submit_running_scanner( + std::make_shared<RunningScanner>(next_scanner, std::move(block))); + _num_running_scanners++; + } + } + return Status::OK(); } std::string ScannerContext::parent_name() { return _parent ? _parent->get_name() : _local_state->get_name(); } -vectorized::BlockUPtr ScannerContext::get_free_block() { +vectorized::BlockUPtr ScannerContext::get_free_block(int batch_size) { vectorized::BlockUPtr block; if (_free_blocks.try_dequeue(block)) { + std::lock_guard<std::mutex> fl(_free_blocks_lock); DCHECK(block->mem_reuse()); - _free_blocks_memory_usage->add(-block->allocated_bytes()); - _serving_blocks_num++; + _free_blocks_memory_usage -= block->allocated_bytes(); + _free_blocks_memory_usage_mark->set(_free_blocks_memory_usage); return block; } - block = vectorized::Block::create_unique(_output_tuple_desc->slots(), _batch_size, - true /*ignore invalid slots*/); - - COUNTER_UPDATE(_newly_create_free_blocks_num, 1); - - _serving_blocks_num++; - return block; + return vectorized::Block::create_unique(_output_tuple_desc->slots(), batch_size, + true /*ignore invalid slots*/); } -void ScannerContext::return_free_block(std::unique_ptr<vectorized::Block> block) { - _serving_blocks_num--; - if (block->mem_reuse()) { - // Only put blocks with schema to free blocks, because colocate blocks - // need schema. - _estimated_block_bytes = std::max(block->allocated_bytes(), (size_t)16); +void ScannerContext::return_free_block(vectorized::BlockUPtr block) { + std::lock_guard<std::mutex> fl(_free_blocks_lock); + if (block->mem_reuse() && _free_blocks_memory_usage < _max_bytes_in_queue) { block->clear_column_data(); - _free_blocks_memory_usage->add(block->allocated_bytes()); + _free_blocks_memory_usage += block->allocated_bytes(); + _free_blocks_memory_usage_mark->set(_free_blocks_memory_usage); _free_blocks.enqueue(std::move(block)); } } -void ScannerContext::append_blocks_to_queue(std::vector<vectorized::BlockUPtr>& blocks) { - std::lock_guard l(_transfer_lock); - auto old_bytes_in_queue = _cur_bytes_in_queue; - for (auto& b : blocks) { - auto st = validate_block_schema(b.get()); - if (!st.ok()) { - set_status_on_error(st, false); +bool ScannerContext::empty_in_queue(int id) { + std::lock_guard<std::mutex> l(_transfer_lock); + return _blocks_queue.empty(); +} + +void ScannerContext::submit_running_scanner(std::shared_ptr<RunningScanner> running_scanner) { + _num_scheduled_scanners++; + _scanner_scheduler->submit(shared_from_this(), running_scanner); +} + +void ScannerContext::append_block_to_queue(std::shared_ptr<RunningScanner> running_scanner) { + Status st = validate_block_schema(running_scanner->current_block.get()); + if (!st.ok()) { + running_scanner->status = st; + } + // set `eos` if `END_OF_FILE`, don't take `END_OF_FILE` as error + if (running_scanner->status.is<ErrorCode::END_OF_FILE>()) { Review Comment: Not need check here. Should set eos=true when set_status in running scanner. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org