This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new 352617a34db [fix](scanner) cached blocks may be empty when VFileScanner return NOT_FOUND (#32745) 352617a34db is described below commit 352617a34dbfc21355ed13ad2cf4943451ca3a13 Author: Ashin Gau <ashin...@users.noreply.github.com> AuthorDate: Wed Mar 27 09:58:37 2024 +0800 [fix](scanner) cached blocks may be empty when VFileScanner return NOT_FOUND (#32745) Cached blocks may be empty when VFileScanner return NOT_FOUND. This feature is introduced by https://github.com/apache/doris/pull/15226. Move this function inner `VFileScanner`. --- be/src/vec/exec/scan/scanner_context.cpp | 23 ++++++++++++----------- be/src/vec/exec/scan/scanner_scheduler.cpp | 14 +------------- be/src/vec/exec/scan/vfile_scanner.cpp | 11 +++++------ 3 files changed, 18 insertions(+), 30 deletions(-) diff --git a/be/src/vec/exec/scan/scanner_context.cpp b/be/src/vec/exec/scan/scanner_context.cpp index 3058708b2d9..2ed0df998b4 100644 --- a/be/src/vec/exec/scan/scanner_context.cpp +++ b/be/src/vec/exec/scan/scanner_context.cpp @@ -254,18 +254,19 @@ Status ScannerContext::get_block_from_queue(RuntimeState* state, vectorized::Blo _set_scanner_done(); return scan_task->get_status(); } - DCHECK(!scan_task->cached_blocks.empty()); - vectorized::BlockUPtr current_block = std::move(scan_task->cached_blocks.front()); - scan_task->cached_blocks.pop_front(); - size_t block_size = current_block->allocated_bytes(); - if (_estimated_block_size > block_size) { - _estimated_block_size = block_size; + if (!scan_task->cached_blocks.empty()) { + vectorized::BlockUPtr current_block = std::move(scan_task->cached_blocks.front()); + scan_task->cached_blocks.pop_front(); + size_t block_size = current_block->allocated_bytes(); + if (_estimated_block_size > block_size) { + _estimated_block_size = block_size; + } + _free_blocks_memory_usage -= block_size; + _free_blocks_memory_usage_mark->set(_free_blocks_memory_usage); + // consume current block + block->swap(*current_block); + return_free_block(std::move(current_block)); } - _free_blocks_memory_usage -= block_size; - _free_blocks_memory_usage_mark->set(_free_blocks_memory_usage); - // consume current block - block->swap(*current_block); - return_free_block(std::move(current_block)); if (scan_task->cached_blocks.empty()) { _blocks_queue.pop_front(); if (scan_task->is_eos()) { // current scanner is finished, and no more data to read diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp b/be/src/vec/exec/scan/scanner_scheduler.cpp index f5dfbe4724a..d81e327b886 100644 --- a/be/src/vec/exec/scan/scanner_scheduler.cpp +++ b/be/src/vec/exec/scan/scanner_scheduler.cpp @@ -266,21 +266,9 @@ void ScannerScheduler::_scanner_scan(std::shared_ptr<ScannerContext> ctx, } status = scanner->get_block_after_projects(state, free_block.get(), &eos); first_read = false; - // The VFileScanner for external table may try to open not exist files, - // Because FE file cache for external table may out of date. - // So, NOT_FOUND for VFileScanner is not a fail case. - // Will remove this after file reader refactor. - if (!status.ok() && (scanner->get_name() != doris::vectorized::VFileScanner::NAME || - (scanner->get_name() == doris::vectorized::VFileScanner::NAME && - !status.is<ErrorCode::NOT_FOUND>()))) { + if (!status.ok()) { LOG(WARNING) << "Scan thread read VScanner failed: " << status.to_string(); break; - } else if (status.is<ErrorCode::NOT_FOUND>()) { - // The only case in this "if" branch is external table file delete and fe cache has not been updated yet. - // Set status to OK. - status = Status::OK(); - eos = true; - break; } raw_bytes_read += free_block->allocated_bytes(); if (!scan_task->cached_blocks.empty() && diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index f92715588eb..fb3eaec0789 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -925,15 +925,14 @@ Status VFileScanner::_get_next_reader() { } COUNTER_UPDATE(_file_counter, 1); - if (init_status.is<END_OF_FILE>()) { + if (init_status.is<END_OF_FILE>() || init_status.is<ErrorCode::NOT_FOUND>()) { + // The VFileScanner for external table may try to open not exist files, + // Because FE file cache for external table may out of date. + // So, NOT_FOUND for VFileScanner is not a fail case. + // Will remove this after file reader refactor. COUNTER_UPDATE(_empty_file_counter, 1); continue; } else if (!init_status.ok()) { - if (init_status.is<ErrorCode::NOT_FOUND>()) { - COUNTER_UPDATE(_empty_file_counter, 1); - LOG(INFO) << "failed to find file: " << range.path; - return init_status; - } return Status::InternalError("failed to init reader for file {}, err: {}", range.path, init_status.to_string()); } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org