This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 352617a34db [fix](scanner) cached blocks may be empty when 
VFileScanner return NOT_FOUND (#32745)
352617a34db is described below

commit 352617a34dbfc21355ed13ad2cf4943451ca3a13
Author: Ashin Gau <ashin...@users.noreply.github.com>
AuthorDate: Wed Mar 27 09:58:37 2024 +0800

    [fix](scanner) cached blocks may be empty when VFileScanner return 
NOT_FOUND (#32745)
    
    Cached blocks may be empty when VFileScanner return NOT_FOUND. This feature 
is introduced by https://github.com/apache/doris/pull/15226. Move this function 
inner `VFileScanner`.
---
 be/src/vec/exec/scan/scanner_context.cpp   | 23 ++++++++++++-----------
 be/src/vec/exec/scan/scanner_scheduler.cpp | 14 +-------------
 be/src/vec/exec/scan/vfile_scanner.cpp     | 11 +++++------
 3 files changed, 18 insertions(+), 30 deletions(-)

diff --git a/be/src/vec/exec/scan/scanner_context.cpp 
b/be/src/vec/exec/scan/scanner_context.cpp
index 3058708b2d9..2ed0df998b4 100644
--- a/be/src/vec/exec/scan/scanner_context.cpp
+++ b/be/src/vec/exec/scan/scanner_context.cpp
@@ -254,18 +254,19 @@ Status ScannerContext::get_block_from_queue(RuntimeState* 
state, vectorized::Blo
             _set_scanner_done();
             return scan_task->get_status();
         }
-        DCHECK(!scan_task->cached_blocks.empty());
-        vectorized::BlockUPtr current_block = 
std::move(scan_task->cached_blocks.front());
-        scan_task->cached_blocks.pop_front();
-        size_t block_size = current_block->allocated_bytes();
-        if (_estimated_block_size > block_size) {
-            _estimated_block_size = block_size;
+        if (!scan_task->cached_blocks.empty()) {
+            vectorized::BlockUPtr current_block = 
std::move(scan_task->cached_blocks.front());
+            scan_task->cached_blocks.pop_front();
+            size_t block_size = current_block->allocated_bytes();
+            if (_estimated_block_size > block_size) {
+                _estimated_block_size = block_size;
+            }
+            _free_blocks_memory_usage -= block_size;
+            _free_blocks_memory_usage_mark->set(_free_blocks_memory_usage);
+            // consume current block
+            block->swap(*current_block);
+            return_free_block(std::move(current_block));
         }
-        _free_blocks_memory_usage -= block_size;
-        _free_blocks_memory_usage_mark->set(_free_blocks_memory_usage);
-        // consume current block
-        block->swap(*current_block);
-        return_free_block(std::move(current_block));
         if (scan_task->cached_blocks.empty()) {
             _blocks_queue.pop_front();
             if (scan_task->is_eos()) { // current scanner is finished, and no 
more data to read
diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp 
b/be/src/vec/exec/scan/scanner_scheduler.cpp
index f5dfbe4724a..d81e327b886 100644
--- a/be/src/vec/exec/scan/scanner_scheduler.cpp
+++ b/be/src/vec/exec/scan/scanner_scheduler.cpp
@@ -266,21 +266,9 @@ void 
ScannerScheduler::_scanner_scan(std::shared_ptr<ScannerContext> ctx,
         }
         status = scanner->get_block_after_projects(state, free_block.get(), 
&eos);
         first_read = false;
-        // The VFileScanner for external table may try to open not exist files,
-        // Because FE file cache for external table may out of date.
-        // So, NOT_FOUND for VFileScanner is not a fail case.
-        // Will remove this after file reader refactor.
-        if (!status.ok() && (scanner->get_name() != 
doris::vectorized::VFileScanner::NAME ||
-                             (scanner->get_name() == 
doris::vectorized::VFileScanner::NAME &&
-                              !status.is<ErrorCode::NOT_FOUND>()))) {
+        if (!status.ok()) {
             LOG(WARNING) << "Scan thread read VScanner failed: " << 
status.to_string();
             break;
-        } else if (status.is<ErrorCode::NOT_FOUND>()) {
-            // The only case in this "if" branch is external table file delete 
and fe cache has not been updated yet.
-            // Set status to OK.
-            status = Status::OK();
-            eos = true;
-            break;
         }
         raw_bytes_read += free_block->allocated_bytes();
         if (!scan_task->cached_blocks.empty() &&
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp 
b/be/src/vec/exec/scan/vfile_scanner.cpp
index f92715588eb..fb3eaec0789 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -925,15 +925,14 @@ Status VFileScanner::_get_next_reader() {
         }
 
         COUNTER_UPDATE(_file_counter, 1);
-        if (init_status.is<END_OF_FILE>()) {
+        if (init_status.is<END_OF_FILE>() || 
init_status.is<ErrorCode::NOT_FOUND>()) {
+            // The VFileScanner for external table may try to open not exist 
files,
+            // Because FE file cache for external table may out of date.
+            // So, NOT_FOUND for VFileScanner is not a fail case.
+            // Will remove this after file reader refactor.
             COUNTER_UPDATE(_empty_file_counter, 1);
             continue;
         } else if (!init_status.ok()) {
-            if (init_status.is<ErrorCode::NOT_FOUND>()) {
-                COUNTER_UPDATE(_empty_file_counter, 1);
-                LOG(INFO) << "failed to find file: " << range.path;
-                return init_status;
-            }
             return Status::InternalError("failed to init reader for file {}, 
err: {}", range.path,
                                          init_status.to_string());
         }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to