This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a25ac144323 [Improvement](inverted index) Remove the check for 
inverted index file exists (#36945)
a25ac144323 is described below

commit a25ac144323ddc77f32766dee9a76c6bd070898c
Author: Sun Chenyang <csun5...@gmail.com>
AuthorDate: Tue Jul 2 15:10:08 2024 +0800

    [Improvement](inverted index) Remove the check for inverted index file 
exists (#36945)
    
    Remove the check for inverted index file exists to avoid latency of remote 
fs eg. s3
---
 be/src/clucene                                     |  2 +-
 .../segment_v2/inverted_index_file_reader.cpp      | 20 +++++---
 .../segment_v2/inverted_index_fs_directory.cpp     | 49 ++++++++----------
 .../rowset/segment_v2/inverted_index_reader.cpp    | 15 +-----
 .../olap/rowset/segment_v2/inverted_index_reader.h |  2 -
 .../test_index_not_found_fault_injection.out       | 13 +++++
 .../test_index_not_found_fault_injection.groovy    | 59 ++++++++++++++++++++++
 7 files changed, 108 insertions(+), 52 deletions(-)

diff --git a/be/src/clucene b/be/src/clucene
index dd200e10e72..5db9db68e44 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit dd200e10e72120445bd897f3dcc515702f4dc80b
+Subproject commit 5db9db68e448b8ccfd360d02666bbac44e6f8d1a
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
index 7a744ea939e..dbd86bb93a5 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
@@ -23,6 +23,7 @@
 #include "olap/rowset/segment_v2/inverted_index_compound_reader.h"
 #include "olap/rowset/segment_v2/inverted_index_fs_directory.h"
 #include "olap/tablet_schema.h"
+#include "util/debug_points.h"
 
 namespace doris::segment_v2 {
 
@@ -41,14 +42,17 @@ Status InvertedIndexFileReader::_init_from_v2(int32_t 
read_buffer_size) {
 
     std::unique_lock<std::shared_mutex> lock(_mutex); // Lock for writing
     try {
-        bool exists = false;
-        RETURN_IF_ERROR(_fs->exists(index_file_full_path, &exists));
-        if (!exists) {
+        int64_t file_size = 0;
+        Status st = _fs->file_size(index_file_full_path, &file_size);
+        DBUG_EXECUTE_IF("inverted file read error: index file not found", {
+            st = Status::Error<doris::ErrorCode::NOT_FOUND>("index file not 
found");
+        })
+        if (st.code() == ErrorCode::NOT_FOUND) {
             return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
                     "inverted index file {} is not found", 
index_file_full_path);
+        } else if (!st.ok()) {
+            return st;
         }
-        int64_t file_size = 0;
-        RETURN_IF_ERROR(_fs->file_size(index_file_full_path, &file_size));
         if (file_size == 0) {
             LOG(WARNING) << "inverted index file " << index_file_full_path << 
" is empty.";
             return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
@@ -157,6 +161,10 @@ Result<std::unique_ptr<DorisCompoundReader>> 
InvertedIndexFileReader::_open(
                 dir->close();
                 _CLDELETE(dir)
             }
+            if (err.number() == CL_ERR_FileNotFound) {
+                return 
ResultError(Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
+                        "inverted index path: {} not exist.", 
index_file_path));
+            }
             return 
ResultError(Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
                     "CLuceneError occur when open idx file {}, error msg: {}", 
index_file_path,
                     err.what()));
@@ -174,7 +182,7 @@ Result<std::unique_ptr<DorisCompoundReader>> 
InvertedIndexFileReader::_open(
         if (index_it == _indices_entries.end()) {
             std::ostringstream errMsg;
             errMsg << "No index with id " << index_id << " found";
-            return 
ResultError(Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
+            return 
ResultError(Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
                     "CLuceneError occur when open idx file {}, error msg: {}",
                     
InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix),
                     errMsg.str()));
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
index 499a3e41d9a..54d484d1199 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
@@ -112,30 +112,28 @@ bool DorisFSDirectory::FSIndexInput::open(const 
io::FileSystemSPtr& fs, const ch
     reader_options.cache_type = config::enable_file_cache ? 
io::FileCachePolicy::FILE_BLOCK_CACHE
                                                           : 
io::FileCachePolicy::NO_CACHE;
     reader_options.is_doris_table = true;
-    if (!fs->open_file(path, &h->_reader, &reader_options).ok()) {
-        error.set(CL_ERR_IO, "open file error");
+    Status st = fs->open_file(path, &h->_reader, &reader_options);
+    DBUG_EXECUTE_IF("inverted file read error: index file not found",
+                    { st = Status::Error<doris::ErrorCode::NOT_FOUND>("index 
file not found"); })
+    if (st.code() == ErrorCode::NOT_FOUND) {
+        error.set(CL_ERR_FileNotFound, "File does not exist");
+    } else if (st.code() == ErrorCode::IO_ERROR) {
+        error.set(CL_ERR_IO, "File open io error");
+    } else if (st.code() == ErrorCode::PERMISSION_DENIED) {
+        error.set(CL_ERR_IO, "File Access denied");
+    } else {
+        error.set(CL_ERR_IO, "Could not open file");
     }
 
     //Check if a valid handle was retrieved
-    if (h->_reader) {
+    if (st.ok() && h->_reader) {
         //Store the file length
         h->_length = h->_reader->size();
         h->_fpos = 0;
         ret = _CLNEW FSIndexInput(std::move(h), buffer_size);
         return true;
-
-    } else {
-        int err = errno;
-        if (err == ENOENT) {
-            error.set(CL_ERR_IO, "File does not exist");
-        } else if (err == EACCES) {
-            error.set(CL_ERR_IO, "File Access denied");
-        } else if (err == EMFILE) {
-            error.set(CL_ERR_IO, "Too many open files");
-        } else {
-            error.set(CL_ERR_IO, "Could not open file");
-        }
     }
+
     //delete h->_shared_lock;
     //_CLDECDELETE(h)
     return false;
@@ -349,19 +347,6 @@ void DorisFSDirectory::init(const io::FileSystemSPtr& fs, 
const char* path,
     }
 
     lucene::store::Directory::setLockFactory(lock_factory);
-
-    // It's fail checking directory existence in S3.
-    if (_fs->type() == io::FileSystemType::S3) {
-        return;
-    }
-    bool exists = false;
-    LOG_AND_THROW_IF_ERROR(_fs->exists(directory, &exists),
-                           "Doris compound directory init IO error");
-    if (!exists) {
-        auto e = "Doris compound directory init error: " + directory + " is 
not a directory";
-        LOG(WARNING) << e;
-        _CLTHROWA(CL_ERR_IO, e.c_str());
-    }
 }
 
 void DorisFSDirectory::priv_getFN(char* buffer, const char* name) const {
@@ -432,7 +417,13 @@ int64_t DorisFSDirectory::fileLength(const char* name) 
const {
     char buffer[CL_MAX_DIR];
     priv_getFN(buffer, name);
     int64_t size = -1;
-    LOG_AND_THROW_IF_ERROR(_fs->file_size(buffer, &size), "Get file size IO 
error");
+    Status st = _fs->file_size(buffer, &size);
+    DBUG_EXECUTE_IF("inverted file read error: index file not found",
+                    { st = Status::Error<doris::ErrorCode::NOT_FOUND>("index 
file not found"); })
+    if (st.code() == ErrorCode::NOT_FOUND) {
+        _CLTHROWA(CL_ERR_FileNotFound, "File does not exist");
+    }
+    LOG_AND_THROW_IF_ERROR(st, "Get file size IO error");
     return size;
 }
 
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
index a91dfe169d5..57479add2f7 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
@@ -201,8 +201,6 @@ Status 
InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach
             return Status::OK();
         }
 
-        RETURN_IF_ERROR(check_file_exist(index_file_key));
-
         if (!dir) {
             // TODO: ugly code here, try to refact.
             auto directory = 
DORIS_TRY(_inverted_index_file_reader->open(&_index_meta));
@@ -252,7 +250,7 @@ Status InvertedIndexReader::handle_searcher_cache(
         auto mem_tracker = 
std::make_unique<MemTracker>("InvertedIndexSearcherCacheWithRead");
         SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer);
         IndexSearcherPtr searcher;
-        RETURN_IF_ERROR(check_file_exist(index_file_key));
+
         auto dir = DORIS_TRY(_inverted_index_file_reader->open(&_index_meta));
         // try to reuse index_searcher's directory to read null_bitmap to cache
         // to avoid open directory additionally for null_bitmap
@@ -286,17 +284,6 @@ Status 
InvertedIndexReader::create_index_searcher(lucene::store::Directory* dir,
     return Status::OK();
 };
 
-Status InvertedIndexReader::check_file_exist(const std::string& 
index_file_key) {
-    bool exists = false;
-    
RETURN_IF_ERROR(_inverted_index_file_reader->index_file_exist(&_index_meta, 
&exists));
-    if (!exists) {
-        LOG(WARNING) << "inverted index: " << index_file_key << " not exist.";
-        return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>(
-                "inverted index input file {} not found", index_file_key);
-    }
-    return Status::OK();
-}
-
 Status FullTextIndexReader::new_iterator(OlapReaderStatistics* stats, 
RuntimeState* runtime_state,
                                          
std::unique_ptr<InvertedIndexIterator>* iterator) {
     *iterator = InvertedIndexIterator::create_unique(stats, runtime_state, 
shared_from_this());
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h 
b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
index 9d603a85d12..90df92f0728 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h
@@ -141,8 +141,6 @@ public:
                                         MemTracker* mem_tracker,
                                         InvertedIndexReaderType reader_type);
 
-    Status check_file_exist(const std::string& index_file_key);
-
 protected:
     friend class InvertedIndexIterator;
     std::shared_ptr<InvertedIndexFileReader> _inverted_index_file_reader;
diff --git 
a/regression-test/data/fault_injection_p0/test_index_not_found_fault_injection.out
 
b/regression-test/data/fault_injection_p0/test_index_not_found_fault_injection.out
new file mode 100644
index 00000000000..e755b8cf9d8
--- /dev/null
+++ 
b/regression-test/data/fault_injection_p0/test_index_not_found_fault_injection.out
@@ -0,0 +1,13 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+3
+
+-- !sql --
+3
+
+-- !sql --
+3
+
+-- !sql --
+3
+
diff --git 
a/regression-test/suites/fault_injection_p0/test_index_not_found_fault_injection.groovy
 
b/regression-test/suites/fault_injection_p0/test_index_not_found_fault_injection.groovy
new file mode 100644
index 00000000000..6b27fa9b2b3
--- /dev/null
+++ 
b/regression-test/suites/fault_injection_p0/test_index_not_found_fault_injection.groovy
@@ -0,0 +1,59 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_index_not_found_fault_injection", "nonConcurrent") {
+    def testTable = "test_index_not_found"
+
+    sql "DROP TABLE IF EXISTS ${testTable}"
+    sql """
+        CREATE TABLE ${testTable} (
+          `@timestamp` int(11) NULL COMMENT "",
+          `clientip` string NULL COMMENT "",
+          `request` string NULL COMMENT "",
+          `status` string NULL COMMENT "",
+          `size` string NULL COMMENT "",
+          INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = 
"unicode", "support_phrase" = "true") COMMENT '',
+          INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = 
"unicode", "support_phrase" = "true") COMMENT '',
+          INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" = 
"unicode", "support_phrase" = "true") COMMENT '',
+          INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" = 
"unicode", "support_phrase" = "true") COMMENT ''
+          ) ENGINE=OLAP
+          DUPLICATE KEY(`@timestamp`)
+          COMMENT "OLAP"
+          DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1
+          PROPERTIES (
+          "replication_allocation" = "tag.location.default: 1",
+          "disable_auto_compaction" = "true"
+        );
+        """
+
+    sql """ INSERT INTO ${testTable} VALUES (893964617, '40.135.0.0', 'GET 
/images/hm_bg.jpg HTTP/1.0', 200, 24736); """
+    sql """ INSERT INTO ${testTable} VALUES (893964653, '232.0.0.0', 'GET 
/images/hm_bg.jpg HTTP/1.0', 200, 3781); """
+    sql """ INSERT INTO ${testTable} VALUES (893964672, '26.1.0.0', 'GET 
/images/hm_bg.jpg HTTP/1.0', 304, 0); """
+
+    try {
+        GetDebugPoint().enableDebugPointForAllBEs("inverted file read error: 
index file not found")
+
+        qt_sql """ select count() from ${testTable} where (request 
match_phrase 'http');  """
+        qt_sql """ select count() from ${testTable} where (request 
match_phrase_prefix 'http');  """
+
+        qt_sql """ select count() from ${testTable} where (clientip 
match_phrase 'http' or request match_phrase 'http' or status match_phrase 
'http' or size match_phrase 'http');  """
+        qt_sql """ select count() from ${testTable} where (clientip 
match_phrase_prefix 'http' or request match_phrase_prefix 'http' or status 
match_phrase_prefix 'http' or size match_phrase_prefix 'http');  """
+    } finally {
+        GetDebugPoint().disableDebugPointForAllBEs("inverted file read error: 
index file not found")
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to