This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new a25ac144323 [Improvement](inverted index) Remove the check for inverted index file exists (#36945) a25ac144323 is described below commit a25ac144323ddc77f32766dee9a76c6bd070898c Author: Sun Chenyang <csun5...@gmail.com> AuthorDate: Tue Jul 2 15:10:08 2024 +0800 [Improvement](inverted index) Remove the check for inverted index file exists (#36945) Remove the check for inverted index file exists to avoid latency of remote fs eg. s3 --- be/src/clucene | 2 +- .../segment_v2/inverted_index_file_reader.cpp | 20 +++++--- .../segment_v2/inverted_index_fs_directory.cpp | 49 ++++++++---------- .../rowset/segment_v2/inverted_index_reader.cpp | 15 +----- .../olap/rowset/segment_v2/inverted_index_reader.h | 2 - .../test_index_not_found_fault_injection.out | 13 +++++ .../test_index_not_found_fault_injection.groovy | 59 ++++++++++++++++++++++ 7 files changed, 108 insertions(+), 52 deletions(-) diff --git a/be/src/clucene b/be/src/clucene index dd200e10e72..5db9db68e44 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit dd200e10e72120445bd897f3dcc515702f4dc80b +Subproject commit 5db9db68e448b8ccfd360d02666bbac44e6f8d1a diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp index 7a744ea939e..dbd86bb93a5 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp @@ -23,6 +23,7 @@ #include "olap/rowset/segment_v2/inverted_index_compound_reader.h" #include "olap/rowset/segment_v2/inverted_index_fs_directory.h" #include "olap/tablet_schema.h" +#include "util/debug_points.h" namespace doris::segment_v2 { @@ -41,14 +42,17 @@ Status InvertedIndexFileReader::_init_from_v2(int32_t read_buffer_size) { std::unique_lock<std::shared_mutex> lock(_mutex); // Lock for writing try { - bool exists = false; - RETURN_IF_ERROR(_fs->exists(index_file_full_path, &exists)); - if (!exists) { + int64_t file_size = 0; + Status st = _fs->file_size(index_file_full_path, &file_size); + DBUG_EXECUTE_IF("inverted file read error: index file not found", { + st = Status::Error<doris::ErrorCode::NOT_FOUND>("index file not found"); + }) + if (st.code() == ErrorCode::NOT_FOUND) { return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( "inverted index file {} is not found", index_file_full_path); + } else if (!st.ok()) { + return st; } - int64_t file_size = 0; - RETURN_IF_ERROR(_fs->file_size(index_file_full_path, &file_size)); if (file_size == 0) { LOG(WARNING) << "inverted index file " << index_file_full_path << " is empty."; return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( @@ -157,6 +161,10 @@ Result<std::unique_ptr<DorisCompoundReader>> InvertedIndexFileReader::_open( dir->close(); _CLDELETE(dir) } + if (err.number() == CL_ERR_FileNotFound) { + return ResultError(Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( + "inverted index path: {} not exist.", index_file_path)); + } return ResultError(Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( "CLuceneError occur when open idx file {}, error msg: {}", index_file_path, err.what())); @@ -174,7 +182,7 @@ Result<std::unique_ptr<DorisCompoundReader>> InvertedIndexFileReader::_open( if (index_it == _indices_entries.end()) { std::ostringstream errMsg; errMsg << "No index with id " << index_id << " found"; - return ResultError(Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>( + return ResultError(Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( "CLuceneError occur when open idx file {}, error msg: {}", InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix), errMsg.str())); diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp index 499a3e41d9a..54d484d1199 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp @@ -112,30 +112,28 @@ bool DorisFSDirectory::FSIndexInput::open(const io::FileSystemSPtr& fs, const ch reader_options.cache_type = config::enable_file_cache ? io::FileCachePolicy::FILE_BLOCK_CACHE : io::FileCachePolicy::NO_CACHE; reader_options.is_doris_table = true; - if (!fs->open_file(path, &h->_reader, &reader_options).ok()) { - error.set(CL_ERR_IO, "open file error"); + Status st = fs->open_file(path, &h->_reader, &reader_options); + DBUG_EXECUTE_IF("inverted file read error: index file not found", + { st = Status::Error<doris::ErrorCode::NOT_FOUND>("index file not found"); }) + if (st.code() == ErrorCode::NOT_FOUND) { + error.set(CL_ERR_FileNotFound, "File does not exist"); + } else if (st.code() == ErrorCode::IO_ERROR) { + error.set(CL_ERR_IO, "File open io error"); + } else if (st.code() == ErrorCode::PERMISSION_DENIED) { + error.set(CL_ERR_IO, "File Access denied"); + } else { + error.set(CL_ERR_IO, "Could not open file"); } //Check if a valid handle was retrieved - if (h->_reader) { + if (st.ok() && h->_reader) { //Store the file length h->_length = h->_reader->size(); h->_fpos = 0; ret = _CLNEW FSIndexInput(std::move(h), buffer_size); return true; - - } else { - int err = errno; - if (err == ENOENT) { - error.set(CL_ERR_IO, "File does not exist"); - } else if (err == EACCES) { - error.set(CL_ERR_IO, "File Access denied"); - } else if (err == EMFILE) { - error.set(CL_ERR_IO, "Too many open files"); - } else { - error.set(CL_ERR_IO, "Could not open file"); - } } + //delete h->_shared_lock; //_CLDECDELETE(h) return false; @@ -349,19 +347,6 @@ void DorisFSDirectory::init(const io::FileSystemSPtr& fs, const char* path, } lucene::store::Directory::setLockFactory(lock_factory); - - // It's fail checking directory existence in S3. - if (_fs->type() == io::FileSystemType::S3) { - return; - } - bool exists = false; - LOG_AND_THROW_IF_ERROR(_fs->exists(directory, &exists), - "Doris compound directory init IO error"); - if (!exists) { - auto e = "Doris compound directory init error: " + directory + " is not a directory"; - LOG(WARNING) << e; - _CLTHROWA(CL_ERR_IO, e.c_str()); - } } void DorisFSDirectory::priv_getFN(char* buffer, const char* name) const { @@ -432,7 +417,13 @@ int64_t DorisFSDirectory::fileLength(const char* name) const { char buffer[CL_MAX_DIR]; priv_getFN(buffer, name); int64_t size = -1; - LOG_AND_THROW_IF_ERROR(_fs->file_size(buffer, &size), "Get file size IO error"); + Status st = _fs->file_size(buffer, &size); + DBUG_EXECUTE_IF("inverted file read error: index file not found", + { st = Status::Error<doris::ErrorCode::NOT_FOUND>("index file not found"); }) + if (st.code() == ErrorCode::NOT_FOUND) { + _CLTHROWA(CL_ERR_FileNotFound, "File does not exist"); + } + LOG_AND_THROW_IF_ERROR(st, "Get file size IO error"); return size; } diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index a91dfe169d5..57479add2f7 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -201,8 +201,6 @@ Status InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach return Status::OK(); } - RETURN_IF_ERROR(check_file_exist(index_file_key)); - if (!dir) { // TODO: ugly code here, try to refact. auto directory = DORIS_TRY(_inverted_index_file_reader->open(&_index_meta)); @@ -252,7 +250,7 @@ Status InvertedIndexReader::handle_searcher_cache( auto mem_tracker = std::make_unique<MemTracker>("InvertedIndexSearcherCacheWithRead"); SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer); IndexSearcherPtr searcher; - RETURN_IF_ERROR(check_file_exist(index_file_key)); + auto dir = DORIS_TRY(_inverted_index_file_reader->open(&_index_meta)); // try to reuse index_searcher's directory to read null_bitmap to cache // to avoid open directory additionally for null_bitmap @@ -286,17 +284,6 @@ Status InvertedIndexReader::create_index_searcher(lucene::store::Directory* dir, return Status::OK(); }; -Status InvertedIndexReader::check_file_exist(const std::string& index_file_key) { - bool exists = false; - RETURN_IF_ERROR(_inverted_index_file_reader->index_file_exist(&_index_meta, &exists)); - if (!exists) { - LOG(WARNING) << "inverted index: " << index_file_key << " not exist."; - return Status::Error<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>( - "inverted index input file {} not found", index_file_key); - } - return Status::OK(); -} - Status FullTextIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeState* runtime_state, std::unique_ptr<InvertedIndexIterator>* iterator) { *iterator = InvertedIndexIterator::create_unique(stats, runtime_state, shared_from_this()); diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index 9d603a85d12..90df92f0728 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -141,8 +141,6 @@ public: MemTracker* mem_tracker, InvertedIndexReaderType reader_type); - Status check_file_exist(const std::string& index_file_key); - protected: friend class InvertedIndexIterator; std::shared_ptr<InvertedIndexFileReader> _inverted_index_file_reader; diff --git a/regression-test/data/fault_injection_p0/test_index_not_found_fault_injection.out b/regression-test/data/fault_injection_p0/test_index_not_found_fault_injection.out new file mode 100644 index 00000000000..e755b8cf9d8 --- /dev/null +++ b/regression-test/data/fault_injection_p0/test_index_not_found_fault_injection.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +3 + +-- !sql -- +3 + +-- !sql -- +3 + +-- !sql -- +3 + diff --git a/regression-test/suites/fault_injection_p0/test_index_not_found_fault_injection.groovy b/regression-test/suites/fault_injection_p0/test_index_not_found_fault_injection.groovy new file mode 100644 index 00000000000..6b27fa9b2b3 --- /dev/null +++ b/regression-test/suites/fault_injection_p0/test_index_not_found_fault_injection.groovy @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +suite("test_index_not_found_fault_injection", "nonConcurrent") { + def testTable = "test_index_not_found" + + sql "DROP TABLE IF EXISTS ${testTable}" + sql """ + CREATE TABLE ${testTable} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` string NULL COMMENT "", + `request` string NULL COMMENT "", + `status` string NULL COMMENT "", + `size` string NULL COMMENT "", + INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '', + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '', + INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '', + INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + sql """ INSERT INTO ${testTable} VALUES (893964617, '40.135.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 24736); """ + sql """ INSERT INTO ${testTable} VALUES (893964653, '232.0.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 3781); """ + sql """ INSERT INTO ${testTable} VALUES (893964672, '26.1.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 304, 0); """ + + try { + GetDebugPoint().enableDebugPointForAllBEs("inverted file read error: index file not found") + + qt_sql """ select count() from ${testTable} where (request match_phrase 'http'); """ + qt_sql """ select count() from ${testTable} where (request match_phrase_prefix 'http'); """ + + qt_sql """ select count() from ${testTable} where (clientip match_phrase 'http' or request match_phrase 'http' or status match_phrase 'http' or size match_phrase 'http'); """ + qt_sql """ select count() from ${testTable} where (clientip match_phrase_prefix 'http' or request match_phrase_prefix 'http' or status match_phrase_prefix 'http' or size match_phrase_prefix 'http'); """ + } finally { + GetDebugPoint().disableDebugPointForAllBEs("inverted file read error: index file not found") + } +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org