This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 63f957d0ccd [opt](index compaction)Avoid get file size when create index reader and remove unnecessary file exists (#41079) 63f957d0ccd is described below commit 63f957d0ccdacb0919d77cce6b54de792e64be69 Author: qiye <jianliang5...@gmail.com> AuthorDate: Sun Sep 22 17:10:48 2024 +0800 [opt](index compaction)Avoid get file size when create index reader and remove unnecessary file exists (#41079) Get file size and file exists operations are very expensive in object storage. Index compaction may have plenty of small files, and the head operation will be a lot time consumption. --- be/src/olap/compaction.cpp | 90 +++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 49 deletions(-) diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 963b885a26b..646613cbdad 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -627,7 +627,8 @@ Status Compaction::do_inverted_index_compaction() { auto seg_path = DORIS_TRY(rowset->segment_path(seg_id)); auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( fs, std::string {InvertedIndexDescriptor::get_index_file_path_prefix(seg_path)}, - _cur_tablet_schema->get_inverted_index_storage_format()); + _cur_tablet_schema->get_inverted_index_storage_format(), + rowset->rowset_meta()->inverted_index_file_info(seg_id)); bool open_idx_file_cache = false; RETURN_NOT_OK_STATUS_WITH_WARN( inverted_index_file_reader->init(config::inverted_index_read_buffer_size, @@ -840,56 +841,47 @@ void Compaction::construct_skip_inverted_index(RowsetWriterContext& ctx) { return false; } - auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( - fs, - std::string { - InvertedIndexDescriptor::get_index_file_path_prefix(*seg_path)}, - _cur_tablet_schema->get_inverted_index_storage_format()); - bool open_idx_file_cache = false; - auto st = inverted_index_file_reader->init(config::inverted_index_read_buffer_size, - open_idx_file_cache); - if (!st.ok()) { - LOG(WARNING) << "init index " - << inverted_index_file_reader->get_index_file_path(index_meta) - << " error:" << st; - return false; - } - - bool exists = false; - if (!inverted_index_file_reader->index_file_exist(index_meta, &exists).ok()) { - LOG(ERROR) << inverted_index_file_reader->get_index_file_path(index_meta) - << " fs->exists error"; - return false; - } - - if (!exists) { - LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] column_unique_id[" - << col_unique_id << "]," - << inverted_index_file_reader->get_index_file_path(index_meta) - << " is not exists, will skip index compaction"; - return false; - } + std::string index_file_path; + try { + auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( + fs, + std::string { + InvertedIndexDescriptor::get_index_file_path_prefix(*seg_path)}, + _cur_tablet_schema->get_inverted_index_storage_format(), + rowset->rowset_meta()->inverted_index_file_info(i)); + bool open_idx_file_cache = false; + auto st = inverted_index_file_reader->init( + config::inverted_index_read_buffer_size, open_idx_file_cache); + index_file_path = inverted_index_file_reader->get_index_file_path(index_meta); + if (!st.ok()) { + LOG(WARNING) << "init index " << index_file_path << " error:" << st; + return false; + } - // check index meta - auto result = inverted_index_file_reader->open(index_meta); - if (!result.has_value()) { - LOG(WARNING) << "open index " - << inverted_index_file_reader->get_index_file_path(index_meta) - << " error:" << result.error(); - return false; - } - auto reader = std::move(result.value()); - std::vector<std::string> files; - reader->list(&files); - reader->close(); - - // why is 3? - // bkd index will write at least 3 files - if (files.size() < 3) { + // check index meta + auto result = inverted_index_file_reader->open(index_meta); + if (!result.has_value()) { + LOG(WARNING) + << "open index " << index_file_path << " error:" << result.error(); + return false; + } + auto reader = std::move(result.value()); + std::vector<std::string> files; + reader->list(&files); + reader->close(); + + // why is 3? + // bkd index will write at least 3 files + if (files.size() < 3) { + LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] column_unique_id[" + << col_unique_id << "]," << index_file_path + << " is corrupted, will skip index compaction"; + return false; + } + } catch (CLuceneError& err) { LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] column_unique_id[" - << col_unique_id << "]," - << inverted_index_file_reader->get_index_file_path(index_meta) - << " is corrupted, will skip index compaction"; + << col_unique_id << "] open index[" << index_file_path + << "], will skip index compaction, error:" << err.what(); return false; } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org