This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 63f957d0ccd [opt](index compaction)Avoid get file size when create 
index reader and remove unnecessary file exists (#41079)
63f957d0ccd is described below

commit 63f957d0ccdacb0919d77cce6b54de792e64be69
Author: qiye <jianliang5...@gmail.com>
AuthorDate: Sun Sep 22 17:10:48 2024 +0800

    [opt](index compaction)Avoid get file size when create index reader and 
remove unnecessary file exists (#41079)
    
    Get file size and file exists operations are very expensive in object 
storage.
    Index compaction may have plenty of small files, and the head operation
    will be a lot time consumption.
---
 be/src/olap/compaction.cpp | 90 +++++++++++++++++++++-------------------------
 1 file changed, 41 insertions(+), 49 deletions(-)

diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 963b885a26b..646613cbdad 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -627,7 +627,8 @@ Status Compaction::do_inverted_index_compaction() {
         auto seg_path = DORIS_TRY(rowset->segment_path(seg_id));
         auto inverted_index_file_reader = 
std::make_unique<InvertedIndexFileReader>(
                 fs, std::string 
{InvertedIndexDescriptor::get_index_file_path_prefix(seg_path)},
-                _cur_tablet_schema->get_inverted_index_storage_format());
+                _cur_tablet_schema->get_inverted_index_storage_format(),
+                rowset->rowset_meta()->inverted_index_file_info(seg_id));
         bool open_idx_file_cache = false;
         RETURN_NOT_OK_STATUS_WITH_WARN(
                 
inverted_index_file_reader->init(config::inverted_index_read_buffer_size,
@@ -840,56 +841,47 @@ void 
Compaction::construct_skip_inverted_index(RowsetWriterContext& ctx) {
                     return false;
                 }
 
-                auto inverted_index_file_reader = 
std::make_unique<InvertedIndexFileReader>(
-                        fs,
-                        std::string {
-                                
InvertedIndexDescriptor::get_index_file_path_prefix(*seg_path)},
-                        
_cur_tablet_schema->get_inverted_index_storage_format());
-                bool open_idx_file_cache = false;
-                auto st = 
inverted_index_file_reader->init(config::inverted_index_read_buffer_size,
-                                                           
open_idx_file_cache);
-                if (!st.ok()) {
-                    LOG(WARNING) << "init index "
-                                 << 
inverted_index_file_reader->get_index_file_path(index_meta)
-                                 << " error:" << st;
-                    return false;
-                }
-
-                bool exists = false;
-                if (!inverted_index_file_reader->index_file_exist(index_meta, 
&exists).ok()) {
-                    LOG(ERROR) << 
inverted_index_file_reader->get_index_file_path(index_meta)
-                               << " fs->exists error";
-                    return false;
-                }
-
-                if (!exists) {
-                    LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] 
column_unique_id["
-                                 << col_unique_id << "],"
-                                 << 
inverted_index_file_reader->get_index_file_path(index_meta)
-                                 << " is not exists, will skip index 
compaction";
-                    return false;
-                }
+                std::string index_file_path;
+                try {
+                    auto inverted_index_file_reader = 
std::make_unique<InvertedIndexFileReader>(
+                            fs,
+                            std::string {
+                                    
InvertedIndexDescriptor::get_index_file_path_prefix(*seg_path)},
+                            
_cur_tablet_schema->get_inverted_index_storage_format(),
+                            
rowset->rowset_meta()->inverted_index_file_info(i));
+                    bool open_idx_file_cache = false;
+                    auto st = inverted_index_file_reader->init(
+                            config::inverted_index_read_buffer_size, 
open_idx_file_cache);
+                    index_file_path = 
inverted_index_file_reader->get_index_file_path(index_meta);
+                    if (!st.ok()) {
+                        LOG(WARNING) << "init index " << index_file_path << " 
error:" << st;
+                        return false;
+                    }
 
-                // check index meta
-                auto result = inverted_index_file_reader->open(index_meta);
-                if (!result.has_value()) {
-                    LOG(WARNING) << "open index "
-                                 << 
inverted_index_file_reader->get_index_file_path(index_meta)
-                                 << " error:" << result.error();
-                    return false;
-                }
-                auto reader = std::move(result.value());
-                std::vector<std::string> files;
-                reader->list(&files);
-                reader->close();
-
-                // why is 3?
-                // bkd index will write at least 3 files
-                if (files.size() < 3) {
+                    // check index meta
+                    auto result = inverted_index_file_reader->open(index_meta);
+                    if (!result.has_value()) {
+                        LOG(WARNING)
+                                << "open index " << index_file_path << " 
error:" << result.error();
+                        return false;
+                    }
+                    auto reader = std::move(result.value());
+                    std::vector<std::string> files;
+                    reader->list(&files);
+                    reader->close();
+
+                    // why is 3?
+                    // bkd index will write at least 3 files
+                    if (files.size() < 3) {
+                        LOG(WARNING) << "tablet[" << _tablet->tablet_id() << 
"] column_unique_id["
+                                     << col_unique_id << "]," << 
index_file_path
+                                     << " is corrupted, will skip index 
compaction";
+                        return false;
+                    }
+                } catch (CLuceneError& err) {
                     LOG(WARNING) << "tablet[" << _tablet->tablet_id() << "] 
column_unique_id["
-                                 << col_unique_id << "],"
-                                 << 
inverted_index_file_reader->get_index_file_path(index_meta)
-                                 << " is corrupted, will skip index 
compaction";
+                                 << col_unique_id << "] open index[" << 
index_file_path
+                                 << "], will skip index compaction, error:" << 
err.what();
                     return false;
                 }
             }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to