This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 005a36322e5 [opt](index compaction) optimize checks before index 
compaction (#25486)
005a36322e5 is described below

commit 005a36322e504eb31dc9518d298400b632cb8626
Author: qiye <jianliang5...@gmail.com>
AuthorDate: Thu Oct 26 11:21:46 2023 +0800

    [opt](index compaction) optimize checks before index compaction (#25486)
---
 be/src/olap/compaction.cpp | 46 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index ebd6110be47..d1fab950584 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -47,6 +47,7 @@
 #include "olap/rowset/rowset_writer.h"
 #include "olap/rowset/rowset_writer_context.h"
 #include "olap/rowset/segment_v2/inverted_index_compaction.h"
+#include "olap/rowset/segment_v2/inverted_index_compound_directory.h"
 #include "olap/storage_engine.h"
 #include "olap/storage_policy.h"
 #include "olap/tablet.h"
@@ -550,6 +551,8 @@ Status 
Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool
                             BetaRowsetSharedPtr rowset =
                                     
std::static_pointer_cast<BetaRowset>(src_rs);
                             if (rowset == nullptr) {
+                                LOG(WARNING) << "tablet[" << 
_tablet->tablet_id()
+                                             << "] rowset is null, will skip 
index compaction";
                                 return false;
                             }
                             auto fs = rowset->rowset_meta()->fs();
@@ -557,6 +560,9 @@ Status 
Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool
                             auto index_meta =
                                     
rowset->tablet_schema()->get_inverted_index(unique_id);
                             if (index_meta == nullptr) {
+                                LOG(WARNING) << "tablet[" << 
_tablet->tablet_id()
+                                             << "] index_unique_id[" << 
unique_id
+                                             << "] index meta is null, will 
skip index compaction";
                                 return false;
                             }
                             for (auto i = 0; i < rowset->num_segments(); i++) {
@@ -571,10 +577,48 @@ Status 
Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool
                                     return false;
                                 }
                                 if (!exists) {
-                                    LOG(WARNING) << 
inverted_index_src_file_path
+                                    LOG(WARNING) << "tablet[" << 
_tablet->tablet_id()
+                                                 << "] index_unique_id[" << 
unique_id << "],"
+                                                 << 
inverted_index_src_file_path
                                                  << " is not exists, will skip 
index compaction";
                                     return false;
                                 }
+
+                                // check idx file size
+                                int64_t file_size = 0;
+                                if 
(fs->file_size(inverted_index_src_file_path, &file_size) !=
+                                    Status::OK()) {
+                                    LOG(ERROR) << inverted_index_src_file_path
+                                               << " fs->file_size error";
+                                    return false;
+                                }
+                                if (file_size == 0) {
+                                    LOG(WARNING) << "tablet[" << 
_tablet->tablet_id()
+                                                 << "] index_unique_id[" << 
unique_id << "],"
+                                                 << 
inverted_index_src_file_path
+                                                 << " is empty file, will skip 
index compaction";
+                                    return false;
+                                }
+
+                                // check index meta
+                                std::filesystem::path 
p(inverted_index_src_file_path);
+                                std::string dir_str = p.parent_path().string();
+                                std::string file_str = p.filename().string();
+                                lucene::store::Directory* dir =
+                                        
DorisCompoundDirectory::getDirectory(fs, dir_str.c_str());
+                                auto reader = new DorisCompoundReader(dir, 
file_str.c_str());
+                                std::vector<std::string> files;
+                                reader->list(&files);
+
+                                // why is 3?
+                                // bkd index will write at least 3 files
+                                if (files.size() < 3) {
+                                    LOG(WARNING) << "tablet[" << 
_tablet->tablet_id()
+                                                 << "] index_unique_id[" << 
unique_id << "],"
+                                                 << 
inverted_index_src_file_path
+                                                 << " is corrupted, will skip 
index compaction";
+                                    return false;
+                                }
                             }
                             return true;
                         });


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to