This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 005a36322e5 [opt](index compaction) optimize checks before index compaction (#25486) 005a36322e5 is described below commit 005a36322e504eb31dc9518d298400b632cb8626 Author: qiye <jianliang5...@gmail.com> AuthorDate: Thu Oct 26 11:21:46 2023 +0800 [opt](index compaction) optimize checks before index compaction (#25486) --- be/src/olap/compaction.cpp | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index ebd6110be47..d1fab950584 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -47,6 +47,7 @@ #include "olap/rowset/rowset_writer.h" #include "olap/rowset/rowset_writer_context.h" #include "olap/rowset/segment_v2/inverted_index_compaction.h" +#include "olap/rowset/segment_v2/inverted_index_compound_directory.h" #include "olap/storage_engine.h" #include "olap/storage_policy.h" #include "olap/tablet.h" @@ -550,6 +551,8 @@ Status Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool BetaRowsetSharedPtr rowset = std::static_pointer_cast<BetaRowset>(src_rs); if (rowset == nullptr) { + LOG(WARNING) << "tablet[" << _tablet->tablet_id() + << "] rowset is null, will skip index compaction"; return false; } auto fs = rowset->rowset_meta()->fs(); @@ -557,6 +560,9 @@ Status Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool auto index_meta = rowset->tablet_schema()->get_inverted_index(unique_id); if (index_meta == nullptr) { + LOG(WARNING) << "tablet[" << _tablet->tablet_id() + << "] index_unique_id[" << unique_id + << "] index meta is null, will skip index compaction"; return false; } for (auto i = 0; i < rowset->num_segments(); i++) { @@ -571,10 +577,48 @@ Status Compaction::construct_output_rowset_writer(RowsetWriterContext& ctx, bool return false; } if (!exists) { - LOG(WARNING) << inverted_index_src_file_path + LOG(WARNING) << "tablet[" << _tablet->tablet_id() + << "] index_unique_id[" << unique_id << "]," + << inverted_index_src_file_path << " is not exists, will skip index compaction"; return false; } + + // check idx file size + int64_t file_size = 0; + if (fs->file_size(inverted_index_src_file_path, &file_size) != + Status::OK()) { + LOG(ERROR) << inverted_index_src_file_path + << " fs->file_size error"; + return false; + } + if (file_size == 0) { + LOG(WARNING) << "tablet[" << _tablet->tablet_id() + << "] index_unique_id[" << unique_id << "]," + << inverted_index_src_file_path + << " is empty file, will skip index compaction"; + return false; + } + + // check index meta + std::filesystem::path p(inverted_index_src_file_path); + std::string dir_str = p.parent_path().string(); + std::string file_str = p.filename().string(); + lucene::store::Directory* dir = + DorisCompoundDirectory::getDirectory(fs, dir_str.c_str()); + auto reader = new DorisCompoundReader(dir, file_str.c_str()); + std::vector<std::string> files; + reader->list(&files); + + // why is 3? + // bkd index will write at least 3 files + if (files.size() < 3) { + LOG(WARNING) << "tablet[" << _tablet->tablet_id() + << "] index_unique_id[" << unique_id << "]," + << inverted_index_src_file_path + << " is corrupted, will skip index compaction"; + return false; + } } return true; }); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org