xiaokang commented on code in PR #30145: URL: https://github.com/apache/doris/pull/30145#discussion_r1500156854
########## be/src/olap/compaction.cpp: ########## @@ -490,29 +492,70 @@ Status CompactionMixin::do_inverted_index_compaction() { return Status::OK(); } - // src index files + // create index_writer to compaction indexes + const auto& fs = _output_rowset->rowset_meta()->fs(); + const auto& tablet_path = _tablet->tablet_path(); + + // src index dirs // format: rowsetId_segmentId - std::vector<std::string> src_index_files(src_segment_num); + std::vector<std::unique_ptr<InvertedIndexFileReader>> inverted_index_file_readers( + src_segment_num); for (const auto& m : src_seg_to_id_map) { std::pair<RowsetId, uint32_t> p = m.first; - src_index_files[m.second] = p.first.to_string() + "_" + std::to_string(p.second); + auto segment_file_name = p.first.to_string() + "_" + std::to_string(p.second) + ".dat"; + auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( + fs, tablet_path, segment_file_name, + _cur_tablet_schema->get_inverted_index_storage_format()); + bool open_idx_file_cache = false; + auto st = inverted_index_file_reader->init(config::inverted_index_read_buffer_size, + open_idx_file_cache); + if (!st.ok()) { + LOG(ERROR) << "init inverted index " + << InvertedIndexDescriptor::get_index_file_name(segment_file_name) + << " failed in compaction when init inverted index file reader"; + return st; + } + inverted_index_file_readers[m.second] = std::move(inverted_index_file_reader); } // dest index files // format: rowsetId_segmentId - std::vector<std::string> dest_index_files(dest_segment_num); + std::vector<std::unique_ptr<InvertedIndexFileWriter>> inverted_index_file_writers( + dest_segment_num); for (int i = 0; i < dest_segment_num; ++i) { - auto prefix = dest_rowset_id.to_string() + "_" + std::to_string(i); - dest_index_files[i] = prefix; + auto prefix = dest_rowset_id.to_string() + "_" + std::to_string(i) + ".dat"; + auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( + fs, tablet_path, prefix, _cur_tablet_schema->get_inverted_index_storage_format()); + bool open_idx_file_cache = false; + auto st = inverted_index_file_reader->init(config::inverted_index_read_buffer_size, + open_idx_file_cache); + if (st.ok()) { + auto index_not_need_to_compact = + DORIS_TRY(inverted_index_file_reader->get_all_directories()); + auto inverted_index_file_writer = std::make_unique<InvertedIndexFileWriter>( + fs, tablet_path, prefix, + _cur_tablet_schema->get_inverted_index_storage_format()); + RETURN_NOT_OK_STATUS_WITH_WARN( + inverted_index_file_writer->initialize(index_not_need_to_compact), Review Comment: reader.init and write.initialize are not consistent ########## be/src/olap/compaction.cpp: ########## @@ -490,29 +492,70 @@ Status CompactionMixin::do_inverted_index_compaction() { return Status::OK(); } - // src index files + // create index_writer to compaction indexes + const auto& fs = _output_rowset->rowset_meta()->fs(); + const auto& tablet_path = _tablet->tablet_path(); + + // src index dirs // format: rowsetId_segmentId - std::vector<std::string> src_index_files(src_segment_num); + std::vector<std::unique_ptr<InvertedIndexFileReader>> inverted_index_file_readers( + src_segment_num); for (const auto& m : src_seg_to_id_map) { std::pair<RowsetId, uint32_t> p = m.first; - src_index_files[m.second] = p.first.to_string() + "_" + std::to_string(p.second); + auto segment_file_name = p.first.to_string() + "_" + std::to_string(p.second) + ".dat"; + auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( + fs, tablet_path, segment_file_name, + _cur_tablet_schema->get_inverted_index_storage_format()); + bool open_idx_file_cache = false; + auto st = inverted_index_file_reader->init(config::inverted_index_read_buffer_size, + open_idx_file_cache); + if (!st.ok()) { + LOG(ERROR) << "init inverted index " + << InvertedIndexDescriptor::get_index_file_name(segment_file_name) + << " failed in compaction when init inverted index file reader"; + return st; + } + inverted_index_file_readers[m.second] = std::move(inverted_index_file_reader); } // dest index files // format: rowsetId_segmentId - std::vector<std::string> dest_index_files(dest_segment_num); + std::vector<std::unique_ptr<InvertedIndexFileWriter>> inverted_index_file_writers( + dest_segment_num); for (int i = 0; i < dest_segment_num; ++i) { - auto prefix = dest_rowset_id.to_string() + "_" + std::to_string(i); - dest_index_files[i] = prefix; + auto prefix = dest_rowset_id.to_string() + "_" + std::to_string(i) + ".dat"; + auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( + fs, tablet_path, prefix, _cur_tablet_schema->get_inverted_index_storage_format()); + bool open_idx_file_cache = false; + auto st = inverted_index_file_reader->init(config::inverted_index_read_buffer_size, + open_idx_file_cache); + if (st.ok()) { + auto index_not_need_to_compact = + DORIS_TRY(inverted_index_file_reader->get_all_directories()); + auto inverted_index_file_writer = std::make_unique<InvertedIndexFileWriter>( + fs, tablet_path, prefix, + _cur_tablet_schema->get_inverted_index_storage_format()); + RETURN_NOT_OK_STATUS_WITH_WARN( + inverted_index_file_writer->initialize(index_not_need_to_compact), + "failed to initialize inverted_index_file_writer for " + + inverted_index_file_writer->get_index_file_name()); + inverted_index_file_writers[i] = std::move(inverted_index_file_writer); + } else if (st.is<ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND>()) { Review Comment: continue if INVERTED_INDEX_FILE_NOT_FOUND? ########## be/src/olap/compaction.cpp: ########## @@ -490,29 +492,70 @@ Status CompactionMixin::do_inverted_index_compaction() { return Status::OK(); } - // src index files + // create index_writer to compaction indexes + const auto& fs = _output_rowset->rowset_meta()->fs(); + const auto& tablet_path = _tablet->tablet_path(); + + // src index dirs // format: rowsetId_segmentId - std::vector<std::string> src_index_files(src_segment_num); + std::vector<std::unique_ptr<InvertedIndexFileReader>> inverted_index_file_readers( + src_segment_num); for (const auto& m : src_seg_to_id_map) { std::pair<RowsetId, uint32_t> p = m.first; - src_index_files[m.second] = p.first.to_string() + "_" + std::to_string(p.second); + auto segment_file_name = p.first.to_string() + "_" + std::to_string(p.second) + ".dat"; + auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( + fs, tablet_path, segment_file_name, + _cur_tablet_schema->get_inverted_index_storage_format()); + bool open_idx_file_cache = false; + auto st = inverted_index_file_reader->init(config::inverted_index_read_buffer_size, + open_idx_file_cache); + if (!st.ok()) { + LOG(ERROR) << "init inverted index " + << InvertedIndexDescriptor::get_index_file_name(segment_file_name) + << " failed in compaction when init inverted index file reader"; + return st; + } + inverted_index_file_readers[m.second] = std::move(inverted_index_file_reader); } // dest index files // format: rowsetId_segmentId - std::vector<std::string> dest_index_files(dest_segment_num); + std::vector<std::unique_ptr<InvertedIndexFileWriter>> inverted_index_file_writers( + dest_segment_num); for (int i = 0; i < dest_segment_num; ++i) { - auto prefix = dest_rowset_id.to_string() + "_" + std::to_string(i); - dest_index_files[i] = prefix; + auto prefix = dest_rowset_id.to_string() + "_" + std::to_string(i) + ".dat"; + auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( + fs, tablet_path, prefix, _cur_tablet_schema->get_inverted_index_storage_format()); + bool open_idx_file_cache = false; + auto st = inverted_index_file_reader->init(config::inverted_index_read_buffer_size, + open_idx_file_cache); + if (st.ok()) { + auto index_not_need_to_compact = Review Comment: what does the name index_not_need_to_compact stand for? ########## be/src/olap/compaction.cpp: ########## @@ -490,29 +492,70 @@ Status CompactionMixin::do_inverted_index_compaction() { return Status::OK(); } - // src index files + // create index_writer to compaction indexes + const auto& fs = _output_rowset->rowset_meta()->fs(); + const auto& tablet_path = _tablet->tablet_path(); + + // src index dirs // format: rowsetId_segmentId - std::vector<std::string> src_index_files(src_segment_num); + std::vector<std::unique_ptr<InvertedIndexFileReader>> inverted_index_file_readers( + src_segment_num); for (const auto& m : src_seg_to_id_map) { std::pair<RowsetId, uint32_t> p = m.first; - src_index_files[m.second] = p.first.to_string() + "_" + std::to_string(p.second); + auto segment_file_name = p.first.to_string() + "_" + std::to_string(p.second) + ".dat"; Review Comment: avoid ".dat" magic string ########## be/src/olap/compaction.cpp: ########## @@ -582,53 +640,54 @@ void CompactionMixin::construct_skip_inverted_index(RowsetWriterContext& ctx) { for (auto i = 0; i < rowset->num_segments(); i++) { auto segment_file = rowset->segment_file_path(i); - std::string inverted_index_src_file_path = - InvertedIndexDescriptor::get_index_file_name( - segment_file, index_meta->index_id(), - index_meta->get_index_suffix()); + io::Path segment_path(segment_file); + auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( + fs, segment_path.parent_path(), segment_path.filename(), Review Comment: It's simpler to just pass a single segment_file_path and seperate it in InvertedIndexFileReader ########## be/src/olap/compaction.cpp: ########## @@ -582,53 +640,54 @@ void CompactionMixin::construct_skip_inverted_index(RowsetWriterContext& ctx) { for (auto i = 0; i < rowset->num_segments(); i++) { auto segment_file = rowset->segment_file_path(i); - std::string inverted_index_src_file_path = - InvertedIndexDescriptor::get_index_file_name( - segment_file, index_meta->index_id(), - index_meta->get_index_suffix()); + io::Path segment_path(segment_file); + auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>( + fs, segment_path.parent_path(), segment_path.filename(), + _cur_tablet_schema->get_inverted_index_storage_format()); + bool open_idx_file_cache = false; + auto st = inverted_index_file_reader->init(config::inverted_index_read_buffer_size, + open_idx_file_cache); + if (!st.ok()) { + LOG(WARNING) << "init index " + << inverted_index_file_reader->get_index_file_path(index_meta) + << " error:" << st; + return false; + } + bool exists = false; - if (!fs->exists(inverted_index_src_file_path, &exists).ok()) { - LOG(ERROR) << inverted_index_src_file_path << " fs->exists error"; + if (!inverted_index_file_reader->index_file_exist(index_meta, &exists).ok()) { + LOG(ERROR) << inverted_index_file_reader->get_index_file_path(index_meta) + << " fs->exists error"; Review Comment: index_file_exist error -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org