This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 61bab1f1c00b65ebb133553ee48882f208aab8de Author: airborne12 <airborn...@gmail.com> AuthorDate: Thu Jan 25 10:15:25 2024 +0800 [Fix](inverted index) fix data size when drop inverted index (#30327) --- be/src/olap/rowset/beta_rowset.cpp | 20 +++++++++++++ be/src/olap/rowset/beta_rowset.h | 1 + be/src/olap/tablet_schema.cpp | 12 ++++++++ be/src/olap/tablet_schema.h | 2 ++ be/src/olap/task/index_builder.cpp | 34 +++++++++++++++++++--- .../suites/inverted_index_p0/test_show_data.groovy | 14 +++++++-- 6 files changed, 77 insertions(+), 6 deletions(-) diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index 44b88296fbd..057e3411f4f 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -92,6 +92,26 @@ Status BetaRowset::do_load(bool /*use_cache*/) { return Status::OK(); } +Status BetaRowset::get_inverted_index_size_by_index_id(int64_t index_id, size_t* index_size) { + auto fs = _rowset_meta->fs(); + if (!fs || _schema == nullptr) { + return Status::Error<INIT_FAILED>("get fs failed"); + } + for (int seg_id = 0; seg_id < num_segments(); ++seg_id) { + auto seg_path = segment_file_path(seg_id); + int64_t file_size = 0; + const auto* index = _schema->get_inverted_index_with_index_id(index_id, ""); + if (index == nullptr || index->index_type() != IndexType::INVERTED) { + continue; + } + std::string inverted_index_file_path = InvertedIndexDescriptor::get_index_file_name( + seg_path, index_id, index->get_index_suffix()); + RETURN_IF_ERROR(fs->file_size(inverted_index_file_path, &file_size)); + *index_size += file_size; + } + return Status::OK(); +} + Status BetaRowset::get_segments_size(std::vector<size_t>* segments_size) { auto fs = _rowset_meta->fs(); if (!fs || _schema == nullptr) { diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h index d404be13ea7..9d87eb6bf7d 100644 --- a/be/src/olap/rowset/beta_rowset.h +++ b/be/src/olap/rowset/beta_rowset.h @@ -94,6 +94,7 @@ public: Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment); Status get_segments_size(std::vector<size_t>* segments_size); + Status get_inverted_index_size_by_index_id(int64_t index_id, size_t* index_size); [[nodiscard]] virtual Status add_to_binlog() override; diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 78e7e938caa..bd54af19603 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -1221,6 +1221,18 @@ bool TabletSchema::has_inverted_index_with_index_id(int32_t index_id, return false; } +const TabletIndex* TabletSchema::get_inverted_index_with_index_id( + int32_t index_id, const std::string& suffix_name) const { + for (size_t i = 0; i < _indexes.size(); i++) { + if (_indexes[i].index_type() == IndexType::INVERTED && + _indexes[i].get_index_suffix() == suffix_name && _indexes[i].index_id() == index_id) { + return &(_indexes[i]); + } + } + + return nullptr; +} + const TabletIndex* TabletSchema::get_inverted_index(int32_t col_unique_id, const std::string& suffix_path) const { for (size_t i = 0; i < _indexes.size(); i++) { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 21970b5cbac..613db2dcbae 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -304,6 +304,8 @@ public: std::vector<const TabletIndex*> get_indexes_for_column(const TabletColumn& col) const; bool has_inverted_index(const TabletColumn& col) const; bool has_inverted_index_with_index_id(int32_t index_id, const std::string& suffix_path) const; + const TabletIndex* get_inverted_index_with_index_id(int32_t index_id, + const std::string& suffix_name) const; const TabletIndex* get_inverted_index(const TabletColumn& col) const; const TabletIndex* get_inverted_index(int32_t col_unique_id, const std::string& suffix_path) const; diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index e7555c6b869..96f27e5d165 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -65,12 +65,38 @@ Status IndexBuilder::update_inverted_index_info() { const auto& input_rs_tablet_schema = input_rowset->tablet_schema(); output_rs_tablet_schema->copy_from(*input_rs_tablet_schema); if (_is_drop_op) { - // base on input rowset's tablet_schema to build - // output rowset's tablet_schema which only remove - // the indexes specified in this drop index request - for (auto t_inverted_index : _alter_inverted_indexes) { + size_t total_index_size = 0; + for (const auto& t_inverted_index : _alter_inverted_indexes) { + auto* beta_rowset = reinterpret_cast<BetaRowset*>(input_rowset.get()); + size_t index_size = 0; + RETURN_IF_ERROR(beta_rowset->get_inverted_index_size_by_index_id( + t_inverted_index.index_id, &index_size)); + total_index_size += index_size; output_rs_tablet_schema->remove_index(t_inverted_index.index_id); } + + auto input_rowset_meta = input_rowset->rowset_meta(); + auto update_disk_size = [&](size_t& disk_size, const std::string& size_type) { + if (disk_size >= total_index_size) { + disk_size -= total_index_size; + } else { + LOG(WARNING) << "rowset " << input_rowset_meta->rowset_id() << " " << size_type + << " size:" << disk_size + << " is less than index size:" << total_index_size; + } + }; + + size_t before_size = input_rowset_meta->total_disk_size(); + update_disk_size(before_size, "total disk"); + input_rowset_meta->set_total_disk_size(before_size); + + before_size = input_rowset_meta->data_disk_size(); + update_disk_size(before_size, "data disk"); + input_rowset_meta->set_data_disk_size(before_size); + + before_size = input_rowset_meta->index_disk_size(); + update_disk_size(before_size, "index"); + input_rowset_meta->set_index_disk_size(before_size); } else { // base on input rowset's tablet_schema to build // output rowset's tablet_schema which only add diff --git a/regression-test/suites/inverted_index_p0/test_show_data.groovy b/regression-test/suites/inverted_index_p0/test_show_data.groovy index 7f9b43498ee..e59624be801 100644 --- a/regression-test/suites/inverted_index_p0/test_show_data.groovy +++ b/regression-test/suites/inverted_index_p0/test_show_data.groovy @@ -103,7 +103,7 @@ suite("test_show_data", "p0") { if (result.size() > 0) { logger.info(table_name + " show data, detail: " + result[0].toString()) def size = result[0][2].replace(" KB", "").toDouble() - if (size > origin_size) { + if (size != origin_size) { return size } } @@ -169,6 +169,11 @@ suite("test_show_data", "p0") { def with_index_size = wait_for_show_data_finish(testTableWithoutIndex, 300000, no_index_size) assertTrue(with_index_size != "wait_timeout") + sql """ ALTER TABLE ${testTableWithoutIndex} DROP INDEX idx_request """ + wait_for_latest_op_on_table_finish(testTableWithoutIndex, timeout) + def another_no_index_size = wait_for_show_data_finish(testTableWithoutIndex, 300000, with_index_size) + assertEquals(another_no_index_size, no_index_size) + sql "DROP TABLE IF EXISTS ${testTableWithIndex}" create_httplogs_table_with_index.call(testTableWithIndex) load_httplogs_data.call(testTableWithIndex, 'test_httplogs_load_with_index', 'true', 'json', 'documents-1000.json') @@ -267,7 +272,7 @@ suite("test_show_data_for_bkd", "p0") { if (result.size() > 0) { logger.info(table_name + " show data, detail: " + result[0].toString()) def size = result[0][2].replace(" KB", "").toDouble() - if (size > origin_size) { + if (size != origin_size) { return size } } @@ -333,6 +338,11 @@ suite("test_show_data_for_bkd", "p0") { def with_index_size = wait_for_show_data_finish(testTableWithoutBKDIndex, 300000, no_index_size) assertTrue(with_index_size != "wait_timeout") + sql """ ALTER TABLE ${testTableWithoutBKDIndex} DROP INDEX idx_status """ + wait_for_latest_op_on_table_finish(testTableWithoutBKDIndex, timeout) + def another_no_index_size = wait_for_show_data_finish(testTableWithoutBKDIndex, 300000, with_index_size) + assertEquals(another_no_index_size, no_index_size) + sql "DROP TABLE IF EXISTS ${testTableWithBKDIndex}" create_httplogs_table_with_bkd_index.call(testTableWithBKDIndex) load_httplogs_data.call(testTableWithBKDIndex, 'test_httplogs_load_with_bkd_index', 'true', 'json', 'documents-1000.json') --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org