This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 61bab1f1c00b65ebb133553ee48882f208aab8de
Author: airborne12 <airborn...@gmail.com>
AuthorDate: Thu Jan 25 10:15:25 2024 +0800

    [Fix](inverted index) fix data size when drop inverted index (#30327)
---
 be/src/olap/rowset/beta_rowset.cpp                 | 20 +++++++++++++
 be/src/olap/rowset/beta_rowset.h                   |  1 +
 be/src/olap/tablet_schema.cpp                      | 12 ++++++++
 be/src/olap/tablet_schema.h                        |  2 ++
 be/src/olap/task/index_builder.cpp                 | 34 +++++++++++++++++++---
 .../suites/inverted_index_p0/test_show_data.groovy | 14 +++++++--
 6 files changed, 77 insertions(+), 6 deletions(-)

diff --git a/be/src/olap/rowset/beta_rowset.cpp 
b/be/src/olap/rowset/beta_rowset.cpp
index 44b88296fbd..057e3411f4f 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -92,6 +92,26 @@ Status BetaRowset::do_load(bool /*use_cache*/) {
     return Status::OK();
 }
 
+Status BetaRowset::get_inverted_index_size_by_index_id(int64_t index_id, 
size_t* index_size) {
+    auto fs = _rowset_meta->fs();
+    if (!fs || _schema == nullptr) {
+        return Status::Error<INIT_FAILED>("get fs failed");
+    }
+    for (int seg_id = 0; seg_id < num_segments(); ++seg_id) {
+        auto seg_path = segment_file_path(seg_id);
+        int64_t file_size = 0;
+        const auto* index = 
_schema->get_inverted_index_with_index_id(index_id, "");
+        if (index == nullptr || index->index_type() != IndexType::INVERTED) {
+            continue;
+        }
+        std::string inverted_index_file_path = 
InvertedIndexDescriptor::get_index_file_name(
+                seg_path, index_id, index->get_index_suffix());
+        RETURN_IF_ERROR(fs->file_size(inverted_index_file_path, &file_size));
+        *index_size += file_size;
+    }
+    return Status::OK();
+}
+
 Status BetaRowset::get_segments_size(std::vector<size_t>* segments_size) {
     auto fs = _rowset_meta->fs();
     if (!fs || _schema == nullptr) {
diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h
index d404be13ea7..9d87eb6bf7d 100644
--- a/be/src/olap/rowset/beta_rowset.h
+++ b/be/src/olap/rowset/beta_rowset.h
@@ -94,6 +94,7 @@ public:
     Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment);
 
     Status get_segments_size(std::vector<size_t>* segments_size);
+    Status get_inverted_index_size_by_index_id(int64_t index_id, size_t* 
index_size);
 
     [[nodiscard]] virtual Status add_to_binlog() override;
 
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 78e7e938caa..bd54af19603 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -1221,6 +1221,18 @@ bool 
TabletSchema::has_inverted_index_with_index_id(int32_t index_id,
     return false;
 }
 
+const TabletIndex* TabletSchema::get_inverted_index_with_index_id(
+        int32_t index_id, const std::string& suffix_name) const {
+    for (size_t i = 0; i < _indexes.size(); i++) {
+        if (_indexes[i].index_type() == IndexType::INVERTED &&
+            _indexes[i].get_index_suffix() == suffix_name && 
_indexes[i].index_id() == index_id) {
+            return &(_indexes[i]);
+        }
+    }
+
+    return nullptr;
+}
+
 const TabletIndex* TabletSchema::get_inverted_index(int32_t col_unique_id,
                                                     const std::string& 
suffix_path) const {
     for (size_t i = 0; i < _indexes.size(); i++) {
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index 21970b5cbac..613db2dcbae 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -304,6 +304,8 @@ public:
     std::vector<const TabletIndex*> get_indexes_for_column(const TabletColumn& 
col) const;
     bool has_inverted_index(const TabletColumn& col) const;
     bool has_inverted_index_with_index_id(int32_t index_id, const std::string& 
suffix_path) const;
+    const TabletIndex* get_inverted_index_with_index_id(int32_t index_id,
+                                                        const std::string& 
suffix_name) const;
     const TabletIndex* get_inverted_index(const TabletColumn& col) const;
     const TabletIndex* get_inverted_index(int32_t col_unique_id,
                                           const std::string& suffix_path) 
const;
diff --git a/be/src/olap/task/index_builder.cpp 
b/be/src/olap/task/index_builder.cpp
index e7555c6b869..96f27e5d165 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -65,12 +65,38 @@ Status IndexBuilder::update_inverted_index_info() {
         const auto& input_rs_tablet_schema = input_rowset->tablet_schema();
         output_rs_tablet_schema->copy_from(*input_rs_tablet_schema);
         if (_is_drop_op) {
-            // base on input rowset's tablet_schema to build
-            // output rowset's tablet_schema which only remove
-            // the indexes specified in this drop index request
-            for (auto t_inverted_index : _alter_inverted_indexes) {
+            size_t total_index_size = 0;
+            for (const auto& t_inverted_index : _alter_inverted_indexes) {
+                auto* beta_rowset = 
reinterpret_cast<BetaRowset*>(input_rowset.get());
+                size_t index_size = 0;
+                
RETURN_IF_ERROR(beta_rowset->get_inverted_index_size_by_index_id(
+                        t_inverted_index.index_id, &index_size));
+                total_index_size += index_size;
                 
output_rs_tablet_schema->remove_index(t_inverted_index.index_id);
             }
+
+            auto input_rowset_meta = input_rowset->rowset_meta();
+            auto update_disk_size = [&](size_t& disk_size, const std::string& 
size_type) {
+                if (disk_size >= total_index_size) {
+                    disk_size -= total_index_size;
+                } else {
+                    LOG(WARNING) << "rowset " << 
input_rowset_meta->rowset_id() << " " << size_type
+                                 << " size:" << disk_size
+                                 << " is less than index size:" << 
total_index_size;
+                }
+            };
+
+            size_t before_size = input_rowset_meta->total_disk_size();
+            update_disk_size(before_size, "total disk");
+            input_rowset_meta->set_total_disk_size(before_size);
+
+            before_size = input_rowset_meta->data_disk_size();
+            update_disk_size(before_size, "data disk");
+            input_rowset_meta->set_data_disk_size(before_size);
+
+            before_size = input_rowset_meta->index_disk_size();
+            update_disk_size(before_size, "index");
+            input_rowset_meta->set_index_disk_size(before_size);
         } else {
             // base on input rowset's tablet_schema to build
             // output rowset's tablet_schema which only add
diff --git a/regression-test/suites/inverted_index_p0/test_show_data.groovy 
b/regression-test/suites/inverted_index_p0/test_show_data.groovy
index 7f9b43498ee..e59624be801 100644
--- a/regression-test/suites/inverted_index_p0/test_show_data.groovy
+++ b/regression-test/suites/inverted_index_p0/test_show_data.groovy
@@ -103,7 +103,7 @@ suite("test_show_data", "p0") {
             if (result.size() > 0) {
                 logger.info(table_name + " show data, detail: " + 
result[0].toString())
                 def size = result[0][2].replace(" KB", "").toDouble()
-                if (size > origin_size) {
+                if (size != origin_size) {
                     return size
                 }
             }
@@ -169,6 +169,11 @@ suite("test_show_data", "p0") {
         def with_index_size = wait_for_show_data_finish(testTableWithoutIndex, 
300000, no_index_size)
         assertTrue(with_index_size != "wait_timeout")
 
+        sql """ ALTER TABLE ${testTableWithoutIndex} DROP INDEX idx_request """
+        wait_for_latest_op_on_table_finish(testTableWithoutIndex, timeout)
+        def another_no_index_size = 
wait_for_show_data_finish(testTableWithoutIndex, 300000, with_index_size)
+        assertEquals(another_no_index_size, no_index_size)
+
         sql "DROP TABLE IF EXISTS ${testTableWithIndex}"
         create_httplogs_table_with_index.call(testTableWithIndex)
         load_httplogs_data.call(testTableWithIndex, 
'test_httplogs_load_with_index', 'true', 'json', 'documents-1000.json')
@@ -267,7 +272,7 @@ suite("test_show_data_for_bkd", "p0") {
             if (result.size() > 0) {
                 logger.info(table_name + " show data, detail: " + 
result[0].toString())
                 def size = result[0][2].replace(" KB", "").toDouble()
-                if (size > origin_size) {
+                if (size != origin_size) {
                     return size
                 }
             }
@@ -333,6 +338,11 @@ suite("test_show_data_for_bkd", "p0") {
         def with_index_size = 
wait_for_show_data_finish(testTableWithoutBKDIndex, 300000, no_index_size)
         assertTrue(with_index_size != "wait_timeout")
 
+        sql """ ALTER TABLE ${testTableWithoutBKDIndex} DROP INDEX idx_status 
"""
+        wait_for_latest_op_on_table_finish(testTableWithoutBKDIndex, timeout)
+        def another_no_index_size = 
wait_for_show_data_finish(testTableWithoutBKDIndex, 300000, with_index_size)
+        assertEquals(another_no_index_size, no_index_size)
+
         sql "DROP TABLE IF EXISTS ${testTableWithBKDIndex}"
         create_httplogs_table_with_bkd_index.call(testTableWithBKDIndex)
         load_httplogs_data.call(testTableWithBKDIndex, 
'test_httplogs_load_with_bkd_index', 'true', 'json', 'documents-1000.json')


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to