This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 2fb8d6895ca [fix](compaction) fix time series compaction merge empty 
rowsets priority (#34677)
2fb8d6895ca is described below

commit 2fb8d6895ca16e7912ea91ebacd7cf8ec4d2d0fe
Author: Sun Chenyang <csun5...@gmail.com>
AuthorDate: Sat May 11 07:55:25 2024 +0800

    [fix](compaction) fix time series compaction merge empty rowsets priority 
(#34677)
---
 .../cumulative_compaction_time_series_policy.cpp   | 39 ++++++-----
 ...mulative_compaction_time_series_policy_test.cpp | 79 ++++++++++++++++++++++
 2 files changed, 98 insertions(+), 20 deletions(-)

diff --git a/be/src/olap/cumulative_compaction_time_series_policy.cpp 
b/be/src/olap/cumulative_compaction_time_series_policy.cpp
index 05c25f80428..b1a8cddf20f 100644
--- a/be/src/olap/cumulative_compaction_time_series_policy.cpp
+++ b/be/src/olap/cumulative_compaction_time_series_policy.cpp
@@ -74,13 +74,6 @@ uint32_t 
TimeSeriesCumulativeCompactionPolicy::calc_cumulative_compaction_score(
         return 0;
     }
 
-    // If there is a continuous set of empty rowsets, prioritize merging.
-    auto consecutive_empty_rowsets = 
tablet->pick_first_consecutive_empty_rowsets(
-            
tablet->tablet_meta()->time_series_compaction_empty_rowsets_threshold());
-    if (!consecutive_empty_rowsets.empty()) {
-        return score;
-    }
-
     // Condition 1: the size of input files for compaction meets the 
requirement of parameter compaction_goal_size
     int64_t compaction_goal_size_mbytes =
             tablet->tablet_meta()->time_series_compaction_goal_size_mbytes();
@@ -127,6 +120,13 @@ uint32_t 
TimeSeriesCumulativeCompactionPolicy::calc_cumulative_compaction_score(
         tablet->set_last_cumu_compaction_success_time(now);
     }
 
+    // Condition 5: If there is a continuous set of empty rowsets, prioritize 
merging.
+    auto consecutive_empty_rowsets = 
tablet->pick_first_consecutive_empty_rowsets(
+            
tablet->tablet_meta()->time_series_compaction_empty_rowsets_threshold());
+    if (!consecutive_empty_rowsets.empty()) {
+        return score;
+    }
+
     return 0;
 }
 
@@ -216,19 +216,6 @@ int 
TimeSeriesCumulativeCompactionPolicy::pick_input_rowsets(
         return 0;
     }
 
-    // If their are many empty rowsets, maybe should be compacted
-    auto consecutive_empty_rowsets = 
tablet->pick_first_consecutive_empty_rowsets(
-            
tablet->tablet_meta()->time_series_compaction_empty_rowsets_threshold());
-    if (!consecutive_empty_rowsets.empty()) {
-        VLOG_NOTICE << "tablet is " << tablet->tablet_id()
-                    << ", there are too many consecutive empty rowsets, size 
is "
-                    << consecutive_empty_rowsets.size();
-        input_rowsets->clear();
-        input_rowsets->insert(input_rowsets->end(), 
consecutive_empty_rowsets.begin(),
-                              consecutive_empty_rowsets.end());
-        return 0;
-    }
-
     int64_t compaction_goal_size_mbytes =
             tablet->tablet_meta()->time_series_compaction_goal_size_mbytes();
 
@@ -339,6 +326,18 @@ int 
TimeSeriesCumulativeCompactionPolicy::pick_input_rowsets(
     }
 
     input_rowsets->clear();
+    // Condition 5: If their are many empty rowsets, maybe should be compacted
+    auto consecutive_empty_rowsets = 
tablet->pick_first_consecutive_empty_rowsets(
+            
tablet->tablet_meta()->time_series_compaction_empty_rowsets_threshold());
+    if (!consecutive_empty_rowsets.empty()) {
+        VLOG_NOTICE << "tablet is " << tablet->tablet_id()
+                    << ", there are too many consecutive empty rowsets, size 
is "
+                    << consecutive_empty_rowsets.size();
+        input_rowsets->clear();
+        input_rowsets->insert(input_rowsets->end(), 
consecutive_empty_rowsets.begin(),
+                              consecutive_empty_rowsets.end());
+        return 0;
+    }
     *compaction_score = 0;
 
     return 0;
diff --git a/be/test/olap/cumulative_compaction_time_series_policy_test.cpp 
b/be/test/olap/cumulative_compaction_time_series_policy_test.cpp
index 27ead8ccc4e..7c93d4d64b9 100644
--- a/be/test/olap/cumulative_compaction_time_series_policy_test.cpp
+++ b/be/test/olap/cumulative_compaction_time_series_policy_test.cpp
@@ -212,6 +212,58 @@ public:
         rs_metas->push_back(ptr5);
     }
 
+    void 
init_all_rs_meta_empty_nonoverlapping(std::vector<RowsetMetaSharedPtr>* 
rs_metas) {
+        RowsetMetaSharedPtr ptr1(new RowsetMeta());
+        init_rs_meta(ptr1, 0, 1);
+        ptr1->set_total_disk_size(1 * 1024);
+        rs_metas->push_back(ptr1);
+
+        RowsetMetaSharedPtr ptr2(new RowsetMeta());
+        init_rs_meta(ptr2, 2, 3);
+        ptr2->set_total_disk_size(2 * 1024);
+        rs_metas->push_back(ptr2);
+
+        RowsetMetaSharedPtr ptr3(new RowsetMeta());
+        init_rs_meta(ptr3, 4, 4);
+        ptr3->set_num_segments(0);
+        rs_metas->push_back(ptr3);
+
+        RowsetMetaSharedPtr ptr4(new RowsetMeta());
+        init_rs_meta(ptr4, 5, 5);
+        ptr4->set_num_segments(0);
+        rs_metas->push_back(ptr4);
+
+        RowsetMetaSharedPtr ptr5(new RowsetMeta());
+        init_rs_meta(ptr5, 6, 6);
+        ptr5->set_num_segments(0);
+        rs_metas->push_back(ptr5);
+
+        RowsetMetaSharedPtr ptr6(new RowsetMeta());
+        init_rs_meta(ptr6, 7, 7);
+        ptr6->set_num_segments(0);
+        rs_metas->push_back(ptr6);
+
+        RowsetMetaSharedPtr ptr7(new RowsetMeta());
+        init_rs_meta(ptr7, 8, 8);
+        ptr7->set_num_segments(0);
+        rs_metas->push_back(ptr7);
+
+        RowsetMetaSharedPtr ptr8(new RowsetMeta());
+        init_rs_meta(ptr8, 9, 9);
+        ptr8->set_num_segments(0);
+        rs_metas->push_back(ptr8);
+
+        RowsetMetaSharedPtr ptr9(new RowsetMeta());
+        init_rs_meta(ptr9, 10, 10);
+        ptr9->set_num_segments(0);
+        rs_metas->push_back(ptr9);
+
+        RowsetMetaSharedPtr ptr10(new RowsetMeta());
+        init_rs_meta(ptr10, 11, 11);
+        ptr10->set_total_disk_size(2 * 1024);
+        rs_metas->push_back(ptr10);
+    }
+
     void init_rs_meta_pick_empty(std::vector<RowsetMetaSharedPtr>* rs_metas) {
         RowsetMetaSharedPtr ptr1(new RowsetMeta());
         init_rs_meta(ptr1, 0, 1);
@@ -570,6 +622,33 @@ TEST_F(TestTimeSeriesCumulativeCompactionPolicy, 
_pick_missing_version_cumulativ
     compaction.find_longest_consecutive_version(&rowsets3, nullptr);
     EXPECT_EQ(0, rowsets3.size());
 }
+
+TEST_F(TestTimeSeriesCumulativeCompactionPolicy, pick_empty_rowsets) {
+    std::vector<RowsetMetaSharedPtr> rs_metas;
+    init_all_rs_meta_empty_nonoverlapping(&rs_metas);
+
+    for (auto& rowset : rs_metas) {
+        static_cast<void>(_tablet_meta->add_rs_meta(rowset));
+    }
+
+    TabletSharedPtr _tablet(new Tablet(_tablet_meta, nullptr, 
CUMULATIVE_TIME_SERIES_POLICY));
+    static_cast<void>(_tablet->init());
+    _tablet->calculate_cumulative_point();
+
+    auto candidate_rowsets = 
_tablet->pick_candidate_rowsets_to_cumulative_compaction();
+
+    std::vector<RowsetSharedPtr> input_rowsets;
+    Version last_delete_version {-1, -1};
+    size_t compaction_score = 0;
+
+    _tablet->_cumulative_compaction_policy->pick_input_rowsets(
+            _tablet.get(), candidate_rowsets, 10, 5, &input_rowsets, 
&last_delete_version,
+            &compaction_score, config::enable_delete_when_cumu_compaction);
+
+    EXPECT_EQ(7, input_rowsets.size());
+    EXPECT_EQ(-1, last_delete_version.first);
+    EXPECT_EQ(-1, last_delete_version.second);
+}
 } // namespace doris
 
 // @brief Test Stub


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to