This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 2fb8d6895ca [fix](compaction) fix time series compaction merge empty rowsets priority (#34677) 2fb8d6895ca is described below commit 2fb8d6895ca16e7912ea91ebacd7cf8ec4d2d0fe Author: Sun Chenyang <csun5...@gmail.com> AuthorDate: Sat May 11 07:55:25 2024 +0800 [fix](compaction) fix time series compaction merge empty rowsets priority (#34677) --- .../cumulative_compaction_time_series_policy.cpp | 39 ++++++----- ...mulative_compaction_time_series_policy_test.cpp | 79 ++++++++++++++++++++++ 2 files changed, 98 insertions(+), 20 deletions(-) diff --git a/be/src/olap/cumulative_compaction_time_series_policy.cpp b/be/src/olap/cumulative_compaction_time_series_policy.cpp index 05c25f80428..b1a8cddf20f 100644 --- a/be/src/olap/cumulative_compaction_time_series_policy.cpp +++ b/be/src/olap/cumulative_compaction_time_series_policy.cpp @@ -74,13 +74,6 @@ uint32_t TimeSeriesCumulativeCompactionPolicy::calc_cumulative_compaction_score( return 0; } - // If there is a continuous set of empty rowsets, prioritize merging. - auto consecutive_empty_rowsets = tablet->pick_first_consecutive_empty_rowsets( - tablet->tablet_meta()->time_series_compaction_empty_rowsets_threshold()); - if (!consecutive_empty_rowsets.empty()) { - return score; - } - // Condition 1: the size of input files for compaction meets the requirement of parameter compaction_goal_size int64_t compaction_goal_size_mbytes = tablet->tablet_meta()->time_series_compaction_goal_size_mbytes(); @@ -127,6 +120,13 @@ uint32_t TimeSeriesCumulativeCompactionPolicy::calc_cumulative_compaction_score( tablet->set_last_cumu_compaction_success_time(now); } + // Condition 5: If there is a continuous set of empty rowsets, prioritize merging. + auto consecutive_empty_rowsets = tablet->pick_first_consecutive_empty_rowsets( + tablet->tablet_meta()->time_series_compaction_empty_rowsets_threshold()); + if (!consecutive_empty_rowsets.empty()) { + return score; + } + return 0; } @@ -216,19 +216,6 @@ int TimeSeriesCumulativeCompactionPolicy::pick_input_rowsets( return 0; } - // If their are many empty rowsets, maybe should be compacted - auto consecutive_empty_rowsets = tablet->pick_first_consecutive_empty_rowsets( - tablet->tablet_meta()->time_series_compaction_empty_rowsets_threshold()); - if (!consecutive_empty_rowsets.empty()) { - VLOG_NOTICE << "tablet is " << tablet->tablet_id() - << ", there are too many consecutive empty rowsets, size is " - << consecutive_empty_rowsets.size(); - input_rowsets->clear(); - input_rowsets->insert(input_rowsets->end(), consecutive_empty_rowsets.begin(), - consecutive_empty_rowsets.end()); - return 0; - } - int64_t compaction_goal_size_mbytes = tablet->tablet_meta()->time_series_compaction_goal_size_mbytes(); @@ -339,6 +326,18 @@ int TimeSeriesCumulativeCompactionPolicy::pick_input_rowsets( } input_rowsets->clear(); + // Condition 5: If their are many empty rowsets, maybe should be compacted + auto consecutive_empty_rowsets = tablet->pick_first_consecutive_empty_rowsets( + tablet->tablet_meta()->time_series_compaction_empty_rowsets_threshold()); + if (!consecutive_empty_rowsets.empty()) { + VLOG_NOTICE << "tablet is " << tablet->tablet_id() + << ", there are too many consecutive empty rowsets, size is " + << consecutive_empty_rowsets.size(); + input_rowsets->clear(); + input_rowsets->insert(input_rowsets->end(), consecutive_empty_rowsets.begin(), + consecutive_empty_rowsets.end()); + return 0; + } *compaction_score = 0; return 0; diff --git a/be/test/olap/cumulative_compaction_time_series_policy_test.cpp b/be/test/olap/cumulative_compaction_time_series_policy_test.cpp index 27ead8ccc4e..7c93d4d64b9 100644 --- a/be/test/olap/cumulative_compaction_time_series_policy_test.cpp +++ b/be/test/olap/cumulative_compaction_time_series_policy_test.cpp @@ -212,6 +212,58 @@ public: rs_metas->push_back(ptr5); } + void init_all_rs_meta_empty_nonoverlapping(std::vector<RowsetMetaSharedPtr>* rs_metas) { + RowsetMetaSharedPtr ptr1(new RowsetMeta()); + init_rs_meta(ptr1, 0, 1); + ptr1->set_total_disk_size(1 * 1024); + rs_metas->push_back(ptr1); + + RowsetMetaSharedPtr ptr2(new RowsetMeta()); + init_rs_meta(ptr2, 2, 3); + ptr2->set_total_disk_size(2 * 1024); + rs_metas->push_back(ptr2); + + RowsetMetaSharedPtr ptr3(new RowsetMeta()); + init_rs_meta(ptr3, 4, 4); + ptr3->set_num_segments(0); + rs_metas->push_back(ptr3); + + RowsetMetaSharedPtr ptr4(new RowsetMeta()); + init_rs_meta(ptr4, 5, 5); + ptr4->set_num_segments(0); + rs_metas->push_back(ptr4); + + RowsetMetaSharedPtr ptr5(new RowsetMeta()); + init_rs_meta(ptr5, 6, 6); + ptr5->set_num_segments(0); + rs_metas->push_back(ptr5); + + RowsetMetaSharedPtr ptr6(new RowsetMeta()); + init_rs_meta(ptr6, 7, 7); + ptr6->set_num_segments(0); + rs_metas->push_back(ptr6); + + RowsetMetaSharedPtr ptr7(new RowsetMeta()); + init_rs_meta(ptr7, 8, 8); + ptr7->set_num_segments(0); + rs_metas->push_back(ptr7); + + RowsetMetaSharedPtr ptr8(new RowsetMeta()); + init_rs_meta(ptr8, 9, 9); + ptr8->set_num_segments(0); + rs_metas->push_back(ptr8); + + RowsetMetaSharedPtr ptr9(new RowsetMeta()); + init_rs_meta(ptr9, 10, 10); + ptr9->set_num_segments(0); + rs_metas->push_back(ptr9); + + RowsetMetaSharedPtr ptr10(new RowsetMeta()); + init_rs_meta(ptr10, 11, 11); + ptr10->set_total_disk_size(2 * 1024); + rs_metas->push_back(ptr10); + } + void init_rs_meta_pick_empty(std::vector<RowsetMetaSharedPtr>* rs_metas) { RowsetMetaSharedPtr ptr1(new RowsetMeta()); init_rs_meta(ptr1, 0, 1); @@ -570,6 +622,33 @@ TEST_F(TestTimeSeriesCumulativeCompactionPolicy, _pick_missing_version_cumulativ compaction.find_longest_consecutive_version(&rowsets3, nullptr); EXPECT_EQ(0, rowsets3.size()); } + +TEST_F(TestTimeSeriesCumulativeCompactionPolicy, pick_empty_rowsets) { + std::vector<RowsetMetaSharedPtr> rs_metas; + init_all_rs_meta_empty_nonoverlapping(&rs_metas); + + for (auto& rowset : rs_metas) { + static_cast<void>(_tablet_meta->add_rs_meta(rowset)); + } + + TabletSharedPtr _tablet(new Tablet(_tablet_meta, nullptr, CUMULATIVE_TIME_SERIES_POLICY)); + static_cast<void>(_tablet->init()); + _tablet->calculate_cumulative_point(); + + auto candidate_rowsets = _tablet->pick_candidate_rowsets_to_cumulative_compaction(); + + std::vector<RowsetSharedPtr> input_rowsets; + Version last_delete_version {-1, -1}; + size_t compaction_score = 0; + + _tablet->_cumulative_compaction_policy->pick_input_rowsets( + _tablet.get(), candidate_rowsets, 10, 5, &input_rowsets, &last_delete_version, + &compaction_score, config::enable_delete_when_cumu_compaction); + + EXPECT_EQ(7, input_rowsets.size()); + EXPECT_EQ(-1, last_delete_version.first); + EXPECT_EQ(-1, last_delete_version.second); +} } // namespace doris // @brief Test Stub --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org