This is an automated email from the ASF dual-hosted git repository. zhaoc pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new f77cfcd [Compaction] Avoid unnecessary compaction (#2839) f77cfcd is described below commit f77cfcdb617c7ec470bad848e2e999f8e6fbcb8d Author: Mingyu Chen <morningman....@gmail.com> AuthorDate: Thu Feb 6 16:40:38 2020 +0800 [Compaction] Avoid unnecessary compaction (#2839) It is not necessary to perform compaction in the following cases 1. A tablet has only 2 rowsets, the versions are [0-1] and [2-x]. In this case, there is no need to perform base compaction because the [0-1] version is an empty version. Some tables will be partitioned by day, and then each partition will only load one batch of data each day, so a large number of tablets with rowsets [0-1][2-2] will appear. And these tablets do not need to be base compaction. 2. The initial value of the `last successful execution time of compaction` is 0, which causes the first time to determine the time interval from the last successful execution time of compaction, which always meets the conditions to trigger cumulative compaction. --- be/src/olap/base_compaction.cpp | 11 ++++++++++ be/src/olap/cumulative_compaction.cpp | 41 +++++++++++++++++++++++------------ 2 files changed, 38 insertions(+), 14 deletions(-) diff --git a/be/src/olap/base_compaction.cpp b/be/src/olap/base_compaction.cpp index be36fb3..322eeb6 100644 --- a/be/src/olap/base_compaction.cpp +++ b/be/src/olap/base_compaction.cpp @@ -67,6 +67,12 @@ OLAPStatus BaseCompaction::pick_rowsets_to_compact() { RETURN_NOT_OK(check_version_continuity(_input_rowsets)); RETURN_NOT_OK(_check_rowset_overlapping(_input_rowsets)); + if (_input_rowsets.size() == 2 && _input_rowsets[0]->end_version() == 1) { + // the tablet is with rowset: [0-1], [2-y] + // and [0-1] has no data. in this situation, no need to do base compaction. + return OLAP_ERR_BE_NO_SUITABLE_VERSION; + } + // 1. cumulative rowset must reach base_compaction_num_cumulative_deltas threshold if (_input_rowsets.size() > config::base_compaction_num_cumulative_deltas) { LOG(INFO) << "satisfy the base compaction policy. tablet="<< _tablet->full_name() @@ -87,6 +93,11 @@ OLAPStatus BaseCompaction::pick_rowsets_to_compact() { } double base_cumulative_delta_ratio = config::base_cumulative_delta_ratio; + if (base_size == 0) { + // base_size == 0 means this may be a base version [0-1], which has no data. + // set to 1 to void devide by zero + base_size = 1; + } double cumulative_base_ratio = static_cast<double>(cumulative_total_size) / base_size; if (cumulative_base_ratio > base_cumulative_delta_ratio) { diff --git a/be/src/olap/cumulative_compaction.cpp b/be/src/olap/cumulative_compaction.cpp index b884736..87c6628 100755 --- a/be/src/olap/cumulative_compaction.cpp +++ b/be/src/olap/cumulative_compaction.cpp @@ -130,24 +130,37 @@ OLAPStatus CumulativeCompaction::pick_rowsets_to_compact() { // the cumulative point after waiting for a long time, to ensure that the base compaction can continue. // check both last success time of base and cumulative compaction - int64_t interval_threshold = config::base_compaction_interval_seconds_since_last_operation * 1000; int64_t now = UnixMillis(); - int64_t cumu_interval = now - _tablet->last_cumu_compaction_success_time(); - int64_t base_interval = now - _tablet->last_base_compaction_success_time(); - if (cumu_interval > interval_threshold && base_interval > interval_threshold) { - // before increasing cumulative point, we should make sure all rowsets are non-overlapping. - // if at least one rowset is overlapping, we should compact them first. - CHECK(candidate_rowsets.size() == transient_rowsets.size()) - << "tablet: " << _tablet->full_name() << ", "<< candidate_rowsets.size() << " vs. " << transient_rowsets.size(); - for (auto& rs : candidate_rowsets) { - if (rs->rowset_meta()->is_segments_overlapping()) { - _input_rowsets = candidate_rowsets; - return OLAP_SUCCESS; + int64_t last_cumu = _tablet->last_cumu_compaction_success_time(); + int64_t last_base = _tablet->last_base_compaction_success_time(); + if (last_cumu != 0 || last_base != 0) { + int64_t interval_threshold = config::base_compaction_interval_seconds_since_last_operation * 1000; + int64_t cumu_interval = now - last_cumu; + int64_t base_interval = now - last_base; + if (cumu_interval > interval_threshold && base_interval > interval_threshold) { + // before increasing cumulative point, we should make sure all rowsets are non-overlapping. + // if at least one rowset is overlapping, we should compact them first. + CHECK(candidate_rowsets.size() == transient_rowsets.size()) + << "tablet: " << _tablet->full_name() << ", "<< candidate_rowsets.size() << " vs. " << transient_rowsets.size(); + for (auto& rs : candidate_rowsets) { + if (rs->rowset_meta()->is_segments_overlapping()) { + _input_rowsets = candidate_rowsets; + return OLAP_SUCCESS; + } } + + // all candicate rowsets are non-overlapping, increase the cumulative point + _tablet->set_cumulative_layer_point(candidate_rowsets.back()->start_version() + 1); + } + } else { + // init the compaction success time for first time + if (last_cumu == 0) { + _tablet->set_last_cumu_compaction_success_time(now); } - // all candicate rowsets are non-overlapping, increase the cumulative point - _tablet->set_cumulative_layer_point(candidate_rowsets.back()->start_version() + 1); + if (last_base == 0) { + _tablet->set_last_base_compaction_success_time(now); + } } return OLAP_ERR_CUMULATIVE_NO_SUITABLE_VERSIONS; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org