dataroaring commented on code in PR #11542: URL: https://github.com/apache/doris/pull/11542#discussion_r938399587
########## be/src/olap/base_compaction.cpp: ########## @@ -81,16 +84,61 @@ Status BaseCompaction::execute_compact_impl() { return Status::OK(); } +bool BaseCompaction::_filter_input_rowset() { + bool exceed = false; + // if enable dup key skip big file and no delete predicate + // we skip big files too save resources + if (!config::enable_dup_key_base_compaction_skip_big_file || + _tablet->keys_type() != KeysType::DUP_KEYS || _tablet->delete_predicates().size() != 0) { + return false; Review Comment: Could we check version of delete predicates? e.g. If we dont have delete predicates before version 10, then we can skip large rowset before version 10. ########## be/src/olap/base_compaction.cpp: ########## @@ -81,16 +84,61 @@ Status BaseCompaction::execute_compact_impl() { return Status::OK(); } +bool BaseCompaction::_filter_input_rowset() { + bool exceed = false; + // if enable dup key skip big file and no delete predicate + // we skip big files too save resources + if (!config::enable_dup_key_base_compaction_skip_big_file || + _tablet->keys_type() != KeysType::DUP_KEYS || _tablet->delete_predicates().size() != 0) { + return false; + } + int32_t total_rowsets = 0; + int64_t total_size = 0; + int64_t max_size = config::base_compaction_dup_key_max_file_size_mbytes; + // first find a proper rowset for start + auto rs_iter = _input_rowsets.begin(); + while (rs_iter != _input_rowsets.end()) { + if ((*rs_iter)->rowset_meta()->total_disk_size() >= max_size) { + rs_iter = _input_rowsets.erase(rs_iter); + ; + } else { + break; + } + } + // find input rowset + while (rs_iter != _input_rowsets.end()) { + ++total_rowsets; + total_size += (*rs_iter)->rowset_meta()->total_disk_size(); + ++rs_iter; + if (total_rowsets <= 1) { + continue; + } + if (total_size > max_size) { + exceed = true; + break; + } + } + // remove rowset left + while (rs_iter != _input_rowsets.end()) { + rs_iter = _input_rowsets.erase(rs_iter); + } Review Comment: Here, we limit total size of inputs to base_compaction_dup_key_max_file_size_mbytes? e.g. if there are four rowsets with 512MB size, we just compact the first 2? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org