This is an automated email from the ASF dual-hosted git repository. zhangchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 68aa0d0bf04 [fix](compaction) reduce memory cost for cloud compaction of mow table (#43502) 68aa0d0bf04 is described below commit 68aa0d0bf04b1a162cb513acd630552890e26422 Author: zhannngchen <48427519+zhannngc...@users.noreply.github.com> AuthorDate: Mon Nov 11 20:28:23 2024 +0800 [fix](compaction) reduce memory cost for cloud compaction of mow table (#43502) Related PR: #36865 Problem Summary: #36865 reduced the memory cost for compactions of MoW table But when we merge the codes for cloud, such optimization is not applied for cloud compaction We found several cases that compaction of MoW table consume lots of memory on cloud, this PR try to fix this issue Co-authored-by: Chen Zhang <zhangc...@selectdb.com> --- be/src/cloud/cloud_tablet.cpp | 78 ++++++++++++++++++++++++++----------------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index 601e9486edf..5552b78a0c9 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -760,38 +760,54 @@ Status CloudTablet::calc_delete_bitmap_for_compaction( int64_t filtered_rows, int64_t initiator, DeleteBitmapPtr& output_rowset_delete_bitmap, bool allow_delete_in_cumu_compaction) { output_rowset_delete_bitmap = std::make_shared<DeleteBitmap>(tablet_id()); - std::set<RowLocation> missed_rows; - std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, RowLocation>>> location_map; + std::unique_ptr<RowLocationSet> missed_rows; + if ((config::enable_missing_rows_correctness_check || + config::enable_mow_compaction_correctness_check_core) && + !allow_delete_in_cumu_compaction && + compaction_type == ReaderType::READER_CUMULATIVE_COMPACTION) { + missed_rows = std::make_unique<RowLocationSet>(); + LOG(INFO) << "RowLocation Set inited succ for tablet:" << tablet_id(); + } + + std::unique_ptr<std::map<RowsetSharedPtr, RowLocationPairList>> location_map; + if (config::enable_rowid_conversion_correctness_check) { + location_map = std::make_unique<std::map<RowsetSharedPtr, RowLocationPairList>>(); + LOG(INFO) << "Location Map inited succ for tablet:" << tablet_id(); + } // 1. calc delete bitmap for historical data RETURN_IF_ERROR(_engine.meta_mgr().sync_tablet_rowsets(this)); Version version = max_version(); + std::size_t missed_rows_size = 0; calc_compaction_output_rowset_delete_bitmap( - input_rowsets, rowid_conversion, 0, version.second + 1, &missed_rows, &location_map, - tablet_meta()->delete_bitmap(), output_rowset_delete_bitmap.get()); - std::size_t missed_rows_size = missed_rows.size(); - if (!allow_delete_in_cumu_compaction) { - if (compaction_type == ReaderType::READER_CUMULATIVE_COMPACTION && - tablet_state() == TABLET_RUNNING) { - if (merged_rows + filtered_rows >= 0 && - merged_rows + filtered_rows != missed_rows_size) { - std::string err_msg = fmt::format( - "cumulative compaction: the merged rows({}), the filtered rows({}) is not " - "equal to missed rows({}) in rowid conversion, tablet_id: {}, table_id:{}", - merged_rows, filtered_rows, missed_rows_size, tablet_id(), table_id()); - if (config::enable_mow_compaction_correctness_check_core) { - CHECK(false) << err_msg; - } else { - DCHECK(false) << err_msg; + input_rowsets, rowid_conversion, 0, version.second + 1, missed_rows.get(), + location_map.get(), tablet_meta()->delete_bitmap(), output_rowset_delete_bitmap.get()); + if (missed_rows) { + missed_rows_size = missed_rows->size(); + if (!allow_delete_in_cumu_compaction) { + if (compaction_type == ReaderType::READER_CUMULATIVE_COMPACTION && + tablet_state() == TABLET_RUNNING) { + if (merged_rows + filtered_rows >= 0 && + merged_rows + filtered_rows != missed_rows_size) { + std::string err_msg = fmt::format( + "cumulative compaction: the merged rows({}), the filtered rows({}) is " + "not equal to missed rows({}) in rowid conversion, tablet_id: {}, " + "table_id:{}", + merged_rows, filtered_rows, missed_rows_size, tablet_id(), table_id()); + if (config::enable_mow_compaction_correctness_check_core) { + CHECK(false) << err_msg; + } else { + DCHECK(false) << err_msg; + } + LOG(WARNING) << err_msg; } - LOG(WARNING) << err_msg; } } } - if (config::enable_rowid_conversion_correctness_check) { - RETURN_IF_ERROR(check_rowid_conversion(output_rowset, location_map)); + if (location_map) { + RETURN_IF_ERROR(check_rowid_conversion(output_rowset, *location_map)); + location_map->clear(); } - location_map.clear(); // 2. calc delete bitmap for incremental data RETURN_IF_ERROR(_engine.meta_mgr().get_delete_bitmap_update_lock( @@ -799,16 +815,16 @@ Status CloudTablet::calc_delete_bitmap_for_compaction( RETURN_IF_ERROR(_engine.meta_mgr().sync_tablet_rowsets(this)); calc_compaction_output_rowset_delete_bitmap( - input_rowsets, rowid_conversion, version.second, UINT64_MAX, &missed_rows, - &location_map, tablet_meta()->delete_bitmap(), output_rowset_delete_bitmap.get()); - if (config::enable_rowid_conversion_correctness_check) { - RETURN_IF_ERROR(check_rowid_conversion(output_rowset, location_map)); - } - if (compaction_type == ReaderType::READER_CUMULATIVE_COMPACTION) { - DCHECK_EQ(missed_rows.size(), missed_rows_size); - if (missed_rows.size() != missed_rows_size) { + input_rowsets, rowid_conversion, version.second, UINT64_MAX, missed_rows.get(), + location_map.get(), tablet_meta()->delete_bitmap(), output_rowset_delete_bitmap.get()); + if (location_map) { + RETURN_IF_ERROR(check_rowid_conversion(output_rowset, *location_map)); + } + if (missed_rows) { + DCHECK_EQ(missed_rows->size(), missed_rows_size); + if (missed_rows->size() != missed_rows_size) { LOG(WARNING) << "missed rows don't match, before: " << missed_rows_size - << " after: " << missed_rows.size(); + << " after: " << missed_rows->size(); } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org