csun5285 commented on code in PR #41022: URL: https://github.com/apache/doris/pull/41022#discussion_r1824254104
########## be/src/cloud/cloud_meta_mgr.cpp: ########## @@ -1125,4 +1128,124 @@ Status CloudMetaMgr::remove_old_version_delete_bitmap( return st; } +void CloudMetaMgr::check_table_size_correctness(const RowsetMeta& rs_meta) { + if (!config::enable_table_size_correctness_check) { + return; + } + int64_t total_segment_size = get_segment_file_size(rs_meta); + int64_t total_inverted_index_size = get_inverted_index_file_szie(rs_meta); + if (rs_meta.data_disk_size() != total_segment_size || + rs_meta.index_disk_size() != total_inverted_index_size || + rs_meta.data_disk_size() + rs_meta.index_disk_size() != rs_meta.total_disk_size()) { + LOG(WARNING) << "[Cloud table table size check failed]:" + << " tablet id: " << rs_meta.tablet_id() + << ", rowset id:" << rs_meta.rowset_id() + << ", rowset data disk size:" << rs_meta.data_disk_size() + << ", rowset real data disk size:" << total_segment_size + << ", rowset index disk size:" << rs_meta.index_disk_size() + << ", rowset real index disk size:" << total_inverted_index_size + << ", rowset total disk size:" << rs_meta.total_disk_size() + << ", rowset segment path:" + << StorageResource().remote_segment_path(rs_meta.tablet_id(), + rs_meta.rowset_id().to_string(), 0); + DCHECK(false); + } +} + +int64_t CloudMetaMgr::get_segment_file_size(const RowsetMeta& rs_meta) { + int64_t total_segment_size = 0; + const auto fs = const_cast<RowsetMeta&>(rs_meta).fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta.resource_id(); + } + for (int64_t seg_id = 0; seg_id < rs_meta.num_segments(); seg_id++) { + std::string segment_path = StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id); + int64_t segment_file_size = 0; + auto st = fs->file_size(segment_path, &segment_file_size); + if (!st.ok()) { + segment_file_size = 0; + if (st.is<FILE_NOT_EXIST>()) { + LOG(INFO) << "cloud table size correctness check get segment size 0 because " + "file not exist! msg:" + << st.msg() << ", segment path:" << segment_path; + } else { + LOG(WARNING) << "cloud table size correctness check get segment size failed! msg:" + << st.msg() << ", segment path:" << segment_path; + } + } + total_segment_size += segment_file_size; + } + return total_segment_size; +} + +int64_t CloudMetaMgr::get_inverted_index_file_szie(const RowsetMeta& rs_meta) { + int64_t total_inverted_index_size = 0; + const auto fs = const_cast<RowsetMeta&>(rs_meta).fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta.resource_id(); + } + if (rs_meta.tablet_schema()->get_inverted_index_storage_format() == Review Comment: if (!rs_meta.tablet_schema()->has_inverted_index()) { return 0; } -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org