csun5285 commented on code in PR #41022: URL: https://github.com/apache/doris/pull/41022#discussion_r1824275743
########## be/src/olap/tablet.cpp: ########## @@ -2737,4 +2741,120 @@ void Tablet::clear_cache() { } } +void Tablet::check_table_size_correctness() { + if (!config::enable_table_size_correctness_check) { + return; + } + const std::vector<RowsetMetaSharedPtr>& all_rs_metas = _tablet_meta->all_rs_metas(); + for (const auto& rs_meta : all_rs_metas) { + int64_t total_segment_size = get_segment_file_size(rs_meta); + int64_t total_inverted_index_size = get_inverted_index_file_szie(rs_meta); + if (rs_meta->data_disk_size() != total_segment_size || + rs_meta->index_disk_size() != total_inverted_index_size || + rs_meta->data_disk_size() + rs_meta->index_disk_size() != rs_meta->total_disk_size()) { + LOG(WARNING) << "[Local table table size check failed]:" + << " tablet id: " << rs_meta->tablet_id() + << ", rowset id:" << rs_meta->rowset_id() + << ", rowset data disk size:" << rs_meta->data_disk_size() + << ", rowset real data disk size:" << total_segment_size + << ", rowset index disk size:" << rs_meta->index_disk_size() + << ", rowset real index disk size:" << total_inverted_index_size + << ", rowset total disk size:" << rs_meta->total_disk_size() + << ", rowset segment path:" + << StorageResource().remote_segment_path( + rs_meta->tablet_id(), rs_meta->rowset_id().to_string(), 0); + DCHECK(false); + } + } +} + +std::string Tablet::get_segment_path(const RowsetMetaSharedPtr& rs_meta, int64_t seg_id) { + std::string segment_path; + if (rs_meta->is_local()) { + segment_path = local_segment_path(_tablet_path, rs_meta->rowset_id().to_string(), seg_id); + } else { + segment_path = rs_meta->remote_storage_resource().value()->remote_segment_path( + rs_meta->tablet_id(), rs_meta->rowset_id().to_string(), seg_id); + } + return segment_path; +} + +int64_t Tablet::get_segment_file_size(const RowsetMetaSharedPtr& rs_meta) { + const auto& fs = rs_meta->fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta->resource_id(); + } + int64_t total_segment_size = 0; + for (int64_t seg_id = 0; seg_id < rs_meta->num_segments(); seg_id++) { + std::string segment_path = get_segment_path(rs_meta, seg_id); + int64_t segment_file_size = 0; + auto st = fs->file_size(segment_path, &segment_file_size); + if (!st.ok()) { + segment_file_size = 0; + LOG(WARNING) << "table size correctness check get segment size failed! msg:" + << st.to_string() << ", segment path:" << segment_path; + } + total_segment_size += segment_file_size; + } + return total_segment_size; +} + +int64_t Tablet::get_inverted_index_file_szie(const RowsetMetaSharedPtr& rs_meta) { + const auto& fs = rs_meta->fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta->resource_id(); + } + int64_t total_inverted_index_size = 0; + + if (rs_meta->tablet_schema()->get_inverted_index_storage_format() == + InvertedIndexStorageFormatPB::V1) { + auto indices = rs_meta->tablet_schema()->indexes(); + for (auto& index : indices) { + // only get file_size for inverted index + if (index.index_type() != IndexType::INVERTED) { + continue; + } + for (int seg_id = 0; seg_id < rs_meta->num_segments(); ++seg_id) { + std::string segment_path = get_segment_path(rs_meta, seg_id); + int64_t file_size = 0; + + std::string inverted_index_file_path = + InvertedIndexDescriptor::get_index_file_path_v1( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), + index.index_id(), index.get_index_suffix()); + auto st = fs->file_size(inverted_index_file_path, &file_size); + if (!st.ok()) { + file_size = 0; + LOG(WARNING) << " tablet id: " << get_tablet_info().tablet_id + << ", rowset id:" << rs_meta->rowset_id() + << ", table size correctness check get inverted index v1 " + "size failed! msg:" + << st.to_string() + << ", inverted index path:" << inverted_index_file_path; + } + total_inverted_index_size += file_size; + } + } + } else { Review Comment: if (rs_meta->tablet_schema()->has_inverted_index()) ########## be/src/runtime/load_stream_writer.cpp: ########## @@ -201,7 +201,7 @@ Status LoadStreamWriter::add_segment(uint32_t segid, const SegmentStatistics& st } DBUG_EXECUTE_IF("LoadStreamWriter.add_segment.size_not_match", { segment_file_size++; }); - if (segment_file_size + inverted_file_size != stat.data_size) { + if (segment_file_size != stat.data_size) { Review Comment: if (segment_file_size != stat.data_size || inverted_file_size != stat.index_size) ########## be/src/cloud/cloud_meta_mgr.cpp: ########## @@ -1125,4 +1128,124 @@ Status CloudMetaMgr::remove_old_version_delete_bitmap( return st; } +void CloudMetaMgr::check_table_size_correctness(const RowsetMeta& rs_meta) { + if (!config::enable_table_size_correctness_check) { + return; + } + int64_t total_segment_size = get_segment_file_size(rs_meta); + int64_t total_inverted_index_size = get_inverted_index_file_szie(rs_meta); + if (rs_meta.data_disk_size() != total_segment_size || + rs_meta.index_disk_size() != total_inverted_index_size || + rs_meta.data_disk_size() + rs_meta.index_disk_size() != rs_meta.total_disk_size()) { + LOG(WARNING) << "[Cloud table table size check failed]:" + << " tablet id: " << rs_meta.tablet_id() + << ", rowset id:" << rs_meta.rowset_id() + << ", rowset data disk size:" << rs_meta.data_disk_size() + << ", rowset real data disk size:" << total_segment_size + << ", rowset index disk size:" << rs_meta.index_disk_size() + << ", rowset real index disk size:" << total_inverted_index_size + << ", rowset total disk size:" << rs_meta.total_disk_size() + << ", rowset segment path:" + << StorageResource().remote_segment_path(rs_meta.tablet_id(), + rs_meta.rowset_id().to_string(), 0); + DCHECK(false); + } +} + +int64_t CloudMetaMgr::get_segment_file_size(const RowsetMeta& rs_meta) { + int64_t total_segment_size = 0; + const auto fs = const_cast<RowsetMeta&>(rs_meta).fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta.resource_id(); + } + for (int64_t seg_id = 0; seg_id < rs_meta.num_segments(); seg_id++) { + std::string segment_path = StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id); + int64_t segment_file_size = 0; + auto st = fs->file_size(segment_path, &segment_file_size); + if (!st.ok()) { + segment_file_size = 0; + if (st.is<FILE_NOT_EXIST>()) { + LOG(INFO) << "cloud table size correctness check get segment size 0 because " + "file not exist! msg:" + << st.msg() << ", segment path:" << segment_path; + } else { + LOG(WARNING) << "cloud table size correctness check get segment size failed! msg:" + << st.msg() << ", segment path:" << segment_path; + } + } + total_segment_size += segment_file_size; + } + return total_segment_size; +} + +int64_t CloudMetaMgr::get_inverted_index_file_szie(const RowsetMeta& rs_meta) { + int64_t total_inverted_index_size = 0; + const auto fs = const_cast<RowsetMeta&>(rs_meta).fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta.resource_id(); + } + if (rs_meta.tablet_schema()->get_inverted_index_storage_format() == + InvertedIndexStorageFormatPB::V1) { + auto indices = rs_meta.tablet_schema()->indexes(); + for (auto& index : indices) { + // only get file_size for inverted index + if (index.index_type() != IndexType::INVERTED) { + continue; + } + for (int seg_id = 0; seg_id < rs_meta.num_segments(); ++seg_id) { + std::string segment_path = StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id); + int64_t file_size = 0; + + std::string inverted_index_file_path = + InvertedIndexDescriptor::get_index_file_path_v1( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path), + index.index_id(), index.get_index_suffix()); + auto st = fs->file_size(inverted_index_file_path, &file_size); + if (!st.ok()) { + file_size = 0; + if (st.is<FILE_NOT_EXIST>()) { + LOG(INFO) << "cloud table size correctness check get inverted index v1 " + "0 because file not exist! msg:" + << st.msg() + << ", inverted index path:" << inverted_index_file_path; + } else { + LOG(WARNING) + << "cloud table size correctness check get inverted index v1 " + "size failed! msg:" + << st.msg() << ", inverted index path:" << inverted_index_file_path; + } + } + total_inverted_index_size += file_size; + } + } + } else { Review Comment: if (rs_meta.tablet_schema()->has_inverted_index()) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org