gavinchou commented on code in PR #41782: URL: https://github.com/apache/doris/pull/41782#discussion_r1821316947
########## cloud/src/meta-service/meta_service_tablet_stats.cpp: ########## @@ -156,4 +165,239 @@ void internal_get_tablet_stats(MetaServiceCode& code, std::string& msg, Transact merge_tablet_stats(stats, detached_stats); } +MetaServiceResponseStatus parse_fix_tablet_stats_param( + std::shared_ptr<ResourceManager> resource_mgr, const std::string& table_id_str, + const std::string& cloud_unique_id_str, int64_t& table_id, std::string& instance_id) { + MetaServiceCode code = MetaServiceCode::OK; + std::string msg; + MetaServiceResponseStatus st; + st.set_code(MetaServiceCode::OK); + + // parse params + try { + table_id = std::stoll(table_id_str); + } catch (...) { + st.set_code(MetaServiceCode::INVALID_ARGUMENT); + st.set_msg("Invalid table_id, table_id: " + table_id_str); + return st; + } + + instance_id = get_instance_id(resource_mgr, cloud_unique_id_str); + if (instance_id.empty()) { + code = MetaServiceCode::INVALID_ARGUMENT; + msg = "empty instance_id"; + LOG(INFO) << msg << ", cloud_unique_id=" << cloud_unique_id_str; + st.set_code(code); + st.set_msg(msg); + return st; + } + return st; +} + +MetaServiceResponseStatus fix_tablet_stats_internal( + std::shared_ptr<TxnKv> txn_kv, std::pair<std::string, std::string>& key_pair, + std::vector<std::shared_ptr<TabletStatsPB>>& tablet_stat_shared_ptr_vec_batch, + const std::string& instance_id, size_t batch_size) { + std::unique_ptr<Transaction> txn; + MetaServiceResponseStatus st; + st.set_code(MetaServiceCode::OK); + MetaServiceCode code = MetaServiceCode::OK; + std::unique_ptr<RangeGetIterator> it; + std::vector<std::shared_ptr<TabletStatsPB>> tmp_tablet_stat_vec; + + TxnErrorCode err = txn_kv->create_txn(&txn); + if (err != TxnErrorCode::TXN_OK) { + st.set_code(cast_as<ErrCategory::CREATE>(err)); + st.set_msg("failed to create txn"); + return st; + } + + // read tablet stats + err = txn->get(key_pair.first, key_pair.second, &it, true); + if (err != TxnErrorCode::TXN_OK) { + st.set_code(cast_as<ErrCategory::READ>(err)); + st.set_msg(fmt::format("failed to get tablet stats, err={} ", err)); + return st; + } + + size_t tablet_cnt = 0; + while (it->has_next() && tablet_cnt < batch_size) { + auto [k, v] = it->next(); + key_pair.first = k; + auto k1 = k; + k1.remove_prefix(1); + std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; + decode_key(&k1, &out); + + // 0x01 "stats" ${instance_id} "tablet" ${table_id} ${index_id} ${partition_id} ${tablet_id} -> TabletStatsPB + if (out.size() == 7) { + tablet_cnt++; + TabletStatsPB tablet_stat; + tablet_stat.ParseFromArray(v.data(), v.size()); + tmp_tablet_stat_vec.emplace_back(std::make_shared<TabletStatsPB>(tablet_stat)); + } + } + if (it->has_next()) { + key_pair.first = it->next().first; + } + + for (const auto& tablet_stat_ptr : tmp_tablet_stat_vec) { + GetRowsetResponse resp; + std::string msg; + // get rowsets in tablet and accumulate disk size + internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id, Review Comment: there may be thousands of rowsets(KV) per call, which may lead txn exceeds 5-seconds limit ########## cloud/src/meta-service/meta_service_tablet_stats.cpp: ########## @@ -156,4 +165,239 @@ void internal_get_tablet_stats(MetaServiceCode& code, std::string& msg, Transact merge_tablet_stats(stats, detached_stats); } +MetaServiceResponseStatus parse_fix_tablet_stats_param( + std::shared_ptr<ResourceManager> resource_mgr, const std::string& table_id_str, + const std::string& cloud_unique_id_str, int64_t& table_id, std::string& instance_id) { + MetaServiceCode code = MetaServiceCode::OK; + std::string msg; + MetaServiceResponseStatus st; + st.set_code(MetaServiceCode::OK); + + // parse params + try { + table_id = std::stoll(table_id_str); + } catch (...) { + st.set_code(MetaServiceCode::INVALID_ARGUMENT); + st.set_msg("Invalid table_id, table_id: " + table_id_str); + return st; + } + + instance_id = get_instance_id(resource_mgr, cloud_unique_id_str); + if (instance_id.empty()) { + code = MetaServiceCode::INVALID_ARGUMENT; + msg = "empty instance_id"; + LOG(INFO) << msg << ", cloud_unique_id=" << cloud_unique_id_str; + st.set_code(code); + st.set_msg(msg); + return st; + } + return st; +} + +MetaServiceResponseStatus fix_tablet_stats_internal( + std::shared_ptr<TxnKv> txn_kv, std::pair<std::string, std::string>& key_pair, + std::vector<std::shared_ptr<TabletStatsPB>>& tablet_stat_shared_ptr_vec_batch, + const std::string& instance_id, size_t batch_size) { + std::unique_ptr<Transaction> txn; + MetaServiceResponseStatus st; + st.set_code(MetaServiceCode::OK); + MetaServiceCode code = MetaServiceCode::OK; + std::unique_ptr<RangeGetIterator> it; + std::vector<std::shared_ptr<TabletStatsPB>> tmp_tablet_stat_vec; + + TxnErrorCode err = txn_kv->create_txn(&txn); + if (err != TxnErrorCode::TXN_OK) { + st.set_code(cast_as<ErrCategory::CREATE>(err)); + st.set_msg("failed to create txn"); + return st; + } + + // read tablet stats + err = txn->get(key_pair.first, key_pair.second, &it, true); + if (err != TxnErrorCode::TXN_OK) { + st.set_code(cast_as<ErrCategory::READ>(err)); + st.set_msg(fmt::format("failed to get tablet stats, err={} ", err)); + return st; + } + + size_t tablet_cnt = 0; + while (it->has_next() && tablet_cnt < batch_size) { + auto [k, v] = it->next(); + key_pair.first = k; + auto k1 = k; + k1.remove_prefix(1); + std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; + decode_key(&k1, &out); + + // 0x01 "stats" ${instance_id} "tablet" ${table_id} ${index_id} ${partition_id} ${tablet_id} -> TabletStatsPB + if (out.size() == 7) { + tablet_cnt++; + TabletStatsPB tablet_stat; + tablet_stat.ParseFromArray(v.data(), v.size()); + tmp_tablet_stat_vec.emplace_back(std::make_shared<TabletStatsPB>(tablet_stat)); + } + } + if (it->has_next()) { + key_pair.first = it->next().first; + } + + for (const auto& tablet_stat_ptr : tmp_tablet_stat_vec) { + GetRowsetResponse resp; + std::string msg; + // get rowsets in tablet and accumulate disk size + internal_get_rowset(txn.get(), 0, std::numeric_limits<int64_t>::max() - 1, instance_id, Review Comment: there may be thousands of rowsets(KV) per call, which may lead to txn exceeds 5-seconds limit -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org