w41ter commented on code in PR #39372: URL: https://github.com/apache/doris/pull/39372#discussion_r1717823383
########## cloud/src/recycler/recycler.cpp: ########## @@ -2640,4 +2641,165 @@ bool InstanceRecycler::check_recycle_tasks() { return found; } +int InstanceRecycler::repair_tablet_index() { + const std::string task_name = "repair_tablet_index"; + int num_partition_scanned = 0; + int num_tablet_scanned = 0; + int num_tablet_repaired = 0; + + std::string begin_partition_ver_key = partition_version_key({instance_id_, 0, 0, 0}); + std::string end_partition_ver_key = + partition_version_key({instance_id_, INT64_MAX, INT64_MAX, INT64_MAX}); + + LOG_INFO("begin to repaire tablet index").tag("instance_id", instance_id_); + + int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); + register_recycle_task(task_name, start_time); + + std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { + unregister_recycle_task(task_name); + int64_t cost = + duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; + LOG_INFO("end to repaire tablet index, cost={}s", cost) + .tag("instance_id", instance_id_) + .tag("num_partition_scanned", num_partition_scanned) + .tag("num_tablet_scanned", num_tablet_scanned) + .tag("num_tablet_repaired", num_tablet_repaired); + }); + + auto handle_partition_ver_kv = [&num_partition_scanned, &num_tablet_scanned, + &num_tablet_repaired, + this](std::string_view key, std::string_view val) -> int { + ++num_partition_scanned; + + std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; + auto key1 = key; + // PartitionVersionKeyInfo 0:instance_id 1:db_id 2:tbl_id 3:partition_id + key1.remove_prefix(1); // Remove key space + if (decode_key(&key1, &out) != 0) { + LOG_ERROR("failed to decode key").tag("instance_id", instance_id_).tag("key", hex(key)); + return -1; + } + + int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); + int64_t table_id = std::get<int64_t>(std::get<0>(out[4])); + int64_t partition_id = std::get<int64_t>(std::get<0>(out[5])); + VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id + << " table_id=" << table_id << " partition_id=" << partition_id; + + DCHECK(db_id > 0); + DCHECK(table_id > 0); + DCHECK(partition_id > 0); + + std::string begin = meta_tablet_key({instance_id_, table_id, 0, 0, 0}); + std::string end = + meta_tablet_key({instance_id_, table_id, INT64_MAX, INT64_MAX, INT64_MAX}); Review Comment: In the current implementation, this range will be scanned many times (per partition), which is not necessary. You could scan this range, parse and save the `partition => tablet` mapping, before scan partition version keys. During `scan_and_recycle` the only thing that needs to be done is to query the tablet id from the mapping and save db id to `TabletIndexPB`. ########## cloud/src/meta-service/meta_service.cpp: ########## @@ -618,11 +618,13 @@ void MetaServiceImpl::create_tablets(::google::protobuf::RpcController* controll msg = fmt::format("failed to get vault id, vault name={}", name); return; } + DCHECK(request->has_db_id()); Review Comment: ```suggestion ``` ########## cloud/src/meta-service/meta_service.cpp: ########## @@ -406,7 +406,7 @@ void MetaServiceImpl::batch_get_version(::google::protobuf::RpcController* contr void internal_create_tablet(MetaServiceCode& code, std::string& msg, const doris::TabletMetaCloudPB& meta, std::shared_ptr<TxnKv> txn_kv, const std::string& instance_id, - std::set<std::pair<int64_t, int32_t>>& saved_schema) { + std::set<std::pair<int64_t, int32_t>>& saved_schema, int64_t db_id) { Review Comment: MS may serve an older version of FE, so db_id has not been set. It would be more appropriate to use `std::optional<int64_t>` here and check `request.has_db_id()` when calling. ########## fe/fe-core/src/main/java/org/apache/doris/cloud/datasource/CloudInternalCatalog.java: ########## @@ -180,6 +180,7 @@ protected Partition createPartitionWithIndices(long dbId, OlapTable tbl, long pa if (!storageVaultIdSet && ((CloudEnv) Env.getCurrentEnv()).getEnableStorageVault()) { requestBuilder.setStorageVaultName(storageVaultName); } + requestBuilder.setDbId(dbId); Review Comment: Consider checking the db id in method `Cloud.CreateTabletsResponse sendCreateTabletsRpc(Cloud.CreateTabletsRequest.Builder requestBuilder)`, throws `Exception` if this field is not set. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org