SWJTU-ZhangLei commented on code in PR #39372: URL: https://github.com/apache/doris/pull/39372#discussion_r1718030465
########## cloud/src/recycler/recycler.cpp: ########## @@ -2640,4 +2641,165 @@ bool InstanceRecycler::check_recycle_tasks() { return found; } +int InstanceRecycler::repair_tablet_index() { + const std::string task_name = "repair_tablet_index"; + int num_partition_scanned = 0; + int num_tablet_scanned = 0; + int num_tablet_repaired = 0; + + std::string begin_partition_ver_key = partition_version_key({instance_id_, 0, 0, 0}); + std::string end_partition_ver_key = + partition_version_key({instance_id_, INT64_MAX, INT64_MAX, INT64_MAX}); + + LOG_INFO("begin to repaire tablet index").tag("instance_id", instance_id_); + + int64_t start_time = duration_cast<seconds>(steady_clock::now().time_since_epoch()).count(); + register_recycle_task(task_name, start_time); + + std::unique_ptr<int, std::function<void(int*)>> defer_log_statistics((int*)0x01, [&](int*) { + unregister_recycle_task(task_name); + int64_t cost = + duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - start_time; + LOG_INFO("end to repaire tablet index, cost={}s", cost) + .tag("instance_id", instance_id_) + .tag("num_partition_scanned", num_partition_scanned) + .tag("num_tablet_scanned", num_tablet_scanned) + .tag("num_tablet_repaired", num_tablet_repaired); + }); + + auto handle_partition_ver_kv = [&num_partition_scanned, &num_tablet_scanned, + &num_tablet_repaired, + this](std::string_view key, std::string_view val) -> int { + ++num_partition_scanned; + + std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> out; + auto key1 = key; + // PartitionVersionKeyInfo 0:instance_id 1:db_id 2:tbl_id 3:partition_id + key1.remove_prefix(1); // Remove key space + if (decode_key(&key1, &out) != 0) { + LOG_ERROR("failed to decode key").tag("instance_id", instance_id_).tag("key", hex(key)); + return -1; + } + + int64_t db_id = std::get<int64_t>(std::get<0>(out[3])); + int64_t table_id = std::get<int64_t>(std::get<0>(out[4])); + int64_t partition_id = std::get<int64_t>(std::get<0>(out[5])); + VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id + << " table_id=" << table_id << " partition_id=" << partition_id; + + DCHECK(db_id > 0); + DCHECK(table_id > 0); + DCHECK(partition_id > 0); + + std::string begin = meta_tablet_key({instance_id_, table_id, 0, 0, 0}); + std::string end = + meta_tablet_key({instance_id_, table_id, INT64_MAX, INT64_MAX, INT64_MAX}); Review Comment: > In the current implementation, this range will be scanned many times (per partition), which is not necessary. You could scan this range, parse and save the `partition => tablet` mapping, before scan partition version keys. During `scan_and_recycle` the only thing that needs to be done is to query the tablet id from the mapping and save db id to `TabletIndexPB`. to optimize this later -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org