SWJTU-ZhangLei commented on code in PR #39372:
URL: https://github.com/apache/doris/pull/39372#discussion_r1718030465


##########
cloud/src/recycler/recycler.cpp:
##########
@@ -2640,4 +2641,165 @@ bool InstanceRecycler::check_recycle_tasks() {
     return found;
 }
 
+int InstanceRecycler::repair_tablet_index() {
+    const std::string task_name = "repair_tablet_index";
+    int num_partition_scanned = 0;
+    int num_tablet_scanned = 0;
+    int num_tablet_repaired = 0;
+
+    std::string begin_partition_ver_key = partition_version_key({instance_id_, 
0, 0, 0});
+    std::string end_partition_ver_key =
+            partition_version_key({instance_id_, INT64_MAX, INT64_MAX, 
INT64_MAX});
+
+    LOG_INFO("begin to repaire tablet index").tag("instance_id", instance_id_);
+
+    int64_t start_time = 
duration_cast<seconds>(steady_clock::now().time_since_epoch()).count();
+    register_recycle_task(task_name, start_time);
+
+    std::unique_ptr<int, std::function<void(int*)>> 
defer_log_statistics((int*)0x01, [&](int*) {
+        unregister_recycle_task(task_name);
+        int64_t cost =
+                
duration_cast<seconds>(steady_clock::now().time_since_epoch()).count() - 
start_time;
+        LOG_INFO("end to repaire tablet index, cost={}s", cost)
+                .tag("instance_id", instance_id_)
+                .tag("num_partition_scanned", num_partition_scanned)
+                .tag("num_tablet_scanned", num_tablet_scanned)
+                .tag("num_tablet_repaired", num_tablet_repaired);
+    });
+
+    auto handle_partition_ver_kv = [&num_partition_scanned, 
&num_tablet_scanned,
+                                    &num_tablet_repaired,
+                                    this](std::string_view key, 
std::string_view val) -> int {
+        ++num_partition_scanned;
+
+        std::vector<std::tuple<std::variant<int64_t, std::string>, int, int>> 
out;
+        auto key1 = key;
+        // PartitionVersionKeyInfo 0:instance_id  1:db_id  2:tbl_id  
3:partition_id
+        key1.remove_prefix(1); // Remove key space
+        if (decode_key(&key1, &out) != 0) {
+            LOG_ERROR("failed to decode key").tag("instance_id", 
instance_id_).tag("key", hex(key));
+            return -1;
+        }
+
+        int64_t db_id = std::get<int64_t>(std::get<0>(out[3]));
+        int64_t table_id = std::get<int64_t>(std::get<0>(out[4]));
+        int64_t partition_id = std::get<int64_t>(std::get<0>(out[5]));
+        VLOG_DEBUG << "instance_id=" << instance_id_ << " db_id=" << db_id
+                   << " table_id=" << table_id << " partition_id=" << 
partition_id;
+
+        DCHECK(db_id > 0);
+        DCHECK(table_id > 0);
+        DCHECK(partition_id > 0);
+
+        std::string begin = meta_tablet_key({instance_id_, table_id, 0, 0, 0});
+        std::string end =
+                meta_tablet_key({instance_id_, table_id, INT64_MAX, INT64_MAX, 
INT64_MAX});

Review Comment:
   > In the current implementation, this range will be scanned many times (per 
partition), which is not necessary. You could scan this range, parse and save 
the `partition => tablet` mapping, before scan partition version keys. During 
`scan_and_recycle` the only thing that needs to be done is to query the tablet 
id from the mapping and save db id to `TabletIndexPB`.
   
   to optimize this later



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to