This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new f9ec237476b [branch-3.0] [feat](checker) Add meta key consistency 
checking for checker #54002 (#54919)
f9ec237476b is described below

commit f9ec237476beb8c7579a9e36bcb684932def15ee
Author: Uniqueyou <[email protected]>
AuthorDate: Tue Aug 19 14:11:36 2025 +0800

    [branch-3.0] [feat](checker) Add meta key consistency checking for checker 
#54002 (#54919)
    
    pick: https://github.com/apache/doris/pull/54002
---
 cloud/src/common/config.h           |   1 +
 cloud/src/recycler/meta_checker.cpp | 786 ++++++++++++++++++++++++++----------
 cloud/src/recycler/meta_checker.h   | 109 +++++
 3 files changed, 686 insertions(+), 210 deletions(-)

diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h
index c52f4100f3c..6c3dd03c1da 100644
--- a/cloud/src/common/config.h
+++ b/cloud/src/common/config.h
@@ -118,6 +118,7 @@ CONF_mInt64(recycle_task_threshold_seconds, "10800"); // 3h
 // **just for TEST**
 CONF_Bool(force_immediate_recycle, "false");
 
+CONF_mBool(enable_checker_for_meta_key_check, "false");
 CONF_String(test_s3_ak, "");
 CONF_String(test_s3_sk, "");
 CONF_String(test_s3_endpoint, "");
diff --git a/cloud/src/recycler/meta_checker.cpp 
b/cloud/src/recycler/meta_checker.cpp
index a299c2839df..78d65fe0eea 100644
--- a/cloud/src/recycler/meta_checker.cpp
+++ b/cloud/src/recycler/meta_checker.cpp
@@ -19,14 +19,21 @@
 
 #include <curl/curl.h>
 #include <gen_cpp/cloud.pb.h>
+#include <gen_cpp/olap_file.pb.h>
 #include <glog/logging.h>
 #include <mysql/mysql.h>
 
 #include <chrono>
+#include <cstdint>
+#include <functional>
 #include <set>
+#include <tuple>
 
+#include "common/config.h"
 #include "common/logging.h"
 #include "common/util.h"
+#include "meta-service/meta_service_schema.h"
+#include "meta-store/blob_message.h"
 #include "meta-store/keys.h"
 #include "meta-store/txn_kv.h"
 
@@ -34,45 +41,15 @@ namespace doris::cloud {
 
 MetaChecker::MetaChecker(std::shared_ptr<TxnKv> txn_kv) : 
txn_kv_(std::move(txn_kv)) {}
 
-struct TabletInfo {
-    int64_t db_id;
-    int64_t table_id;
-    int64_t partition_id;
-    int64_t index_id;
-    int64_t tablet_id;
-    int64_t schema_version;
-
-    std::string debug_string() const {
-        return "db id: " + std::to_string(db_id) + " table id: " + 
std::to_string(table_id) +
-               " partition id: " + std::to_string(partition_id) +
-               " index id: " + std::to_string(index_id) +
-               " tablet id: " + std::to_string(tablet_id) +
-               " schema version: " + std::to_string(schema_version);
-    }
-};
-
-struct PartitionInfo {
-    int64_t db_id;
-    int64_t table_id;
-    int64_t partition_id;
-    int64_t tablet_id;
-    int64_t visible_version;
-};
-
-bool MetaChecker::check_fe_meta_by_fdb(MYSQL* conn) {
+bool MetaChecker::scan_and_handle_kv(
+        std::string& start_key, const std::string& end_key,
+        std::function<int(std::string_view, std::string_view)> handle_kv) {
     std::unique_ptr<Transaction> txn;
     TxnErrorCode err = txn_kv_->create_txn(&txn);
     if (err != TxnErrorCode::TXN_OK) {
         LOG(WARNING) << "failed to init txn";
         return false;
     }
-
-    std::string start_key;
-    std::string end_key;
-    meta_tablet_idx_key({instance_id_, 0}, &start_key);
-    meta_tablet_idx_key({instance_id_, std::numeric_limits<int64_t>::max()}, 
&end_key);
-    std::vector<TabletIndexPB> tablet_indexes;
-
     std::unique_ptr<RangeGetIterator> it;
     do {
         err = txn->get(start_key, end_key, &it);
@@ -83,43 +60,186 @@ bool MetaChecker::check_fe_meta_by_fdb(MYSQL* conn) {
 
         while (it->has_next()) {
             auto [k, v] = it->next();
-            TabletIndexPB tablet_idx;
-            if (!tablet_idx.ParseFromArray(v.data(), v.size())) [[unlikely]] {
-                LOG(WARNING) << "malformed tablet index value";
-                return false;
-            }
 
-            tablet_indexes.push_back(std::move(tablet_idx));
-            if (!it->has_next()) start_key = k;
+            handle_kv(k, v);
+            if (!it->has_next()) {
+                start_key = k;
+            }
         }
         start_key.push_back('\x00');
     } while (it->more());
+    return true;
+}
 
+bool MetaChecker::do_meta_tablet_key_check(MYSQL* conn) {
+    std::vector<doris::TabletMetaCloudPB> tablets_meta;
     bool check_res = true;
+
+    // scan and collect tablet_meta
+    std::string start_key;
+    std::string end_key;
+    meta_tablet_key({instance_id_, 0, 0, 0, 0}, &start_key);
+    meta_tablet_key({instance_id_, INT64_MAX, 0, 0, 0}, &end_key);
+    scan_and_handle_kv(start_key, end_key,
+                       [&tablets_meta](std::string_view key, std::string_view 
value) -> int {
+                           doris::TabletMetaCloudPB tablet_meta;
+                           if (!tablet_meta.ParseFromArray(value.data(), 
value.size())) {
+                               LOG(WARNING) << "malformed tablet meta value";
+                               return -1;
+                           }
+                           tablets_meta.push_back(std::move(tablet_meta));
+                           return 0;
+                       });
+
+    for (const auto& tablet_meta : tablets_meta) {
+        std::unique_ptr<Transaction> txn;
+        TxnErrorCode err = txn_kv_->create_txn(&txn);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to init txn";
+            continue;
+        }
+
+        // get tablet_index to search tablet belongs which db
+        std::string tablet_index_key;
+        std::string tablet_index_val;
+        meta_tablet_idx_key({instance_id_, tablet_meta.tablet_id()}, 
&tablet_index_key);
+        err = txn->get(tablet_index_key, &tablet_index_val);
+        if (err != TxnErrorCode::TXN_OK) {
+            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+                LOG(WARNING) << "tablet_idx not found, tablet id: " << 
tablet_meta.tablet_id();
+                continue;
+            } else {
+                LOG(WARNING) << "failed to get tablet_idx, err: " << err
+                             << " tablet id: " << tablet_meta.tablet_id();
+                continue;
+            }
+        }
+
+        TabletIndexPB tablet_index_meta;
+        tablet_index_meta.ParseFromString(tablet_index_val);
+
+        if (!db_meta_.contains(tablet_index_meta.db_id())) {
+            LOG(WARNING) << "tablet_idx.db_id not found in fe meta, db_id = "
+                         << tablet_index_meta.db_id()
+                         << "tablet index meta: " << 
tablet_index_meta.DebugString();
+            continue;
+        }
+        std::string db_name = db_meta_.at(tablet_index_meta.db_id());
+        if (db_name == "__internal_schema" || db_name == "information_schema" 
||
+            db_name == "mysql") {
+            continue;
+        }
+
+        if (mysql_select_db(conn, db_name.c_str())) {
+            LOG(WARNING) << "mysql select db error, db_name: " << db_name
+                         << " error: " << mysql_error(conn);
+            continue;
+        }
+        MYSQL_RES* result;
+        std::string sql_stmt = "show tablet " + 
std::to_string(tablet_meta.tablet_id());
+        mysql_query(conn, sql_stmt.c_str());
+
+        result = mysql_store_result(conn);
+        if (result) {
+            MYSQL_ROW row = mysql_fetch_row(result);
+            auto [db_id, table_id, partition_id, index_id] =
+                    std::make_tuple(atoll(row[4]), atoll(row[5]), 
atoll(row[6]), atoll(row[7]));
+            if (tablet_meta.table_id() != table_id) {
+                LOG(WARNING) << "check failed, fdb meta: " << 
tablet_meta.ShortDebugString()
+                             << " fe table_id: " << atoll(row[5]);
+                check_res = false;
+            }
+            if (tablet_meta.partition_id() != partition_id) {
+                LOG(WARNING) << "check failed, fdb meta: " << 
tablet_meta.ShortDebugString()
+                             << " fe partition_id: " << atoll(row[6]);
+                check_res = false;
+            }
+            if (tablet_meta.index_id() != index_id) {
+                LOG(WARNING) << "check failed, fdb meta: " << 
tablet_meta.ShortDebugString()
+                             << " fe index_id: " << atoll(row[7]);
+                check_res = false;
+            }
+            mysql_free_result(result);
+        } else {
+            LOG(WARNING) << "check failed, fdb meta: " << 
tablet_meta.ShortDebugString()
+                         << " fe tablet not found";
+            check_res = false;
+        }
+        stat_info_.check_fe_tablet_num++;
+    }
+
+    return check_res;
+}
+
+bool MetaChecker::do_meta_tablet_key_index_check(MYSQL* conn) {
+    std::vector<TabletIndexPB> tablet_indexes;
+    bool check_res = true;
+
+    // scan and collect tablet_idx
+    std::string start_key;
+    std::string end_key;
+    meta_tablet_idx_key({instance_id_, 0}, &start_key);
+    meta_tablet_idx_key({instance_id_, INT64_MAX}, &end_key);
+    scan_and_handle_kv(start_key, end_key,
+                       [&tablet_indexes](std::string_view key, 
std::string_view value) -> int {
+                           TabletIndexPB tablet_idx;
+                           if (!tablet_idx.ParseFromArray(value.data(), 
value.size())) {
+                               LOG(WARNING) << "malformed tablet index value";
+                               return -1;
+                           }
+                           tablet_indexes.push_back(std::move(tablet_idx));
+                           return 0;
+                       });
+
     for (const TabletIndexPB& tablet_idx : tablet_indexes) {
+        if (!db_meta_.contains(tablet_idx.db_id())) {
+            LOG(WARNING) << "tablet_idx.db_id not found in fe meta, db_id = " 
<< tablet_idx.db_id();
+            continue;
+        }
         std::string sql_stmt = "show tablet " + 
std::to_string(tablet_idx.tablet_id());
         MYSQL_RES* result;
+        std::string db_name = db_meta_.at(tablet_idx.db_id());
+        if (db_name == "__internal_schema" || db_name == "information_schema" 
||
+            db_name == "mysql") {
+            continue;
+        }
+        if (mysql_select_db(conn, db_name.c_str())) {
+            LOG(WARNING) << "mysql select db error, db_name: " << db_name
+                         << " error: " << mysql_error(conn);
+            continue;
+        }
         mysql_query(conn, sql_stmt.c_str());
         result = mysql_store_result(conn);
         if (result) {
             MYSQL_ROW row = mysql_fetch_row(result);
-            if (tablet_idx.table_id() != atoll(row[5])) {
+            auto [db_id, table_id, partition_id, index_id] =
+                    std::make_tuple(atoll(row[4]), atoll(row[5]), 
atoll(row[6]), atoll(row[7]));
+            if (tablet_idx.db_id() != db_id) {
+                LOG(WARNING) << "check failed, fdb meta: " << 
tablet_idx.ShortDebugString()
+                             << " fe db_id: " << atoll(row[4]);
+                check_res = false;
+            }
+            if (tablet_idx.table_id() != table_id) {
                 LOG(WARNING) << "check failed, fdb meta: " << 
tablet_idx.ShortDebugString()
                              << " fe table_id: " << atoll(row[5]);
                 check_res = false;
             }
-            if (tablet_idx.partition_id() != atoll(row[6])) {
+            if (tablet_idx.partition_id() != partition_id) {
                 LOG(WARNING) << "check failed, fdb meta: " << 
tablet_idx.ShortDebugString()
                              << " fe partition_id: " << atoll(row[6]);
                 check_res = false;
             }
-            if (tablet_idx.index_id() != atoll(row[7])) {
+            if (tablet_idx.index_id() != index_id) {
                 LOG(WARNING) << "check failed, fdb meta: " << 
tablet_idx.ShortDebugString()
                              << " fe index_id: " << atoll(row[7]);
                 check_res = false;
             }
+            mysql_free_result(result);
+        } else {
+            LOG(WARNING) << "check failed, fdb meta: " << 
tablet_idx.ShortDebugString()
+                         << " fe tablet not found";
+            check_res = false;
         }
-        mysql_free_result(result);
         stat_info_.check_fe_tablet_num++;
     }
     LOG(INFO) << "check_fe_tablet_num: " << stat_info_.check_fe_tablet_num;
@@ -127,131 +247,167 @@ bool MetaChecker::check_fe_meta_by_fdb(MYSQL* conn) {
     return check_res;
 }
 
-bool MetaChecker::check_fdb_by_fe_meta(MYSQL* conn) {
-    // get db info from FE
-    std::string sql_stmt = "show databases";
-    MYSQL_RES* result;
-    mysql_query(conn, sql_stmt.c_str());
-    result = mysql_store_result(conn);
-    std::map<std::string, std::vector<std::string>*> db_to_tables;
-    if (result) {
-        int num_row = mysql_num_rows(result);
-        for (int i = 0; i < num_row; ++i) {
-            MYSQL_ROW row = mysql_fetch_row(result);
-            if (strcmp(row[0], "__internal_schema") == 0 ||
-                strcmp(row[0], "information_schema") == 0) {
+bool MetaChecker::do_meta_schema_key_check(MYSQL* conn) {
+    std::vector<doris::TabletMetaCloudPB> tablets_meta;
+    bool check_res = true;
+
+    // scan and collect tablet_meta
+    std::string start_key;
+    std::string end_key;
+    meta_tablet_key({instance_id_, 0, 0, 0, 0}, &start_key);
+    meta_tablet_key({instance_id_, INT64_MAX, 0, 0, 0}, &end_key);
+    scan_and_handle_kv(start_key, end_key,
+                       [&tablets_meta](std::string_view key, std::string_view 
value) -> int {
+                           doris::TabletMetaCloudPB tablet_meta;
+                           if (!tablet_meta.ParseFromArray(value.data(), 
value.size())) {
+                               LOG(WARNING) << "malformed tablet meta value";
+                               return -1;
+                           }
+                           tablets_meta.push_back(std::move(tablet_meta));
+                           return 0;
+                       });
+
+    for (const auto& tablet_meta : tablets_meta) {
+        std::unique_ptr<Transaction> txn;
+        TxnErrorCode err = txn_kv_->create_txn(&txn);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << "failed to init txn";
+            continue;
+        }
+
+        // get tablet_index to search tablet belongs which db
+        std::string tablet_index_key;
+        std::string tablet_index_val;
+        meta_tablet_idx_key({instance_id_, tablet_meta.tablet_id()}, 
&tablet_index_key);
+        err = txn->get(tablet_index_key, &tablet_index_val);
+        if (err != TxnErrorCode::TXN_OK) {
+            if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+                LOG(WARNING) << "tablet_idx not found, tablet id: " << 
tablet_meta.tablet_id();
+                continue;
+            } else {
+                LOG(WARNING) << "failed to get tablet_idx, err: " << err
+                             << " tablet id: " << tablet_meta.tablet_id();
                 continue;
             }
-            db_to_tables.insert({row[0], new std::vector<std::string>()});
         }
-    }
-    mysql_free_result(result);
 
-    // get tables info from FE
-    for (const auto& elem : db_to_tables) {
-        std::string sql_stmt = "show tables from " + elem.first;
+        TabletIndexPB tablet_index_meta;
+        tablet_index_meta.ParseFromString(tablet_index_val);
+
+        if (!db_meta_.contains(tablet_index_meta.db_id())) {
+            LOG(WARNING) << "tablet_idx.db_id not found in fe meta, db_id = "
+                         << tablet_index_meta.db_id()
+                         << "tablet index meta: " << 
tablet_index_meta.DebugString();
+            continue;
+        }
+        std::string db_name = db_meta_.at(tablet_index_meta.db_id());
+        if (db_name == "__internal_schema" || db_name == "information_schema" 
||
+            db_name == "mysql") {
+            continue;
+        }
+
+        if (mysql_select_db(conn, db_name.c_str())) {
+            LOG(WARNING) << "mysql select db error, db_name: " << db_name
+                         << " error: " << mysql_error(conn);
+            continue;
+        }
+        std::string tablet_schema_key;
+        std::string tablet_schema_val;
+        meta_schema_key({instance_id_, tablet_index_meta.index_id(), 
tablet_meta.schema_version()},
+                        &tablet_schema_key);
+        ValueBuf val_buf;
+        err = cloud::blob_get(txn.get(), tablet_schema_key, &val_buf);
+        if (err != TxnErrorCode::TXN_OK) {
+            LOG(WARNING) << fmt::format(
+                    "failed to get schema, err={}",
+                    err == TxnErrorCode::TXN_KEY_NOT_FOUND ? "not found" : 
"internal error");
+            continue;
+        }
+        doris::TabletSchemaCloudPB tablet_schema;
+        if (!parse_schema_value(val_buf, &tablet_schema)) {
+            LOG(WARNING) << fmt::format("malformed schema value, key={}", 
tablet_schema_key);
+            continue;
+        }
+
+        MYSQL_RES* result;
+        std::string sql_stmt =
+                fmt::format("SHOW PROC '/dbs/{}/{}/index_schema/{}'", 
tablet_index_meta.db_id(),
+                            tablet_meta.table_id(), tablet_meta.index_id());
         mysql_query(conn, sql_stmt.c_str());
+
         result = mysql_store_result(conn);
         if (result) {
-            int num_row = mysql_num_rows(result);
-            for (int i = 0; i < num_row; ++i) {
-                MYSQL_ROW row = mysql_fetch_row(result);
-                elem.second->push_back(row[0]);
-            }
-        }
-        mysql_free_result(result);
-    }
-
-    // get tablet info from FE
-    std::vector<TabletInfo> tablets;
-    for (const auto& elem : db_to_tables) {
-        for (const std::string& table : *elem.second) {
-            std::string sql_stmt = "show tablets from " + elem.first + "." + 
table;
-            mysql_query(conn, sql_stmt.c_str());
-            result = mysql_store_result(conn);
-            if (result) {
-                int num_row = mysql_num_rows(result);
-                for (int i = 0; i < num_row; ++i) {
-                    MYSQL_ROW row = mysql_fetch_row(result);
-                    TabletInfo tablet_info = {0};
-                    tablet_info.tablet_id = atoll(row[0]);
-                    VLOG_DEBUG << "get tablet info log"
-                               << ", db name" << elem.first << ", table name" 
<< table
-                               << ",tablet id" << tablet_info.tablet_id;
-                    tablet_info.schema_version = atoll(row[4]);
-                    tablets.push_back(std::move(tablet_info));
-                }
+            MYSQL_ROW row = mysql_fetch_row(result);
+            int64_t schema_version = atoll(row[2]);
+            if (tablet_schema.schema_version() != schema_version) {
+                LOG(WARNING) << "check failed, fdb meta: " << 
tablet_schema.ShortDebugString()
+                             << " fe schema_version: " << schema_version;
+                check_res = false;
             }
             mysql_free_result(result);
+        } else {
+            LOG(WARNING) << "check failed, fdb meta: " << 
tablet_meta.ShortDebugString()
+                         << " fe tablet not found";
+            check_res = false;
         }
+        stat_info_.check_fe_tablet_num++;
     }
 
-    // get tablet info from FE
-    // get Partition info from FE
-    std::map<int64_t, PartitionInfo> partitions;
-    for (auto& tablet_info : tablets) {
-        std::string sql_stmt = "show tablet " + 
std::to_string(tablet_info.tablet_id);
-        mysql_query(conn, sql_stmt.c_str());
-        result = mysql_store_result(conn);
-        if (result) {
-            int num_row = mysql_num_rows(result);
-            for (int i = 0; i < num_row; ++i) {
-                MYSQL_ROW row = mysql_fetch_row(result);
-                tablet_info.db_id = atoll(row[4]);
-                tablet_info.table_id = atoll(row[5]);
-                tablet_info.partition_id = atoll(row[6]);
-                tablet_info.index_id = atoll(row[7]);
+    return check_res;
+}
 
-                PartitionInfo partition_info = {0};
-                partition_info.db_id = atoll(row[4]);
-                partition_info.table_id = atoll(row[5]);
-                partition_info.partition_id = atoll(row[6]);
-                partition_info.tablet_id = tablet_info.tablet_id;
-                VLOG_DEBUG << "get partition info log"
-                           << ", db id" << partition_info.db_id << ", table id"
-                           << partition_info.table_id << ", partition id"
-                           << partition_info.partition_id << ", tablet id"
-                           << partition_info.tablet_id;
+template <>
+bool MetaChecker::handle_check_fe_meta_by_fdb<CHECK_META>(MYSQL* conn) {
+    bool check_res = true;
+    // check MetaTabletIdxKey
+    if (!do_meta_tablet_key_index_check(conn)) {
+        check_res = false;
+        LOG(WARNING) << "do_meta_tablet_key_index_check failed";
+    } else {
+        LOG(INFO) << "do_meta_tablet_key_index_check success";
+    }
 
-                partitions.insert({partition_info.partition_id, 
std::move(partition_info)});
-            }
-        }
-        mysql_free_result(result);
+    // check MetaTabletKey
+    if (!do_meta_tablet_key_check(conn)) {
+        check_res = false;
+        LOG(WARNING) << "do_meta_tablet_key_check failed";
+    } else {
+        LOG(INFO) << "do_meta_tablet_key_check success";
     }
 
-    // get partition version from FE
-    for (const auto& elem : db_to_tables) {
-        for (const std::string& table : *elem.second) {
-            std::string sql_stmt = "show partitions from " + elem.first + "." 
+ table;
-            mysql_query(conn, sql_stmt.c_str());
-            result = mysql_store_result(conn);
-            if (result) {
-                int num_row = mysql_num_rows(result);
-                for (int i = 0; i < num_row; ++i) {
-                    MYSQL_ROW row = mysql_fetch_row(result);
-                    int64_t partition_id = atoll(row[0]);
-                    int64_t visible_version = atoll(row[2]);
-                    partitions[partition_id].visible_version = visible_version;
-                    VLOG_DEBUG << "get partition version log"
-                               << ", db name" << elem.first << ", table name" 
<< table
-                               << ", raw partition id" << row[0] << ", first 
partition id"
-                               << partition_id << ", db id" << 
partitions[partition_id].db_id
-                               << ", table id" << 
partitions[partition_id].table_id
-                               << ", second partition id" << 
partitions[partition_id].partition_id
-                               << ", tablet id" << 
partitions[partition_id].tablet_id;
-                }
-            }
-            mysql_free_result(result);
-        }
+    // check MetaSchemaKey
+    if (!do_meta_schema_key_check(conn)) {
+        check_res = false;
+        LOG(WARNING) << "do_meta_schema_key_check failed";
+    } else {
+        LOG(INFO) << "do_meta_schema_key_check success";
     }
+    return check_res;
+}
 
+bool MetaChecker::check_fe_meta_by_fdb(MYSQL* conn) {
+    bool success = true;
+    if (config::enable_checker_for_meta_key_check) {
+        success = handle_check_fe_meta_by_fdb<CHECK_META>(conn);
+    }
+
+    // TODO(wyxxxcat) add check for version key
+    // if (config::enable_checker_for_version_key_check) {
+    //     success = handle_check_fe_meta_by_fdb<CHECK_VERSION>(conn);
+    // }
+    return success;
+}
+
+bool MetaChecker::do_meta_tablet_index_key_inverted_check(MYSQL* conn,
+                                                          const 
std::vector<TabletInfo>& tablets) {
+    bool check_res = true;
     // check tablet idx
     for (const auto& tablet_info : tablets) {
         std::unique_ptr<Transaction> txn;
         TxnErrorCode err = txn_kv_->create_txn(&txn);
         if (err != TxnErrorCode::TXN_OK) {
             LOG(WARNING) << "failed to init txn";
-            return false;
+            continue;
         }
 
         std::string key, val;
@@ -260,62 +416,71 @@ bool MetaChecker::check_fdb_by_fe_meta(MYSQL* conn) {
         if (err != TxnErrorCode::TXN_OK) {
             if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
                 LOG(WARNING) << "tablet not found, tablet id: " << 
tablet_info.tablet_id;
-                return false;
+                check_res = false;
+                continue;
             } else {
                 LOG(WARNING) << "failed to get tablet_idx, err: " << err
                              << " tablet id: " << tablet_info.tablet_id;
-                return false;
+                check_res = false;
+                continue;
             }
         }
 
         TabletIndexPB tablet_idx;
         if (!tablet_idx.ParseFromString(val)) [[unlikely]] {
             LOG(WARNING) << "malformed tablet index value";
-            return false;
+            continue;
         }
 
-        /*
         if (tablet_info.db_id != tablet_idx.db_id()) [[unlikely]] {
             LOG(WARNING) << "tablet idx check failed, fe db id: " << 
tablet_info.db_id
                          << " tablet idx db id: " << tablet_idx.db_id();
-            return false;
+            check_res = false;
+            continue;
         }
-        */
 
         if (tablet_info.table_id != tablet_idx.table_id()) [[unlikely]] {
             LOG(WARNING) << "tablet idx check failed, fe table id: " << 
tablet_info.table_id
                          << " tablet idx table id: " << tablet_idx.table_id();
-            return false;
+            check_res = false;
+            continue;
         }
 
         if (tablet_info.partition_id != tablet_idx.partition_id()) 
[[unlikely]] {
             LOG(WARNING) << "tablet idx check failed, fe part id: " << 
tablet_info.partition_id
                          << " tablet idx part id: " << 
tablet_idx.partition_id();
-            return false;
+            check_res = false;
+            continue;
         }
 
         if (tablet_info.index_id != tablet_idx.index_id()) [[unlikely]] {
             LOG(WARNING) << "tablet idx check failed, fe index id: " << 
tablet_info.index_id
                          << " tablet idx index id: " << tablet_idx.index_id();
-            return false;
+            check_res = false;
+            continue;
         }
 
         if (tablet_info.tablet_id != tablet_idx.tablet_id()) [[unlikely]] {
             LOG(WARNING) << "tablet idx check failed, fe tablet id: " << 
tablet_info.tablet_id
                          << " tablet idx tablet id: " << 
tablet_idx.tablet_id();
-            return false;
+            check_res = false;
+            continue;
         }
-
         stat_info_.check_fdb_tablet_idx_num++;
     }
+    return check_res;
+}
 
+bool MetaChecker::do_meta_tablet_key_inverted_check(MYSQL* conn, 
std::vector<TabletInfo>& tablets,
+                                                    std::map<int64_t, 
PartitionInfo>& partitions) {
+    bool check_res = true;
     // check tablet meta
     for (const auto& tablet_info : tablets) {
         std::unique_ptr<Transaction> txn;
         TxnErrorCode err = txn_kv_->create_txn(&txn);
         if (err != TxnErrorCode::TXN_OK) {
             LOG(WARNING) << "failed to init txn";
-            return false;
+            continue;
         }
 
         MetaTabletKeyInfo key_info1 {instance_id_, tablet_info.table_id, 
tablet_info.index_id,
@@ -325,16 +490,76 @@ bool MetaChecker::check_fdb_by_fe_meta(MYSQL* conn) {
         err = txn->get(key, &val);
         if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
             LOG(WARNING) << "tablet meta not found: " << tablet_info.tablet_id;
-            return false;
+            check_res = false;
+            continue;
         } else if (err != TxnErrorCode::TXN_OK) [[unlikely]] {
             LOG(WARNING) << "failed to get tablet, err: " << err;
-            return false;
+            check_res = false;
+            continue;
         }
         stat_info_.check_fdb_tablet_meta_num++;
     }
 
-    // check tablet schema
-    /*
+    // TODO(wyxxxcat):
+    // separate from this function to check partition version function
+    // for (const auto& elem : partitions) {
+    //     std::unique_ptr<Transaction> txn;
+    //     TxnErrorCode err = txn_kv_->create_txn(&txn);
+    //     if (err != TxnErrorCode::TXN_OK) {
+    //         LOG(WARNING) << "failed to init txn";
+    //         continue;
+    //     }
+    //     if (elem.second.visible_version == 0 || elem.second.visible_version 
== 1) {
+    //         continue;
+    //     }
+
+    //     int64_t db_id = elem.second.db_id;
+    //     int64_t table_id = elem.second.table_id;
+    //     int64_t partition_id = elem.second.partition_id;
+    //     int64_t tablet_id = elem.second.tablet_id;
+    //     std::string ver_key = partition_version_key({instance_id_, db_id, 
table_id, partition_id});
+    //     std::string ver_val;
+    //     err = txn->get(ver_key, &ver_val);
+    //     if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
+    //         LOG_WARNING("version key not found.")
+    //                 .tag("db id", db_id)
+    //                 .tag("table id", table_id)
+    //                 .tag("partition id", partition_id)
+    //                 .tag("tablet id", tablet_id);
+    //         check_res = false;
+    //         continue;
+    //     } else if (err != TxnErrorCode::TXN_OK) {
+    //         LOG_WARNING("failed to get version.")
+    //                 .tag("db id", db_id)
+    //                 .tag("table id", table_id)
+    //                 .tag("partition id", partition_id)
+    //                 .tag("tablet id", tablet_id);
+    //         check_res = false;
+    //         continue;
+    //     }
+
+    //     VersionPB version_pb;
+    //     if (!version_pb.ParseFromString(ver_val)) {
+    //         LOG(WARNING) << "malformed version value";
+    //         check_res = false;
+    //         continue;
+    //     }
+
+    //     if (version_pb.version() != elem.second.visible_version) {
+    //         LOG(WARNING) << "partition version check failed, FE partition 
version"
+    //                      << elem.second.visible_version << " ms version: " 
<< version_pb.version();
+    //         check_res = false;
+    //         continue;
+    //     }
+    //     stat_info_.check_fdb_partition_version_num++;
+    // }
+    return check_res;
+}
+
+bool MetaChecker::do_meta_schema_key_inverted_check(MYSQL* conn, 
std::vector<TabletInfo>& tablets,
+                                                    std::map<int64_t, 
PartitionInfo>& partitions) {
+    bool check_res = true;
+
     for (const auto& tablet_info : tablets) {
         std::unique_ptr<Transaction> txn;
         TxnErrorCode err = txn_kv_->create_txn(&txn);
@@ -350,68 +575,87 @@ bool MetaChecker::check_fdb_by_fe_meta(MYSQL* conn) {
         err = cloud::blob_get(txn.get(), schema_key, &val_buf);
         if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
             LOG(WARNING) << "tablet schema not found: " << 
tablet_info.debug_string();
-            return false;
+            check_res = false;
+            continue;
         } else if (err != TxnErrorCode::TXN_OK) [[unlikely]] {
-            LOG(WARNING) <<"failed to get tablet schema, err: " << err;
-            return false;
+            LOG(WARNING) << "failed to get tablet schema, err: " << err;
+            check_res = false;
+            continue;
         }
+        stat_info_.check_fdb_tablet_schema_num++;
     }
-    */
+    return check_res;
+}
 
-    // check partition
-    for (const auto& elem : partitions) {
-        std::unique_ptr<Transaction> txn;
-        TxnErrorCode err = txn_kv_->create_txn(&txn);
-        if (err != TxnErrorCode::TXN_OK) {
-            LOG(WARNING) << "failed to init txn";
-            return false;
-        }
-        if (elem.second.visible_version == 0 || elem.second.visible_version == 
1) {
-            continue;
-        }
+template <>
+bool MetaChecker::handle_check_fdb_by_fe_meta<CHECK_META>(MYSQL* conn) {
+    std::vector<TabletInfo> tablets;
+    std::map<int64_t, PartitionInfo> partitions;
 
-        int64_t db_id = elem.second.db_id;
-        int64_t table_id = elem.second.table_id;
-        int64_t partition_id = elem.second.partition_id;
-        int64_t tablet_id = elem.second.tablet_id;
-        std::string ver_key = partition_version_key({instance_id_, db_id, 
table_id, partition_id});
-        std::string ver_val;
-        err = txn->get(ver_key, &ver_val);
-        if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) {
-            LOG_WARNING("version key not found.")
-                    .tag("db id", db_id)
-                    .tag("table id", table_id)
-                    .tag("partition id", partition_id)
-                    .tag("tablet id", tablet_id);
-            return false;
-        } else if (err != TxnErrorCode::TXN_OK) {
-            LOG_WARNING("failed to get version.")
-                    .tag("db id", db_id)
-                    .tag("table id", table_id)
-                    .tag("partition id", partition_id)
-                    .tag("tablet id", tablet_id);
-            return false;
-        }
+    init_tablet_info_from_fe_meta(conn, tablets, partitions);
 
-        VersionPB version_pb;
-        if (!version_pb.ParseFromString(ver_val)) {
-            LOG(WARNING) << "malformed version value";
-            return false;
-        }
+    bool check_res = true;
+    // check MetaTabletIdxKey
+    if (!do_meta_tablet_index_key_inverted_check(conn, tablets)) {
+        check_res = false;
+        LOG(WARNING) << "do_meta_tablet_index_key_inverted_check failed";
+    } else {
+        LOG(INFO) << "do_meta_tablet_index_key_inverted_check success";
+    }
 
-        if (version_pb.version() != elem.second.visible_version) {
-            LOG(WARNING) << "partition version check failed, FE partition 
version"
-                         << elem.second.visible_version << " ms version: " << 
version_pb.version();
-            return false;
-        }
-        stat_info_.check_fdb_partition_version_num++;
+    // check MetaTabletKey
+    if (!do_meta_tablet_key_inverted_check(conn, tablets, partitions)) {
+        check_res = false;
+        LOG(WARNING) << "do_meta_tablet_key_inverted_check failed";
+    } else {
+        LOG(INFO) << "do_meta_tablet_key_inverted_check success";
+    }
+
+    // check MetaSchemaKey
+    if (!do_meta_schema_key_inverted_check(conn, tablets, partitions)) {
+        check_res = false;
+        LOG(WARNING) << "do_meta_schema_key_inverted_check failed";
+    } else {
+        LOG(INFO) << "do_meta_schema_key_inverted_check success";
     }
 
+    return check_res;
+}
+
+bool MetaChecker::check_fdb_by_fe_meta(MYSQL* conn) {
+    bool success = true;
+    if (config::enable_checker_for_meta_key_check) {
+        success = handle_check_fdb_by_fe_meta<CHECK_META>(conn);
+    }
+
+    // TODO(wyxxxcat) add check for version key
+    // if (config::enable_checker_for_version_key_check) {
+    //     success = handle_check_fdb_by_fe_meta<CHECK_VERSION>(conn);
+    // }
+
     LOG(INFO) << "check_fdb_table_idx_num: " << 
stat_info_.check_fdb_tablet_idx_num
               << " check_fdb_table_meta_num: " << 
stat_info_.check_fdb_tablet_meta_num
+              << " check_fdb_tablet_schema_num: " << 
stat_info_.check_fdb_tablet_schema_num
               << " check_fdb_partition_version_num: " << 
stat_info_.check_fdb_partition_version_num;
+    return success;
+}
 
-    return true;
+void MetaChecker::init_db_meta(MYSQL* conn) {
+    // init db_meta_ -> map<db_id, db_name>
+    db_meta_.clear();
+    std::string sql_stmt = "SHOW PROC '/dbs/'";
+    MYSQL_RES* result;
+    mysql_query(conn, sql_stmt.c_str());
+    result = mysql_store_result(conn);
+    if (result) {
+        int num_row = mysql_num_rows(result);
+        for (int i = 0; i < num_row; ++i) {
+            MYSQL_ROW row = mysql_fetch_row(result);
+            auto [db_id, db_name] = std::make_tuple(atoll(row[0]), row[1]);
+            db_meta_.insert({db_id, db_name});
+        }
+        mysql_free_result(result);
+    }
 }
 
 void MetaChecker::do_check(const std::string& host, const std::string& port,
@@ -442,6 +686,7 @@ void MetaChecker::do_check(const std::string& host, const 
std::string& port,
     LOG(INFO) << "check_fe_meta_by_fdb begin";
     bool ret = false;
     do {
+        init_db_meta(&conn);
         ret = check_fe_meta_by_fdb(&conn);
         if (!ret) {
             std::this_thread::sleep_for(seconds(10));
@@ -456,6 +701,8 @@ void MetaChecker::do_check(const std::string& host, const 
std::string& port,
     now = 
duration_cast<seconds>(system_clock::now().time_since_epoch()).count();
     LOG(INFO) << "check_fe_meta_by_fdb finish, cost(second): " << now - start;
 
+    LOG(INFO) << "check_fdb_by_fe_meta begin";
+    init_db_meta(&conn);
     ret = check_fdb_by_fe_meta(&conn);
     if (!ret) {
         LOG(WARNING) << "check_fdb_by_fe_meta failed, there may be data loss";
@@ -470,4 +717,123 @@ void MetaChecker::do_check(const std::string& host, const 
std::string& port,
     LOG(INFO) << "meta check finish";
 }
 
+void MetaChecker::init_tablet_info_from_fe_meta(MYSQL* conn, 
std::vector<TabletInfo>& tablets,
+                                                std::map<int64_t, 
PartitionInfo>& partitions) {
+    // init tablet info, partition info
+    std::map<std::string, std::vector<std::string>> db_to_tables;
+    std::string sql_stmt = "show databases";
+    MYSQL_RES* result;
+
+    mysql_query(conn, sql_stmt.c_str());
+    result = mysql_store_result(conn);
+    if (result) {
+        int num_row = mysql_num_rows(result);
+        for (int i = 0; i < num_row; ++i) {
+            MYSQL_ROW row = mysql_fetch_row(result);
+            if (strcmp(row[0], "__internal_schema") == 0 ||
+                strcmp(row[0], "information_schema") == 0 || strcmp(row[0], 
"mysql")) {
+                continue;
+            }
+            db_to_tables.insert({row[0], std::vector<std::string>()});
+        }
+        mysql_free_result(result);
+    }
+
+    // get tables info from FE
+    for (auto& elem : db_to_tables) {
+        std::string sql_stmt = "show tables from " + elem.first;
+        mysql_query(conn, sql_stmt.c_str());
+        result = mysql_store_result(conn);
+        if (result) {
+            int num_row = mysql_num_rows(result);
+            for (int i = 0; i < num_row; ++i) {
+                MYSQL_ROW row = mysql_fetch_row(result);
+                elem.second.emplace_back(row[0]);
+            }
+            mysql_free_result(result);
+        }
+    }
+
+    // get tablet info from FE
+    for (const auto& elem : db_to_tables) {
+        for (const std::string& table : elem.second) {
+            std::string sql_stmt = "show tablets from " + elem.first + "." + 
table;
+            mysql_query(conn, sql_stmt.c_str());
+            result = mysql_store_result(conn);
+            if (result) {
+                int num_row = mysql_num_rows(result);
+                for (int i = 0; i < num_row; ++i) {
+                    MYSQL_ROW row = mysql_fetch_row(result);
+                    TabletInfo tablet_info;
+                    tablet_info.tablet_id = atoll(row[0]);
+                    VLOG_DEBUG << "get tablet info log"
+                               << ", db name" << elem.first << ", table name" 
<< table
+                               << ",tablet id" << tablet_info.tablet_id;
+                    tablet_info.schema_version = atoll(row[4]);
+                    tablets.push_back(tablet_info);
+                }
+                mysql_free_result(result);
+            }
+        }
+    }
+
+    // get tablet info from FE
+    // get Partition info from FE
+    for (auto& tablet_info : tablets) {
+        std::string sql_stmt = "show tablet " + 
std::to_string(tablet_info.tablet_id);
+        mysql_query(conn, sql_stmt.c_str());
+        result = mysql_store_result(conn);
+        if (result) {
+            int num_row = mysql_num_rows(result);
+            for (int i = 0; i < num_row; ++i) {
+                MYSQL_ROW row = mysql_fetch_row(result);
+                tablet_info.db_id = atoll(row[4]);
+                tablet_info.table_id = atoll(row[5]);
+                tablet_info.partition_id = atoll(row[6]);
+                tablet_info.index_id = atoll(row[7]);
+
+                PartitionInfo partition_info;
+                partition_info.db_id = atoll(row[4]);
+                partition_info.table_id = atoll(row[5]);
+                partition_info.partition_id = atoll(row[6]);
+                partition_info.tablet_id = tablet_info.tablet_id;
+                VLOG_DEBUG << "get partition info log"
+                           << ", db id" << partition_info.db_id << ", table id"
+                           << partition_info.table_id << ", partition id"
+                           << partition_info.partition_id << ", tablet id"
+                           << partition_info.tablet_id;
+
+                partitions.insert({partition_info.partition_id, 
partition_info});
+            }
+            mysql_free_result(result);
+        }
+    }
+
+    // get partition version from FE
+    for (const auto& elem : db_to_tables) {
+        for (const std::string& table : elem.second) {
+            std::string sql_stmt = "show partitions from " + elem.first + "." 
+ table;
+            mysql_query(conn, sql_stmt.c_str());
+            result = mysql_store_result(conn);
+            if (result) {
+                int num_row = mysql_num_rows(result);
+                for (int i = 0; i < num_row; ++i) {
+                    MYSQL_ROW row = mysql_fetch_row(result);
+                    int64_t partition_id = atoll(row[0]);
+                    int64_t visible_version = atoll(row[2]);
+                    partitions[partition_id].visible_version = visible_version;
+                    VLOG_DEBUG << "get partition version log"
+                               << ", db name" << elem.first << ", table name" 
<< table
+                               << ", raw partition id" << row[0] << ", first 
partition id"
+                               << partition_id << ", db id" << 
partitions[partition_id].db_id
+                               << ", table id" << 
partitions[partition_id].table_id
+                               << ", second partition id" << 
partitions[partition_id].partition_id
+                               << ", tablet id" << 
partitions[partition_id].tablet_id;
+                }
+                mysql_free_result(result);
+            }
+        }
+    }
+}
+
 } // namespace doris::cloud
diff --git a/cloud/src/recycler/meta_checker.h 
b/cloud/src/recycler/meta_checker.h
index 90479c71673..4f16cdab7c7 100644
--- a/cloud/src/recycler/meta_checker.h
+++ b/cloud/src/recycler/meta_checker.h
@@ -27,18 +27,57 @@
 #include <unordered_map>
 #include <unordered_set>
 
+#include "common/logging.h"
 #include "recycler/white_black_list.h"
 
 namespace doris::cloud {
 class TxnKv;
 
 struct StatInfo {
+    // fe
     int64_t check_fe_tablet_num = 0;
+    int64_t check_fe_partition_num = 0;
+    int64_t check_fe_tablet_schema_num = 0;
+    // fdb
     int64_t check_fdb_tablet_idx_num = 0;
     int64_t check_fdb_tablet_meta_num = 0;
+    int64_t check_fdb_tablet_schema_num = 0;
     int64_t check_fdb_partition_version_num = 0;
 };
 
+enum CHECK_TYPE {
+    CHECK_TXN,
+    CHECK_VERSION,
+    CHECK_META,
+    CHECK_STATS,
+    CHECK_JOB,
+};
+
+struct TabletInfo {
+    int64_t db_id;
+    int64_t table_id;
+    int64_t partition_id;
+    int64_t index_id;
+    int64_t tablet_id;
+    int64_t schema_version;
+
+    std::string debug_string() const {
+        return "db id: " + std::to_string(db_id) + " table id: " + 
std::to_string(table_id) +
+               " partition id: " + std::to_string(partition_id) +
+               " index id: " + std::to_string(index_id) +
+               " tablet id: " + std::to_string(tablet_id) +
+               " schema version: " + std::to_string(schema_version);
+    }
+};
+
+struct PartitionInfo {
+    int64_t db_id;
+    int64_t table_id;
+    int64_t partition_id;
+    int64_t tablet_id;
+    int64_t visible_version;
+};
+
 class MetaChecker {
 public:
     explicit MetaChecker(std::shared_ptr<TxnKv> txn_kv);
@@ -47,10 +86,80 @@ public:
     bool check_fe_meta_by_fdb(MYSQL* conn);
     bool check_fdb_by_fe_meta(MYSQL* conn);
 
+    template <CHECK_TYPE>
+    bool handle_check_fe_meta_by_fdb(MYSQL* conn);
+
+    template <CHECK_TYPE>
+    bool handle_check_fdb_by_fe_meta(MYSQL* conn);
+
+private:
+    void init_tablet_info_from_fe_meta(MYSQL* conn, std::vector<TabletInfo>& 
tablets,
+                                       std::map<int64_t, PartitionInfo>& 
partitions);
+
+    bool scan_and_handle_kv(std::string& start_key, const std::string& end_key,
+                            std::function<int(std::string_view, 
std::string_view)>);
+
+    bool do_meta_tablet_key_index_check(MYSQL* conn);
+
+    bool do_meta_tablet_key_check(MYSQL* conn);
+
+    bool do_meta_schema_key_check(MYSQL* conn);
+
+    bool do_meta_tablet_index_key_inverted_check(MYSQL* conn,
+                                                 const 
std::vector<TabletInfo>& tablets);
+
+    bool do_meta_tablet_key_inverted_check(MYSQL* conn, 
std::vector<TabletInfo>& tablets,
+                                           std::map<int64_t, PartitionInfo>& 
partitions);
+
+    bool do_meta_schema_key_inverted_check(MYSQL* conn, 
std::vector<TabletInfo>& tablets,
+                                           std::map<int64_t, PartitionInfo>& 
partitions);
+
+    void init_db_meta(MYSQL* conn);
+
 private:
     std::shared_ptr<TxnKv> txn_kv_;
     StatInfo stat_info_;
     std::string instance_id_;
+    // db_id -> db_name
+    std::unordered_map<int64_t, std::string> db_meta_;
 };
 
+// not implemented yet
+template <>
+bool MetaChecker::handle_check_fe_meta_by_fdb<CHECK_STATS>(MYSQL* conn) = 
delete;
+
+// not implemented yet
+template <>
+bool MetaChecker::handle_check_fe_meta_by_fdb<CHECK_TXN>(MYSQL* conn) = delete;
+
+// not implemented yet
+template <>
+bool MetaChecker::handle_check_fe_meta_by_fdb<CHECK_VERSION>(MYSQL* conn) = 
delete;
+
+// not implemented yet
+template <>
+bool MetaChecker::handle_check_fe_meta_by_fdb<CHECK_JOB>(MYSQL* conn) = delete;
+
+template <>
+bool MetaChecker::handle_check_fe_meta_by_fdb<CHECK_META>(MYSQL* conn);
+
+// not implemented yet
+template <>
+bool MetaChecker::handle_check_fdb_by_fe_meta<CHECK_STATS>(MYSQL* conn) = 
delete;
+
+// not implemented yet
+template <>
+bool MetaChecker::handle_check_fdb_by_fe_meta<CHECK_TXN>(MYSQL* conn) = delete;
+
+// not implemented yet
+template <>
+bool MetaChecker::handle_check_fdb_by_fe_meta<CHECK_VERSION>(MYSQL* conn) = 
delete;
+
+// not implemented yet
+template <>
+bool MetaChecker::handle_check_fdb_by_fe_meta<CHECK_JOB>(MYSQL* conn) = delete;
+
+template <>
+bool MetaChecker::handle_check_fdb_by_fe_meta<CHECK_META>(MYSQL* conn);
+
 } // namespace doris::cloud


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to