This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 23e6372b25a [Fix](partition) Skip rowset partition id eq 0 smaller 
than config wh… (#29363)
23e6372b25a is described below

commit 23e6372b25aa6c24cc69581615af6f600dcf6420
Author: deardeng <565620...@qq.com>
AuthorDate: Fri Jan 5 19:39:47 2024 +0800

    [Fix](partition) Skip rowset partition id eq 0 smaller than config wh… 
(#29363)
    
    
    
    Co-authored-by: Yongqiang YANG 
<98214048+dataroar...@users.noreply.github.com>
---
 be/src/common/config.cpp                         |  3 ++
 be/src/common/config.h                           |  3 ++
 be/src/olap/data_dir.cpp                         | 36 +++++++++++++++++++++---
 be/src/olap/rowset/rowset_meta_manager.cpp       | 16 ++++++++---
 be/src/olap/rowset/rowset_meta_manager.h         |  4 +--
 be/src/olap/tablet_manager.cpp                   |  4 +++
 be/src/olap/tablet_meta.cpp                      | 11 ++++++++
 be/src/olap/txn_manager.cpp                      | 17 +++++------
 be/test/olap/rowset/rowset_meta_manager_test.cpp |  2 +-
 9 files changed, 77 insertions(+), 19 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index a762d0de392..4cd55d96806 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1120,6 +1120,9 @@ DEFINE_mInt32(s3_writer_buffer_allocation_timeout_second, 
"60");
 
 DEFINE_mBool(enable_column_type_check, "true");
 
+// Tolerance for the number of partition id 0 in rowset, default 0
+DEFINE_Int32(ignore_invalid_partition_id_rowset_num, "0");
+
 // clang-format off
 #ifdef BE_TEST
 // test s3
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 34699267002..eed14921baa 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1175,6 +1175,9 @@ 
DECLARE_mInt32(s3_writer_buffer_allocation_timeout_second);
 
 DECLARE_mBool(enable_column_type_check);
 
+// Tolerance for the number of partition id 0 in rowset, default 0
+DECLARE_Int32(ignore_invalid_partition_id_rowset_num);
+
 #ifdef BE_TEST
 // test s3
 DECLARE_String(test_s3_resource);
diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp
index 47434bd8226..5517391541f 100644
--- a/be/src/olap/data_dir.cpp
+++ b/be/src/olap/data_dir.cpp
@@ -383,6 +383,12 @@ Status DataDir::load() {
         if (rowset_meta->is_local()) {
             rowset_meta->set_fs(local_fs);
         }
+
+        if (rowset_meta->partition_id() == 0) {
+            LOG(WARNING) << "rs tablet=" << rowset_meta->tablet_id() << " 
rowset_id=" << rowset_id
+                         << " load from meta but partition id eq 0";
+        }
+
         dir_rowset_metas.push_back(rowset_meta);
         return true;
     };
@@ -470,6 +476,19 @@ Status DataDir::load() {
     };
     TabletMetaManager::traverse_pending_publish(_meta, 
load_pending_publish_info_func);
 
+    int64_t rowset_partition_id_eq_0_num = 0;
+    for (auto rowset_meta : dir_rowset_metas) {
+        if (rowset_meta->partition_id() == 0) {
+            ++rowset_partition_id_eq_0_num;
+        }
+    }
+    if (rowset_partition_id_eq_0_num > 
config::ignore_invalid_partition_id_rowset_num) {
+        LOG(FATAL) << fmt::format(
+                "roswet partition id eq 0 bigger than config {}, be exit, plz 
check be.INFO",
+                config::ignore_invalid_partition_id_rowset_num);
+        exit(-1);
+    }
+
     // traverse rowset
     // 1. add committed rowset to txn map
     // 2. add visible rowset to tablet
@@ -486,6 +505,13 @@ Status DataDir::load() {
             continue;
         }
 
+        if (rowset_meta->partition_id() == 0) {
+            LOG(WARNING) << "skip tablet_id=" << tablet->tablet_id()
+                         << " rowset: " << rowset_meta->rowset_id()
+                         << " txn: " << rowset_meta->txn_id();
+            continue;
+        }
+
         RowsetSharedPtr rowset;
         Status create_status = tablet->create_rowset(rowset_meta, &rowset);
         if (!create_status) {
@@ -499,8 +525,9 @@ Status DataDir::load() {
             rowset_meta->tablet_uid() == tablet->tablet_uid()) {
             if (!rowset_meta->tablet_schema()) {
                 rowset_meta->set_tablet_schema(tablet->tablet_schema());
-                RowsetMetaManager::save(_meta, rowset_meta->tablet_uid(), 
rowset_meta->rowset_id(),
-                                        rowset_meta->get_rowset_pb());
+                RETURN_IF_ERROR(RowsetMetaManager::save(_meta, 
rowset_meta->tablet_uid(),
+                                                        
rowset_meta->rowset_id(),
+                                                        
rowset_meta->get_rowset_pb(), false));
             }
             Status commit_txn_status = _txn_manager->commit_txn(
                     _meta, rowset_meta->partition_id(), rowset_meta->txn_id(),
@@ -527,8 +554,9 @@ Status DataDir::load() {
                    rowset_meta->tablet_uid() == tablet->tablet_uid()) {
             if (!rowset_meta->tablet_schema()) {
                 rowset_meta->set_tablet_schema(tablet->tablet_schema());
-                RowsetMetaManager::save(_meta, rowset_meta->tablet_uid(), 
rowset_meta->rowset_id(),
-                                        rowset_meta->get_rowset_pb());
+                RETURN_IF_ERROR(RowsetMetaManager::save(_meta, 
rowset_meta->tablet_uid(),
+                                                        
rowset_meta->rowset_id(),
+                                                        
rowset_meta->get_rowset_pb(), false));
             }
             Status publish_status = tablet->add_rowset(rowset);
             if (!publish_status && 
!publish_status.is<PUSH_VERSION_ALREADY_EXIST>()) {
diff --git a/be/src/olap/rowset/rowset_meta_manager.cpp 
b/be/src/olap/rowset/rowset_meta_manager.cpp
index 23682338ff9..f5dc8101ea0 100644
--- a/be/src/olap/rowset/rowset_meta_manager.cpp
+++ b/be/src/olap/rowset/rowset_meta_manager.cpp
@@ -34,6 +34,7 @@
 #include "olap/olap_define.h"
 #include "olap/olap_meta.h"
 #include "olap/utils.h"
+#include "util/debug_points.h"
 
 namespace doris {
 namespace {
@@ -98,15 +99,22 @@ Status RowsetMetaManager::save(OlapMeta* meta, TabletUid 
tablet_uid, const Rowse
         // return Status::InternalError("invaid partition id {} tablet {}",
         //  rowset_meta_pb.partition_id(), rowset_meta_pb.tablet_id());
     }
+    DBUG_EXECUTE_IF("RowsetMetaManager::save::zero_partition_id", {
+        long partition_id = rowset_meta_pb.partition_id();
+        auto& rs_pb = 
const_cast<std::decay_t<decltype(rowset_meta_pb)>&>(rowset_meta_pb);
+        rs_pb.set_partition_id(0);
+        LOG(WARNING) << "set debug point 
RowsetMetaManager::save::zero_partition_id old="
+                     << partition_id << " new=" << 
rowset_meta_pb.DebugString();
+    });
     if (enable_binlog) {
         return _save_with_binlog(meta, tablet_uid, rowset_id, rowset_meta_pb);
     } else {
-        return save(meta, tablet_uid, rowset_id, rowset_meta_pb);
+        return _save(meta, tablet_uid, rowset_id, rowset_meta_pb);
     }
 }
 
-Status RowsetMetaManager::save(OlapMeta* meta, TabletUid tablet_uid, const 
RowsetId& rowset_id,
-                               const RowsetMetaPB& rowset_meta_pb) {
+Status RowsetMetaManager::_save(OlapMeta* meta, TabletUid tablet_uid, const 
RowsetId& rowset_id,
+                                const RowsetMetaPB& rowset_meta_pb) {
     std::string key =
             fmt::format("{}{}_{}", ROWSET_PREFIX, tablet_uid.to_string(), 
rowset_id.to_string());
     std::string value;
@@ -523,7 +531,7 @@ Status RowsetMetaManager::load_json_rowset_meta(OlapMeta* 
meta,
     }
     RowsetId rowset_id = rowset_meta.rowset_id();
     TabletUid tablet_uid = rowset_meta.tablet_uid();
-    Status status = save(meta, tablet_uid, rowset_id, 
rowset_meta.get_rowset_pb());
+    Status status = save(meta, tablet_uid, rowset_id, 
rowset_meta.get_rowset_pb(), false);
     return status;
 }
 
diff --git a/be/src/olap/rowset/rowset_meta_manager.h 
b/be/src/olap/rowset/rowset_meta_manager.h
index 0c04cb686c5..ddf33aa055a 100644
--- a/be/src/olap/rowset/rowset_meta_manager.h
+++ b/be/src/olap/rowset/rowset_meta_manager.h
@@ -51,8 +51,6 @@ public:
     // TODO(Drogon): refactor save && _save_with_binlog to one, adapt to ut 
temperately
     static Status save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& 
rowset_id,
                        const RowsetMetaPB& rowset_meta_pb, bool enable_binlog);
-    static Status save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& 
rowset_id,
-                       const RowsetMetaPB& rowset_meta_pb);
 
     static std::vector<std::string> get_binlog_filenames(OlapMeta* meta, 
TabletUid tablet_uid,
                                                          std::string_view 
binlog_version,
@@ -79,6 +77,8 @@ public:
     static Status load_json_rowset_meta(OlapMeta* meta, const std::string& 
rowset_meta_path);
 
 private:
+    static Status _save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& 
rowset_id,
+                        const RowsetMetaPB& rowset_meta_pb);
     static Status _save_with_binlog(OlapMeta* meta, TabletUid tablet_uid, 
const RowsetId& rowset_id,
                                     const RowsetMetaPB& rowset_meta_pb);
     static Status _get_rowset_binlog_metas(OlapMeta* meta, const TabletUid 
tablet_uid,
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index 7aa5c52534f..f1e8e6185fc 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -844,6 +844,10 @@ Status TabletManager::load_tablet_from_meta(DataDir* 
data_dir, TTabletId tablet_
         tablet_meta->set_tablet_state(TABLET_RUNNING);
     }
 
+    if (tablet_meta->partition_id() == 0) {
+        LOG(WARNING) << "tablet=" << tablet_id << " load from meta but 
partition id eq 0";
+    }
+
     TabletSharedPtr tablet = Tablet::create_tablet_from_meta(tablet_meta, 
data_dir);
     if (tablet == nullptr) {
         return Status::Error<TABLE_CREATE_FROM_HEADER_ERROR>(
diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp
index daec22246e0..9d6ea16d6ff 100644
--- a/be/src/olap/tablet_meta.cpp
+++ b/be/src/olap/tablet_meta.cpp
@@ -38,6 +38,7 @@
 #include "olap/olap_define.h"
 #include "olap/tablet_meta_manager.h"
 #include "olap/utils.h"
+#include "util/debug_points.h"
 #include "util/string_util.h"
 #include "util/time.h"
 #include "util/uid_util.h"
@@ -471,6 +472,16 @@ Status TabletMeta::_save_meta(DataDir* data_dir) {
 Status TabletMeta::serialize(string* meta_binary) {
     TabletMetaPB tablet_meta_pb;
     to_meta_pb(&tablet_meta_pb);
+    if (tablet_meta_pb.partition_id() <= 0) {
+        LOG(WARNING) << "invalid partition id " << 
tablet_meta_pb.partition_id() << " tablet "
+                     << tablet_meta_pb.tablet_id();
+    }
+    DBUG_EXECUTE_IF("TabletMeta::serialize::zero_partition_id", {
+        long partition_id = tablet_meta_pb.partition_id();
+        tablet_meta_pb.set_partition_id(0);
+        LOG(WARNING) << "set debug point 
TabletMeta::serialize::zero_partition_id old="
+                     << partition_id << " new=" << 
tablet_meta_pb.DebugString();
+    });
     bool serialize_success = tablet_meta_pb.SerializeToString(meta_binary);
     if (!serialize_success) {
         LOG(FATAL) << "failed to serialize meta " << full_name();
diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp
index da465b3fbd0..441cf93d4e4 100644
--- a/be/src/olap/txn_manager.cpp
+++ b/be/src/olap/txn_manager.cpp
@@ -290,6 +290,13 @@ Status TxnManager::commit_txn(OlapMeta* meta, TPartitionId 
partition_id,
     do {
         // get tx
         std::shared_lock rdlock(_get_txn_map_lock(transaction_id));
+        auto rs_pb = rowset_ptr->rowset_meta()->get_rowset_pb();
+        // TODO(dx): remove log after fix partition id eq 0 bug
+        if (!rs_pb.has_partition_id() || rs_pb.partition_id() == 0) {
+            rowset_ptr->rowset_meta()->set_partition_id(partition_id);
+            LOG(WARNING) << "cant get partition id from rs pb, get from func 
arg partition_id="
+                         << partition_id;
+        }
         txn_tablet_map_t& txn_tablet_map = _get_txn_tablet_map(transaction_id);
         auto it = txn_tablet_map.find(key);
         if (it == txn_tablet_map.end()) {
@@ -335,15 +342,9 @@ Status TxnManager::commit_txn(OlapMeta* meta, TPartitionId 
partition_id,
     // save meta need access disk, it maybe very slow, so that it is not in 
global txn lock
     // it is under a single txn lock
     if (!is_recovery) {
-        auto rs_pb = rowset_ptr->rowset_meta()->get_rowset_pb();
-        // TODO(dx): remove log after fix partition id eq 0 bug
-        if (!rs_pb.has_partition_id() || rs_pb.partition_id() == 0) {
-            rs_pb.set_partition_id(partition_id);
-            LOG(WARNING) << "cant get partition id from rs pb, get from func 
arg partition_id="
-                         << partition_id;
-        }
         Status save_status =
-                RowsetMetaManager::save(meta, tablet_uid, 
rowset_ptr->rowset_id(), rs_pb);
+                RowsetMetaManager::save(meta, tablet_uid, 
rowset_ptr->rowset_id(),
+                                        
rowset_ptr->rowset_meta()->get_rowset_pb(), false);
         DBUG_EXECUTE_IF("TxnManager.RowsetMetaManager.save_wait", {
             if (auto wait = dp->param<int>("duration", 0); wait > 0) {
                 
LOG_WARNING("TxnManager.RowsetMetaManager.save_wait").tag("wait ms", wait);
diff --git a/be/test/olap/rowset/rowset_meta_manager_test.cpp 
b/be/test/olap/rowset/rowset_meta_manager_test.cpp
index a747d1fa2ca..5875ba424b1 100644
--- a/be/test/olap/rowset/rowset_meta_manager_test.cpp
+++ b/be/test/olap/rowset/rowset_meta_manager_test.cpp
@@ -103,7 +103,7 @@ TEST_F(RowsetMetaManagerTest, TestSaveAndGetAndRemove) {
     EXPECT_EQ(rowset_meta.rowset_id(), rowset_id);
     RowsetMetaPB rowset_meta_pb;
     rowset_meta.to_rowset_pb(&rowset_meta_pb);
-    Status status = RowsetMetaManager::save(_meta, _tablet_uid, rowset_id, 
rowset_meta_pb);
+    Status status = RowsetMetaManager::save(_meta, _tablet_uid, rowset_id, 
rowset_meta_pb, false);
     EXPECT_TRUE(status == Status::OK());
     EXPECT_TRUE(RowsetMetaManager::check_rowset_meta(_meta, _tablet_uid, 
rowset_id));
     std::string json_rowset_meta_read;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to