This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-3.0 by this push:
     new bcdf55350bb [fix](cloud-mow) make cloud_txn_delete_bitmap_cache's  
expired time more reasonable (#40333)
bcdf55350bb is described below

commit bcdf55350bbfc581e82925b70e99ed4b57836b9d
Author: huanghaibin <284824...@qq.com>
AuthorDate: Sat Sep 14 11:25:11 2024 +0800

    [fix](cloud-mow) make cloud_txn_delete_bitmap_cache's  expired time more 
reasonable (#40333)
    
    Now cloud_txn_delete_bitmap_cache's expired time is relay rely on
    txn_timeout_s, however if the cost time of calculating delete bitmap is
    bigger than txn_timeout_s, updating the publish status of
    cloud_txn_delete_bitmap_cache will be failed, because this cache may be
    remove by the cleaning thread. In rountine load, this txn_timeout_s is
    rely on max_batch_interval session variable, so if someone use small
    interval to submit rountine load task, cloud_txn_delete_bitmap_cache may
    be removed when calculating delete bitmap task is going, so we need to
    set a min expired time on cloud_txn_delete_bitmap_cache to avoid this
    scenario.
---
 be/src/cloud/cloud_tablet.cpp                  | 10 +++----
 be/src/cloud/cloud_txn_delete_bitmap_cache.cpp | 38 ++++++++++++++++----------
 be/src/cloud/cloud_txn_delete_bitmap_cache.h   |  8 +++---
 be/src/cloud/config.cpp                        |  4 +++
 be/src/cloud/config.h                          |  4 +++
 5 files changed, 40 insertions(+), 24 deletions(-)

diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index 7433b781c65..8d132020a77 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -658,8 +658,8 @@ Status CloudTablet::save_delete_bitmap(const TabletTxnInfo* 
txn_info, int64_t tx
     RowsetSharedPtr rowset = txn_info->rowset;
     int64_t cur_version = rowset->start_version();
     // update delete bitmap info, in order to avoid recalculation when trying 
again
-    _engine.txn_delete_bitmap_cache().update_tablet_txn_info(
-            txn_id, tablet_id(), delete_bitmap, cur_rowset_ids, 
PublishStatus::PREPARE);
+    RETURN_IF_ERROR(_engine.txn_delete_bitmap_cache().update_tablet_txn_info(
+            txn_id, tablet_id(), delete_bitmap, cur_rowset_ids, 
PublishStatus::PREPARE));
 
     if (txn_info->partial_update_info && 
txn_info->partial_update_info->is_partial_update &&
         rowset_writer->num_rows() > 0) {
@@ -684,9 +684,9 @@ Status CloudTablet::save_delete_bitmap(const TabletTxnInfo* 
txn_info, int64_t tx
     // store the delete bitmap with sentinel marks in txn_delete_bitmap_cache 
because if the txn is retried for some reason,
     // it will use the delete bitmap from txn_delete_bitmap_cache when 
re-calculating the delete bitmap, during which it will do
     // delete bitmap correctness check. If we store the new_delete_bitmap, the 
delete bitmap correctness check will fail
-    _engine.txn_delete_bitmap_cache().update_tablet_txn_info(txn_id, 
tablet_id(), delete_bitmap,
-                                                             cur_rowset_ids, 
PublishStatus::SUCCEED,
-                                                             
txn_info->publish_info);
+    RETURN_IF_ERROR(_engine.txn_delete_bitmap_cache().update_tablet_txn_info(
+            txn_id, tablet_id(), delete_bitmap, cur_rowset_ids, 
PublishStatus::SUCCEED,
+            txn_info->publish_info));
 
     return Status::OK();
 }
diff --git a/be/src/cloud/cloud_txn_delete_bitmap_cache.cpp 
b/be/src/cloud/cloud_txn_delete_bitmap_cache.cpp
index c6a3b54edc3..4ea2699bdd9 100644
--- a/be/src/cloud/cloud_txn_delete_bitmap_cache.cpp
+++ b/be/src/cloud/cloud_txn_delete_bitmap_cache.cpp
@@ -23,6 +23,7 @@
 #include <memory>
 #include <shared_mutex>
 
+#include "cloud/config.h"
 #include "common/status.h"
 #include "cpp/sync_point.h"
 #include "olap/olap_common.h"
@@ -119,12 +120,11 @@ void CloudTxnDeleteBitmapCache::set_tablet_txn_info(
         TTransactionId transaction_id, int64_t tablet_id, DeleteBitmapPtr 
delete_bitmap,
         const RowsetIdUnorderedSet& rowset_ids, RowsetSharedPtr rowset, 
int64_t txn_expiration,
         std::shared_ptr<PartialUpdateInfo> partial_update_info) {
-    if (txn_expiration <= 0) {
-        txn_expiration = duration_cast<std::chrono::seconds>(
-                                 
std::chrono::system_clock::now().time_since_epoch())
-                                 .count() +
-                         120;
-    }
+    int64_t txn_expiration_min =
+            
duration_cast<std::chrono::seconds>(std::chrono::system_clock::now().time_since_epoch())
+                    .count() +
+            config::tablet_txn_info_min_expired_seconds;
+    txn_expiration = std::max(txn_expiration_min, txn_expiration);
     {
         std::unique_lock<std::shared_mutex> wlock(_rwlock);
         TxnKey txn_key(transaction_id, tablet_id);
@@ -153,16 +153,21 @@ void CloudTxnDeleteBitmapCache::set_tablet_txn_info(
             .tag("delete_bitmap_size", charge);
 }
 
-void CloudTxnDeleteBitmapCache::update_tablet_txn_info(TTransactionId 
transaction_id,
-                                                       int64_t tablet_id,
-                                                       DeleteBitmapPtr 
delete_bitmap,
-                                                       const 
RowsetIdUnorderedSet& rowset_ids,
-                                                       PublishStatus 
publish_status,
-                                                       TxnPublishInfo 
publish_info) {
+Status CloudTxnDeleteBitmapCache::update_tablet_txn_info(TTransactionId 
transaction_id,
+                                                         int64_t tablet_id,
+                                                         DeleteBitmapPtr 
delete_bitmap,
+                                                         const 
RowsetIdUnorderedSet& rowset_ids,
+                                                         PublishStatus 
publish_status,
+                                                         TxnPublishInfo 
publish_info) {
     {
         std::unique_lock<std::shared_mutex> wlock(_rwlock);
         TxnKey txn_key(transaction_id, tablet_id);
-        CHECK(_txn_map.contains(txn_key));
+        if (!_txn_map.contains(txn_key)) {
+            return Status::Error<ErrorCode::NOT_FOUND, false>(
+                    "not found txn info, tablet_id={}, transaction_id={}, may 
be expired and be "
+                    "removed",
+                    tablet_id, transaction_id);
+        }
         TxnVal& txn_val = _txn_map[txn_key];
         *(txn_val.publish_status) = publish_status;
         if (publish_status == PublishStatus::SUCCEED) {
@@ -184,7 +189,9 @@ void 
CloudTxnDeleteBitmapCache::update_tablet_txn_info(TTransactionId transactio
     LOG_INFO("update txn related delete bitmap")
             .tag("txn_id", transaction_id)
             .tag("tablt_id", tablet_id)
-            .tag("delete_bitmap_size", charge);
+            .tag("delete_bitmap_size", charge)
+            .tag("publish_status", static_cast<int>(publish_status));
+    return Status::OK();
 }
 
 void CloudTxnDeleteBitmapCache::remove_expired_tablet_txn_info() {
@@ -238,7 +245,8 @@ void 
CloudTxnDeleteBitmapCache::remove_unused_tablet_txn_info(TTransactionId tra
 void CloudTxnDeleteBitmapCache::_clean_thread_callback() {
     do {
         remove_expired_tablet_txn_info();
-    } while (!_stop_latch.wait_for(std::chrono::seconds(300)));
+    } while (!_stop_latch.wait_for(
+            
std::chrono::seconds(config::remove_expired_tablet_txn_info_interval_seconds)));
 }
 
 } // namespace doris
\ No newline at end of file
diff --git a/be/src/cloud/cloud_txn_delete_bitmap_cache.h 
b/be/src/cloud/cloud_txn_delete_bitmap_cache.h
index 75577ae2e3f..db5f8867263 100644
--- a/be/src/cloud/cloud_txn_delete_bitmap_cache.h
+++ b/be/src/cloud/cloud_txn_delete_bitmap_cache.h
@@ -50,10 +50,10 @@ public:
                              RowsetSharedPtr rowset, int64_t txn_expirationm,
                              std::shared_ptr<PartialUpdateInfo> 
partial_update_info);
 
-    void update_tablet_txn_info(TTransactionId transaction_id, int64_t 
tablet_id,
-                                DeleteBitmapPtr delete_bitmap,
-                                const RowsetIdUnorderedSet& rowset_ids,
-                                PublishStatus publish_status, TxnPublishInfo 
publish_info = {});
+    Status update_tablet_txn_info(TTransactionId transaction_id, int64_t 
tablet_id,
+                                  DeleteBitmapPtr delete_bitmap,
+                                  const RowsetIdUnorderedSet& rowset_ids,
+                                  PublishStatus publish_status, TxnPublishInfo 
publish_info = {});
 
     void remove_expired_tablet_txn_info();
 
diff --git a/be/src/cloud/config.cpp b/be/src/cloud/config.cpp
index 0f59b51059b..44f9fa42cae 100644
--- a/be/src/cloud/config.cpp
+++ b/be/src/cloud/config.cpp
@@ -68,6 +68,10 @@ DEFINE_mBool(enable_new_tablet_do_compaction, "false");
 
 DEFINE_Bool(enable_cloud_txn_lazy_commit, "false");
 
+DEFINE_mInt32(remove_expired_tablet_txn_info_interval_seconds, "300");
+
+DEFINE_mInt32(tablet_txn_info_min_expired_seconds, "120");
+
 void set_cloud_unique_id(std::string instance_id) {
     if (cloud_unique_id.empty() && !instance_id.empty()) {
         static_cast<void>(set_config("cloud_unique_id", "1:" + instance_id + 
":compute", true));
diff --git a/be/src/cloud/config.h b/be/src/cloud/config.h
index 57f6348df70..ba20bccbcc7 100644
--- a/be/src/cloud/config.h
+++ b/be/src/cloud/config.h
@@ -103,4 +103,8 @@ DECLARE_mInt32(sync_load_for_tablets_thread);
 // enable large txn lazy commit in meta-service `commit_txn`
 DECLARE_mBool(enable_cloud_txn_lazy_commit);
 
+DECLARE_mInt32(remove_expired_tablet_txn_info_interval_seconds);
+
+DECLARE_mInt32(tablet_txn_info_min_expired_seconds);
+
 } // namespace doris::config


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to