This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new f1e6de3f442 [enhancement](schema-change) Schema change wait lock for a 
while when there are inverted index being built or cooldown running on base 
tablet   (#47958)
f1e6de3f442 is described below

commit f1e6de3f4425e841a5e11249493eb5165c84c77f
Author: Siyang Tang <tangsiy...@selectdb.com>
AuthorDate: Mon Mar 3 22:00:59 2025 +0800

    [enhancement](schema-change) Schema change wait lock for a while when there 
are inverted index being built or cooldown running on base tablet   (#47958)
    
    
    When there are inverted index being built on base tablet, schema change
    will failed directly. After this PR, schema change lock will be a timed
    mutex so that it could wait on inverted index builder or cooldown task
    releasing schema change lock.
---
 be/src/cloud/cloud_schema_change_job.cpp | 19 +++++++++++++------
 be/src/olap/base_tablet.h                |  6 ++++--
 be/src/olap/schema_change.cpp            | 14 +++++++++-----
 be/src/olap/tablet.cpp                   |  5 ++++-
 be/src/olap/task/index_builder.cpp       | 17 ++++++++++++-----
 5 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/be/src/cloud/cloud_schema_change_job.cpp 
b/be/src/cloud/cloud_schema_change_job.cpp
index 17d2ae2bdac..ce202dbf2cd 100644
--- a/be/src/cloud/cloud_schema_change_job.cpp
+++ b/be/src/cloud/cloud_schema_change_job.cpp
@@ -21,12 +21,14 @@
 
 #include <chrono>
 #include <memory>
+#include <mutex>
 #include <random>
 #include <thread>
 
 #include "cloud/cloud_meta_mgr.h"
 #include "cloud/cloud_tablet_mgr.h"
 #include "common/status.h"
+#include "gutil/integral_types.h"
 #include "olap/delete_handler.h"
 #include "olap/olap_define.h"
 #include "olap/rowset/beta_rowset.h"
@@ -74,13 +76,18 @@ Status CloudSchemaChangeJob::process_alter_tablet(const 
TAlterTabletReqV2& reque
 
     _base_tablet = 
DORIS_TRY(_cloud_storage_engine.tablet_mgr().get_tablet(request.base_tablet_id));
 
-    std::unique_lock<std::mutex> 
schema_change_lock(_base_tablet->get_schema_change_lock(),
-                                                    std::try_to_lock);
-    if (!schema_change_lock.owns_lock()) {
-        LOG(WARNING) << "Failed to obtain schema change lock. base_tablet="
+    static constexpr long TRY_LOCK_TIMEOUT = 30;
+    std::unique_lock 
schema_change_lock(_base_tablet->get_schema_change_lock(), std::defer_lock);
+    bool owns_lock = 
schema_change_lock.try_lock_for(std::chrono::seconds(TRY_LOCK_TIMEOUT));
+
+    if (!owns_lock) {
+        LOG(WARNING) << "Failed to obtain schema change lock, there might be 
inverted index being "
+                        "built on base_tablet="
                      << request.base_tablet_id;
-        return Status::Error<TRY_LOCK_FAILED>("Failed to obtain schema change 
lock. base_tablet={}",
-                                              request.base_tablet_id);
+        return Status::Error<TRY_LOCK_FAILED>(
+                "Failed to obtain schema change lock, there might be inverted 
index being "
+                "built on base_tablet=",
+                request.base_tablet_id);
     }
     // MUST sync rowsets before capturing rowset readers and building 
DeleteHandler
     RETURN_IF_ERROR(_base_tablet->sync_rowsets(request.alter_version));
diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h
index 1ba60d5e48b..7407ea40284 100644
--- a/be/src/olap/base_tablet.h
+++ b/be/src/olap/base_tablet.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <memory>
+#include <mutex>
 #include <shared_mutex>
 #include <string>
 
@@ -67,7 +68,8 @@ public:
     KeysType keys_type() const { return 
_tablet_meta->tablet_schema()->keys_type(); }
     size_t num_key_columns() const { return 
_tablet_meta->tablet_schema()->num_key_columns(); }
     int64_t ttl_seconds() const { return _tablet_meta->ttl_seconds(); }
-    std::mutex& get_schema_change_lock() { return _schema_change_lock; }
+    // currently used by schema change, inverted index building, and cooldown
+    std::timed_mutex& get_schema_change_lock() { return _schema_change_lock; }
     bool enable_unique_key_merge_on_write() const {
 #ifdef BE_TEST
         if (_tablet_meta == nullptr) {
@@ -349,7 +351,7 @@ protected:
     std::shared_ptr<MetricEntity> _metric_entity;
 
 protected:
-    std::mutex _schema_change_lock;
+    std::timed_mutex _schema_change_lock;
 
 public:
     IntCounter* query_scan_bytes = nullptr;
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 91978df30fd..281491faa82 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -802,11 +802,15 @@ Status SchemaChangeJob::process_alter_tablet(const 
TAlterTabletReqV2& request) {
               << ", alter_version=" << request.alter_version;
 
     // Lock schema_change_lock util schema change info is stored in tablet 
header
-    std::unique_lock<std::mutex> 
schema_change_lock(_base_tablet->get_schema_change_lock(),
-                                                    std::try_to_lock);
-    if (!schema_change_lock.owns_lock()) {
-        return Status::Error<TRY_LOCK_FAILED>("failed to obtain schema change 
lock. base_tablet={}",
-                                              request.base_tablet_id);
+    static constexpr long TRY_LOCK_TIMEOUT = 30;
+    std::unique_lock 
schema_change_lock(_base_tablet->get_schema_change_lock(), std::defer_lock);
+    bool owns_lock = 
schema_change_lock.try_lock_for(std::chrono::seconds(TRY_LOCK_TIMEOUT));
+
+    if (!owns_lock) {
+        return Status::Error<TRY_LOCK_FAILED>(
+                "Failed to obtain schema change lock, there might be inverted 
index being "
+                "built or cooldown runnning on base_tablet={}",
+                request.base_tablet_id);
     }
 
     Status res = _do_process_alter_tablet(request);
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index f48da2a9a43..6b6bda1a78f 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -2011,7 +2011,10 @@ Status Tablet::create_rowset(const RowsetMetaSharedPtr& 
rowset_meta, RowsetShare
 Status Tablet::cooldown(RowsetSharedPtr rowset) {
     std::unique_lock schema_change_lock(_schema_change_lock, std::try_to_lock);
     if (!schema_change_lock.owns_lock()) {
-        return Status::Error<TRY_LOCK_FAILED>("try schema_change_lock failed");
+        return Status::Error<TRY_LOCK_FAILED>(
+                "try schema_change_lock failed, schema change running or 
inverted index built on "
+                "this tablet={}",
+                tablet_id());
     }
     // Check executing serially with compaction task.
     std::unique_lock base_compaction_lock(_base_compaction_lock, 
std::try_to_lock);
diff --git a/be/src/olap/task/index_builder.cpp 
b/be/src/olap/task/index_builder.cpp
index 792812200bb..817486679b2 100644
--- a/be/src/olap/task/index_builder.cpp
+++ b/be/src/olap/task/index_builder.cpp
@@ -17,6 +17,8 @@
 
 #include "olap/task/index_builder.h"
 
+#include <mutex>
+
 #include "common/status.h"
 #include "gutil/integral_types.h"
 #include "olap/olap_define.h"
@@ -721,11 +723,16 @@ Status IndexBuilder::do_build_inverted_index() {
         return Status::OK();
     }
 
-    std::unique_lock<std::mutex> 
schema_change_lock(_tablet->get_schema_change_lock(),
-                                                    std::try_to_lock);
-    if (!schema_change_lock.owns_lock()) {
-        return Status::ObtainLockFailed("try schema_change_lock failed. 
tablet={} ",
-                                        _tablet->tablet_id());
+    static constexpr long TRY_LOCK_TIMEOUT = 30;
+    std::unique_lock schema_change_lock(_tablet->get_schema_change_lock(), 
std::defer_lock);
+    bool owns_lock = 
schema_change_lock.try_lock_for(std::chrono::seconds(TRY_LOCK_TIMEOUT));
+
+    if (!owns_lock) {
+        return Status::ObtainLockFailed(
+                "try schema_change_lock failed. There might be schema change 
or cooldown running "
+                "on "
+                "tablet={} ",
+                _tablet->tablet_id());
     }
     // Check executing serially with compaction task.
     std::unique_lock<std::mutex> 
base_compaction_lock(_tablet->get_base_compaction_lock(),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to