This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new f1e6de3f442 [enhancement](schema-change) Schema change wait lock for a while when there are inverted index being built or cooldown running on base tablet (#47958) f1e6de3f442 is described below commit f1e6de3f4425e841a5e11249493eb5165c84c77f Author: Siyang Tang <tangsiy...@selectdb.com> AuthorDate: Mon Mar 3 22:00:59 2025 +0800 [enhancement](schema-change) Schema change wait lock for a while when there are inverted index being built or cooldown running on base tablet (#47958) When there are inverted index being built on base tablet, schema change will failed directly. After this PR, schema change lock will be a timed mutex so that it could wait on inverted index builder or cooldown task releasing schema change lock. --- be/src/cloud/cloud_schema_change_job.cpp | 19 +++++++++++++------ be/src/olap/base_tablet.h | 6 ++++-- be/src/olap/schema_change.cpp | 14 +++++++++----- be/src/olap/tablet.cpp | 5 ++++- be/src/olap/task/index_builder.cpp | 17 ++++++++++++----- 5 files changed, 42 insertions(+), 19 deletions(-) diff --git a/be/src/cloud/cloud_schema_change_job.cpp b/be/src/cloud/cloud_schema_change_job.cpp index 17d2ae2bdac..ce202dbf2cd 100644 --- a/be/src/cloud/cloud_schema_change_job.cpp +++ b/be/src/cloud/cloud_schema_change_job.cpp @@ -21,12 +21,14 @@ #include <chrono> #include <memory> +#include <mutex> #include <random> #include <thread> #include "cloud/cloud_meta_mgr.h" #include "cloud/cloud_tablet_mgr.h" #include "common/status.h" +#include "gutil/integral_types.h" #include "olap/delete_handler.h" #include "olap/olap_define.h" #include "olap/rowset/beta_rowset.h" @@ -74,13 +76,18 @@ Status CloudSchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& reque _base_tablet = DORIS_TRY(_cloud_storage_engine.tablet_mgr().get_tablet(request.base_tablet_id)); - std::unique_lock<std::mutex> schema_change_lock(_base_tablet->get_schema_change_lock(), - std::try_to_lock); - if (!schema_change_lock.owns_lock()) { - LOG(WARNING) << "Failed to obtain schema change lock. base_tablet=" + static constexpr long TRY_LOCK_TIMEOUT = 30; + std::unique_lock schema_change_lock(_base_tablet->get_schema_change_lock(), std::defer_lock); + bool owns_lock = schema_change_lock.try_lock_for(std::chrono::seconds(TRY_LOCK_TIMEOUT)); + + if (!owns_lock) { + LOG(WARNING) << "Failed to obtain schema change lock, there might be inverted index being " + "built on base_tablet=" << request.base_tablet_id; - return Status::Error<TRY_LOCK_FAILED>("Failed to obtain schema change lock. base_tablet={}", - request.base_tablet_id); + return Status::Error<TRY_LOCK_FAILED>( + "Failed to obtain schema change lock, there might be inverted index being " + "built on base_tablet=", + request.base_tablet_id); } // MUST sync rowsets before capturing rowset readers and building DeleteHandler RETURN_IF_ERROR(_base_tablet->sync_rowsets(request.alter_version)); diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index 1ba60d5e48b..7407ea40284 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -18,6 +18,7 @@ #pragma once #include <memory> +#include <mutex> #include <shared_mutex> #include <string> @@ -67,7 +68,8 @@ public: KeysType keys_type() const { return _tablet_meta->tablet_schema()->keys_type(); } size_t num_key_columns() const { return _tablet_meta->tablet_schema()->num_key_columns(); } int64_t ttl_seconds() const { return _tablet_meta->ttl_seconds(); } - std::mutex& get_schema_change_lock() { return _schema_change_lock; } + // currently used by schema change, inverted index building, and cooldown + std::timed_mutex& get_schema_change_lock() { return _schema_change_lock; } bool enable_unique_key_merge_on_write() const { #ifdef BE_TEST if (_tablet_meta == nullptr) { @@ -349,7 +351,7 @@ protected: std::shared_ptr<MetricEntity> _metric_entity; protected: - std::mutex _schema_change_lock; + std::timed_mutex _schema_change_lock; public: IntCounter* query_scan_bytes = nullptr; diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp index 91978df30fd..281491faa82 100644 --- a/be/src/olap/schema_change.cpp +++ b/be/src/olap/schema_change.cpp @@ -802,11 +802,15 @@ Status SchemaChangeJob::process_alter_tablet(const TAlterTabletReqV2& request) { << ", alter_version=" << request.alter_version; // Lock schema_change_lock util schema change info is stored in tablet header - std::unique_lock<std::mutex> schema_change_lock(_base_tablet->get_schema_change_lock(), - std::try_to_lock); - if (!schema_change_lock.owns_lock()) { - return Status::Error<TRY_LOCK_FAILED>("failed to obtain schema change lock. base_tablet={}", - request.base_tablet_id); + static constexpr long TRY_LOCK_TIMEOUT = 30; + std::unique_lock schema_change_lock(_base_tablet->get_schema_change_lock(), std::defer_lock); + bool owns_lock = schema_change_lock.try_lock_for(std::chrono::seconds(TRY_LOCK_TIMEOUT)); + + if (!owns_lock) { + return Status::Error<TRY_LOCK_FAILED>( + "Failed to obtain schema change lock, there might be inverted index being " + "built or cooldown runnning on base_tablet={}", + request.base_tablet_id); } Status res = _do_process_alter_tablet(request); diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index f48da2a9a43..6b6bda1a78f 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -2011,7 +2011,10 @@ Status Tablet::create_rowset(const RowsetMetaSharedPtr& rowset_meta, RowsetShare Status Tablet::cooldown(RowsetSharedPtr rowset) { std::unique_lock schema_change_lock(_schema_change_lock, std::try_to_lock); if (!schema_change_lock.owns_lock()) { - return Status::Error<TRY_LOCK_FAILED>("try schema_change_lock failed"); + return Status::Error<TRY_LOCK_FAILED>( + "try schema_change_lock failed, schema change running or inverted index built on " + "this tablet={}", + tablet_id()); } // Check executing serially with compaction task. std::unique_lock base_compaction_lock(_base_compaction_lock, std::try_to_lock); diff --git a/be/src/olap/task/index_builder.cpp b/be/src/olap/task/index_builder.cpp index 792812200bb..817486679b2 100644 --- a/be/src/olap/task/index_builder.cpp +++ b/be/src/olap/task/index_builder.cpp @@ -17,6 +17,8 @@ #include "olap/task/index_builder.h" +#include <mutex> + #include "common/status.h" #include "gutil/integral_types.h" #include "olap/olap_define.h" @@ -721,11 +723,16 @@ Status IndexBuilder::do_build_inverted_index() { return Status::OK(); } - std::unique_lock<std::mutex> schema_change_lock(_tablet->get_schema_change_lock(), - std::try_to_lock); - if (!schema_change_lock.owns_lock()) { - return Status::ObtainLockFailed("try schema_change_lock failed. tablet={} ", - _tablet->tablet_id()); + static constexpr long TRY_LOCK_TIMEOUT = 30; + std::unique_lock schema_change_lock(_tablet->get_schema_change_lock(), std::defer_lock); + bool owns_lock = schema_change_lock.try_lock_for(std::chrono::seconds(TRY_LOCK_TIMEOUT)); + + if (!owns_lock) { + return Status::ObtainLockFailed( + "try schema_change_lock failed. There might be schema change or cooldown running " + "on " + "tablet={} ", + _tablet->tablet_id()); } // Check executing serially with compaction task. std::unique_lock<std::mutex> base_compaction_lock(_tablet->get_base_compaction_lock(), --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org