This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git

commit cd272d40abdffa0ee2506526f589d268bcd4738b
Author: yujun <yu.jun.re...@gmail.com>
AuthorDate: Tue Jul 11 09:46:43 2023 +0800

    [fix](function) fix create tablet timeout (#21682)
    
    To get tablet's local size and remote size both need lock the tablet's meta.
    Sometimes the meta lock was locked by other process for a long time (eg, 
publish version).
    It will cause function 'get root path info' hold the tablet manager's shard 
lock for a long time too.
    Then it will cause creating tablet could not get the shard lock and finally 
timeout.
    Let getting tablet's local size/size go out of the scope of tablet manager 
shard lock.
---
 be/src/http/action/pad_rowset_action.cpp           |  3 ++
 be/src/olap/compaction.cpp                         |  1 +
 be/src/olap/schema_change.cpp                      |  5 +++
 be/src/olap/tablet.cpp                             |  6 ++++
 be/src/olap/tablet.h                               |  2 ++
 be/src/olap/tablet_manager.cpp                     | 38 +++++++++-------------
 be/src/olap/tablet_manager.h                       |  3 +-
 be/src/olap/task/engine_clone_task.cpp             |  2 ++
 be/src/util/trace.h                                | 29 +++++++++++++++++
 .../apache/doris/datasource/InternalCatalog.java   |  2 +-
 10 files changed, 67 insertions(+), 24 deletions(-)

diff --git a/be/src/http/action/pad_rowset_action.cpp 
b/be/src/http/action/pad_rowset_action.cpp
index df2721f50b..1b16e3f98d 100644
--- a/be/src/http/action/pad_rowset_action.cpp
+++ b/be/src/http/action/pad_rowset_action.cpp
@@ -25,6 +25,8 @@
 #include "olap/rowset/beta_rowset_writer.h"
 #include "olap/rowset/rowset.h"
 #include "olap/storage_engine.h"
+#include "util/time.h"
+#include "util/trace.h"
 
 namespace doris {
 
@@ -95,6 +97,7 @@ Status PadRowsetAction::_pad_rowset(TabletSharedPtr tablet, 
const Version& versi
     std::vector<RowsetSharedPtr> to_delete;
     {
         std::unique_lock wlock(tablet->get_header_lock());
+        SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
         tablet->modify_rowsets(to_add, to_delete);
         tablet->save_meta();
     }
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index b9e9754030..8fd87a0e5f 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -467,6 +467,7 @@ Status Compaction::modify_rowsets(const Merger::Statistics* 
stats) {
         {
             std::lock_guard<std::mutex> 
wrlock_(_tablet->get_rowset_update_lock());
             std::lock_guard<std::shared_mutex> 
wrlock(_tablet->get_header_lock());
+            SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
 
             // Convert the delete bitmap of the input rowsets to output rowset 
for
             // incremental data.
diff --git a/be/src/olap/schema_change.cpp b/be/src/olap/schema_change.cpp
index 2ceff6f7c6..dd81df84d3 100644
--- a/be/src/olap/schema_change.cpp
+++ b/be/src/olap/schema_change.cpp
@@ -32,6 +32,7 @@
 #include "olap/wrapper_field.h"
 #include "runtime/memory/mem_tracker.h"
 #include "util/defer_op.h"
+#include "util/trace.h"
 #include "vec/aggregate_functions/aggregate_function.h"
 #include "vec/aggregate_functions/aggregate_function_reader.h"
 #include "vec/aggregate_functions/aggregate_function_simple_factory.h"
@@ -1846,6 +1847,7 @@ Status 
SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2&
         std::lock_guard<std::mutex> 
base_tablet_lock(base_tablet->get_push_lock());
         std::lock_guard<std::mutex> 
new_tablet_lock(new_tablet->get_push_lock());
         std::lock_guard<std::shared_mutex> 
base_tablet_wlock(base_tablet->get_header_lock());
+        SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
         std::lock_guard<std::shared_mutex> 
new_tablet_wlock(new_tablet->get_header_lock());
 
         do {
@@ -2064,6 +2066,7 @@ Status 
SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2&
             // step 3
             std::lock_guard<std::mutex> 
rwlock(new_tablet->get_rowset_update_lock());
             std::lock_guard<std::shared_mutex> 
new_wlock(new_tablet->get_header_lock());
+            SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
             int64_t new_max_version = new_tablet->max_version().second;
             rowsets.clear();
             if (max_version < new_max_version) {
@@ -2094,6 +2097,7 @@ Status 
SchemaChangeHandler::_do_process_alter_tablet_v2(const TAlterTabletReqV2&
         } else {
             // set state to ready
             std::lock_guard<std::shared_mutex> 
new_wlock(new_tablet->get_header_lock());
+            SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
             res = new_tablet->set_tablet_state(TabletState::TABLET_RUNNING);
             if (!res) {
                 break;
@@ -2171,6 +2175,7 @@ Status 
SchemaChangeHandler::_convert_historical_rowsets(const SchemaChangeParams
         {
             // save tablet meta here because rowset meta is not saved during 
add rowset
             std::lock_guard<std::shared_mutex> 
new_wlock(sc_params.new_tablet->get_header_lock());
+            SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
             sc_params.new_tablet->save_meta();
         }
         if (res) {
diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp
index ce1b14f8a1..968b6ad9d4 100644
--- a/be/src/olap/tablet.cpp
+++ b/be/src/olap/tablet.cpp
@@ -68,6 +68,7 @@
 
 namespace doris {
 using namespace ErrorCode;
+using namespace std::chrono_literals;
 
 using std::pair;
 using std::nothrow;
@@ -75,6 +76,8 @@ using std::sort;
 using std::string;
 using std::vector;
 
+const std::chrono::seconds TRACE_TABLET_LOCK_THRESHOLD = 10s;
+
 DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(flush_bytes, MetricUnit::BYTES);
 DEFINE_COUNTER_METRIC_PROTOTYPE_2ARG(flush_finish_count, 
MetricUnit::OPERATIONS);
 
@@ -300,6 +303,7 @@ Status Tablet::revise_tablet_meta(const 
std::vector<RowsetMetaSharedPtr>& rowset
 Status Tablet::add_rowset(RowsetSharedPtr rowset) {
     DCHECK(rowset != nullptr);
     std::lock_guard<std::shared_mutex> wrlock(_meta_lock);
+    SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
     // If the rowset already exist, just return directly.  The rowset_id is an 
unique-id,
     // we can use it to check this situation.
     if (_contains_rowset(rowset->rowset_id())) {
@@ -535,6 +539,7 @@ void Tablet::_delete_stale_rowset_by_version(const Version& 
version) {
 void Tablet::delete_expired_stale_rowset() {
     int64_t now = UnixSeconds();
     std::lock_guard<std::shared_mutex> wrlock(_meta_lock);
+    SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
     // Compute the end time to delete rowsets, when a expired rowset 
createtime less then this time, it will be deleted.
     double expired_stale_sweep_endtime =
             ::difftime(now, config::tablet_rowset_stale_sweep_time_sec);
@@ -972,6 +977,7 @@ void 
Tablet::_max_continuous_version_from_beginning_unlocked(Version* version, V
 
 void Tablet::calculate_cumulative_point() {
     std::lock_guard<std::shared_mutex> wrlock(_meta_lock);
+    SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
     int64_t ret_cumulative_point;
     _cumulative_compaction_policy->calculate_cumulative_point(
             this, _tablet_meta->all_rs_metas(), _cumulative_point, 
&ret_cumulative_point);
diff --git a/be/src/olap/tablet.h b/be/src/olap/tablet.h
index d34597323a..15e17223c3 100644
--- a/be/src/olap/tablet.h
+++ b/be/src/olap/tablet.h
@@ -62,6 +62,8 @@ using TabletSharedPtr = std::shared_ptr<Tablet>;
 
 enum TabletStorageType { STORAGE_TYPE_LOCAL, STORAGE_TYPE_REMOTE, 
STORAGE_TYPE_REMOTE_AND_LOCAL };
 
+extern const std::chrono::seconds TRACE_TABLET_LOCK_THRESHOLD;
+
 class Tablet : public BaseTablet {
 public:
     static TabletSharedPtr create_tablet_from_meta(TabletMetaSharedPtr 
tablet_meta,
diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp
index fa7c25e9c4..5f2bd1b5c0 100644
--- a/be/src/olap/tablet_manager.cpp
+++ b/be/src/olap/tablet_manager.cpp
@@ -473,6 +473,7 @@ Status TabletManager::_drop_tablet_unlocked(TTabletId 
tablet_id, TReplicaId repl
     if (!keep_files) {
         // drop tablet will update tablet meta, should lock
         std::lock_guard<std::shared_mutex> 
wrlock(to_drop_tablet->get_header_lock());
+        SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
         LOG(INFO) << "set tablet to shutdown state and remove it from memory. "
                   << "tablet_id=" << tablet_id << ", tablet_path=" << 
to_drop_tablet->tablet_path();
         // NOTE: has to update tablet here, but must not update tablet meta 
directly.
@@ -583,18 +584,14 @@ TabletSharedPtr TabletManager::get_tablet(TTabletId 
tablet_id, TabletUid tablet_
     return nullptr;
 }
 
-std::vector<TabletSharedPtr> TabletManager::get_all_tablet() {
+std::vector<TabletSharedPtr> 
TabletManager::get_all_tablet(std::function<bool(Tablet*)>&& filter) {
     std::vector<TabletSharedPtr> res;
     for (const auto& tablets_shard : _tablets_shards) {
         std::shared_lock rdlock(tablets_shard.lock);
-        for (const auto& tablet_map : tablets_shard.tablet_map) {
-            // these are tablets which is not deleted
-            TabletSharedPtr tablet = tablet_map.second;
-            if (!tablet->is_used()) {
-                LOG(WARNING) << "tablet cannot be used. tablet=" << 
tablet->tablet_id();
-                continue;
+        for (auto& [id, tablet] : tablets_shard.tablet_map) {
+            if (filter(tablet.get())) {
+                res.emplace_back(tablet);
             }
-            res.emplace_back(tablet);
         }
     }
     return res;
@@ -1096,20 +1093,17 @@ void 
TabletManager::update_root_path_info(std::map<string, DataDirInfo>* path_ma
                                           size_t* tablet_count) {
     DCHECK(tablet_count);
     *tablet_count = 0;
-    for (const auto& tablets_shard : _tablets_shards) {
-        std::shared_lock rdlock(tablets_shard.lock);
-        for (const auto& item : tablets_shard.tablet_map) {
-            TabletSharedPtr tablet = item.second;
-            ++(*tablet_count);
-            auto iter = path_map->find(tablet->data_dir()->path());
-            if (iter == path_map->end()) {
-                continue;
-            }
-            if (iter->second.is_used) {
-                iter->second.local_used_capacity += 
tablet->tablet_local_size();
-                iter->second.remote_used_capacity += 
tablet->tablet_remote_size();
-            }
-        }
+    auto filter = [path_map, tablet_count](Tablet* t) -> bool {
+        ++(*tablet_count);
+        auto iter = path_map->find(t->data_dir()->path());
+        return iter != path_map->end() && iter->second.is_used;
+    };
+
+    auto tablets = get_all_tablet(filter);
+    for (const auto& tablet : tablets) {
+        auto& data_dir_info = (*path_map)[tablet->data_dir()->path()];
+        data_dir_info.local_used_capacity += tablet->tablet_local_size();
+        data_dir_info.remote_used_capacity += tablet->tablet_remote_size();
     }
 }
 
diff --git a/be/src/olap/tablet_manager.h b/be/src/olap/tablet_manager.h
index 667013d268..378ff499ac 100644
--- a/be/src/olap/tablet_manager.h
+++ b/be/src/olap/tablet_manager.h
@@ -78,7 +78,8 @@ public:
     TabletSharedPtr get_tablet(TTabletId tablet_id, TabletUid tablet_uid,
                                bool include_deleted = false, std::string* err 
= nullptr);
 
-    std::vector<TabletSharedPtr> get_all_tablet();
+    std::vector<TabletSharedPtr> get_all_tablet(std::function<bool(Tablet*)>&& 
filter =
+                                                        [](Tablet* t) { return 
t->is_used(); });
 
     uint64_t get_rowset_nums();
     uint64_t get_segment_nums();
diff --git a/be/src/olap/task/engine_clone_task.cpp 
b/be/src/olap/task/engine_clone_task.cpp
index 5c8c2f8070..9352c7a0ce 100644
--- a/be/src/olap/task/engine_clone_task.cpp
+++ b/be/src/olap/task/engine_clone_task.cpp
@@ -34,6 +34,7 @@
 #include "runtime/thread_context.h"
 #include "util/defer_op.h"
 #include "util/thrift_rpc_helper.h"
+#include "util/trace.h"
 
 using std::set;
 using std::stringstream;
@@ -466,6 +467,7 @@ Status EngineCloneTask::_finish_clone(Tablet* tablet, const 
std::string& clone_d
     std::lock_guard<std::mutex> push_lock(tablet->get_push_lock());
     std::lock_guard<std::mutex> rwlock(tablet->get_rowset_update_lock());
     std::lock_guard<std::shared_mutex> wrlock(tablet->get_header_lock());
+    SCOPED_SIMPLE_TRACE_IF_TIMEOUT(TRACE_TABLET_LOCK_THRESHOLD);
     // check clone dir existed
     if (!FileUtils::check_exist(clone_dir)) {
         return Status::InternalError("clone dir not existed. clone_dir={}", 
clone_dir);
diff --git a/be/src/util/trace.h b/be/src/util/trace.h
index 40c6ba265f..7fe0c9e0c2 100644
--- a/be/src/util/trace.h
+++ b/be/src/util/trace.h
@@ -18,18 +18,21 @@
 
 #include <rapidjson/writer.h>
 
+#include <chrono>
 #include <iosfwd>
 #include <memory>
 #include <string>
 #include <utility>
 #include <vector>
 
+#include "common/logging.h"
 #include "gutil/macros.h"
 #include "gutil/ref_counted.h"
 #include "gutil/strings/stringpiece.h"
 #include "gutil/strings/substitute.h"
 #include "gutil/threading/thread_collision_warner.h"
 #include "gutil/walltime.h"
+#include "util/scoped_cleanup.h"
 #include "util/spinlock.h"
 #include "util/trace_metrics.h"
 
@@ -112,6 +115,32 @@ class Trace;
         }                                          \
     }()
 
+// If this scope times out, make a simple trace.
+// It will log the cost time only.
+// Timeout is chrono duration struct, eg: 5ms, 100 * 1s.
+#define SCOPED_SIMPLE_TRACE_IF_TIMEOUT(timeout) \
+    SCOPED_SIMPLE_TRACE_TO_STREAM_IF_TIMEOUT(timeout, LOG(WARNING))
+
+// If this scope times out, then put simple trace to the stream.
+// Timeout is chrono duration struct, eg: 5ms, 100 * 1s.
+// For example:
+//
+//    std::string tag = "[foo]";
+//    SCOPED_SIMPLE_TRACE_TO_STREAM_IF_TIMEOUT(5s, LOG(INFO) << tag);
+//
+#define SCOPED_SIMPLE_TRACE_TO_STREAM_IF_TIMEOUT(timeout, stream)              
         \
+    using namespace std::chrono_literals;                                      
         \
+    auto VARNAME_LINENUM(scoped_simple_trace) = doris::MonotonicMicros();      
         \
+    SCOPED_CLEANUP({                                                           
         \
+        auto VARNAME_LINENUM(timeout_us) =                                     
         \
+                
std::chrono::duration_cast<std::chrono::microseconds>(timeout).count(); \
+        auto VARNAME_LINENUM(cost_us) =                                        
         \
+                doris::MonotonicMicros() - 
VARNAME_LINENUM(scoped_simple_trace);        \
+        if (VARNAME_LINENUM(cost_us) >= VARNAME_LINENUM(timeout_us)) {         
         \
+            stream << "Simple trace cost(us): " << VARNAME_LINENUM(cost_us);   
         \
+        }                                                                      
         \
+    })
+
 namespace doris {
 
 struct TraceEntry;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
index 8a93277bf5..ca24af08eb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java
@@ -1763,7 +1763,7 @@ public class InternalCatalog implements 
CatalogIf<Database> {
             }
 
             if (!ok || !countDownLatch.getStatus().ok()) {
-                errMsg = "Failed to create partition[" + partitionName + "]. 
Timeout.";
+                errMsg = "Failed to create partition[" + partitionName + "]. 
Timeout:" + (timeout / 1000) + " seconds.";
                 // clear tasks
                 AgentTaskQueue.removeBatchTask(batchTask, TTaskType.CREATE);
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to