This is an automated email from the ASF dual-hosted git repository. dataroaring pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 761a8eb51e2 [branch-2.0] Picks "[opt](merge-on-write) Reduce the version not continuous logs for merge-on-write table #40946" (#40997) 761a8eb51e2 is described below commit 761a8eb51e2ac839de2ed34d415039cb5651eb06 Author: bobhan1 <bh2444151...@outlook.com> AuthorDate: Fri Sep 20 19:08:56 2024 +0800 [branch-2.0] Picks "[opt](merge-on-write) Reduce the version not continuous logs for merge-on-write table #40946" (#40997) picks https://github.com/apache/doris/pull/40946 --- be/src/common/config.cpp | 4 ++++ be/src/common/config.h | 4 ++++ be/src/olap/task/engine_publish_version_task.cpp | 26 +++++++++++++--------- .../main/java/org/apache/doris/common/Config.java | 4 ++++ .../java/org/apache/doris/master/MasterImpl.java | 14 +++++++++--- 5 files changed, 39 insertions(+), 13 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index e93f40d7ffa..c631068fef5 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1112,6 +1112,10 @@ DEFINE_mBool(enable_missing_rows_correctness_check, "false"); // When the number of missing versions is more than this value, do not directly // retry the publish and handle it through async publish. DEFINE_mInt32(mow_publish_max_discontinuous_version_num, "20"); +// When the version is not continuous for MOW table in publish phase and the gap between +// current txn's publishing version and the max version of the tablet exceeds this value, +// don't print warning log +DEFINE_mInt32(publish_version_gap_logging_threshold, "200"); // The secure path with user files, used in the `local` table function. DEFINE_mString(user_files_secure_path, "${DORIS_HOME}"); diff --git a/be/src/common/config.h b/be/src/common/config.h index daf054a7369..51f02afee2f 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1155,6 +1155,10 @@ DECLARE_mBool(enable_missing_rows_correctness_check); // When the number of missing versions is more than this value, do not directly // retry the publish and handle it through async publish. DECLARE_mInt32(mow_publish_max_discontinuous_version_num); +// When the version is not continuous for MOW table in publish phase and the gap between +// current txn's publishing version and the max version of the tablet exceeds this value, +// don't print warning log +DECLARE_mInt32(publish_version_gap_logging_threshold); // The secure path with user files, used in the `local` table function. DECLARE_mString(user_files_secure_path); diff --git a/be/src/olap/task/engine_publish_version_task.cpp b/be/src/olap/task/engine_publish_version_task.cpp index ed0f258bf85..4c1d159e85a 100644 --- a/be/src/olap/task/engine_publish_version_task.cpp +++ b/be/src/olap/task/engine_publish_version_task.cpp @@ -224,16 +224,22 @@ Status EnginePublishVersionTask::finish() { int64_t missed_txn_id = StorageEngine::instance()->txn_manager()->get_txn_by_tablet_version( tablet->tablet_id(), missed_version); - auto msg = fmt::format( - "uniq key with merge-on-write version not continuous, " - "missed version={}, it's transaction_id={}, current publish " - "version={}, tablet_id={}, transaction_id={}", - missed_version, missed_txn_id, version.second, tablet->tablet_id(), - _publish_version_req.transaction_id); - if (first_time_update) { - LOG(INFO) << msg; - } else { - LOG_EVERY_SECOND(INFO) << msg; + bool need_log = + (config::publish_version_gap_logging_threshold < 0 || + max_version + config::publish_version_gap_logging_threshold >= + version.second); + if (need_log) { + auto msg = fmt::format( + "uniq key with merge-on-write version not continuous, " + "missed version={}, it's transaction_id={}, current publish " + "version={}, tablet_id={}, transaction_id={}", + missed_version, missed_txn_id, version.second, + tablet->tablet_id(), _publish_version_req.transaction_id); + if (first_time_update) { + LOG(INFO) << msg; + } else { + LOG_EVERY_SECOND(INFO) << msg; + } } }; // The versions during the schema change period need to be also continuous diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index fe61cc80343..86547830282 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -474,6 +474,10 @@ public class Config extends ConfigBase { "print log interval for publish transaction failed interval"}) public static long publish_fail_log_interval_second = 5 * 60; + @ConfField(mutable = true, masterOnly = true, description = {"一个 PUBLISH_VERSION 任务打印失败日志的次数上限", + "the upper limit of failure logs of PUBLISH_VERSION task"}) + public static long publish_version_task_failed_log_threshold = 80; + @ConfField(mutable = true, masterOnly = true, description = {"提交事务的最大超时时间,单位是秒。" + "该参数仅用于事务型 insert 操作中。", "Maximal waiting time for all data inserted before one transaction to be committed, in seconds. " diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java index 3b2aeba3ec0..3e63a5421f7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java @@ -29,6 +29,7 @@ import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.Tablet; import org.apache.doris.catalog.TabletInvertedIndex; import org.apache.doris.catalog.TabletMeta; +import org.apache.doris.common.Config; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.load.DeleteJob; import org.apache.doris.load.loadv2.SparkLoadJob; @@ -86,11 +87,13 @@ public class MasterImpl { // check task status // retry task by report process TStatus taskStatus = request.getTaskStatus(); + TTaskType taskType = request.getTaskType(); + long signature = request.getSignature(); if (LOG.isDebugEnabled()) { LOG.debug("get task report: {}", request); } - if (taskStatus.getStatusCode() != TStatusCode.OK) { + if (taskStatus.getStatusCode() != TStatusCode.OK && taskType != TTaskType.PUBLISH_VERSION) { LOG.warn("finish task reports bad. request: {}", request); } @@ -109,8 +112,6 @@ public class MasterImpl { } long backendId = backend.getId(); - TTaskType taskType = request.getTaskType(); - long signature = request.getSignature(); AgentTask task = AgentTaskQueue.getTask(backendId, taskType, signature); if (task == null) { @@ -128,6 +129,13 @@ public class MasterImpl { } else { if (taskStatus.getStatusCode() != TStatusCode.OK) { task.failed(); + if (taskType == TTaskType.PUBLISH_VERSION) { + boolean needLog = (Config.publish_version_task_failed_log_threshold < 0 + || task.getFailedTimes() <= Config.publish_version_task_failed_log_threshold); + if (needLog) { + LOG.warn("finish task reports bad. request: {}", request); + } + } String errMsg = "task type: " + taskType + ", status_code: " + taskStatus.getStatusCode().toString() + (taskStatus.isSetErrorMsgs() ? (", status_message: " + taskStatus.getErrorMsgs()) : "") + ", backendId: " + backend + ", signature: " + signature; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org