This is an automated email from the ASF dual-hosted git repository. jiafengzheng pushed a commit to branch dev-1.1.2 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/dev-1.1.2 by this push: new 8f8a301b6f 1 (#12320) 8f8a301b6f is described below commit 8f8a301b6fad7a6b7ef4436e1be40120c30fa8bf Author: Xinyi Zou <zouxiny...@gmail.com> AuthorDate: Sun Sep 4 13:56:59 2022 +0800 1 (#12320) Modify exec_mem_limit default value and mem limit exceed log printing --- be/src/runtime/memory/mem_tracker_limiter.cpp | 12 ++++++------ be/src/runtime/memory/mem_tracker_limiter.h | 4 ++-- be/src/runtime/runtime_state.cpp | 5 +++++ docs/en/administrator-guide/variables.md | 2 +- docs/en/extending-doris/logstash.md | 2 +- docs/en/getting-started/advance-usage.md | 6 +++--- .../sql-reference/sql-statements/Data Manipulation/EXPORT.md | 2 +- .../sql-statements/Data Manipulation/STREAM LOAD.md | 2 +- docs/zh-CN/administrator-guide/export-manual.md | 4 ++-- docs/zh-CN/administrator-guide/variables.md | 2 +- docs/zh-CN/extending-doris/logstash.md | 2 +- .../sql-statements/Data Manipulation/BROKER LOAD.md | 2 +- .../sql-reference/sql-statements/Data Manipulation/EXPORT.md | 2 +- .../sql-statements/Data Manipulation/STREAM LOAD.md | 2 +- fe/fe-core/src/main/java/org/apache/doris/load/LoadJob.java | 2 +- .../src/main/java/org/apache/doris/load/loadv2/LoadJob.java | 2 +- .../org/apache/doris/load/routineload/RoutineLoadJob.java | 2 +- .../src/main/java/org/apache/doris/qe/SessionVariable.java | 2 +- 18 files changed, 31 insertions(+), 26 deletions(-) diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp index dccfb9db45..fd4c46259c 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.cpp +++ b/be/src/runtime/memory/mem_tracker_limiter.cpp @@ -150,8 +150,8 @@ bool MemTrackerLimiter::gc_memory(int64_t max_consumption) { Status MemTrackerLimiter::try_gc_memory(int64_t bytes) { if (UNLIKELY(gc_memory(_limit - bytes))) { return Status::MemoryLimitExceeded(fmt::format( - "failed_alloc_size={}B, exceeded_tracker={}, limit={}B, peak_used={}B, " - "current_used={}B", + "failed_alloc_size={} B, exceeded_tracker={}, limit={} B, peak_used={} B, " + "current_used={} B", bytes, label(), _limit, _consumption->value(), _consumption->current_value())); } VLOG_NOTICE << "GC succeeded, TryConsume bytes=" << bytes @@ -279,8 +279,8 @@ Status MemTrackerLimiter::mem_limit_exceeded(const std::string& msg, MemTrackerLimiter* print_log_usage_tracker = nullptr; if (exceeded_tracker != nullptr) { detail += fmt::format( - "failed_alloc_size={}B, exceeded_tracker={}, limit={}B, peak_used={}B, " - "current_used={}B>, executing_msg:<{}>", + "failed_alloc_size={} B, exceeded_tracker={}, limit={} B, peak_used={} B, " + "current_used={} B>, executing_msg:<{}>", PrettyPrinter::print(failed_allocation_size, TUnit::BYTES), exceeded_tracker->label(), exceeded_tracker->limit(), exceeded_tracker->peak_consumption(), exceeded_tracker->consumption(), msg); @@ -290,8 +290,8 @@ Status MemTrackerLimiter::mem_limit_exceeded(const std::string& msg, } else if (max_consumption_tracker != nullptr) { // must after check_sys_mem_info false detail += fmt::format( - "failed_alloc_size={}B, max_consumption_tracker={}, limit={}B, peak_used={}B, " - "current_used={}B>, executing_msg:<{}>", + "failed_alloc_size={} B, max_consumption_tracker={}, limit={} B, peak_used={} B, " + "current_used={} B>, executing_msg:<{}>", PrettyPrinter::print(failed_allocation_size, TUnit::BYTES), max_consumption_tracker->label(), max_consumption_tracker->limit(), max_consumption_tracker->peak_consumption(), max_consumption_tracker->consumption(), diff --git a/be/src/runtime/memory/mem_tracker_limiter.h b/be/src/runtime/memory/mem_tracker_limiter.h index 0543a992b1..e9c7957464 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.h +++ b/be/src/runtime/memory/mem_tracker_limiter.h @@ -71,7 +71,7 @@ public: // for fast, expect MemInfo::initialized() to be true. if (PerfCounters::get_vm_rss() + bytes >= MemInfo::mem_limit()) { auto st = Status::MemoryLimitExceeded( - fmt::format("process memory used {} exceed limit {}, failed_alloc_size={}", + fmt::format("process memory used {} B, exceed limit {} B, failed_alloc_size={} B", PerfCounters::get_vm_rss(), MemInfo::mem_limit(), bytes)); ExecEnv::GetInstance()->process_mem_tracker_raw()->print_log_usage(st.get_error_msg()); return st; @@ -229,7 +229,7 @@ private: // The number of child trackers that have been added. std::atomic_size_t _had_child_count = 0; - bool _print_log_usage = true; + bool _print_log_usage = false; // Lock to protect gc_memory(). This prevents many GCs from occurring at once. std::mutex _gc_lock; diff --git a/be/src/runtime/runtime_state.cpp b/be/src/runtime/runtime_state.cpp index 556281e993..0446a303b0 100644 --- a/be/src/runtime/runtime_state.cpp +++ b/be/src/runtime/runtime_state.cpp @@ -236,6 +236,11 @@ Status RuntimeState::init_mem_trackers(const TUniqueId& query_id) { -1, "RuntimeState:instance:" + print_id(_fragment_instance_id), _new_query_mem_tracker); + if (_query_options.is_report_success) { + _new_query_mem_tracker->enable_print_log_usage(); + _new_instance_mem_tracker->enable_print_log_usage(); + } + /* // TODO: this is a stopgap until we implement ExprContext _udf_mem_tracker.reset( diff --git a/docs/en/administrator-guide/variables.md b/docs/en/administrator-guide/variables.md index 3eb2ac3121..3cd6b1a718 100644 --- a/docs/en/administrator-guide/variables.md +++ b/docs/en/administrator-guide/variables.md @@ -185,7 +185,7 @@ Note that the comment must start with /*+ and can only follow the SELECT. * `exec_mem_limit` - Used to set the memory limit for a single query. The default is 2GB, you can set it in B/K/KB/M/MB/G/GB/T/TB/P/PB, the default is B. + Used to set the memory limit for a single query. The default is 4GB, you can set it in B/K/KB/M/MB/G/GB/T/TB/P/PB, the default is B. This parameter is used to limit the memory that can be used by an instance of a single query fragment in a query plan. A query plan may have multiple instances, and a BE node may execute one or more instances. Therefore, this parameter does not accurately limit the memory usage of a query across the cluster, nor does it accurately limit the memory usage of a query on a single BE node. The specific needs need to be judged according to the generated query plan. diff --git a/docs/en/extending-doris/logstash.md b/docs/en/extending-doris/logstash.md index 96dbfdb73d..eb60c90d34 100644 --- a/docs/en/extending-doris/logstash.md +++ b/docs/en/extending-doris/logstash.md @@ -97,7 +97,7 @@ Configuration | Explanation `timeout` | timeout, the default is 600s `strict_mode` | Strict mode, the default is false `timezone` | Specify the time zone used for this import, the default is the East Eight District -`exec_mem_limit` | Import memory limit, default is 2GB, unit is byte +`exec_mem_limit` | Import memory limit, default is 4GB, unit is byte Other configuration: diff --git a/docs/en/getting-started/advance-usage.md b/docs/en/getting-started/advance-usage.md index cfaa54dffe..9687399ea4 100644 --- a/docs/en/getting-started/advance-usage.md +++ b/docs/en/getting-started/advance-usage.md @@ -145,13 +145,13 @@ For more help, see `HELP ALTER TABLE`. ### 2.1 Memory Limitation -To prevent a user's query from consuming too much memory. Queries are controlled in memory. A query task uses no more than 2GB of memory by default on a single BE node. +To prevent a user's query from consuming too much memory. Queries are controlled in memory. A query task uses no more than 4GB of memory by default on a single BE node. When users use it, if they find a `Memory limit exceeded` error, they usually exceed the memory limit. Users should try to optimize their SQL statements when they encounter memory overrun. -If it is found that 2GB memory cannot be satisfied, the memory parameters can be set manually. +If it is found that 4GB memory cannot be satisfied, the memory parameters can be set manually. Display query memory limits: @@ -160,7 +160,7 @@ mysql> SHOW VARIABLES LIKE "%mem_limit%"; +---------------+------------+ | Variable_name | Value | +---------------+------------+ -| exec_mem_limit| 2147483648 | +| exec_mem_limit| 4294967296 | +---------------+------------+ 1 row in set (0.00 sec) ``` diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/EXPORT.md b/docs/en/sql-reference/sql-statements/Data Manipulation/EXPORT.md index f3e849f3e0..d2ad0ae134 100644 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/EXPORT.md +++ b/docs/en/sql-reference/sql-statements/Data Manipulation/EXPORT.md @@ -61,7 +61,7 @@ under the License. column_separator: Specifies the exported column separator, defaulting to t. Supports invisible characters, such as'\x07'. column: Specify the columns to be exported, separated by commas. If you do not fill in this parameter, the default is to export all the columns of the table. line_delimiter: Specifies the exported line separator, defaulting to\n. Supports invisible characters, such as'\x07'. - exec_mem_limit: Exports the upper limit of memory usage for a single BE node, defaulting to 2GB in bytes. + exec_mem_limit: Exports the upper limit of memory usage for a single BE node, defaulting to 4GB in bytes. timeout: The time-out for importing jobs is 1 day by default, in seconds. tablet_num_per_task: The maximum number of tablets that each subtask can allocate. diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md index fac74a7db1..c37d3fef41 100644 --- a/docs/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md +++ b/docs/en/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md @@ -117,7 +117,7 @@ Specifies the time zone used for this load. The default is East Eight District. `exec_mem_limit` -Memory limit. Default is 2GB. Unit is Bytes. +Memory limit. Default is 4GB. Unit is Bytes. `format` Specifies the format of the imported data. Support csv and json, the default is csv. diff --git a/docs/zh-CN/administrator-guide/export-manual.md b/docs/zh-CN/administrator-guide/export-manual.md index b71f9bfce7..6445f5402f 100644 --- a/docs/zh-CN/administrator-guide/export-manual.md +++ b/docs/zh-CN/administrator-guide/export-manual.md @@ -113,7 +113,7 @@ PROPERTIES "label" = "mylabel", "column_separator"=",", "columns" = "col1,col2", - "exec_mem_limit"="2147483648", + "exec_mem_limit"="4294967296", "timeout" = "3600" ) WITH BROKER "hdfs" @@ -127,7 +127,7 @@ WITH BROKER "hdfs" * `column_separator`:列分隔符。默认为 `\t`。支持不可见字符,比如 '\x07'。 * `columns`:要导出的列,使用英文状态逗号隔开,如果不填这个参数默认是导出表的所有列。 * `line_delimiter`:行分隔符。默认为 `\n`。支持不可见字符,比如 '\x07'。 -* `exec_mem_limit`: 表示 Export 作业中,一个查询计划在单个 BE 上的内存使用限制。默认 2GB。单位字节。 +* `exec_mem_limit`: 表示 Export 作业中,一个查询计划在单个 BE 上的内存使用限制。默认 4GB。单位字节。 * `timeout`:作业超时时间。默认 2小时。单位秒。 * `tablet_num_per_task`:每个查询计划分配的最大分片数。默认为 5。 diff --git a/docs/zh-CN/administrator-guide/variables.md b/docs/zh-CN/administrator-guide/variables.md index 219e86049e..cef0310456 100644 --- a/docs/zh-CN/administrator-guide/variables.md +++ b/docs/zh-CN/administrator-guide/variables.md @@ -182,7 +182,7 @@ SELECT /*+ SET_VAR(query_timeout = 1, enable_partition_cache=true) */ sleep(3); * `exec_mem_limit` - 用于设置单个查询的内存限制。默认为 2GB,单位为B/K/KB/M/MB/G/GB/T/TB/P/PB, 默认为B。 + 用于设置单个查询的内存限制。默认为 4GB,单位为B/K/KB/M/MB/G/GB/T/TB/P/PB, 默认为B。 该参数用于限制一个查询计划中,单个查询计划的实例所能使用的内存。一个查询计划可能有多个实例,一个 BE 节点可能执行一个或多个实例。所以该参数并不能准确限制一个查询在整个集群的内存使用,也不能准确限制一个查询在单一 BE 节点上的内存使用。具体需要根据生成的查询计划判断。 diff --git a/docs/zh-CN/extending-doris/logstash.md b/docs/zh-CN/extending-doris/logstash.md index c92bad0c65..1f6ba186b8 100644 --- a/docs/zh-CN/extending-doris/logstash.md +++ b/docs/zh-CN/extending-doris/logstash.md @@ -97,7 +97,7 @@ copy logstash-output-doris-{version}.gem 到 logstash 安装目录下 `timeout` | 超时时间,默认为600s。 `strict_mode` | 严格模式,默认为false。 `timezone` | 指定本次导入所使用的时区,默认为东八区。 -`exec_mem_limit` | 导入内存限制,默认为 2GB,单位为字节。 +`exec_mem_limit` | 导入内存限制,默认为 4GB,单位为字节。 其他配置 diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md index 52613dfe78..a5f009e334 100644 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md +++ b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md @@ -245,7 +245,7 @@ under the License. 可以指定如下参数: timeout: 指定导入操作的超时时间。默认超时为4小时。单位秒。 max_filter_ratio:最大容忍可过滤(数据不规范等原因)的数据比例。默认零容忍。 - exec_mem_limit: 导入内存限制。默认为 2GB。单位为字节。 + exec_mem_limit: 导入内存限制。默认为 4GB。单位为字节。 strict mode: 是否对数据进行严格限制。默认为 false。 timezone: 指定某些受时区影响的函数的时区,如 strftime/alignment_timestamp/from_unixtime 等等,具体请查阅 [时区] 文档。如果不指定,则使用 "Asia/Shanghai" 时区。 send_batch_parallelism: 用于设置发送批处理数据的并行度,如果并行度的值超过 BE 配置中的 `max_send_batch_parallelism_per_job`,那么作为协调点的 BE 将使用 `max_send_batch_parallelism_per_job` 的值。 diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/EXPORT.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/EXPORT.md index 6148f0be9c..4082c8bd55 100644 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/EXPORT.md +++ b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/EXPORT.md @@ -61,7 +61,7 @@ under the License. column_separator: 指定导出的列分隔符,默认为\t。支持不可见字符,比如 '\x07'。 column: 指定待导出的列,使用英文逗号隔开,如果不填这个参数默认是导出表的所有列。 line_delimiter: 指定导出的行分隔符,默认为\n。支持不可见字符,比如 '\x07'。 - exec_mem_limit: 导出在单个 BE 节点的内存使用上限,默认为 2GB,单位为字节。 + exec_mem_limit: 导出在单个 BE 节点的内存使用上限,默认为 4GB,单位为字节。 timeout:导入作业的超时时间,默认为1天,单位是秒。 tablet_num_per_task:每个子任务能分配的最大 Tablet 数量。 diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md index 9edcd46c2e..7d45f486ec 100644 --- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md +++ b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/STREAM LOAD.md @@ -76,7 +76,7 @@ under the License. timezone: 指定本次导入所使用的时区。默认为东八区。该参数会影响所有导入涉及的和时区有关的函数结果。 - exec_mem_limit: 导入内存限制。默认为 2GB。单位为字节。 + exec_mem_limit: 导入内存限制。默认为 4GB。单位为字节。 format: 指定导入数据格式,默认是csv,支持json格式。 diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/LoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/LoadJob.java index 0fa8a304da..4cb82b2947 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/LoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/LoadJob.java @@ -73,7 +73,7 @@ public class LoadJob implements Writable { } private static final int DEFAULT_TIMEOUT_S = 0; - private static final long DEFAULT_EXEC_MEM_LIMIT = 2147483648L; // 2GB + private static final long DEFAULT_EXEC_MEM_LIMIT = 4294967296L; // 4GB private long id; private long dbId; diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java index 28f64f11a7..2dd646b636 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadJob.java @@ -402,7 +402,7 @@ public abstract class LoadJob extends AbstractTxnStateChangeCallback implements break; } jobProperties.put(LoadStmt.TIMEOUT_PROPERTY, timeout); - jobProperties.put(LoadStmt.EXEC_MEM_LIMIT, 2 * 1024 * 1024 * 1024L); + jobProperties.put(LoadStmt.EXEC_MEM_LIMIT, 4 * 1024 * 1024 * 1024L); jobProperties.put(LoadStmt.MAX_FILTER_RATIO_PROPERTY, 0.0); jobProperties.put(LoadStmt.STRICT_MODE, false); jobProperties.put(LoadStmt.TIMEZONE, TimeUtils.DEFAULT_TIME_ZONE); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java index 7c0b24d69c..ab87e4b180 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java @@ -106,7 +106,7 @@ public abstract class RoutineLoadJob extends AbstractTxnStateChangeCallback impl public static final long DEFAULT_MAX_INTERVAL_SECOND = 10; public static final long DEFAULT_MAX_BATCH_ROWS = 200000; public static final long DEFAULT_MAX_BATCH_SIZE = 100 * 1024 * 1024; // 100MB - public static final long DEFAULT_EXEC_MEM_LIMIT = 2 * 1024 * 1024 * 1024L; + public static final long DEFAULT_EXEC_MEM_LIMIT = 4 * 1024 * 1024 * 1024L; public static final boolean DEFAULT_STRICT_MODE = false; // default is false public static final int DEFAULT_SEND_BATCH_PARALLELISM = 1; public static final boolean DEFAULT_LOAD_TO_SINGLE_TABLET = false; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 46017bd24c..d051c632f2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -202,7 +202,7 @@ public class SessionVariable implements Serializable, Writable { // max memory used on every backend. @VariableMgr.VarAttr(name = EXEC_MEM_LIMIT) - public long maxExecMemByte = 2147483648L; + public long maxExecMemByte = 4294967296L; @VariableMgr.VarAttr(name = ENABLE_SPILLING) public boolean enableSpilling = false; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org