This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new d6497fe  [Config] Change config name 
'streaming_load_max_batch_size_mb' to 'streaming_load_json_max_mb' (#4791)
d6497fe is described below

commit d6497fedc4de320326d4ff5edb8efa72286661d5
Author: EmmyMiao87 <522274...@qq.com>
AuthorDate: Wed Oct 28 23:27:33 2020 +0800

    [Config] Change config name 'streaming_load_max_batch_size_mb' to 
'streaming_load_json_max_mb' (#4791)
    
    The name and another config name are close to each other and are 
indistinguishable.
    So this pr modify the name.
    The document description has also been changed
---
 be/src/common/config.h                             |  2 +-
 be/src/http/action/stream_load.cpp                 | 52 +++++++++++-----------
 docs/en/administrator-guide/config/be_config.md    | 11 +++--
 docs/en/administrator-guide/config/fe_config.md    |  8 ++++
 .../Data Manipulation/BROKER LOAD.md               |  3 +-
 docs/zh-CN/administrator-guide/config/be_config.md | 11 +++--
 docs/zh-CN/administrator-guide/config/fe_config.md |  8 ++++
 .../Data Manipulation/BROKER LOAD.md               |  3 +-
 8 files changed, 63 insertions(+), 35 deletions(-)

diff --git a/be/src/common/config.h b/be/src/common/config.h
index a369049..0be21a4 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -347,7 +347,7 @@ namespace config {
     // Some data formats, such as JSON, cannot be streamed.
     // Therefore, it is necessary to limit the maximum number of
     // such data when using stream load to prevent excessive memory 
consumption.
-    CONF_mInt64(streaming_load_max_batch_size_mb, "100");
+    CONF_mInt64(streaming_load_json_max_mb, "100");
     // the alive time of a TabletsChannel.
     // If the channel does not receive any data till this time,
     // the channel will be removed.
diff --git a/be/src/http/action/stream_load.cpp 
b/be/src/http/action/stream_load.cpp
index 64f3bb0..9bdaf4b 100644
--- a/be/src/http/action/stream_load.cpp
+++ b/be/src/http/action/stream_load.cpp
@@ -208,25 +208,7 @@ Status StreamLoadAction::_on_header(HttpRequest* http_req, 
StreamLoadContext* ct
         LOG(WARNING) << "parse basic authorization failed." << ctx->brief();
         return Status::InternalError("no valid Basic authorization");
     }
-    // check content length
-    ctx->body_bytes = 0;
-    size_t max_body_bytes = config::streaming_load_max_mb * 1024 * 1024;
-    if (!http_req->header(HttpHeaders::CONTENT_LENGTH).empty()) {
-        ctx->body_bytes = 
std::stol(http_req->header(HttpHeaders::CONTENT_LENGTH));
-        if (ctx->body_bytes > max_body_bytes) {
-            LOG(WARNING) << "body exceed max size." << ctx->brief();
 
-            std::stringstream ss;
-            ss << "body exceed max size: " << max_body_bytes << ", limit: " << 
max_body_bytes;
-            return Status::InternalError(ss.str());
-        }
-    } else {
-#ifndef BE_TEST
-        evhttp_connection_set_max_body_size(
-            evhttp_request_get_connection(http_req->get_evhttp_request()),
-            max_body_bytes);
-#endif
-    }
     // get format of this put
     if (http_req->header(HTTP_FORMAT_KEY).empty()) {
         ctx->format = TFileFormatType::FORMAT_CSV_PLAIN;
@@ -237,16 +219,34 @@ Status StreamLoadAction::_on_header(HttpRequest* 
http_req, StreamLoadContext* ct
             ss << "unknown data format, format=" << 
http_req->header(HTTP_FORMAT_KEY);
             return Status::InternalError(ss.str());
         }
+    }
 
-        if (ctx->format == TFileFormatType::FORMAT_JSON) {
-            size_t max_body_bytes = config::streaming_load_max_batch_size_mb * 
1024 * 1024;
-            if (ctx->body_bytes > max_body_bytes) {
-                std::stringstream ss;
-                ss << "The size of this batch exceed the max size [" << 
max_body_bytes
-                   << "]  of json type data " << " data [ " << ctx->body_bytes 
<< " ]";
-                return Status::InternalError(ss.str());
-            }
+    // check content length
+    ctx->body_bytes = 0;
+    size_t csv_max_body_bytes = config::streaming_load_max_mb * 1024 * 1024;
+    size_t json_max_body_bytes = config::streaming_load_json_max_mb * 1024 * 
1024;
+    if (!http_req->header(HttpHeaders::CONTENT_LENGTH).empty()) {
+        ctx->body_bytes = 
std::stol(http_req->header(HttpHeaders::CONTENT_LENGTH));
+        // json max body size
+        if ((ctx->format == TFileFormatType::FORMAT_JSON) && (ctx->body_bytes 
> json_max_body_bytes)) {
+            std::stringstream ss;
+            ss << "The size of this batch exceed the max size [" << 
json_max_body_bytes
+                << "]  of json type data " << " data [ " << ctx->body_bytes << 
" ]";
+            return Status::InternalError(ss.str());
+        } 
+        // csv max body size
+        else if (ctx->body_bytes > csv_max_body_bytes) {
+            LOG(WARNING) << "body exceed max size." << ctx->brief();
+            std::stringstream ss;
+            ss << "body exceed max size: " << csv_max_body_bytes << ", data: " 
<< ctx->body_bytes;
+            return Status::InternalError(ss.str());
         }
+    } else {
+#ifndef BE_TEST
+        evhttp_connection_set_max_body_size(
+            evhttp_request_get_connection(http_req->get_evhttp_request()),
+            csv_max_body_bytes);
+#endif
     }
 
     if (!http_req->header(HTTP_TIMEOUT).empty()) {
diff --git a/docs/en/administrator-guide/config/be_config.md 
b/docs/en/administrator-guide/config/be_config.md
index 3a8b68b..0fc5325 100644
--- a/docs/en/administrator-guide/config/be_config.md
+++ b/docs/en/administrator-guide/config/be_config.md
@@ -512,6 +512,11 @@ Indicates how many tablets in this data directory failed 
to load. At the same ti
 
 ### `push_write_mbytes_per_sec`
 
++ Type: int32
++ Description: Load data speed control, the default is 10MB per second. 
Applicable to all load methods.
++ Unit: MB
++ Default value: 10
+
 ### `query_scratch_dirs`
 
 ### `read_size`
@@ -570,16 +575,16 @@ process will log fatal and exit. When config is false, 
process will only log war
 ### `streaming_load_max_mb`
 
 * Type: int64
-* Description: Used to limit the maximum amount of data allowed in one Stream 
load. The unit is MB.
+* Description: Used to limit the maximum amount of csv data allowed in one 
Stream load. The unit is MB.
 * Default value: 10240
 * Dynamically modify: yes
 
 Stream Load is generally suitable for loading data less than a few GB, not 
suitable for loading` too large data.
 
-### `streaming_load_max_batch_size_mb`
+### `streaming_load_json_max_mb`
 
 * Type: int64
-* Description: For some data formats, such as JSON, it is used to limit the 
maximum amount of data allowed in one Stream load. The unit is MB.
+* Description: it is used to limit the maximum amount of json data allowed in 
one Stream load. The unit is MB.
 * Default value: 100
 * Dynamically modify: yes
 
diff --git a/docs/en/administrator-guide/config/fe_config.md 
b/docs/en/administrator-guide/config/fe_config.md
index 876519c..77207a3 100644
--- a/docs/en/administrator-guide/config/fe_config.md
+++ b/docs/en/administrator-guide/config/fe_config.md
@@ -578,6 +578,14 @@ See the description of `max_clone_task_timeout_sec`.
 
 ### `rewrite_count_distinct_to_bitmap_hll`
 
+This variable is a session variable, and the session level takes effect.
+
++ Type: boolean
++ Description: **Only for the table of the AGG model**, when the variable is 
true, when the user query contains aggregate functions such as count(distinct 
c1), if the type of the c1 column itself is bitmap, count distnct will be 
rewritten It is bitmap_union_count(c1).
+         When the type of the c1 column itself is hll, count distinct will be 
rewritten as hll_union_agg(c1)
+         If the variable is false, no overwriting occurs.
++ Default value: true.
+
 ### `rpc_port`
 
 ### `schedule_slot_num_per_path`
diff --git a/docs/en/sql-reference/sql-statements/Data Manipulation/BROKER 
LOAD.md b/docs/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md
index c9c1f65..cc4b3af 100644
--- a/docs/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md     
+++ b/docs/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md     
@@ -336,7 +336,8 @@ under the License.
     
     7. Load data into tables containing HLL columns, which can be columns in 
tables or columns in data
     
-        If there are three columns in the table (id, v1, v2, v3). The V1 and 
V2 columns are HLL columns. The imported source file has three columns. Then 
(column_list) declares that the first column is id, and the second and third 
columns are temporarily named k1, k2.
+        If there are 4 columns in the table are (id, v1, v2, v3). The v1 and 
v2 columns are hll columns. The imported source file has 3 columns, where the 
first column in the table = the first column in the source file, and the second 
and third columns in the table are the second and third columns in the source 
file, and the third column in the table is transformed. The four columns do not 
exist in the source file.
+        Then (column_list) declares that the first column is id, and the 
second and third columns are temporarily named k1, k2.
 
         In SET, the HLL column in the table must be specifically declared 
hll_hash. The V1 column in the table is equal to the hll_hash (k1) column in 
the original data.The v3 column in the table does not have a corresponding 
value in the original data, and empty_hll is used to supplement the default 
value.
 
diff --git a/docs/zh-CN/administrator-guide/config/be_config.md 
b/docs/zh-CN/administrator-guide/config/be_config.md
index 25f3f54..f931f59 100644
--- a/docs/zh-CN/administrator-guide/config/be_config.md
+++ b/docs/zh-CN/administrator-guide/config/be_config.md
@@ -507,6 +507,11 @@ load tablets from header failed, failed tablets size: xxx, 
path=xxx
 
 ### `push_write_mbytes_per_sec`
 
++ 类型:int32
++ 描述:导入数据速度控制,默认最快每秒10MB。适用于所有的导入方式。
++ 单位:MB
++ 默认值:10
+
 ### `query_scratch_dirs`
 
 ### `read_size`
@@ -565,16 +570,16 @@ load tablets from header failed, failed tablets size: 
xxx, path=xxx
 ### `streaming_load_max_mb`
 
 * 类型:int64
-* 描述:用于限制一次 Stream load 导入中,允许的最大数据量。单位 MB。
+* 描述:用于限制数据格式为 csv 的一次 Stream load 导入中,允许的最大数据量。单位 MB。
 * 默认值: 10240
 * 可动态修改:是
 
 Stream Load 一般适用于导入几个GB以内的数据,不适合导入过大的数据。
 
-### `streaming_load_max_batch_size_mb`
+### `streaming_load_json_max_mb`
 
 * 类型:int64
-* 描述:对于某些数据格式,如 JSON,用于限制一次 Stream load 导入中,允许的最大数据量。单位 MB。
+* 描述:用于限制数据格式为 json 的一次 Stream load 导入中,允许的最大数据量。单位 MB。
 * 默认值: 100
 * 可动态修改:是
 
diff --git a/docs/zh-CN/administrator-guide/config/fe_config.md 
b/docs/zh-CN/administrator-guide/config/fe_config.md
index d37d348..0f9a7d1 100644
--- a/docs/zh-CN/administrator-guide/config/fe_config.md
+++ b/docs/zh-CN/administrator-guide/config/fe_config.md
@@ -572,6 +572,14 @@ current running txns on db xxx is xx, larger than limit xx
 
 ### `rewrite_count_distinct_to_bitmap_hll`
 
+该变量为 session variable,session 级别生效。
+
++ 类型:boolean
++ 描述:**仅对于 AGG 模型的表来说**,当变量为 true 时,用户查询时包含 count(distinct c1) 这类聚合函数时,如果 c1 
列本身类型为 bitmap,则 count distnct 会改写为 bitmap_union_count(c1)。
+        当 c1 列本身类型为 hll,则 count distinct 会改写为 hll_union_agg(c1)
+        如果变量为 false,则不发生任何改写。
++ 默认值:true。
+
 ### `rpc_port`
 
 ### `schedule_slot_num_per_path`
diff --git a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BROKER 
LOAD.md b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BROKER 
LOAD.md
index bedf1a3..e785877 100644
--- a/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md  
+++ b/docs/zh-CN/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md  
@@ -335,7 +335,8 @@ under the License.
     
     7. 导入数据到含有HLL列的表,可以是表中的列或者数据里面的列
 
-        
如果表中有三列分别是(id,v1,v2,v3)。其中v1和v2列是hll列。导入的源文件有3列。则(column_list)中声明第一列为id,第二三列为一个临时命名的k1,k2。
+        如果表中有4列分别是(id,v1,v2,v3)。其中v1和v2列是hll列。导入的源文件有3列, 其中表中的第一列 = 
源文件中的第一列,而表中的第二,三列为源文件中的第二,三列变换得到,表中的第四列在源文件中并不存在。
+        则(column_list)中声明第一列为id,第二三列为一个临时命名的k1,k2。
         在SET中必须给表中的hll列特殊声明 hll_hash。表中的v1列等于原始数据中的hll_hash(k1)列, 
表中的v3列在原始数据中并没有对应的值,使用empty_hll补充默认值。
         LOAD LABEL example_db.label7
         (


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to