This is an automated email from the ASF dual-hosted git repository. eldenmoon pushed a commit to branch branch-2.0-var in repository https://gitbox.apache.org/repos/asf/doris.git
commit 39b83792c78cb421a00ff7f5df1c99cba672294b Author: Jack Drogon <jack.xsuper...@gmail.com> AuthorDate: Wed Sep 27 08:13:53 2023 +0800 [feature](autobucekt) Add support autobucket min buckets config Add support autobucket (#24920) Signed-off-by: Jack Drogon <jack.xsuper...@gmail.com> --- docs/en/docs/advanced/autobucket.md | 1 + docs/zh-CN/docs/advanced/autobucket.md | 1 + .../main/java/org/apache/doris/common/Config.java | 6 ++++++ .../org/apache/doris/analysis/CreateTableStmt.java | 2 +- .../doris/clone/DynamicPartitionScheduler.java | 2 +- .../apache/doris/common/util/AutoBucketUtils.java | 5 +++++ .../suites/autobucket/test_autobucket.groovy | 23 ++++++++++++++++++++++ 7 files changed, 38 insertions(+), 2 deletions(-) diff --git a/docs/en/docs/advanced/autobucket.md b/docs/en/docs/advanced/autobucket.md index bc05e3e72f6..e9fdfcdf63b 100644 --- a/docs/en/docs/advanced/autobucket.md +++ b/docs/en/docs/advanced/autobucket.md @@ -72,6 +72,7 @@ First, use the value of estimate_partition_size divided by 5 (calculated as a 5- 3. Calculation logic to get the final number of buckets. First calculate an intermediate value x = min(M, N, 128). If x < N and x < the number of BE nodes, the final bucket is y, the number of BE nodes; otherwise, the final bucket is x. +4. x = max(x, autobucket_min_buckets), 这里autobucket_min_buckets是在Config中配置的,默认是1 The pseudo-code representation of the above process is as follows diff --git a/docs/zh-CN/docs/advanced/autobucket.md b/docs/zh-CN/docs/advanced/autobucket.md index 43a1124db77..ccbceaf633d 100644 --- a/docs/zh-CN/docs/advanced/autobucket.md +++ b/docs/zh-CN/docs/advanced/autobucket.md @@ -73,6 +73,7 @@ properties("estimate_partition_size" = "100G") 3. 得到最终的分桶个数计算逻辑: 先计算一个中间值 x = min(M, N, 128), 如果 x < N并且x < BE节点个数,则最终分桶为 y 即 BE 节点个数;否则最终分桶数为 x +4. x = max(x, autobucket_min_buckets), 这里autobucket_min_buckets是在Config中配置的,默认是1 上述过程伪代码表现形式为: diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 5f2173af7ca..3c5bb36094d 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -2125,4 +2125,10 @@ public class Config extends ConfigBase { + "The larger the value, the more uniform the distribution of the hash algorithm, " + "but it will increase the memory overhead."}) public static int virtual_node_number = 2048; + + @ConfField(description = { + "Auto Buckets中最小的buckets数目", + "min buckets of auto bucket" + }) + public static int autobucket_min_buckets = 1; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java index b1df8498b8b..fa305793f56 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java @@ -122,7 +122,7 @@ public class CreateTableStmt extends DdlStmt { } else { long partitionSize = ParseUtil .analyzeDataVolumn(newProperties.get(PropertyAnalyzer.PROPERTIES_ESTIMATE_PARTITION_SIZE)); - distributionDesc.setBuckets(AutoBucketUtils.getBucketsNum(partitionSize)); + distributionDesc.setBuckets(AutoBucketUtils.getBucketsNum(partitionSize, Config.autobucket_min_buckets)); } return newProperties; diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java index 53f44070d93..dc03ecf8233 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java @@ -221,7 +221,7 @@ public class DynamicPartitionScheduler extends MasterDaemon { // plus 5 for uncompressed data long uncompressedPartitionSize = getNextPartitionSize(partitionSizeArray) * 5; - return AutoBucketUtils.getBucketsNum(uncompressedPartitionSize); + return AutoBucketUtils.getBucketsNum(uncompressedPartitionSize, Config.autobucket_min_buckets); } private ArrayList<AddPartitionClause> getAddPartitionClause(Database db, OlapTable olapTable, diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java index ca935ab20e7..294250fd213 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java @@ -95,4 +95,9 @@ public class AutoBucketUtils { logger.debug("AutoBucketsUtil: final bucketsNum {}", bucketsNum); return bucketsNum; } + + public static int getBucketsNum(long partitionSize, int minBuckets) { + int bucketsNum = getBucketsNum(partitionSize); + return Math.max(minBuckets, bucketsNum); + } } diff --git a/regression-test/suites/autobucket/test_autobucket.groovy b/regression-test/suites/autobucket/test_autobucket.groovy index ab0ae99658b..d3ba70d0df3 100644 --- a/regression-test/suites/autobucket/test_autobucket.groovy +++ b/regression-test/suites/autobucket/test_autobucket.groovy @@ -39,4 +39,27 @@ suite("test_autobucket") { assertEquals(Integer.valueOf(result.get(0).get(8)), 10) sql "drop table if exists autobucket_test" + + + sql "drop table if exists autobucket_test_min_buckets" + result = sql """ + CREATE TABLE `autobucket_test_min_buckets` ( + `user_id` largeint(40) NOT NULL + ) ENGINE=OLAP + DUPLICATE KEY(`user_id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`user_id`) BUCKETS AUTO + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "estimate_partition_size" = "1M" + ) + """ + + default_min_buckets = 1 // in Config.java + result = sql "show partitions from autobucket_test_min_buckets" + logger.info("${result}") + // XXX: buckets at pos(8), next maybe impl by sql meta + assertEquals(Integer.valueOf(result.get(0).get(8)), default_min_buckets) + + sql "drop table if exists autobucket_test_min_buckets" } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org