This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch branch-2.0-var
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 39b83792c78cb421a00ff7f5df1c99cba672294b
Author: Jack Drogon <jack.xsuper...@gmail.com>
AuthorDate: Wed Sep 27 08:13:53 2023 +0800

    [feature](autobucekt) Add support autobucket min buckets config Add support 
autobucket (#24920)
    
    Signed-off-by: Jack Drogon <jack.xsuper...@gmail.com>
---
 docs/en/docs/advanced/autobucket.md                |  1 +
 docs/zh-CN/docs/advanced/autobucket.md             |  1 +
 .../main/java/org/apache/doris/common/Config.java  |  6 ++++++
 .../org/apache/doris/analysis/CreateTableStmt.java |  2 +-
 .../doris/clone/DynamicPartitionScheduler.java     |  2 +-
 .../apache/doris/common/util/AutoBucketUtils.java  |  5 +++++
 .../suites/autobucket/test_autobucket.groovy       | 23 ++++++++++++++++++++++
 7 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/docs/en/docs/advanced/autobucket.md 
b/docs/en/docs/advanced/autobucket.md
index bc05e3e72f6..e9fdfcdf63b 100644
--- a/docs/en/docs/advanced/autobucket.md
+++ b/docs/en/docs/advanced/autobucket.md
@@ -72,6 +72,7 @@ First, use the value of estimate_partition_size divided by 5 
(calculated as a 5-
 3. Calculation logic to get the final number of buckets.
 First calculate an intermediate value x = min(M, N, 128).
 If x < N and x < the number of BE nodes, the final bucket is y, the number of 
BE nodes; otherwise, the final bucket is x.
+4. x = max(x, autobucket_min_buckets), 
这里autobucket_min_buckets是在Config中配置的,默认是1
 
 The pseudo-code representation of the above process is as follows
 
diff --git a/docs/zh-CN/docs/advanced/autobucket.md 
b/docs/zh-CN/docs/advanced/autobucket.md
index 43a1124db77..ccbceaf633d 100644
--- a/docs/zh-CN/docs/advanced/autobucket.md
+++ b/docs/zh-CN/docs/advanced/autobucket.md
@@ -73,6 +73,7 @@ properties("estimate_partition_size" = "100G")
 3. 得到最终的分桶个数计算逻辑:
 先计算一个中间值 x = min(M, N, 128),
 如果 x < N并且x < BE节点个数,则最终分桶为 y 即 BE 节点个数;否则最终分桶数为 x
+4. x = max(x, autobucket_min_buckets), 
这里autobucket_min_buckets是在Config中配置的,默认是1
 
 上述过程伪代码表现形式为:
 
diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java 
b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
index 5f2173af7ca..3c5bb36094d 100644
--- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
+++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java
@@ -2125,4 +2125,10 @@ public class Config extends ConfigBase {
                     + "The larger the value, the more uniform the distribution 
of the hash algorithm, "
                     + "but it will increase the memory overhead."})
     public static int virtual_node_number = 2048;
+
+    @ConfField(description = {
+            "Auto Buckets中最小的buckets数目",
+            "min buckets of auto bucket"
+    })
+    public static int autobucket_min_buckets = 1;
 }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
index b1df8498b8b..fa305793f56 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateTableStmt.java
@@ -122,7 +122,7 @@ public class CreateTableStmt extends DdlStmt {
         } else {
             long partitionSize = ParseUtil
                     
.analyzeDataVolumn(newProperties.get(PropertyAnalyzer.PROPERTIES_ESTIMATE_PARTITION_SIZE));
-            
distributionDesc.setBuckets(AutoBucketUtils.getBucketsNum(partitionSize));
+            
distributionDesc.setBuckets(AutoBucketUtils.getBucketsNum(partitionSize, 
Config.autobucket_min_buckets));
         }
 
         return newProperties;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
 
b/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
index 53f44070d93..dc03ecf8233 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
@@ -221,7 +221,7 @@ public class DynamicPartitionScheduler extends MasterDaemon 
{
 
         // plus 5 for uncompressed data
         long uncompressedPartitionSize = 
getNextPartitionSize(partitionSizeArray) * 5;
-        return AutoBucketUtils.getBucketsNum(uncompressedPartitionSize);
+        return AutoBucketUtils.getBucketsNum(uncompressedPartitionSize, 
Config.autobucket_min_buckets);
     }
 
     private ArrayList<AddPartitionClause> getAddPartitionClause(Database db, 
OlapTable olapTable,
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java
index ca935ab20e7..294250fd213 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java
@@ -95,4 +95,9 @@ public class AutoBucketUtils {
         logger.debug("AutoBucketsUtil: final bucketsNum {}", bucketsNum);
         return bucketsNum;
     }
+
+    public static int getBucketsNum(long partitionSize, int minBuckets) {
+        int bucketsNum = getBucketsNum(partitionSize);
+        return Math.max(minBuckets, bucketsNum);
+    }
 }
diff --git a/regression-test/suites/autobucket/test_autobucket.groovy 
b/regression-test/suites/autobucket/test_autobucket.groovy
index ab0ae99658b..d3ba70d0df3 100644
--- a/regression-test/suites/autobucket/test_autobucket.groovy
+++ b/regression-test/suites/autobucket/test_autobucket.groovy
@@ -39,4 +39,27 @@ suite("test_autobucket") {
     assertEquals(Integer.valueOf(result.get(0).get(8)), 10)
 
     sql "drop table if exists autobucket_test"
+
+
+    sql "drop table if exists autobucket_test_min_buckets"
+    result = sql """
+        CREATE TABLE `autobucket_test_min_buckets` (
+          `user_id` largeint(40) NOT NULL
+        ) ENGINE=OLAP
+        DUPLICATE KEY(`user_id`)
+        COMMENT 'OLAP'
+        DISTRIBUTED BY HASH(`user_id`) BUCKETS AUTO
+        PROPERTIES (
+          "replication_allocation" = "tag.location.default: 1",
+          "estimate_partition_size" = "1M"
+        )
+        """
+
+    default_min_buckets = 1 // in Config.java
+    result = sql "show partitions from autobucket_test_min_buckets"
+    logger.info("${result}")
+    // XXX: buckets at pos(8), next maybe impl by sql meta
+    assertEquals(Integer.valueOf(result.get(0).get(8)), default_min_buckets)
+
+    sql "drop table if exists autobucket_test_min_buckets"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to