This is an automated email from the ASF dual-hosted git repository.

zclll pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 302aec1316a [Enhancement](auto partition) Auto partition could use 
auto bucket (#56921)
302aec1316a is described below

commit 302aec1316a0087e3fb0ef2a0be379c51588fc44
Author: zclllyybb <[email protected]>
AuthorDate: Thu Oct 16 16:53:11 2025 +0800

    [Enhancement](auto partition) Auto partition could use auto bucket (#56921)
    
    1. forbid auto list partition + auto bucket
    2. support auto range partition + auto bucket to calculate proper bucket
    for new partition. base on the assumption of incremental load of table.
---
 .../apache/doris/analysis/PartitionExprUtil.java   |  28 ++-
 .../doris/clone/DynamicPartitionScheduler.java     |  42 +---
 .../doris/common/util/AutoBucketCalculator.java    | 205 +++++++++++++++++++
 .../apache/doris/common/util/AutoBucketUtils.java  |  10 +-
 .../trees/plans/commands/info/CreateTableInfo.java |   8 +
 .../doris/analysis/PartitionExprUtilTest.java      | 223 +++++++++++++++++++++
 .../common/util/AutoBucketCalculatorTest.java      | 106 ++++++++++
 .../test_iot_auto_detect_fail.groovy               |   4 +-
 .../expression/test_simplify_range.groovy          |   2 +-
 .../auto_partition/test_auto_list_partition.groovy |   2 +-
 .../test_auto_partition_behavior.groovy            |  14 ++
 ...uto_partition_with_single_replica_insert.groovy |   4 +-
 .../query_p0/show/test_show_partitions.groovy      |   2 +-
 13 files changed, 606 insertions(+), 44 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java
index 7d2d6b9743e..ec6772e849a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java
@@ -19,11 +19,14 @@ package org.apache.doris.analysis;
 
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.DataProperty;
+import org.apache.doris.catalog.DistributionInfo;
+import org.apache.doris.catalog.HashDistributionInfo;
 import org.apache.doris.catalog.OlapTable;
 import org.apache.doris.catalog.PartitionInfo;
 import org.apache.doris.catalog.PartitionType;
 import org.apache.doris.catalog.Type;
 import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.util.AutoBucketCalculator;
 import org.apache.doris.common.util.PropertyAnalyzer;
 import org.apache.doris.thrift.TNullableStringLiteral;
 
@@ -187,7 +190,30 @@ public class PartitionExprUtil {
             }
 
             Map<String, String> partitionProperties = Maps.newHashMap();
-            DistributionDesc distributionDesc = 
olapTable.getDefaultDistributionInfo().toDistributionDesc();
+            DistributionInfo defaultDistributionInfo = 
olapTable.getDefaultDistributionInfo();
+            DistributionDesc distributionDesc = 
defaultDistributionInfo.toDistributionDesc();
+            if (olapTable.isAutoBucket() && partitionType == 
PartitionType.RANGE) {
+                // Use unified auto bucket calculator
+                AutoBucketCalculator.AutoBucketContext context = new 
AutoBucketCalculator.AutoBucketContext(
+                        olapTable, partitionName, partitionName, false,
+                        defaultDistributionInfo.getBucketNum());
+
+                int bucketsNum = 
AutoBucketCalculator.calculateAutoBucketsWithBoundsCheck(context);
+
+                // Only update distribution if calculation was successful 
(bucketsNum != default)
+                if (bucketsNum != defaultDistributionInfo.getBucketNum()) {
+                    if (defaultDistributionInfo.getType() == 
DistributionInfo.DistributionInfoType.HASH) {
+                        HashDistributionInfo hashDistributionInfo = 
(HashDistributionInfo) defaultDistributionInfo;
+                        List<String> distColumnNames = new ArrayList<>();
+                        for (Column distributionColumn : 
hashDistributionInfo.getDistributionColumns()) {
+                            distColumnNames.add(distributionColumn.getName());
+                        }
+                        distributionDesc = new 
HashDistributionDesc(bucketsNum, distColumnNames);
+                    } else {
+                        distributionDesc = new 
RandomDistributionDesc(bucketsNum);
+                    }
+                }
+            }
 
             SinglePartitionDesc singleRangePartitionDesc = new 
SinglePartitionDesc(true, partitionName,
                     partitionKeyDesc, partitionProperties);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
 
b/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
index 1a96280ea30..3e94c68953f 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
@@ -45,6 +45,7 @@ import org.apache.doris.common.Config;
 import org.apache.doris.common.DdlException;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.Pair;
+import org.apache.doris.common.util.AutoBucketCalculator;
 import org.apache.doris.common.util.AutoBucketUtils;
 import org.apache.doris.common.util.DebugPointUtil;
 import org.apache.doris.common.util.DynamicPartitionUtil;
@@ -55,7 +56,6 @@ import org.apache.doris.common.util.TimeUtils;
 import org.apache.doris.datasource.InternalCatalog;
 import org.apache.doris.meta.MetaContext;
 import org.apache.doris.persist.PartitionPersistInfo;
-import org.apache.doris.rpc.RpcException;
 import org.apache.doris.thrift.TStorageMedium;
 
 import com.google.common.base.Preconditions;
@@ -194,31 +194,14 @@ public class DynamicPartitionScheduler extends 
MasterDaemon {
 
     private static Pair<Integer, Integer> 
getBucketsNum(DynamicPartitionProperty property, OlapTable table,
             String partitionName, String nowPartitionName, boolean 
executeFirstTime) {
-        // if execute first time, all partitions no contain data
-        if (!table.isAutoBucket() || executeFirstTime) {
-            return Pair.of(property.getBuckets(), 0);
-        }
-
-        List<Partition> partitions = getHistoricalPartitions(table, 
nowPartitionName);
-        List<Long> visibleVersions;
-        try {
-            visibleVersions = Partition.getVisibleVersions(partitions);
-        } catch (RpcException e) {
-            LOG.warn("auto bucket use property's buckets get visible version 
fail, table: [{}-{}], "
-                    + "partition: {}, buckets num: {}, exception: ",
-                    table.getName(), table.getId(), partitionName, 
property.getBuckets(), e);
-            return Pair.of(property.getBuckets(), 0);
-        }
+        AutoBucketCalculator.AutoBucketContext context = new 
AutoBucketCalculator.AutoBucketContext(
+                table, partitionName, nowPartitionName, executeFirstTime, 
property.getBuckets());
 
-        List<Partition> hasDataPartitions = filterDataPartitions(partitions, 
visibleVersions);
-        if (hasDataPartitions.isEmpty()) {
-            return handleNoDataPartitions(table, partitionName, 
property.getBuckets());
-        }
-
-        return calculateBuckets(hasDataPartitions);
+        AutoBucketCalculator.AutoBucketResult result = 
AutoBucketCalculator.calculateAutoBuckets(context);
+        return Pair.of(result.getBuckets(), result.getPreviousBuckets());
     }
 
-    private static List<Partition> getHistoricalPartitions(OlapTable table, 
String nowPartitionName) {
+    public static List<Partition> getHistoricalPartitions(OlapTable table, 
String nowPartitionName) {
         RangePartitionInfo info = (RangePartitionInfo) 
(table.getPartitionInfo());
         List<Map.Entry<Long, PartitionItem>> idToItems = new 
ArrayList<>(info.getIdToItem(false).entrySet());
         idToItems.sort(Comparator.comparing(o -> ((RangePartitionItem) 
o.getValue()).getItems().upperEndpoint()));
@@ -228,7 +211,7 @@ public class DynamicPartitionScheduler extends MasterDaemon 
{
                 .collect(Collectors.toList());
     }
 
-    private static List<Partition> filterDataPartitions(List<Partition> 
partitions, List<Long> visibleVersions) {
+    public static List<Partition> filterDataPartitions(List<Partition> 
partitions, List<Long> visibleVersions) {
         Preconditions.checkState(partitions.size() == visibleVersions.size(),
                 String.format("partitions size %d not eq visibleVersions size 
%d, impossible",
                     partitions.size(), visibleVersions.size()));
@@ -241,14 +224,8 @@ public class DynamicPartitionScheduler extends 
MasterDaemon {
         return hasDataPartitions;
     }
 
-    private static Pair<Integer, Integer> handleNoDataPartitions(OlapTable 
table,
-                                                                 String 
partitionName, int defaultBuckets) {
-        LOG.info("auto bucket use property's buckets due to all partitions no 
data, table: [{}-{}], "
-                + "partition: {}, buckets num: {}", table.getName(), 
table.getId(), partitionName, defaultBuckets);
-        return Pair.of(defaultBuckets, 0);
-    }
 
-    private static Pair<Integer, Integer> calculateBuckets(List<Partition> 
hasDataPartitions) {
+    public static Pair<Integer, Integer> calculateBuckets(List<Partition> 
hasDataPartitions) {
         List<Long> partitionSizeArray = new ArrayList<>();
         List<Long> sizeUnknownArray = new ArrayList<>();
 
@@ -417,7 +394,8 @@ public class DynamicPartitionScheduler extends MasterDaemon 
{
         return addPartitionClauses;
     }
 
-    private int checkAndFixAutoBucketCalcNumIsValid(int 
currentPartitionNumBuckets, int previousPartitionNumBuckets,
+    public static int checkAndFixAutoBucketCalcNumIsValid(int 
currentPartitionNumBuckets,
+                                                    int 
previousPartitionNumBuckets,
                                                     String tableName, String 
partitionName) {
         // previousPartitionBucketsNum == 0, some abnormal case, ignore it
         if (currentPartitionNumBuckets != 0) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketCalculator.java
 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketCalculator.java
new file mode 100644
index 00000000000..4434c404a7c
--- /dev/null
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketCalculator.java
@@ -0,0 +1,205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.util;
+
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.Partition;
+import org.apache.doris.clone.DynamicPartitionScheduler;
+import org.apache.doris.common.Pair;
+import org.apache.doris.rpc.RpcException;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.util.List;
+
+/**
+ * Utility class for calculating auto bucket numbers for partitions.
+ * This class provides a unified interface for auto bucket calculation
+ * used by both DynamicPartitionScheduler and PartitionExprUtil.
+ */
+public class AutoBucketCalculator {
+    private static final Logger LOG = 
LogManager.getLogger(AutoBucketCalculator.class);
+
+    /**
+     * Context for auto bucket calculation
+     */
+    public static class AutoBucketContext {
+        private final OlapTable table;
+        private final String partitionName;
+        private final String nowPartitionName;
+        private final boolean executeFirstTime;
+        private final int defaultBuckets;
+
+        public AutoBucketContext(OlapTable table, String partitionName, String 
nowPartitionName,
+                                boolean executeFirstTime, int defaultBuckets) {
+            this.table = table;
+            this.partitionName = partitionName;
+            this.nowPartitionName = nowPartitionName;
+            this.executeFirstTime = executeFirstTime;
+            this.defaultBuckets = defaultBuckets;
+        }
+
+        public OlapTable getTable() {
+            return table;
+        }
+
+        public String getPartitionName() {
+            return partitionName;
+        }
+
+        public String getNowPartitionName() {
+            return nowPartitionName;
+        }
+
+        public boolean isExecuteFirstTime() {
+            return executeFirstTime;
+        }
+
+        public int getDefaultBuckets() {
+            return defaultBuckets;
+        }
+    }
+
+    /**
+     * Result of auto bucket calculation
+     */
+    public static class AutoBucketResult {
+        private final int buckets;
+        private final int previousBuckets;
+        private final boolean success;
+        private final String errorMessage;
+
+        private AutoBucketResult(int buckets, int previousBuckets, boolean 
success, String errorMessage) {
+            this.buckets = buckets;
+            this.previousBuckets = previousBuckets;
+            this.success = success;
+            this.errorMessage = errorMessage;
+        }
+
+        public static AutoBucketResult success(int buckets, int 
previousBuckets) {
+            return new AutoBucketResult(buckets, previousBuckets, true, null);
+        }
+
+        public static AutoBucketResult fallback(int defaultBuckets, String 
reason) {
+            return new AutoBucketResult(defaultBuckets, 0, false, reason);
+        }
+
+        public int getBuckets() {
+            return buckets;
+        }
+
+        public int getPreviousBuckets() {
+            return previousBuckets;
+        }
+
+        public boolean isSuccess() {
+            return success;
+        }
+
+        public String getErrorMessage() {
+            return errorMessage;
+        }
+    }
+
+    /**
+     * Calculate auto bucket number for a partition.
+     * This is the unified method that replaces the logic in both
+     * DynamicPartitionScheduler.getBucketsNum and 
PartitionExprUtil.getAddPartitionClauseFromPartitionValues
+     *
+     * @param context the context for auto bucket calculation
+     * @return AutoBucketResult containing bucket numbers and calculation 
status
+     */
+    public static AutoBucketResult calculateAutoBuckets(AutoBucketContext 
context) {
+        OlapTable table = context.getTable();
+        String partitionName = context.getPartitionName();
+        String nowPartitionName = context.getNowPartitionName();
+        boolean executeFirstTime = context.isExecuteFirstTime();
+        int defaultBuckets = context.getDefaultBuckets();
+
+        // if execute first time or not auto bucket, use default buckets
+        if (!table.isAutoBucket() || executeFirstTime) {
+            return AutoBucketResult.fallback(defaultBuckets,
+                executeFirstTime ? "executeFirstTime" : "not auto bucket 
table");
+        }
+
+        // Get historical partitions
+        List<Partition> partitions = 
DynamicPartitionScheduler.getHistoricalPartitions(table, nowPartitionName);
+
+        // Get visible versions with error handling
+        List<Long> visibleVersions;
+        try {
+            visibleVersions = Partition.getVisibleVersions(partitions);
+        } catch (RpcException e) {
+            LOG.warn("auto bucket get visible version fail, table: [{}-{}], 
partition: {}, use default buckets: {}",
+                    table.getName(), table.getId(), partitionName, 
defaultBuckets, e);
+            return AutoBucketResult.fallback(defaultBuckets, "RpcException: " 
+ e.getMessage());
+        }
+
+        // Check if visible versions match partitions
+        if (visibleVersions == null || partitions.size() != 
visibleVersions.size()) {
+            LOG.warn(
+                    "auto bucket visible versions mismatch, table: [{}-{}], 
partition: {}, "
+                            + "partitions size: {}, visible versions size: {}, 
use default buckets: {}",
+                    table.getName(), table.getId(), partitionName, 
partitions.size(),
+                    visibleVersions != null ? visibleVersions.size() : 0, 
defaultBuckets);
+            return AutoBucketResult.fallback(defaultBuckets, "visible versions 
mismatch");
+        }
+
+        // Filter partitions with data
+        List<Partition> hasDataPartitions = 
DynamicPartitionScheduler.filterDataPartitions(partitions, visibleVersions);
+        if (hasDataPartitions.isEmpty()) {
+            LOG.info(
+                    "auto bucket use default buckets due to all partitions no 
data, table: [{}-{}], "
+                            + "partition: {}, buckets num: {}",
+                    table.getName(), table.getId(), partitionName, 
defaultBuckets);
+            return AutoBucketResult.fallback(defaultBuckets, "no data 
partitions");
+        }
+
+        // Calculate buckets based on historical data
+        Pair<Integer, Integer> calc = 
DynamicPartitionScheduler.calculateBuckets(hasDataPartitions);
+        int candidateBuckets = calc.first;
+        int previousBuckets = calc.second;
+
+        return AutoBucketResult.success(candidateBuckets, previousBuckets);
+    }
+
+    /**
+     * Calculate auto bucket number and apply bounds checking.
+     * This method combines calculateAutoBuckets with 
checkAndFixAutoBucketCalcNumIsValid.
+     *
+     * @param context the context for auto bucket calculation
+     * @return final bucket number after bounds checking
+     */
+    public static int calculateAutoBucketsWithBoundsCheck(AutoBucketContext 
context) {
+        AutoBucketResult result = calculateAutoBuckets(context);
+
+        if (!result.isSuccess()) {
+            return result.getBuckets(); // return default buckets
+        }
+
+        int candidateBuckets = result.getBuckets();
+        int previousBuckets = result.getPreviousBuckets();
+
+        // Apply bounds checking
+        int adjustedBuckets = 
DynamicPartitionScheduler.checkAndFixAutoBucketCalcNumIsValid(
+                candidateBuckets, previousBuckets, 
context.getTable().getName(), context.getPartitionName());
+
+        return adjustedBuckets > 0 ? adjustedBuckets : candidateBuckets;
+    }
+}
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java
index c6b8e69feae..3c08294d72b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java
@@ -33,7 +33,9 @@ public class AutoBucketUtils {
     private static Logger logger = LogManager.getLogger(AutoBucketUtils.class);
 
     static final long SIZE_100MB = 100 * 1024 * 1024L;
+
     static final long SIZE_1GB = 1 * 1024 * 1024 * 1024L;
+
     static final long SIZE_1TB = 1024 * SIZE_1GB;
 
     private static int getBENum() {
@@ -55,7 +57,8 @@ public class AutoBucketUtils {
         return activeBENum;
     }
 
-    private static int getBucketsNumByBEDisks() {
+    // public for mock in test
+    public static int getBucketsNumByBEDisks() {
         SystemInfoService infoService = Env.getCurrentSystemInfo();
         ImmutableMap<Long, Backend> backends;
         try {
@@ -99,10 +102,9 @@ public class AutoBucketUtils {
                 } else {
                     partitionSizePerBucket = 5;
                 }
-                logger.debug("autobucket_partition_size_per_bucket_gb <= 0, 
use adaptive {}",
-                        partitionSizePerBucket);
+                logger.debug("autobucket_partition_size_per_bucket_gb <= 0, 
use adaptive {}", partitionSizePerBucket);
             }
-            return  (int) ((partitionSize - 1) / (partitionSizePerBucket * 
SIZE_1GB) + 1);
+            return (int) ((partitionSize - 1) / (partitionSizePerBucket * 
SIZE_1GB) + 1);
         }
     }
 
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java
index 0c4c97c716a..51c4e109975 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java
@@ -30,6 +30,7 @@ import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.Index;
 import org.apache.doris.catalog.KeysType;
+import org.apache.doris.catalog.PartitionType;
 import org.apache.doris.catalog.Type;
 import org.apache.doris.common.Config;
 import org.apache.doris.common.ErrorCode;
@@ -397,6 +398,13 @@ public class CreateTableInfo {
         paddingEngineName(ctlName, ctx);
         checkEngineName();
 
+        // not allow auto bucket with auto list partition
+        if (partitionTableInfo != null
+                && 
partitionTableInfo.getPartitionType().equalsIgnoreCase(PartitionType.LIST.name())
+                && partitionTableInfo.isAutoPartition() && distribution != 
null && distribution.isAutoBucket()) {
+            throw new AnalysisException("Cannot use auto bucket with auto list 
partition");
+        }
+
         if (properties == null) {
             properties = Maps.newHashMap();
         }
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/analysis/PartitionExprUtilTest.java 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/PartitionExprUtilTest.java
new file mode 100644
index 00000000000..100feb99ef0
--- /dev/null
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/analysis/PartitionExprUtilTest.java
@@ -0,0 +1,223 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.analysis;
+
+import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.Partition;
+import org.apache.doris.common.Config;
+import org.apache.doris.common.FeConstants;
+import org.apache.doris.common.util.AutoBucketUtils;
+import org.apache.doris.service.ExecuteEnv;
+import org.apache.doris.service.FrontendServiceImpl;
+import org.apache.doris.thrift.TCreatePartitionRequest;
+import org.apache.doris.thrift.TCreatePartitionResult;
+import org.apache.doris.thrift.TNullableStringLiteral;
+import org.apache.doris.thrift.TStatusCode;
+import org.apache.doris.utframe.TestWithFeService;
+
+import mockit.Expectations;
+import mockit.Mock;
+import mockit.MockUp;
+import mockit.Mocked;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class PartitionExprUtilTest extends TestWithFeService {
+
+    @Mocked
+    ExecuteEnv exeEnv;
+
+    @Override
+    protected void runBeforeAll() throws Exception {
+        FeConstants.runningUnitTest = true;
+        FeConstants.default_scheduler_interval_millisecond = 100;
+        Config.dynamic_partition_enable = true;
+        Config.dynamic_partition_check_interval_seconds = 1;
+        Config.autobucket_max_buckets = 10000;
+        createDatabase("test");
+    }
+
+    @Test
+    public void testAutoBucketLargeDataCalculatesBuckets() throws Exception {
+        String createOlapTblStmt = "CREATE TABLE test.auto_bucket_calc_large 
(\n"
+                + "  event_day DATETIME NOT NULL,\n"
+                + "  site_id INT,\n"
+                + "  v INT\n"
+                + ")\n"
+                + "DUPLICATE KEY(event_day, site_id)\n"
+                + "AUTO PARTITION BY range (date_trunc(event_day,'day')) (\n"
+                + "\n"
+                + ")\n"
+                + "DISTRIBUTED BY HASH(event_day) BUCKETS AUTO\n"
+                + "PROPERTIES(\"replication_num\"=\"1\");";
+
+        createTable(createOlapTblStmt);
+        Database db = 
Env.getCurrentInternalCatalog().getDbOrAnalysisException("test");
+        OlapTable table = (OlapTable) 
db.getTableOrAnalysisException("auto_bucket_calc_large");
+
+        FrontendServiceImpl impl = new FrontendServiceImpl(exeEnv);
+
+        // Create 3 historical partitions: 2023-08-01, 02, 03
+        String[] days = {"2023-08-01 00:00:00", "2023-08-02 00:00:00", 
"2023-08-03 00:00:00"};
+        String[] partNames = {"p20230801000000", "p20230802000000", 
"p20230803000000"};
+        for (String day : days) {
+            List<List<TNullableStringLiteral>> partitionValues = new 
ArrayList<>();
+            List<TNullableStringLiteral> values = new ArrayList<>();
+            TNullableStringLiteral start = new TNullableStringLiteral();
+            start.setValue(day);
+            values.add(start);
+            partitionValues.add(values);
+
+            TCreatePartitionRequest request = new TCreatePartitionRequest();
+            request.setDbId(db.getId());
+            request.setTableId(table.getId());
+            request.setPartitionValues(partitionValues);
+            TCreatePartitionResult result = impl.createPartition(request);
+            Assertions.assertEquals(TStatusCode.OK, 
result.getStatus().getStatusCode());
+        }
+
+        // Mark them as having data (visibleVersion >= 2)
+        table.writeLockOrDdlException();
+        try {
+            for (String pn : partNames) {
+                Partition p = table.getPartition(pn);
+                Assertions.assertNotNull(p);
+                p.setVisibleVersionAndTime(2L, System.currentTimeMillis());
+            }
+        } finally {
+            table.writeUnlock();
+        }
+
+        // Mock large compressed data sizes for each historical partition
+        final long GB = 1024L * 1024L * 1024L;
+        Partition p1 = table.getPartition(partNames[0]);
+        Partition p2 = table.getPartition(partNames[1]);
+        Partition p3 = table.getPartition(partNames[2]);
+        new Expectations(p1, p2, p3) {
+            {
+                p1.getDataSizeExcludeEmptyReplica(true);
+                result = 1000 * GB; // ~1 TB uncompressed
+                minTimes = 0;
+                p2.getDataSizeExcludeEmptyReplica(true);
+                result = 1500 * GB; // ~1.5 TB uncompressed
+                minTimes = 0;
+                p3.getDataSizeExcludeEmptyReplica(true);
+                result = 2000 * GB; // ~2 TB uncompressed
+                minTimes = 0;
+            }
+        };
+
+        // Directly mock BE disk cap to a very large value so AutoBucketUtils 
doesn't limit by disks
+        new MockUp<AutoBucketUtils>() {
+            @Mock
+            public int getBucketsNumByBEDisks() { // private static in target, 
allowed to mock by name
+                return Integer.MAX_VALUE;
+            }
+        };
+
+        // Compute expected bucket by reusing scheduler helpers before 
creating the new partition (2023-08-04)
+        String newPartName = "p20230804000000";
+
+        // Create the new partition (2023-08-04) and verify bucket num
+        List<List<TNullableStringLiteral>> partitionValues4 = new 
ArrayList<>();
+        List<TNullableStringLiteral> values4 = new ArrayList<>();
+        TNullableStringLiteral start4 = new TNullableStringLiteral();
+        start4.setValue("2023-08-04 00:00:00");
+        values4.add(start4);
+        partitionValues4.add(values4);
+
+        TCreatePartitionRequest request4 = new TCreatePartitionRequest();
+        request4.setDbId(db.getId());
+        request4.setTableId(table.getId());
+        request4.setPartitionValues(partitionValues4);
+        TCreatePartitionResult result4 = impl.createPartition(request4);
+        Assertions.assertEquals(TStatusCode.OK, 
result4.getStatus().getStatusCode());
+
+        Partition p4 = table.getPartition(newPartName);
+        Assertions.assertNotNull(p4);
+        Assertions.assertEquals(p4.getDistributionInfo().getBucketNum(), 500);
+    }
+
+    @Test
+    public void testAutoBucketReusePrevPartitionBuckets() throws Exception {
+        String createOlapTblStmt = "CREATE TABLE test.auto_bucket_calc (\n"
+                + "  event_day DATETIME NOT NULL,\n"
+                + "  site_id INT,\n"
+                + "  v INT\n"
+                + ")\n"
+                + "DUPLICATE KEY(event_day, site_id)\n"
+                + "AUTO PARTITION BY range (date_trunc(event_day,'day')) (\n"
+                + "\n"
+                + ")\n"
+                + "DISTRIBUTED BY HASH(event_day) BUCKETS AUTO\n"
+                + "PROPERTIES(\"replication_num\"=\"1\");";
+
+        createTable(createOlapTblStmt);
+        Database db = 
Env.getCurrentInternalCatalog().getDbOrAnalysisException("test");
+        OlapTable table = (OlapTable) 
db.getTableOrAnalysisException("auto_bucket_calc");
+
+        // Create first partition via RPC (acts as historical partition)
+        List<List<TNullableStringLiteral>> partitionValues = new ArrayList<>();
+        List<TNullableStringLiteral> values = new ArrayList<>();
+        TNullableStringLiteral start = new TNullableStringLiteral();
+        start.setValue("2023-08-05 00:00:00");
+        values.add(start);
+        partitionValues.add(values);
+
+        FrontendServiceImpl impl = new FrontendServiceImpl(exeEnv);
+        TCreatePartitionRequest request = new TCreatePartitionRequest();
+        request.setDbId(db.getId());
+        request.setTableId(table.getId());
+        request.setPartitionValues(partitionValues);
+        TCreatePartitionResult result = impl.createPartition(request);
+        Assertions.assertEquals(TStatusCode.OK, 
result.getStatus().getStatusCode());
+
+        Partition p1 = table.getPartition("p20230805000000");
+        Assertions.assertNotNull(p1);
+        // Mark the partition as having data (visibleVersion >= 2) to be 
counted by filterDataPartitions
+        table.writeLockOrDdlException();
+        try {
+            p1.setVisibleVersionAndTime(2L, System.currentTimeMillis());
+        } finally {
+            table.writeUnlock();
+        }
+        // Create next partition; PartitionExprUtil should reuse previous 
partition buckets (8)
+        List<List<TNullableStringLiteral>> partitionValues2 = new 
ArrayList<>();
+        List<TNullableStringLiteral> values2 = new ArrayList<>();
+        TNullableStringLiteral start2 = new TNullableStringLiteral();
+        start2.setValue("2023-08-06 00:00:00");
+        values2.add(start2);
+        partitionValues2.add(values2);
+
+        TCreatePartitionRequest request2 = new TCreatePartitionRequest();
+        request2.setDbId(db.getId());
+        request2.setTableId(table.getId());
+        request2.setPartitionValues(partitionValues2);
+        TCreatePartitionResult result2 = impl.createPartition(request2);
+        Assertions.assertEquals(TStatusCode.OK, 
result2.getStatus().getStatusCode());
+
+        Partition p2 = table.getPartition("p20230806000000");
+        Assertions.assertNotNull(p2);
+        Assertions.assertEquals(p1.getDistributionInfo().getBucketNum(), 
p2.getDistributionInfo().getBucketNum());
+    }
+}
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/common/util/AutoBucketCalculatorTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/common/util/AutoBucketCalculatorTest.java
new file mode 100644
index 00000000000..e23ec904252
--- /dev/null
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/common/util/AutoBucketCalculatorTest.java
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.util;
+
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.common.util.AutoBucketCalculator.AutoBucketContext;
+import org.apache.doris.common.util.AutoBucketCalculator.AutoBucketResult;
+
+import mockit.Expectations;
+import mockit.Mocked;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class AutoBucketCalculatorTest {
+
+    @Mocked
+    OlapTable table;
+
+    @Test
+    public void testCalculateAutoBucketsNotAutoBucket() {
+        new Expectations() {
+            {
+                table.isAutoBucket();
+                result = false;
+
+                table.getName();
+                result = "test_table";
+                minTimes = 0;
+
+                table.getId();
+                result = 1L;
+                minTimes = 0;
+            }
+        };
+
+        AutoBucketContext context = new AutoBucketContext(table, "p1", "p2", 
false, 10);
+        AutoBucketResult result = 
AutoBucketCalculator.calculateAutoBuckets(context);
+
+        Assertions.assertFalse(result.isSuccess());
+        Assertions.assertEquals(10, result.getBuckets());
+        Assertions.assertEquals("not auto bucket table", 
result.getErrorMessage());
+    }
+
+    @Test
+    public void testCalculateAutoBucketsExecuteFirstTime() {
+        new Expectations() {
+            {
+                table.isAutoBucket();
+                result = true;
+
+                table.getName();
+                result = "test_table";
+                minTimes = 0;
+
+                table.getId();
+                result = 1L;
+                minTimes = 0;
+            }
+        };
+
+        AutoBucketContext context = new AutoBucketContext(table, "p1", "p2", 
true, 10);
+        AutoBucketResult result = 
AutoBucketCalculator.calculateAutoBuckets(context);
+
+        Assertions.assertFalse(result.isSuccess());
+        Assertions.assertEquals(10, result.getBuckets());
+        Assertions.assertEquals("executeFirstTime", result.getErrorMessage());
+    }
+
+    @Test
+    public void testCalculateAutoBucketsWithBoundsCheck() {
+        new Expectations() {
+            {
+                table.isAutoBucket();
+                result = false;
+
+                table.getName();
+                result = "test_table";
+                minTimes = 0;
+
+                table.getId();
+                result = 1L;
+                minTimes = 0;
+            }
+        };
+
+        AutoBucketContext context = new AutoBucketContext(table, "p1", "p2", 
false, 10);
+        int buckets = 
AutoBucketCalculator.calculateAutoBucketsWithBoundsCheck(context);
+
+        Assertions.assertEquals(10, buckets);
+    }
+}
diff --git 
a/regression-test/suites/insert_overwrite_p0/test_iot_auto_detect_fail.groovy 
b/regression-test/suites/insert_overwrite_p0/test_iot_auto_detect_fail.groovy
index 7bb2277f184..07c271bec47 100644
--- 
a/regression-test/suites/insert_overwrite_p0/test_iot_auto_detect_fail.groovy
+++ 
b/regression-test/suites/insert_overwrite_p0/test_iot_auto_detect_fail.groovy
@@ -68,7 +68,7 @@ PARTITION p202411248 VALUES IN ("20241124"),
 PARTITION p202411258 VALUES IN ("20241125"),
 PARTITION p202411268 VALUES IN ("20241126"),
 PARTITION p202411278 VALUES IN ("20241127"))
-DISTRIBUTED BY HASH(`khh`, `dt`) BUCKETS AUTO
+DISTRIBUTED BY HASH(`khh`, `dt`) buckets 10
 PROPERTIES (
 "replication_allocation" = "tag.location.default: 1",
 "min_load_replica_num" = "-1",
@@ -136,7 +136,7 @@ PARTITION p202411248 VALUES IN ("20241124"),
 PARTITION p202411258 VALUES IN ("20241125"),
 PARTITION p202411268 VALUES IN ("20241126"),
 PARTITION p202411278 VALUES IN ("20241127"))
-DISTRIBUTED BY HASH(`khh`, `dt`) BUCKETS AUTO
+DISTRIBUTED BY HASH(`khh`, `dt`) buckets 10
 PROPERTIES (
 "replication_allocation" = "tag.location.default: 1",
 "min_load_replica_num" = "-1",
diff --git 
a/regression-test/suites/nereids_rules_p0/expression/test_simplify_range.groovy 
b/regression-test/suites/nereids_rules_p0/expression/test_simplify_range.groovy
index 986b38a7c1c..89cf3581b40 100644
--- 
a/regression-test/suites/nereids_rules_p0/expression/test_simplify_range.groovy
+++ 
b/regression-test/suites/nereids_rules_p0/expression/test_simplify_range.groovy
@@ -37,7 +37,7 @@ suite('test_simplify_range') {
         )
         unique key(pk_id, collect_time)
         auto partition by list(collect_time)()
-        distributed by hash(pk_id) buckets auto
+        distributed by hash(pk_id) buckets 10
         properties (
             "replication_allocation" = "tag.location.default: 1",
             "enable_unique_key_merge_on_write" = "true"
diff --git 
a/regression-test/suites/partition_p0/auto_partition/test_auto_list_partition.groovy
 
b/regression-test/suites/partition_p0/auto_partition/test_auto_list_partition.groovy
index 5855ecc06e1..382785fc2a4 100644
--- 
a/regression-test/suites/partition_p0/auto_partition/test_auto_list_partition.groovy
+++ 
b/regression-test/suites/partition_p0/auto_partition/test_auto_list_partition.groovy
@@ -319,7 +319,7 @@ suite("test_auto_list_partition") {
                 LAST_UPLOAD_TIME DATETIME COMMENT 'LAST_UPLOAD_TIME'
             )
             AUTO PARTITION BY LIST (sum(DATE_ID))()
-            DISTRIBUTED BY HASH(DATE_ID) BUCKETS AUTO
+            DISTRIBUTED BY HASH(DATE_ID) buckets 10
             PROPERTIES (
                 "replication_num" = "1"
             );
diff --git 
a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy
 
b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy
index 9aaf0f6bfe6..3a279693993 100644
--- 
a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy
+++ 
b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy
@@ -344,4 +344,18 @@ suite("test_auto_partition_behavior") {
         """
         exception "auto create partition only support date_trunc function of 
RANGE partition"
     }
+
+    test {
+        sql """
+            create table ap_wrong(
+                dt datetime not null,
+                k0 varchar not null
+            )
+            AUTO PARTITION BY LIST (dt)
+            ()
+            DISTRIBUTED BY HASH(`dt`) BUCKETS AUTO
+            properties("replication_num" = "1");
+        """
+        exception "Cannot use auto bucket with auto list partition"
+    }
 }
diff --git 
a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_with_single_replica_insert.groovy
 
b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_with_single_replica_insert.groovy
index 510b24602ac..d363bbc5d58 100644
--- 
a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_with_single_replica_insert.groovy
+++ 
b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_with_single_replica_insert.groovy
@@ -31,7 +31,7 @@ suite("test_auto_partition_with_single_replica_insert") {
         AUTO PARTITION BY LIST (`chain_name`)
         (PARTITION pchain5fname10 VALUES IN ("chain_name"),
         PARTITION p4e0995e85ce1534e4e3a5 VALUES IN ("星辰医疗科技有限公司"))
-        DISTRIBUTED BY HASH(`user_id`) BUCKETS AUTO
+        DISTRIBUTED BY HASH(`user_id`) buckets 10
         PROPERTIES (
             "replication_allocation" = "tag.location.default: 1"
         );
@@ -61,7 +61,7 @@ suite("test_auto_partition_with_single_replica_insert") {
         AUTO PARTITION BY LIST (`chain_name`)
         (PARTITION pchain5fname10 VALUES IN ("chain_name"),
         PARTITION p4e0995e85ce1534e4e3a5 VALUES IN ("星辰医疗科技有限公司"))
-        DISTRIBUTED BY HASH(`user_id`) BUCKETS AUTO
+        DISTRIBUTED BY HASH(`user_id`) buckets 10
         PROPERTIES (
             "replication_allocation" = "tag.location.default: 1"
         );
diff --git a/regression-test/suites/query_p0/show/test_show_partitions.groovy 
b/regression-test/suites/query_p0/show/test_show_partitions.groovy
index bf88872029e..b8dacc19b82 100644
--- a/regression-test/suites/query_p0/show/test_show_partitions.groovy
+++ b/regression-test/suites/query_p0/show/test_show_partitions.groovy
@@ -25,7 +25,7 @@ suite("test_show_partitions") {
             name varchar (255)
         ) ENGINE = OLAP DUPLICATE KEY(month, id)
         AUTO PARTITION BY LIST (month)()
-        distributed by hash (id) buckets auto
+        distributed by hash (id) buckets 10
         PROPERTIES(
         "replication_allocation" = "tag.location.default: 1"
         )


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to