This is an automated email from the ASF dual-hosted git repository.
zclll pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 302aec1316a [Enhancement](auto partition) Auto partition could use
auto bucket (#56921)
302aec1316a is described below
commit 302aec1316a0087e3fb0ef2a0be379c51588fc44
Author: zclllyybb <[email protected]>
AuthorDate: Thu Oct 16 16:53:11 2025 +0800
[Enhancement](auto partition) Auto partition could use auto bucket (#56921)
1. forbid auto list partition + auto bucket
2. support auto range partition + auto bucket to calculate proper bucket
for new partition. base on the assumption of incremental load of table.
---
.../apache/doris/analysis/PartitionExprUtil.java | 28 ++-
.../doris/clone/DynamicPartitionScheduler.java | 42 +---
.../doris/common/util/AutoBucketCalculator.java | 205 +++++++++++++++++++
.../apache/doris/common/util/AutoBucketUtils.java | 10 +-
.../trees/plans/commands/info/CreateTableInfo.java | 8 +
.../doris/analysis/PartitionExprUtilTest.java | 223 +++++++++++++++++++++
.../common/util/AutoBucketCalculatorTest.java | 106 ++++++++++
.../test_iot_auto_detect_fail.groovy | 4 +-
.../expression/test_simplify_range.groovy | 2 +-
.../auto_partition/test_auto_list_partition.groovy | 2 +-
.../test_auto_partition_behavior.groovy | 14 ++
...uto_partition_with_single_replica_insert.groovy | 4 +-
.../query_p0/show/test_show_partitions.groovy | 2 +-
13 files changed, 606 insertions(+), 44 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java
b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java
index 7d2d6b9743e..ec6772e849a 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java
@@ -19,11 +19,14 @@ package org.apache.doris.analysis;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.DataProperty;
+import org.apache.doris.catalog.DistributionInfo;
+import org.apache.doris.catalog.HashDistributionInfo;
import org.apache.doris.catalog.OlapTable;
import org.apache.doris.catalog.PartitionInfo;
import org.apache.doris.catalog.PartitionType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.util.AutoBucketCalculator;
import org.apache.doris.common.util.PropertyAnalyzer;
import org.apache.doris.thrift.TNullableStringLiteral;
@@ -187,7 +190,30 @@ public class PartitionExprUtil {
}
Map<String, String> partitionProperties = Maps.newHashMap();
- DistributionDesc distributionDesc =
olapTable.getDefaultDistributionInfo().toDistributionDesc();
+ DistributionInfo defaultDistributionInfo =
olapTable.getDefaultDistributionInfo();
+ DistributionDesc distributionDesc =
defaultDistributionInfo.toDistributionDesc();
+ if (olapTable.isAutoBucket() && partitionType ==
PartitionType.RANGE) {
+ // Use unified auto bucket calculator
+ AutoBucketCalculator.AutoBucketContext context = new
AutoBucketCalculator.AutoBucketContext(
+ olapTable, partitionName, partitionName, false,
+ defaultDistributionInfo.getBucketNum());
+
+ int bucketsNum =
AutoBucketCalculator.calculateAutoBucketsWithBoundsCheck(context);
+
+ // Only update distribution if calculation was successful
(bucketsNum != default)
+ if (bucketsNum != defaultDistributionInfo.getBucketNum()) {
+ if (defaultDistributionInfo.getType() ==
DistributionInfo.DistributionInfoType.HASH) {
+ HashDistributionInfo hashDistributionInfo =
(HashDistributionInfo) defaultDistributionInfo;
+ List<String> distColumnNames = new ArrayList<>();
+ for (Column distributionColumn :
hashDistributionInfo.getDistributionColumns()) {
+ distColumnNames.add(distributionColumn.getName());
+ }
+ distributionDesc = new
HashDistributionDesc(bucketsNum, distColumnNames);
+ } else {
+ distributionDesc = new
RandomDistributionDesc(bucketsNum);
+ }
+ }
+ }
SinglePartitionDesc singleRangePartitionDesc = new
SinglePartitionDesc(true, partitionName,
partitionKeyDesc, partitionProperties);
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
b/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
index 1a96280ea30..3e94c68953f 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/clone/DynamicPartitionScheduler.java
@@ -45,6 +45,7 @@ import org.apache.doris.common.Config;
import org.apache.doris.common.DdlException;
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.Pair;
+import org.apache.doris.common.util.AutoBucketCalculator;
import org.apache.doris.common.util.AutoBucketUtils;
import org.apache.doris.common.util.DebugPointUtil;
import org.apache.doris.common.util.DynamicPartitionUtil;
@@ -55,7 +56,6 @@ import org.apache.doris.common.util.TimeUtils;
import org.apache.doris.datasource.InternalCatalog;
import org.apache.doris.meta.MetaContext;
import org.apache.doris.persist.PartitionPersistInfo;
-import org.apache.doris.rpc.RpcException;
import org.apache.doris.thrift.TStorageMedium;
import com.google.common.base.Preconditions;
@@ -194,31 +194,14 @@ public class DynamicPartitionScheduler extends
MasterDaemon {
private static Pair<Integer, Integer>
getBucketsNum(DynamicPartitionProperty property, OlapTable table,
String partitionName, String nowPartitionName, boolean
executeFirstTime) {
- // if execute first time, all partitions no contain data
- if (!table.isAutoBucket() || executeFirstTime) {
- return Pair.of(property.getBuckets(), 0);
- }
-
- List<Partition> partitions = getHistoricalPartitions(table,
nowPartitionName);
- List<Long> visibleVersions;
- try {
- visibleVersions = Partition.getVisibleVersions(partitions);
- } catch (RpcException e) {
- LOG.warn("auto bucket use property's buckets get visible version
fail, table: [{}-{}], "
- + "partition: {}, buckets num: {}, exception: ",
- table.getName(), table.getId(), partitionName,
property.getBuckets(), e);
- return Pair.of(property.getBuckets(), 0);
- }
+ AutoBucketCalculator.AutoBucketContext context = new
AutoBucketCalculator.AutoBucketContext(
+ table, partitionName, nowPartitionName, executeFirstTime,
property.getBuckets());
- List<Partition> hasDataPartitions = filterDataPartitions(partitions,
visibleVersions);
- if (hasDataPartitions.isEmpty()) {
- return handleNoDataPartitions(table, partitionName,
property.getBuckets());
- }
-
- return calculateBuckets(hasDataPartitions);
+ AutoBucketCalculator.AutoBucketResult result =
AutoBucketCalculator.calculateAutoBuckets(context);
+ return Pair.of(result.getBuckets(), result.getPreviousBuckets());
}
- private static List<Partition> getHistoricalPartitions(OlapTable table,
String nowPartitionName) {
+ public static List<Partition> getHistoricalPartitions(OlapTable table,
String nowPartitionName) {
RangePartitionInfo info = (RangePartitionInfo)
(table.getPartitionInfo());
List<Map.Entry<Long, PartitionItem>> idToItems = new
ArrayList<>(info.getIdToItem(false).entrySet());
idToItems.sort(Comparator.comparing(o -> ((RangePartitionItem)
o.getValue()).getItems().upperEndpoint()));
@@ -228,7 +211,7 @@ public class DynamicPartitionScheduler extends MasterDaemon
{
.collect(Collectors.toList());
}
- private static List<Partition> filterDataPartitions(List<Partition>
partitions, List<Long> visibleVersions) {
+ public static List<Partition> filterDataPartitions(List<Partition>
partitions, List<Long> visibleVersions) {
Preconditions.checkState(partitions.size() == visibleVersions.size(),
String.format("partitions size %d not eq visibleVersions size
%d, impossible",
partitions.size(), visibleVersions.size()));
@@ -241,14 +224,8 @@ public class DynamicPartitionScheduler extends
MasterDaemon {
return hasDataPartitions;
}
- private static Pair<Integer, Integer> handleNoDataPartitions(OlapTable
table,
- String
partitionName, int defaultBuckets) {
- LOG.info("auto bucket use property's buckets due to all partitions no
data, table: [{}-{}], "
- + "partition: {}, buckets num: {}", table.getName(),
table.getId(), partitionName, defaultBuckets);
- return Pair.of(defaultBuckets, 0);
- }
- private static Pair<Integer, Integer> calculateBuckets(List<Partition>
hasDataPartitions) {
+ public static Pair<Integer, Integer> calculateBuckets(List<Partition>
hasDataPartitions) {
List<Long> partitionSizeArray = new ArrayList<>();
List<Long> sizeUnknownArray = new ArrayList<>();
@@ -417,7 +394,8 @@ public class DynamicPartitionScheduler extends MasterDaemon
{
return addPartitionClauses;
}
- private int checkAndFixAutoBucketCalcNumIsValid(int
currentPartitionNumBuckets, int previousPartitionNumBuckets,
+ public static int checkAndFixAutoBucketCalcNumIsValid(int
currentPartitionNumBuckets,
+ int
previousPartitionNumBuckets,
String tableName, String
partitionName) {
// previousPartitionBucketsNum == 0, some abnormal case, ignore it
if (currentPartitionNumBuckets != 0) {
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketCalculator.java
b/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketCalculator.java
new file mode 100644
index 00000000000..4434c404a7c
--- /dev/null
+++
b/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketCalculator.java
@@ -0,0 +1,205 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.util;
+
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.Partition;
+import org.apache.doris.clone.DynamicPartitionScheduler;
+import org.apache.doris.common.Pair;
+import org.apache.doris.rpc.RpcException;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.util.List;
+
+/**
+ * Utility class for calculating auto bucket numbers for partitions.
+ * This class provides a unified interface for auto bucket calculation
+ * used by both DynamicPartitionScheduler and PartitionExprUtil.
+ */
+public class AutoBucketCalculator {
+ private static final Logger LOG =
LogManager.getLogger(AutoBucketCalculator.class);
+
+ /**
+ * Context for auto bucket calculation
+ */
+ public static class AutoBucketContext {
+ private final OlapTable table;
+ private final String partitionName;
+ private final String nowPartitionName;
+ private final boolean executeFirstTime;
+ private final int defaultBuckets;
+
+ public AutoBucketContext(OlapTable table, String partitionName, String
nowPartitionName,
+ boolean executeFirstTime, int defaultBuckets) {
+ this.table = table;
+ this.partitionName = partitionName;
+ this.nowPartitionName = nowPartitionName;
+ this.executeFirstTime = executeFirstTime;
+ this.defaultBuckets = defaultBuckets;
+ }
+
+ public OlapTable getTable() {
+ return table;
+ }
+
+ public String getPartitionName() {
+ return partitionName;
+ }
+
+ public String getNowPartitionName() {
+ return nowPartitionName;
+ }
+
+ public boolean isExecuteFirstTime() {
+ return executeFirstTime;
+ }
+
+ public int getDefaultBuckets() {
+ return defaultBuckets;
+ }
+ }
+
+ /**
+ * Result of auto bucket calculation
+ */
+ public static class AutoBucketResult {
+ private final int buckets;
+ private final int previousBuckets;
+ private final boolean success;
+ private final String errorMessage;
+
+ private AutoBucketResult(int buckets, int previousBuckets, boolean
success, String errorMessage) {
+ this.buckets = buckets;
+ this.previousBuckets = previousBuckets;
+ this.success = success;
+ this.errorMessage = errorMessage;
+ }
+
+ public static AutoBucketResult success(int buckets, int
previousBuckets) {
+ return new AutoBucketResult(buckets, previousBuckets, true, null);
+ }
+
+ public static AutoBucketResult fallback(int defaultBuckets, String
reason) {
+ return new AutoBucketResult(defaultBuckets, 0, false, reason);
+ }
+
+ public int getBuckets() {
+ return buckets;
+ }
+
+ public int getPreviousBuckets() {
+ return previousBuckets;
+ }
+
+ public boolean isSuccess() {
+ return success;
+ }
+
+ public String getErrorMessage() {
+ return errorMessage;
+ }
+ }
+
+ /**
+ * Calculate auto bucket number for a partition.
+ * This is the unified method that replaces the logic in both
+ * DynamicPartitionScheduler.getBucketsNum and
PartitionExprUtil.getAddPartitionClauseFromPartitionValues
+ *
+ * @param context the context for auto bucket calculation
+ * @return AutoBucketResult containing bucket numbers and calculation
status
+ */
+ public static AutoBucketResult calculateAutoBuckets(AutoBucketContext
context) {
+ OlapTable table = context.getTable();
+ String partitionName = context.getPartitionName();
+ String nowPartitionName = context.getNowPartitionName();
+ boolean executeFirstTime = context.isExecuteFirstTime();
+ int defaultBuckets = context.getDefaultBuckets();
+
+ // if execute first time or not auto bucket, use default buckets
+ if (!table.isAutoBucket() || executeFirstTime) {
+ return AutoBucketResult.fallback(defaultBuckets,
+ executeFirstTime ? "executeFirstTime" : "not auto bucket
table");
+ }
+
+ // Get historical partitions
+ List<Partition> partitions =
DynamicPartitionScheduler.getHistoricalPartitions(table, nowPartitionName);
+
+ // Get visible versions with error handling
+ List<Long> visibleVersions;
+ try {
+ visibleVersions = Partition.getVisibleVersions(partitions);
+ } catch (RpcException e) {
+ LOG.warn("auto bucket get visible version fail, table: [{}-{}],
partition: {}, use default buckets: {}",
+ table.getName(), table.getId(), partitionName,
defaultBuckets, e);
+ return AutoBucketResult.fallback(defaultBuckets, "RpcException: "
+ e.getMessage());
+ }
+
+ // Check if visible versions match partitions
+ if (visibleVersions == null || partitions.size() !=
visibleVersions.size()) {
+ LOG.warn(
+ "auto bucket visible versions mismatch, table: [{}-{}],
partition: {}, "
+ + "partitions size: {}, visible versions size: {},
use default buckets: {}",
+ table.getName(), table.getId(), partitionName,
partitions.size(),
+ visibleVersions != null ? visibleVersions.size() : 0,
defaultBuckets);
+ return AutoBucketResult.fallback(defaultBuckets, "visible versions
mismatch");
+ }
+
+ // Filter partitions with data
+ List<Partition> hasDataPartitions =
DynamicPartitionScheduler.filterDataPartitions(partitions, visibleVersions);
+ if (hasDataPartitions.isEmpty()) {
+ LOG.info(
+ "auto bucket use default buckets due to all partitions no
data, table: [{}-{}], "
+ + "partition: {}, buckets num: {}",
+ table.getName(), table.getId(), partitionName,
defaultBuckets);
+ return AutoBucketResult.fallback(defaultBuckets, "no data
partitions");
+ }
+
+ // Calculate buckets based on historical data
+ Pair<Integer, Integer> calc =
DynamicPartitionScheduler.calculateBuckets(hasDataPartitions);
+ int candidateBuckets = calc.first;
+ int previousBuckets = calc.second;
+
+ return AutoBucketResult.success(candidateBuckets, previousBuckets);
+ }
+
+ /**
+ * Calculate auto bucket number and apply bounds checking.
+ * This method combines calculateAutoBuckets with
checkAndFixAutoBucketCalcNumIsValid.
+ *
+ * @param context the context for auto bucket calculation
+ * @return final bucket number after bounds checking
+ */
+ public static int calculateAutoBucketsWithBoundsCheck(AutoBucketContext
context) {
+ AutoBucketResult result = calculateAutoBuckets(context);
+
+ if (!result.isSuccess()) {
+ return result.getBuckets(); // return default buckets
+ }
+
+ int candidateBuckets = result.getBuckets();
+ int previousBuckets = result.getPreviousBuckets();
+
+ // Apply bounds checking
+ int adjustedBuckets =
DynamicPartitionScheduler.checkAndFixAutoBucketCalcNumIsValid(
+ candidateBuckets, previousBuckets,
context.getTable().getName(), context.getPartitionName());
+
+ return adjustedBuckets > 0 ? adjustedBuckets : candidateBuckets;
+ }
+}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java
index c6b8e69feae..3c08294d72b 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/AutoBucketUtils.java
@@ -33,7 +33,9 @@ public class AutoBucketUtils {
private static Logger logger = LogManager.getLogger(AutoBucketUtils.class);
static final long SIZE_100MB = 100 * 1024 * 1024L;
+
static final long SIZE_1GB = 1 * 1024 * 1024 * 1024L;
+
static final long SIZE_1TB = 1024 * SIZE_1GB;
private static int getBENum() {
@@ -55,7 +57,8 @@ public class AutoBucketUtils {
return activeBENum;
}
- private static int getBucketsNumByBEDisks() {
+ // public for mock in test
+ public static int getBucketsNumByBEDisks() {
SystemInfoService infoService = Env.getCurrentSystemInfo();
ImmutableMap<Long, Backend> backends;
try {
@@ -99,10 +102,9 @@ public class AutoBucketUtils {
} else {
partitionSizePerBucket = 5;
}
- logger.debug("autobucket_partition_size_per_bucket_gb <= 0,
use adaptive {}",
- partitionSizePerBucket);
+ logger.debug("autobucket_partition_size_per_bucket_gb <= 0,
use adaptive {}", partitionSizePerBucket);
}
- return (int) ((partitionSize - 1) / (partitionSizePerBucket *
SIZE_1GB) + 1);
+ return (int) ((partitionSize - 1) / (partitionSizePerBucket *
SIZE_1GB) + 1);
}
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java
index 0c4c97c716a..51c4e109975 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateTableInfo.java
@@ -30,6 +30,7 @@ import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.Env;
import org.apache.doris.catalog.Index;
import org.apache.doris.catalog.KeysType;
+import org.apache.doris.catalog.PartitionType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.Config;
import org.apache.doris.common.ErrorCode;
@@ -397,6 +398,13 @@ public class CreateTableInfo {
paddingEngineName(ctlName, ctx);
checkEngineName();
+ // not allow auto bucket with auto list partition
+ if (partitionTableInfo != null
+ &&
partitionTableInfo.getPartitionType().equalsIgnoreCase(PartitionType.LIST.name())
+ && partitionTableInfo.isAutoPartition() && distribution !=
null && distribution.isAutoBucket()) {
+ throw new AnalysisException("Cannot use auto bucket with auto list
partition");
+ }
+
if (properties == null) {
properties = Maps.newHashMap();
}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/analysis/PartitionExprUtilTest.java
b/fe/fe-core/src/test/java/org/apache/doris/analysis/PartitionExprUtilTest.java
new file mode 100644
index 00000000000..100feb99ef0
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/analysis/PartitionExprUtilTest.java
@@ -0,0 +1,223 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.analysis;
+
+import org.apache.doris.catalog.Database;
+import org.apache.doris.catalog.Env;
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.catalog.Partition;
+import org.apache.doris.common.Config;
+import org.apache.doris.common.FeConstants;
+import org.apache.doris.common.util.AutoBucketUtils;
+import org.apache.doris.service.ExecuteEnv;
+import org.apache.doris.service.FrontendServiceImpl;
+import org.apache.doris.thrift.TCreatePartitionRequest;
+import org.apache.doris.thrift.TCreatePartitionResult;
+import org.apache.doris.thrift.TNullableStringLiteral;
+import org.apache.doris.thrift.TStatusCode;
+import org.apache.doris.utframe.TestWithFeService;
+
+import mockit.Expectations;
+import mockit.Mock;
+import mockit.MockUp;
+import mockit.Mocked;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class PartitionExprUtilTest extends TestWithFeService {
+
+ @Mocked
+ ExecuteEnv exeEnv;
+
+ @Override
+ protected void runBeforeAll() throws Exception {
+ FeConstants.runningUnitTest = true;
+ FeConstants.default_scheduler_interval_millisecond = 100;
+ Config.dynamic_partition_enable = true;
+ Config.dynamic_partition_check_interval_seconds = 1;
+ Config.autobucket_max_buckets = 10000;
+ createDatabase("test");
+ }
+
+ @Test
+ public void testAutoBucketLargeDataCalculatesBuckets() throws Exception {
+ String createOlapTblStmt = "CREATE TABLE test.auto_bucket_calc_large
(\n"
+ + " event_day DATETIME NOT NULL,\n"
+ + " site_id INT,\n"
+ + " v INT\n"
+ + ")\n"
+ + "DUPLICATE KEY(event_day, site_id)\n"
+ + "AUTO PARTITION BY range (date_trunc(event_day,'day')) (\n"
+ + "\n"
+ + ")\n"
+ + "DISTRIBUTED BY HASH(event_day) BUCKETS AUTO\n"
+ + "PROPERTIES(\"replication_num\"=\"1\");";
+
+ createTable(createOlapTblStmt);
+ Database db =
Env.getCurrentInternalCatalog().getDbOrAnalysisException("test");
+ OlapTable table = (OlapTable)
db.getTableOrAnalysisException("auto_bucket_calc_large");
+
+ FrontendServiceImpl impl = new FrontendServiceImpl(exeEnv);
+
+ // Create 3 historical partitions: 2023-08-01, 02, 03
+ String[] days = {"2023-08-01 00:00:00", "2023-08-02 00:00:00",
"2023-08-03 00:00:00"};
+ String[] partNames = {"p20230801000000", "p20230802000000",
"p20230803000000"};
+ for (String day : days) {
+ List<List<TNullableStringLiteral>> partitionValues = new
ArrayList<>();
+ List<TNullableStringLiteral> values = new ArrayList<>();
+ TNullableStringLiteral start = new TNullableStringLiteral();
+ start.setValue(day);
+ values.add(start);
+ partitionValues.add(values);
+
+ TCreatePartitionRequest request = new TCreatePartitionRequest();
+ request.setDbId(db.getId());
+ request.setTableId(table.getId());
+ request.setPartitionValues(partitionValues);
+ TCreatePartitionResult result = impl.createPartition(request);
+ Assertions.assertEquals(TStatusCode.OK,
result.getStatus().getStatusCode());
+ }
+
+ // Mark them as having data (visibleVersion >= 2)
+ table.writeLockOrDdlException();
+ try {
+ for (String pn : partNames) {
+ Partition p = table.getPartition(pn);
+ Assertions.assertNotNull(p);
+ p.setVisibleVersionAndTime(2L, System.currentTimeMillis());
+ }
+ } finally {
+ table.writeUnlock();
+ }
+
+ // Mock large compressed data sizes for each historical partition
+ final long GB = 1024L * 1024L * 1024L;
+ Partition p1 = table.getPartition(partNames[0]);
+ Partition p2 = table.getPartition(partNames[1]);
+ Partition p3 = table.getPartition(partNames[2]);
+ new Expectations(p1, p2, p3) {
+ {
+ p1.getDataSizeExcludeEmptyReplica(true);
+ result = 1000 * GB; // ~1 TB uncompressed
+ minTimes = 0;
+ p2.getDataSizeExcludeEmptyReplica(true);
+ result = 1500 * GB; // ~1.5 TB uncompressed
+ minTimes = 0;
+ p3.getDataSizeExcludeEmptyReplica(true);
+ result = 2000 * GB; // ~2 TB uncompressed
+ minTimes = 0;
+ }
+ };
+
+ // Directly mock BE disk cap to a very large value so AutoBucketUtils
doesn't limit by disks
+ new MockUp<AutoBucketUtils>() {
+ @Mock
+ public int getBucketsNumByBEDisks() { // private static in target,
allowed to mock by name
+ return Integer.MAX_VALUE;
+ }
+ };
+
+ // Compute expected bucket by reusing scheduler helpers before
creating the new partition (2023-08-04)
+ String newPartName = "p20230804000000";
+
+ // Create the new partition (2023-08-04) and verify bucket num
+ List<List<TNullableStringLiteral>> partitionValues4 = new
ArrayList<>();
+ List<TNullableStringLiteral> values4 = new ArrayList<>();
+ TNullableStringLiteral start4 = new TNullableStringLiteral();
+ start4.setValue("2023-08-04 00:00:00");
+ values4.add(start4);
+ partitionValues4.add(values4);
+
+ TCreatePartitionRequest request4 = new TCreatePartitionRequest();
+ request4.setDbId(db.getId());
+ request4.setTableId(table.getId());
+ request4.setPartitionValues(partitionValues4);
+ TCreatePartitionResult result4 = impl.createPartition(request4);
+ Assertions.assertEquals(TStatusCode.OK,
result4.getStatus().getStatusCode());
+
+ Partition p4 = table.getPartition(newPartName);
+ Assertions.assertNotNull(p4);
+ Assertions.assertEquals(p4.getDistributionInfo().getBucketNum(), 500);
+ }
+
+ @Test
+ public void testAutoBucketReusePrevPartitionBuckets() throws Exception {
+ String createOlapTblStmt = "CREATE TABLE test.auto_bucket_calc (\n"
+ + " event_day DATETIME NOT NULL,\n"
+ + " site_id INT,\n"
+ + " v INT\n"
+ + ")\n"
+ + "DUPLICATE KEY(event_day, site_id)\n"
+ + "AUTO PARTITION BY range (date_trunc(event_day,'day')) (\n"
+ + "\n"
+ + ")\n"
+ + "DISTRIBUTED BY HASH(event_day) BUCKETS AUTO\n"
+ + "PROPERTIES(\"replication_num\"=\"1\");";
+
+ createTable(createOlapTblStmt);
+ Database db =
Env.getCurrentInternalCatalog().getDbOrAnalysisException("test");
+ OlapTable table = (OlapTable)
db.getTableOrAnalysisException("auto_bucket_calc");
+
+ // Create first partition via RPC (acts as historical partition)
+ List<List<TNullableStringLiteral>> partitionValues = new ArrayList<>();
+ List<TNullableStringLiteral> values = new ArrayList<>();
+ TNullableStringLiteral start = new TNullableStringLiteral();
+ start.setValue("2023-08-05 00:00:00");
+ values.add(start);
+ partitionValues.add(values);
+
+ FrontendServiceImpl impl = new FrontendServiceImpl(exeEnv);
+ TCreatePartitionRequest request = new TCreatePartitionRequest();
+ request.setDbId(db.getId());
+ request.setTableId(table.getId());
+ request.setPartitionValues(partitionValues);
+ TCreatePartitionResult result = impl.createPartition(request);
+ Assertions.assertEquals(TStatusCode.OK,
result.getStatus().getStatusCode());
+
+ Partition p1 = table.getPartition("p20230805000000");
+ Assertions.assertNotNull(p1);
+ // Mark the partition as having data (visibleVersion >= 2) to be
counted by filterDataPartitions
+ table.writeLockOrDdlException();
+ try {
+ p1.setVisibleVersionAndTime(2L, System.currentTimeMillis());
+ } finally {
+ table.writeUnlock();
+ }
+ // Create next partition; PartitionExprUtil should reuse previous
partition buckets (8)
+ List<List<TNullableStringLiteral>> partitionValues2 = new
ArrayList<>();
+ List<TNullableStringLiteral> values2 = new ArrayList<>();
+ TNullableStringLiteral start2 = new TNullableStringLiteral();
+ start2.setValue("2023-08-06 00:00:00");
+ values2.add(start2);
+ partitionValues2.add(values2);
+
+ TCreatePartitionRequest request2 = new TCreatePartitionRequest();
+ request2.setDbId(db.getId());
+ request2.setTableId(table.getId());
+ request2.setPartitionValues(partitionValues2);
+ TCreatePartitionResult result2 = impl.createPartition(request2);
+ Assertions.assertEquals(TStatusCode.OK,
result2.getStatus().getStatusCode());
+
+ Partition p2 = table.getPartition("p20230806000000");
+ Assertions.assertNotNull(p2);
+ Assertions.assertEquals(p1.getDistributionInfo().getBucketNum(),
p2.getDistributionInfo().getBucketNum());
+ }
+}
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/common/util/AutoBucketCalculatorTest.java
b/fe/fe-core/src/test/java/org/apache/doris/common/util/AutoBucketCalculatorTest.java
new file mode 100644
index 00000000000..e23ec904252
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/common/util/AutoBucketCalculatorTest.java
@@ -0,0 +1,106 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.common.util;
+
+import org.apache.doris.catalog.OlapTable;
+import org.apache.doris.common.util.AutoBucketCalculator.AutoBucketContext;
+import org.apache.doris.common.util.AutoBucketCalculator.AutoBucketResult;
+
+import mockit.Expectations;
+import mockit.Mocked;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+public class AutoBucketCalculatorTest {
+
+ @Mocked
+ OlapTable table;
+
+ @Test
+ public void testCalculateAutoBucketsNotAutoBucket() {
+ new Expectations() {
+ {
+ table.isAutoBucket();
+ result = false;
+
+ table.getName();
+ result = "test_table";
+ minTimes = 0;
+
+ table.getId();
+ result = 1L;
+ minTimes = 0;
+ }
+ };
+
+ AutoBucketContext context = new AutoBucketContext(table, "p1", "p2",
false, 10);
+ AutoBucketResult result =
AutoBucketCalculator.calculateAutoBuckets(context);
+
+ Assertions.assertFalse(result.isSuccess());
+ Assertions.assertEquals(10, result.getBuckets());
+ Assertions.assertEquals("not auto bucket table",
result.getErrorMessage());
+ }
+
+ @Test
+ public void testCalculateAutoBucketsExecuteFirstTime() {
+ new Expectations() {
+ {
+ table.isAutoBucket();
+ result = true;
+
+ table.getName();
+ result = "test_table";
+ minTimes = 0;
+
+ table.getId();
+ result = 1L;
+ minTimes = 0;
+ }
+ };
+
+ AutoBucketContext context = new AutoBucketContext(table, "p1", "p2",
true, 10);
+ AutoBucketResult result =
AutoBucketCalculator.calculateAutoBuckets(context);
+
+ Assertions.assertFalse(result.isSuccess());
+ Assertions.assertEquals(10, result.getBuckets());
+ Assertions.assertEquals("executeFirstTime", result.getErrorMessage());
+ }
+
+ @Test
+ public void testCalculateAutoBucketsWithBoundsCheck() {
+ new Expectations() {
+ {
+ table.isAutoBucket();
+ result = false;
+
+ table.getName();
+ result = "test_table";
+ minTimes = 0;
+
+ table.getId();
+ result = 1L;
+ minTimes = 0;
+ }
+ };
+
+ AutoBucketContext context = new AutoBucketContext(table, "p1", "p2",
false, 10);
+ int buckets =
AutoBucketCalculator.calculateAutoBucketsWithBoundsCheck(context);
+
+ Assertions.assertEquals(10, buckets);
+ }
+}
diff --git
a/regression-test/suites/insert_overwrite_p0/test_iot_auto_detect_fail.groovy
b/regression-test/suites/insert_overwrite_p0/test_iot_auto_detect_fail.groovy
index 7bb2277f184..07c271bec47 100644
---
a/regression-test/suites/insert_overwrite_p0/test_iot_auto_detect_fail.groovy
+++
b/regression-test/suites/insert_overwrite_p0/test_iot_auto_detect_fail.groovy
@@ -68,7 +68,7 @@ PARTITION p202411248 VALUES IN ("20241124"),
PARTITION p202411258 VALUES IN ("20241125"),
PARTITION p202411268 VALUES IN ("20241126"),
PARTITION p202411278 VALUES IN ("20241127"))
-DISTRIBUTED BY HASH(`khh`, `dt`) BUCKETS AUTO
+DISTRIBUTED BY HASH(`khh`, `dt`) buckets 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"min_load_replica_num" = "-1",
@@ -136,7 +136,7 @@ PARTITION p202411248 VALUES IN ("20241124"),
PARTITION p202411258 VALUES IN ("20241125"),
PARTITION p202411268 VALUES IN ("20241126"),
PARTITION p202411278 VALUES IN ("20241127"))
-DISTRIBUTED BY HASH(`khh`, `dt`) BUCKETS AUTO
+DISTRIBUTED BY HASH(`khh`, `dt`) buckets 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"min_load_replica_num" = "-1",
diff --git
a/regression-test/suites/nereids_rules_p0/expression/test_simplify_range.groovy
b/regression-test/suites/nereids_rules_p0/expression/test_simplify_range.groovy
index 986b38a7c1c..89cf3581b40 100644
---
a/regression-test/suites/nereids_rules_p0/expression/test_simplify_range.groovy
+++
b/regression-test/suites/nereids_rules_p0/expression/test_simplify_range.groovy
@@ -37,7 +37,7 @@ suite('test_simplify_range') {
)
unique key(pk_id, collect_time)
auto partition by list(collect_time)()
- distributed by hash(pk_id) buckets auto
+ distributed by hash(pk_id) buckets 10
properties (
"replication_allocation" = "tag.location.default: 1",
"enable_unique_key_merge_on_write" = "true"
diff --git
a/regression-test/suites/partition_p0/auto_partition/test_auto_list_partition.groovy
b/regression-test/suites/partition_p0/auto_partition/test_auto_list_partition.groovy
index 5855ecc06e1..382785fc2a4 100644
---
a/regression-test/suites/partition_p0/auto_partition/test_auto_list_partition.groovy
+++
b/regression-test/suites/partition_p0/auto_partition/test_auto_list_partition.groovy
@@ -319,7 +319,7 @@ suite("test_auto_list_partition") {
LAST_UPLOAD_TIME DATETIME COMMENT 'LAST_UPLOAD_TIME'
)
AUTO PARTITION BY LIST (sum(DATE_ID))()
- DISTRIBUTED BY HASH(DATE_ID) BUCKETS AUTO
+ DISTRIBUTED BY HASH(DATE_ID) buckets 10
PROPERTIES (
"replication_num" = "1"
);
diff --git
a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy
b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy
index 9aaf0f6bfe6..3a279693993 100644
---
a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy
+++
b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy
@@ -344,4 +344,18 @@ suite("test_auto_partition_behavior") {
"""
exception "auto create partition only support date_trunc function of
RANGE partition"
}
+
+ test {
+ sql """
+ create table ap_wrong(
+ dt datetime not null,
+ k0 varchar not null
+ )
+ AUTO PARTITION BY LIST (dt)
+ ()
+ DISTRIBUTED BY HASH(`dt`) BUCKETS AUTO
+ properties("replication_num" = "1");
+ """
+ exception "Cannot use auto bucket with auto list partition"
+ }
}
diff --git
a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_with_single_replica_insert.groovy
b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_with_single_replica_insert.groovy
index 510b24602ac..d363bbc5d58 100644
---
a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_with_single_replica_insert.groovy
+++
b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_with_single_replica_insert.groovy
@@ -31,7 +31,7 @@ suite("test_auto_partition_with_single_replica_insert") {
AUTO PARTITION BY LIST (`chain_name`)
(PARTITION pchain5fname10 VALUES IN ("chain_name"),
PARTITION p4e0995e85ce1534e4e3a5 VALUES IN ("星辰医疗科技有限公司"))
- DISTRIBUTED BY HASH(`user_id`) BUCKETS AUTO
+ DISTRIBUTED BY HASH(`user_id`) buckets 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
@@ -61,7 +61,7 @@ suite("test_auto_partition_with_single_replica_insert") {
AUTO PARTITION BY LIST (`chain_name`)
(PARTITION pchain5fname10 VALUES IN ("chain_name"),
PARTITION p4e0995e85ce1534e4e3a5 VALUES IN ("星辰医疗科技有限公司"))
- DISTRIBUTED BY HASH(`user_id`) BUCKETS AUTO
+ DISTRIBUTED BY HASH(`user_id`) buckets 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
diff --git a/regression-test/suites/query_p0/show/test_show_partitions.groovy
b/regression-test/suites/query_p0/show/test_show_partitions.groovy
index bf88872029e..b8dacc19b82 100644
--- a/regression-test/suites/query_p0/show/test_show_partitions.groovy
+++ b/regression-test/suites/query_p0/show/test_show_partitions.groovy
@@ -25,7 +25,7 @@ suite("test_show_partitions") {
name varchar (255)
) ENGINE = OLAP DUPLICATE KEY(month, id)
AUTO PARTITION BY LIST (month)()
- distributed by hash (id) buckets auto
+ distributed by hash (id) buckets 10
PROPERTIES(
"replication_allocation" = "tag.location.default: 1"
)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]