This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 3b2650c26d9 [fix-2.1](auto-partition) Fix unintended partition 
creation when insert overwrite auto detect for auto partition table (#44449)
3b2650c26d9 is described below

commit 3b2650c26d9576af8f6ce5e8590cb9a7e393b004
Author: zclllhhjj <zhaochan...@selectdb.com>
AuthorDate: Fri Nov 22 22:46:40 2024 +0800

    [fix-2.1](auto-partition) Fix unintended partition creation when insert 
overwrite auto detect for auto partition table (#44449)
---
 .../apache/doris/analysis/NativeInsertStmt.java    |   1 +
 .../plans/commands/insert/OlapInsertExecutor.java  |  10 +-
 .../org/apache/doris/planner/OlapTableSink.java    |  33 +++-
 .../test_iot_auto_detect_fail.groovy               | 167 +++++++++++++++++++++
 4 files changed, 199 insertions(+), 12 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java 
b/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java
index 01d5e6b87d3..13f3c3bfac4 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java
@@ -1092,6 +1092,7 @@ public class NativeInsertStmt extends InsertStmt {
             if (!allowAutoPartition) {
                 ((OlapTableSink) dataSink).setAutoPartition(false);
             }
+            // ATTN! here's bug for iot+auto partition. we decide not to fix 
it for legacy planner.
             if (!isGroupCommitStreamLoadSql) {
                 // add table indexes to transaction state
                 TransactionState txnState = 
Env.getCurrentGlobalTransactionMgr()
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java
index f522a956899..de2c9a6ed8c 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java
@@ -164,14 +164,10 @@ public class OlapInsertExecutor extends 
AbstractInsertExecutor {
                     throw new IllegalStateException("Unsupported DataSink: " + 
childFragmentSink);
                 }
 
-                Analyzer analyzer = new Analyzer(Env.getCurrentEnv(), 
ConnectContext.get());
-                
dataStreamSink.setTabletSinkSchemaParam(olapTableSink.createSchema(
-                        database.getId(), olapTableSink.getDstTable(), 
analyzer));
-                
dataStreamSink.setTabletSinkPartitionParam(olapTableSink.createPartition(
-                        database.getId(), olapTableSink.getDstTable(), 
analyzer));
+                
dataStreamSink.setTabletSinkSchemaParam(olapTableSink.getOlapTableSchemaParam());
+                
dataStreamSink.setTabletSinkPartitionParam(olapTableSink.getOlapTablePartitionParam());
                 
dataStreamSink.setTabletSinkTupleDesc(olapTableSink.getTupleDescriptor());
-                List<TOlapTableLocationParam> locationParams = olapTableSink
-                        .createLocation(database.getId(), 
olapTableSink.getDstTable());
+                List<TOlapTableLocationParam> locationParams = 
olapTableSink.getOlapTableLocationParams();
                 
dataStreamSink.setTabletSinkLocationParam(locationParams.get(0));
                 dataStreamSink.setTabletSinkTxnId(olapTableSink.getTxnId());
                 dataStreamSink.setTabletSinkExprs(fragment.getOutputExprs());
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
index cfaa4d7d507..4a4791b15ac 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java
@@ -114,6 +114,13 @@ public class OlapTableSink extends DataSink {
     private boolean isStrictMode = false;
     private long txnId = -1;
 
+    // reuse them for set DataStreamSink
+    private TOlapTableSchemaParam tOlapTableSchemaParam;
+
+    private TOlapTablePartitionParam tOlapTablePartitionParam;
+
+    private List<TOlapTableLocationParam> tOlapTableLocationParams;
+
     public OlapTableSink(OlapTable dstTable, TupleDescriptor tupleDescriptor, 
List<Long> partitionIds,
             boolean singleReplicaLoad) {
         this.dstTable = dstTable;
@@ -188,6 +195,18 @@ public class OlapTableSink extends DataSink {
         tDataSink.getOlapTableSink().getPartition().setOverwriteGroupId(var);
     }
 
+    public TOlapTableSchemaParam getOlapTableSchemaParam() {
+        return tOlapTableSchemaParam;
+    }
+
+    public TOlapTablePartitionParam getOlapTablePartitionParam() {
+        return tOlapTablePartitionParam;
+    }
+
+    public List<TOlapTableLocationParam> getOlapTableLocationParams() {
+        return tOlapTableLocationParams;
+    }
+
     // must called after tupleDescriptor is computed
     public void complete(Analyzer analyzer) throws UserException {
         for (Long partitionId : partitionIds) {
@@ -207,12 +226,16 @@ public class OlapTableSink extends DataSink {
         int numReplicas = 
dstTable.getTableProperty().getReplicaAllocation().getTotalReplicaNum();
         tSink.setNumReplicas(numReplicas);
         tSink.setNeedGenRollup(dstTable.shouldLoadToNewRollup());
-        tSink.setSchema(createSchema(tSink.getDbId(), dstTable, analyzer));
-        tSink.setPartition(createPartition(tSink.getDbId(), dstTable, 
analyzer));
-        List<TOlapTableLocationParam> locationParams = 
createLocation(tSink.getDbId(), dstTable);
-        tSink.setLocation(locationParams.get(0));
+
+        tOlapTableSchemaParam = createSchema(tSink.getDbId(), dstTable, 
analyzer);
+        tOlapTablePartitionParam = createPartition(tSink.getDbId(), dstTable, 
analyzer);
+        tOlapTableLocationParams = createLocation(tSink.getDbId(), dstTable);
+
+        tSink.setSchema(tOlapTableSchemaParam);
+        tSink.setPartition(tOlapTablePartitionParam);
+        tSink.setLocation(tOlapTableLocationParams.get(0));
         if (singleReplicaLoad) {
-            tSink.setSlaveLocation(locationParams.get(1));
+            tSink.setSlaveLocation(tOlapTableLocationParams.get(1));
         }
         tSink.setWriteSingleReplica(singleReplicaLoad);
         tSink.setNodesInfo(createPaloNodesInfo());
diff --git 
a/regression-test/suites/insert_overwrite_p0/test_iot_auto_detect_fail.groovy 
b/regression-test/suites/insert_overwrite_p0/test_iot_auto_detect_fail.groovy
new file mode 100644
index 00000000000..7bb2277f184
--- /dev/null
+++ 
b/regression-test/suites/insert_overwrite_p0/test_iot_auto_detect_fail.groovy
@@ -0,0 +1,167 @@
+// Licensed to the Apache Software Foundation (ASF) under one  
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information       
+// regarding copyright ownership.  The ASF licenses this file  
+// to you under the Apache License, Version 2.0 (the           
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_iot_auto_detect_fail") {
+    multi_sql """
+    drop table if exists fail_src;
+    CREATE TABLE `fail_src` (
+  `qsrq` int NULL,
+  `lsh` varchar(32) NULL,
+  `wth` bigint NULL,
+  `khh` varchar(16) NULL,
+  `dt` varchar(8) NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`qsrq`, `lsh`)
+AUTO PARTITION BY LIST (`dt`)
+(PARTITION p202307078 VALUES IN ("20230707"),
+PARTITION p202307108 VALUES IN ("20230710"),
+PARTITION p202307118 VALUES IN ("20230711"),
+PARTITION p202307128 VALUES IN ("20230712"),
+PARTITION p202307138 VALUES IN ("20230713"),
+PARTITION p202307148 VALUES IN ("20230714"),
+PARTITION p202410088 VALUES IN ("20241008"),
+PARTITION p202410098 VALUES IN ("20241009"),
+PARTITION p202410108 VALUES IN ("20241010"),
+PARTITION p202410118 VALUES IN ("20241011"),
+PARTITION p202410148 VALUES IN ("20241014"),
+PARTITION p202410158 VALUES IN ("20241015"),
+PARTITION p202410168 VALUES IN ("20241016"),
+PARTITION p202410178 VALUES IN ("20241017"),
+PARTITION p202410188 VALUES IN ("20241018"),
+PARTITION p202410218 VALUES IN ("20241021"),
+PARTITION p202410228 VALUES IN ("20241022"),
+PARTITION p202410238 VALUES IN ("20241023"),
+PARTITION p202410248 VALUES IN ("20241024"),
+PARTITION p202410258 VALUES IN ("20241025"),
+PARTITION p202410288 VALUES IN ("20241028"),
+PARTITION p202410298 VALUES IN ("20241029"),
+PARTITION p202410308 VALUES IN ("20241030"),
+PARTITION p202410318 VALUES IN ("20241031"),
+PARTITION p202411018 VALUES IN ("20241101"),
+PARTITION p202411028 VALUES IN ("20241102"),
+PARTITION p202411038 VALUES IN ("20241103"),
+PARTITION p202411048 VALUES IN ("20241104"),
+PARTITION p202411058 VALUES IN ("20241105"),
+PARTITION p202411068 VALUES IN ("20241106"),
+PARTITION p202411078 VALUES IN ("20241107"),
+PARTITION p202411088 VALUES IN ("20241108"),
+PARTITION p202411118 VALUES IN ("20241111"),
+PARTITION p202411128 VALUES IN ("20241112"),
+PARTITION p202411138 VALUES IN ("20241113"),
+PARTITION p202411148 VALUES IN ("20241114"),
+PARTITION p202411158 VALUES IN ("20241115"),
+PARTITION p202411248 VALUES IN ("20241124"),
+PARTITION p202411258 VALUES IN ("20241125"),
+PARTITION p202411268 VALUES IN ("20241126"),
+PARTITION p202411278 VALUES IN ("20241127"))
+DISTRIBUTED BY HASH(`khh`, `dt`) BUCKETS AUTO
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1",
+"min_load_replica_num" = "-1",
+"is_being_synced" = "false",
+"storage_medium" = "hdd",
+"storage_format" = "V2",
+"inverted_index_storage_format" = "V1",
+"light_schema_change" = "true",
+"disable_auto_compaction" = "false",
+"enable_single_replica_compaction" = "false",
+"group_commit_interval_ms" = "10000",
+"group_commit_data_bytes" = "134217728"
+);
+
+insert into fail_src values 
(3,'a',10,'b','20241128'),(4,'a',10,'b','20241128'),(5,'a',10,'b','20241128'),(6,'a',10,'b','20241128'),(7,'a',10,'b','20241128'),(8,'a',10,'b','20241128'),(9,'a',10,'b','20241128');
+
+drop table if exists fail_tag;
+CREATE TABLE `fail_tag` (
+  `qsrq` int NULL,
+  `lsh` varchar(32) NULL,
+  `wth` bigint NULL,
+  `khh` varchar(16) NULL,
+  `dt` varchar(8) NULL
+) ENGINE=OLAP
+DUPLICATE KEY(`qsrq`, `lsh`)
+AUTO PARTITION BY LIST (`dt`)
+(PARTITION p202307078 VALUES IN ("20230707"),
+PARTITION p202307108 VALUES IN ("20230710"),
+PARTITION p202307118 VALUES IN ("20230711"),
+PARTITION p202307128 VALUES IN ("20230712"),
+PARTITION p202307138 VALUES IN ("20230713"),
+PARTITION p202307148 VALUES IN ("20230714"),
+PARTITION p202410088 VALUES IN ("20241008"),
+PARTITION p202410098 VALUES IN ("20241009"),
+PARTITION p202410108 VALUES IN ("20241010"),
+PARTITION p202410118 VALUES IN ("20241011"),
+PARTITION p202410148 VALUES IN ("20241014"),
+PARTITION p202410158 VALUES IN ("20241015"),
+PARTITION p202410168 VALUES IN ("20241016"),
+PARTITION p202410178 VALUES IN ("20241017"),
+PARTITION p202410188 VALUES IN ("20241018"),
+PARTITION p202410218 VALUES IN ("20241021"),
+PARTITION p202410228 VALUES IN ("20241022"),
+PARTITION p202410238 VALUES IN ("20241023"),
+PARTITION p202410248 VALUES IN ("20241024"),
+PARTITION p202410258 VALUES IN ("20241025"),
+PARTITION p202410288 VALUES IN ("20241028"),
+PARTITION p202410298 VALUES IN ("20241029"),
+PARTITION p202410308 VALUES IN ("20241030"),
+PARTITION p202410318 VALUES IN ("20241031"),
+PARTITION p202411018 VALUES IN ("20241101"),
+PARTITION p202411028 VALUES IN ("20241102"),
+PARTITION p202411038 VALUES IN ("20241103"),
+PARTITION p202411048 VALUES IN ("20241104"),
+PARTITION p202411058 VALUES IN ("20241105"),
+PARTITION p202411068 VALUES IN ("20241106"),
+PARTITION p202411078 VALUES IN ("20241107"),
+PARTITION p202411088 VALUES IN ("20241108"),
+PARTITION p202411118 VALUES IN ("20241111"),
+PARTITION p202411128 VALUES IN ("20241112"),
+PARTITION p202411138 VALUES IN ("20241113"),
+PARTITION p202411148 VALUES IN ("20241114"),
+PARTITION p202411158 VALUES IN ("20241115"),
+PARTITION p202411248 VALUES IN ("20241124"),
+PARTITION p202411258 VALUES IN ("20241125"),
+PARTITION p202411268 VALUES IN ("20241126"),
+PARTITION p202411278 VALUES IN ("20241127"))
+DISTRIBUTED BY HASH(`khh`, `dt`) BUCKETS AUTO
+PROPERTIES (
+"replication_allocation" = "tag.location.default: 1",
+"min_load_replica_num" = "-1",
+"is_being_synced" = "false",
+"storage_medium" = "hdd",
+"storage_format" = "V2",
+"inverted_index_storage_format" = "V1",
+"light_schema_change" = "true",
+"disable_auto_compaction" = "false",
+"enable_single_replica_compaction" = "false",
+"group_commit_interval_ms" = "10000",
+"group_commit_data_bytes" = "134217728"
+);
+    """
+
+    test {
+        sql "insert overwrite table fail_tag PARTITION(*) select 
qsrq,lsh,wth,khh,dt from fail_src where dt='20241128';"
+        exception "Cannot found origin partitions"
+    }
+    test {
+        sql "insert overwrite table fail_tag PARTITION(*) select 
qsrq,lsh,wth,khh,dt from fail_src where dt='20241128';"
+        exception "Cannot found origin partitions"
+    }
+    test {
+        sql "insert overwrite table fail_tag PARTITION(*) select 
qsrq,lsh,wth,khh,dt from fail_src where dt='20241128';"
+        exception "Cannot found origin partitions"
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to