This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 609761567c24dc1d31ea49bc74afdf46cc45f1b7
Author: abmdocrt <yukang.lian2...@gmail.com>
AuthorDate: Fri Mar 8 13:47:56 2024 +0800

    [Fix](partial-update) Fix wrong column number passing to BE when partial 
and enable nereids (#31461)
    
    * Problem:
    Inconsistent behavior occurs when executing partial column update `UPDATE` 
statements and `INSERT` statements on merge-on-write tables with the Nereids 
optimizer enabled. The number of columns passed to BE differs; `UPDATE` 
operations incorrectly pass all columns, while `INSERT` operations correctly 
pass only the updated columns.
    
    Reason:
    The Nereids optimizer does not handle partial column update `UPDATE` 
statements properly. The processing logic for `UPDATE` statements rewrites them 
as equivalent `INSERT` statements, which are then processed according to the 
logic of `INSERT` statements. For example, assuming a MoW table structure with 
columns k1, k2, v1, v2, the correct rewrite should be:
    * `UPDATE` table t1 set v1 = v1 + 1 where k1 = 1 and k2 = 2
     * =>
     * `INSERT` into table (v1) select v1 + 1 from table t1 where k1 = 1 and k2 
= 2
    
    However, the actual rewriting process does not consider the logic for 
partial column updates, leading to all columns being included in the `INSERT` 
statement, i.e., the result is:
    * `INSERT` into table (k1, k2, v1, v2) select k1, k2, v1 + 1, v2 from table 
t1 where k1 = 1 and k2 = 2
    
    This results in `UPDATE` operations incorrectly passing all columns to BE.
    
    Solution:
    Having analyzed the cause, the solution is straightforward: when rewriting 
partial column update `UPDATE` statements to `INSERT` statements, only retain 
the updated columns and all key columns (as partial column updates must include 
all key columns). Additionally, this PR includes error injection cases to 
verify the number of columns passed to BE is correct.
    
    * 2
    
    * 3
    
    * 4
    
    * 5
---
 be/src/olap/rowset_builder.cpp                     |  6 ++
 .../trees/plans/commands/UpdateCommand.java        | 44 ++++++++++--
 ...t_partial_update_column_num_fault_injection.out | 11 +++
 .../test_auto_partition_behavior.out               | 15 ----
 .../data/update/test_unique_table_update.out       | 26 +++++++
 regression-test/data/update/test_update_mow.out    | 15 ++++
 ...artial_update_column_num_fault_injection.groovy | 51 +++++++++++++
 .../test_auto_partition_behavior.groovy            |  7 --
 .../suites/update/test_unique_table_update.groovy  | 84 ++++++++++++++++++++++
 .../suites/update/test_update_mow.groovy           | 32 +++++++++
 10 files changed, 263 insertions(+), 28 deletions(-)

diff --git a/be/src/olap/rowset_builder.cpp b/be/src/olap/rowset_builder.cpp
index 8e9d87ec259..248aaba5c93 100644
--- a/be/src/olap/rowset_builder.cpp
+++ b/be/src/olap/rowset_builder.cpp
@@ -184,6 +184,12 @@ Status RowsetBuilder::init() {
 
     RETURN_IF_ERROR(prepare_txn());
 
+    DBUG_EXECUTE_IF("BaseRowsetBuilder::init.check_partial_update_column_num", 
{
+        if (_req.table_schema_param->partial_update_input_columns().size() !=
+            dp->param<int>("column_num")) {
+            return Status::InternalError("partial update input column num 
wrong!");
+        };
+    })
     // build tablet schema in request level
     _build_current_tablet_schema(_req.index_id, _req.table_schema_param.get(),
                                  *_tablet->tablet_schema());
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java
index 133fc36a1da..cde07c957a3 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java
@@ -49,6 +49,7 @@ import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
@@ -103,10 +104,17 @@ public class UpdateCommand extends Command implements 
ForwardWithSync, Explainab
         checkTable(ctx);
 
         Map<String, Expression> colNameToExpression = 
Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
+        Map<String, Expression> partialUpdateColNameToExpression = 
Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER);
         for (EqualTo equalTo : assignments) {
             List<String> nameParts = ((UnboundSlot) 
equalTo.left()).getNameParts();
             checkAssignmentColumn(ctx, nameParts);
             colNameToExpression.put(nameParts.get(nameParts.size() - 1), 
equalTo.right());
+            
partialUpdateColNameToExpression.put(nameParts.get(nameParts.size() - 1), 
equalTo.right());
+        }
+        // check if any key in update clause
+        if (targetTable.getFullSchema().stream().filter(Column::isKey)
+                .anyMatch(column -> 
partialUpdateColNameToExpression.containsKey(column.getName()))) {
+            throw new AnalysisException("Only value columns of unique table 
could be updated");
         }
         List<NamedExpression> selectItems = Lists.newArrayList();
         String tableName = tableAlias != null ? tableAlias : 
targetTable.getName();
@@ -147,16 +155,40 @@ public class UpdateCommand extends Command implements 
ForwardWithSync, Explainab
                     + String.join(", ", colNameToExpression.keySet()));
         }
 
-        logicalQuery = new LogicalProject<>(selectItems, logicalQuery);
-        if (cte.isPresent()) {
-            logicalQuery = ((LogicalPlan) 
cte.get().withChildren(logicalQuery));
-        }
         boolean isPartialUpdate = targetTable.getEnableUniqueKeyMergeOnWrite()
                 && selectItems.size() < targetTable.getColumns().size()
-                && !targetTable.hasVariantColumns();
+                && !targetTable.hasVariantColumns() && 
targetTable.getSequenceCol() == null
+                && partialUpdateColNameToExpression.size() <= 
targetTable.getFullSchema().size() * 3 / 10;
+
+        List<String> partialUpdateColNames = new ArrayList<>();
+        List<NamedExpression> partialUpdateSelectItems = new ArrayList<>();
+        if (isPartialUpdate) {
+            for (Column column : targetTable.getFullSchema()) {
+                Expression expr = new 
NereidsParser().parseExpression(tableName + "." + column.getName());
+                boolean existInExpr = false;
+                for (String colName : 
partialUpdateColNameToExpression.keySet()) {
+                    if (colName.equalsIgnoreCase(column.getName())) {
+                        expr = 
partialUpdateColNameToExpression.get(column.getName());
+                        existInExpr = true;
+                        break;
+                    }
+                }
+                if (column.isKey() || existInExpr) {
+                    partialUpdateSelectItems.add(expr instanceof UnboundSlot
+                            ? ((NamedExpression) expr)
+                            : new UnboundAlias(expr));
+                    partialUpdateColNames.add(column.getName());
+                }
+            }
+        }
 
+        logicalQuery = new LogicalProject<>(isPartialUpdate ? 
partialUpdateSelectItems : selectItems, logicalQuery);
+        if (cte.isPresent()) {
+            logicalQuery = ((LogicalPlan) 
cte.get().withChildren(logicalQuery));
+        }
         // make UnboundTableSink
-        return new UnboundTableSink<>(nameParts, ImmutableList.of(), 
ImmutableList.of(),
+        return new UnboundTableSink<>(nameParts, isPartialUpdate ? 
partialUpdateColNames : ImmutableList.of(),
+                ImmutableList.of(),
                 false, ImmutableList.of(), isPartialUpdate, 
DMLCommandType.UPDATE, logicalQuery);
     }
 
diff --git 
a/regression-test/data/fault_injection_p0/test_partial_update_column_num_fault_injection.out
 
b/regression-test/data/fault_injection_p0/test_partial_update_column_num_fault_injection.out
new file mode 100644
index 00000000000..3d9ecbcede0
--- /dev/null
+++ 
b/regression-test/data/fault_injection_p0/test_partial_update_column_num_fault_injection.out
@@ -0,0 +1,11 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_1 --
+1      1       1       1       1
+2      2       2       2       2
+3      3       3       3       3
+
+-- !select_2 --
+1      1       1       1       1
+2      1       1       2       2
+3      3       3       3       3
+
diff --git 
a/regression-test/data/partition_p0/auto_partition/test_auto_partition_behavior.out
 
b/regression-test/data/partition_p0/auto_partition/test_auto_partition_behavior.out
index d24d4db21db..5fe25cce3d8 100644
--- 
a/regression-test/data/partition_p0/auto_partition/test_auto_partition_behavior.out
+++ 
b/regression-test/data/partition_p0/auto_partition/test_auto_partition_behavior.out
@@ -26,21 +26,6 @@ xxX  3
 Xxx    3
 xxX    3
 
--- !sql4 --
-       2
-
--- !sql5 --
-1
-
--- !sql6 --
- !  
- - 
--
-- -
---
-modified
-xxX
-
 -- !sql1 --
  
   
diff --git a/regression-test/data/update/test_unique_table_update.out 
b/regression-test/data/update/test_unique_table_update.out
new file mode 100644
index 00000000000..d0c0dda6ad2
--- /dev/null
+++ b/regression-test/data/update/test_unique_table_update.out
@@ -0,0 +1,26 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !select_1 --
+1      1       1       1       1
+2      2       2       2       2
+3      3       3       3       3
+
+-- !select_2 --
+1      1       1       1       1
+2      2       2       2       2
+3      3       3       3       3
+
+-- !select_3 --
+1      1       1       1       1
+2      1       1       2       2
+3      3       3       3       3
+
+-- !select_4 --
+1      1       1       1       1
+2      1       1       2       2
+3      3       3       3       3
+
+-- !select_5 --
+1      1       1       1       1
+2      1       1       2       2
+3      3       3       3       3
+
diff --git a/regression-test/data/update/test_update_mow.out 
b/regression-test/data/update/test_update_mow.out
index ed072a5c0a2..0cc8c448555 100644
--- a/regression-test/data/update/test_update_mow.out
+++ b/regression-test/data/update/test_update_mow.out
@@ -38,3 +38,18 @@ a    1       2023-11-12T00:00        test1   999
 b      2       2023-11-12T00:00        test2   2
 c      3       2022-01-01T00:00        update value    3
 
+-- !sql --
+a      1       2023-11-12T00:00        test1   1
+b      2       2023-11-12T00:00        test2   2
+c      3       2023-11-12T00:00        test3   3
+
+-- !sql --
+a      1       2023-11-12T00:00        test1   999
+b      2       2023-11-12T00:00        test2   2
+c      3       2023-11-12T00:00        test3   3
+
+-- !sql --
+a      1       2023-11-12T00:00        test1   999
+b      2       2023-11-12T00:00        test2   2
+c      3       2022-01-01T00:00        update value    3
+
diff --git 
a/regression-test/suites/fault_injection_p0/test_partial_update_column_num_fault_injection.groovy
 
b/regression-test/suites/fault_injection_p0/test_partial_update_column_num_fault_injection.groovy
new file mode 100644
index 00000000000..50973fb8971
--- /dev/null
+++ 
b/regression-test/suites/fault_injection_p0/test_partial_update_column_num_fault_injection.groovy
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_partial_update_column_num_fault_injection","nonConcurrent") {
+
+
+    def tableName = "test_partial_update_column_num_fault_injection"
+ 
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+
+    sql """
+        CREATE TABLE ${tableName}
+            (k bigint,   v1 string, v2 string, v3 string, v4 string )
+            UNIQUE KEY(k)
+            DISTRIBUTED BY HASH (k) 
+            BUCKETS 32   
+            PROPERTIES(
+                "replication_num" = "1",
+                "enable_unique_key_merge_on_write"="true");
+        """
+
+    GetDebugPoint().clearDebugPointsForAllBEs()
+
+    try {
+        sql "insert into ${tableName} 
values(1,1,1,1,1),(2,2,2,2,2),(3,3,3,3,3);"
+        qt_select_1 "select * from ${tableName} order by k;"
+        sql "set enable_fallback_to_original_planner=false;"
+        
GetDebugPoint().enableDebugPointForAllBEs("BaseRowsetBuilder::init.check_partial_update_column_num",
 [column_num: 3])
+        sql "update ${tableName} set v1=1, v2=1 where k=2;"
+    } catch (Exception e) {
+        logger.info(e.getMessage())
+        AssertTrue(false) 
+    } finally {
+        
GetDebugPoint().disableDebugPointForAllBEs("BaseRowsetBuilder::init.check_partial_update_column_num")
+        qt_select_2 "select * from ${tableName} order by k;"
+    }
+}
\ No newline at end of file
diff --git 
a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy
 
b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy
index 02623bb5c97..a2429214d60 100644
--- 
a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy
+++ 
b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy
@@ -60,13 +60,6 @@ suite("test_auto_partition_behavior") {
     result = sql "show partitions from unique_table"
     assertEquals(result.size(), 9)
     qt_sql3 """ select str,length(str) from unique_table order by `str` """
-    // modify value 
-    sql """ update unique_table set str = "modified" where str in (" ", "  ") 
""" // only "  "
-    qt_sql4 """ select str,length(str) from unique_table where str = '  ' 
order by `str` """ // modified
-    qt_sql5 """ select count() from unique_table where str = 'modified' """
-    // crop
-    qt_sql6 """ select str from unique_table where ((str > ' ! ' || str = 
'modified') && str != 'Xxx') order by str """
-
 
     /// duplicate key table
     sql "drop table if exists dup_table"
diff --git a/regression-test/suites/update/test_unique_table_update.groovy 
b/regression-test/suites/update/test_unique_table_update.groovy
new file mode 100644
index 00000000000..d886b4fcdf4
--- /dev/null
+++ b/regression-test/suites/update/test_unique_table_update.groovy
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_unique_table_update","nonConcurrent") {
+
+
+    def tableName = "test_unique_table_update"
+ 
+    sql """ DROP TABLE IF EXISTS ${tableName} """
+
+    sql """
+        CREATE TABLE ${tableName}
+            (k bigint,   v1 string, v2 string, v3 string, v4 string )
+            UNIQUE KEY(k)
+            DISTRIBUTED BY HASH (k) 
+            BUCKETS 32   
+            PROPERTIES(
+                "replication_num" = "1",
+                "enable_unique_key_merge_on_write"="true");
+        """
+
+    sql "insert into ${tableName} values(1,1,1,1,1),(2,2,2,2,2),(3,3,3,3,3);"
+    qt_select_1 "select * from ${tableName} order by k;"
+
+    sql "set enable_nereids_planner=true"
+    sql "set enable_fallback_to_original_planner=false"
+
+    // update key is not allowed
+    try {
+        sql "update ${tableName} set k=1, v1=1, v2=1 where k=2;"
+        assertTrue(false) 
+    } catch (Exception e) {
+        logger.info(e.getMessage())
+        assertTrue(e.getMessage().contains("Only value columns of unique table 
could be updated"))
+    } finally {
+        qt_select_2 "select * from ${tableName} order by k;"
+    }
+
+    // update value is allowed
+    try {
+        sql "update ${tableName} set v1=1, v2=1 where k=2;"
+    } catch (Exception e) {
+        logger.info(e.getMessage())
+        assertTrue(false) 
+    } finally {
+        qt_select_3 "select * from ${tableName} order by k;"
+    }
+
+    sql "set enable_nereids_planner=false"
+    // update key is not allowed
+    try {
+        sql "update ${tableName} set k=1, v1=1, v2=1 where k=2;"
+        assertTrue(false) 
+    } catch (Exception e) {
+        logger.info(e.getMessage())
+        assertTrue(e.getMessage().contains("Only value columns of unique table 
could be updated"))
+    } finally {
+        qt_select_4 "select * from ${tableName} order by k;"
+    }
+
+    // update key is allowed
+    try {
+        sql "update ${tableName} set v1=1, v2=1 where k=2;"
+    } catch (Exception e) {
+        logger.info(e.getMessage())
+        assertTrue(false) 
+    } finally {
+        qt_select_5 "select * from ${tableName} order by k;"
+    }
+}
\ No newline at end of file
diff --git a/regression-test/suites/update/test_update_mow.groovy 
b/regression-test/suites/update/test_update_mow.groovy
index 27a663484f6..6b1462f69b9 100644
--- a/regression-test/suites/update/test_update_mow.groovy
+++ b/regression-test/suites/update/test_update_mow.groovy
@@ -16,6 +16,8 @@
 // under the License.
 
 suite("test_update_mow", "p0") {
+    sql "set enable_nereids_planner=true"
+    sql "set enable_fallback_to_original_planner=false"
     def tbName1 = "test_update_mow_1"
     def tbName2 = "test_update_mow_2"
     def tbName3 = "test_update_mow_3"
@@ -127,4 +129,34 @@ suite("test_update_mow", "p0") {
     qt_sql "select * from ${tableName5} order by k1,k2" 
 
     sql "DROP TABLE IF EXISTS ${tableName5}"
+
+    sql "set enable_nereids_planner=true"
+    sql "set enable_fallback_to_original_planner=false"
+    sql "sync"
+    def tableName6 = "test_update_mow_6"
+    sql "DROP TABLE IF EXISTS ${tableName6}"
+    sql """ CREATE TABLE ${tableName6} (
+            k1 varchar(100) NOT NULL,
+            k2 int(11) NOT NULL,
+            v1 datetime NULL,
+            v2 varchar(100) NULL,
+            v3 int NULL) ENGINE=OLAP UNIQUE KEY(k1, k2) COMMENT 'OLAP'
+            DISTRIBUTED BY HASH(k1, k2) BUCKETS 3
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "enable_unique_key_merge_on_write" = "true",
+            "light_schema_change" = "true",
+            "store_row_column" = "true",
+            "enable_single_replica_compaction" = "false");"""
+    sql """insert into ${tableName6} values
+        ("a",1,"2023-11-12 00:00:00","test1",1),
+        ("b",2,"2023-11-12 00:00:00","test2",2),
+        ("c",3,"2023-11-12 00:00:00","test3",3);"""
+    qt_sql "select * from ${tableName6} order by k1,k2"
+    sql """update ${tableName6} set v3=999 where k1="a" and k2=1;"""
+    qt_sql "select * from ${tableName6} order by k1,k2" 
+    sql """update ${tableName6} set v2="update value", v1="2022-01-01 
00:00:00" where k1="c" and k2=3;"""
+    qt_sql "select * from ${tableName6} order by k1,k2" 
+
+    sql "DROP TABLE IF EXISTS ${tableName6}"
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to