This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 9c270e5cdf3 [fix](delete) Fix unrecognized column name delete handler 
(#32429) (#35742)
9c270e5cdf3 is described below

commit 9c270e5cdf361b8497d568e38b3c736c8da52744
Author: Gavin Chou <gavineaglec...@gmail.com>
AuthorDate: Fri May 31 20:41:22 2024 +0800

    [fix](delete) Fix unrecognized column name delete handler (#32429) (#35742)
    
    pick doris-master #32429
---
 be/src/olap/delete_handler.cpp                     | 73 +++++++++++--------
 be/src/olap/delete_handler.h                       | 10 ++-
 be/test/olap/delete_handler_test.cpp               | 40 +++++++++++
 .../java/org/apache/doris/common/FeNameFormat.java |  4 +-
 .../suites/delete_p0/test_delete_handler.groovy    | 84 ++++++++++++++++++++++
 5 files changed, 179 insertions(+), 32 deletions(-)

diff --git a/be/src/olap/delete_handler.cpp b/be/src/olap/delete_handler.cpp
index d2126bbdf3e..ca28d07254e 100644
--- a/be/src/olap/delete_handler.cpp
+++ b/be/src/olap/delete_handler.cpp
@@ -80,7 +80,15 @@ Status DeleteHandler::generate_delete_predicate(const 
TabletSchema& schema,
                       << "condition size=" << in_pred->values().size();
         } else {
             // write sub predicate v1 for compactbility
-            del_pred->add_sub_predicates(construct_sub_predicate(condition));
+            std::string condition_str = construct_sub_predicate(condition);
+            if (TCondition tmp; !DeleteHandler::parse_condition(condition_str, 
&tmp)) {
+                LOG(WARNING) << "failed to parse condition_str, condtion="
+                             << ThriftDebugString(condition);
+                return Status::Error<DELETE_INVALID_CONDITION>(
+                        "failed to parse condition_str, condtion={}", 
ThriftDebugString(condition));
+            }
+            VLOG_NOTICE << __PRETTY_FUNCTION__ << " condition_str: " << 
condition_str;
+            del_pred->add_sub_predicates(condition_str);
             DeleteSubPredicatePB* sub_predicate = 
del_pred->add_sub_predicates_v2();
             if (condition.__isset.column_unique_id) {
                 
sub_predicate->set_column_unique_id(condition.column_unique_id);
@@ -127,8 +135,9 @@ std::string DeleteHandler::construct_sub_predicate(const 
TCondition& condition)
     }
     string condition_str;
     if ("IS" == op) {
-        condition_str = condition.column_name + " " + op + " " + 
condition.condition_values[0];
-    } else {
+        // ATTN: tricky! Surround IS with spaces to make it "special"
+        condition_str = condition.column_name + " IS " + 
condition.condition_values[0];
+    } else { // multi-elements IN expr has been processed with InPredicatePB
         if (op == "*=") {
             op = "=";
         } else if (op == "!*=") {
@@ -286,44 +295,50 @@ Status DeleteHandler::parse_condition(const 
DeleteSubPredicatePB& sub_cond, TCon
     return Status::OK();
 }
 
+// clang-format off
+// Condition string format, the format is (column_name)(op)(value)
+// eg: condition_str="c1 = 1597751948193618247 and length(source)<1;\n;\n"
+// column_name: matches "c1", must include FeNameFormat.java COLUMN_NAME_REGEX
+//              and compactible with any the lagacy
+// operator: matches "="
+// value: matches "1597751948193618247  and length(source)<1;\n;\n"
+//
+// For more info, see DeleteHandler::construct_sub_predicates
+// FIXME(gavin): support unicode. And this is a tricky implementation, it 
should
+//               not be the final resolution, refactor it.
+const char* const CONDITION_STR_PATTERN =
+    // .----------------- column-name ----------------.   
.----------------------- operator ------------------------.   .------------ 
value ----------.
+    
R"(([_a-zA-Z@0-9\s/][.a-zA-Z0-9_+-/?@#$%^&*"\s,:]*)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:
 IS ))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
+    // '----------------- group 1 --------------------'   
'--------------------- group 2 ---------------------------'   | '-- group 4--'  
            |
+    //                                                         match any of: = 
!= >> << >= <= *= " IS "                 '----------- group 3 ---------'
+    //                                                                         
                                          match **ANY THING** without(4)
+    //                                                                         
                                          or with(3) single quote
+boost::regex DELETE_HANDLER_REGEX(CONDITION_STR_PATTERN);
+// clang-format on
+
 Status DeleteHandler::parse_condition(const std::string& condition_str, 
TCondition* condition) {
-    bool matched = true;
+    bool matched = false;
     boost::smatch what;
-
     try {
-        // Condition string format, the format is (column_name)(op)(value)
-        // eg:  condition_str="c1 = 1597751948193618247  and 
length(source)<1;\n;\n"
-        //  group1:  (\w+) matches "c1"
-        //  group2:  ((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS)) 
matches  "="
-        //  group3:  ((?:[\s\S]+)?) matches "1597751948193618247  and 
length(source)<1;\n;\n"
-        const char* const CONDITION_STR_PATTERN =
-                
R"(([\w$#%]+)\s*((?:=)|(?:!=)|(?:>>)|(?:<<)|(?:>=)|(?:<=)|(?:\*=)|(?:IS))\s*('((?:[\s\S]+)?)'|(?:[\s\S]+)?))";
-        boost::regex ex(CONDITION_STR_PATTERN);
-        if (boost::regex_match(condition_str, what, ex)) {
-            if (condition_str.size() != what[0].str().size()) {
-                matched = false;
-            }
-        } else {
-            matched = false;
-        }
+        VLOG_NOTICE << "condition_str: " << condition_str;
+        matched = boost::regex_match(condition_str, what, 
DELETE_HANDLER_REGEX) &&
+                  condition_str.size() == what[0].str().size(); // exact match
     } catch (boost::regex_error& e) {
         VLOG_NOTICE << "fail to parse expr. [expr=" << condition_str << "; 
error=" << e.what()
                     << "]";
-        matched = false;
     }
-
     if (!matched) {
         return Status::Error<DELETE_INVALID_PARAMETERS>("fail to sub 
condition. condition={}",
                                                         condition_str);
     }
-    condition->column_name = what[1].str();
-    condition->condition_op = what[2].str();
-    if (what[4].matched) { // match string with single quotes, eg. a = 'b'
-        condition->condition_values.push_back(what[4].str());
-    } else { // match string without quote, compat with old conditions, eg. a 
= b
-        condition->condition_values.push_back(what[3].str());
-    }
 
+    condition->column_name = what[1].str();
+    condition->condition_op = what[2].str() == " IS " ? "IS" : what[2].str();
+    // match string with single quotes, a = b  or a = 'b'
+    condition->condition_values.push_back(what[3 + !!what[4].matched].str());
+    VLOG_NOTICE << "parsed condition_str: col_name={" << 
condition->column_name << "} op={"
+                << condition->condition_op << "} val={" << 
condition->condition_values.back()
+                << "}";
     return Status::OK();
 }
 
diff --git a/be/src/olap/delete_handler.h b/be/src/olap/delete_handler.h
index 45be5d73ffe..bce18669c58 100644
--- a/be/src/olap/delete_handler.h
+++ b/be/src/olap/delete_handler.h
@@ -66,6 +66,15 @@ public:
 
     static void convert_to_sub_pred_v2(DeletePredicatePB* delete_pred, 
TabletSchemaSPtr schema);
 
+    /**
+     * Use regular expression to extract 'column_name', 'op' and 'operands'
+     *
+     * @param condition_str input predicate string in form of `X OP Y`
+     * @param condition output param
+     * @return OK if matched and extracted correctly otherwise 
DELETE_INVALID_PARAMETERS
+     */
+    static Status parse_condition(const std::string& condition_str, 
TCondition* condition);
+
 private:
     // Validate the condition on the schema.
     static Status check_condition_valid(const TabletSchema& tablet_schema, 
const TCondition& cond);
@@ -87,7 +96,6 @@ private:
 
     // extract 'column_name', 'op' and 'operands' to condition
     static Status parse_condition(const DeleteSubPredicatePB& sub_cond, 
TCondition* condition);
-    static Status parse_condition(const std::string& condition_str, 
TCondition* condition);
 
 public:
     DeleteHandler() = default;
diff --git a/be/test/olap/delete_handler_test.cpp 
b/be/test/olap/delete_handler_test.cpp
index a7d39ad81b7..f36aeac84cd 100644
--- a/be/test/olap/delete_handler_test.cpp
+++ b/be/test/olap/delete_handler_test.cpp
@@ -1192,4 +1192,44 @@ TEST_F(TestDeleteHandler, FilterDataVersion) {
     EXPECT_EQ(Status::OK(), res);
 }
 
+// clang-format off
+TEST_F(TestDeleteHandler, TestParseDeleteCondition) {
+    auto test = [](const std::tuple<std::string, bool, TCondition>& in) {
+        auto& [cond_str, exp_succ, exp_cond] = in;
+        TCondition parsed_cond;
+        EXPECT_EQ(DeleteHandler::parse_condition(cond_str, &parsed_cond), 
exp_succ) << " unexpected result, cond_str: " << cond_str;
+        if (exp_succ) EXPECT_EQ(parsed_cond, exp_cond) << " unexpected result, 
cond_str: " << cond_str;
+    };
+
+    auto gen_cond = [](const std::string& col, const std::string& op, const 
std::string& val) {
+        TCondition cond;
+        cond.__set_column_name(col);
+        cond.__set_condition_op(op);
+        cond.__set_condition_values(std::vector<std::string>{val});
+        return cond;
+    };
+
+    // <cond_str, parsed, expect_value>>
+    std::vector<std::tuple<std::string, bool, TCondition>> test_input {
+        {R"(abc=b)"             , true,  gen_cond(R"(abc)"   , "=" , R"(b)"    
     )}, // normal case
+        {R"(abc!=b)"            , true,  gen_cond(R"(abc)"   , "!=", R"(b)"    
     )}, // normal case
+        {R"(abc<=b)"            , true,  gen_cond(R"(abc)"   , "<=", R"(b)"    
     )}, // normal case
+        {R"(abc>=b)"            , true,  gen_cond(R"(abc)"   , ">=", R"(b)"    
     )}, // normal case
+        {R"(abc>>b)"            , true,  gen_cond(R"(abc)"   , ">>", R"(b)"    
     )}, // normal case
+        {R"(abc<<b)"            , true,  gen_cond(R"(abc)"   , "<<", R"(b)"    
     )}, // normal case
+        {R"(abc!='b')"          , true,  gen_cond(R"(abc)"   , "!=", R"(b)"    
     )}, // value surrounded by '
+        {R"(abc=)"              , true, gen_cond(R"(abc)"    , "=" , R"()"     
     )}, // missing value, it means not to be parsed succefully, how every it's 
a ignorable bug
+        {R"(@a*<<10086)"        , true,  gen_cond(R"(@a*)"   , "<<", 
R"(10086)"     )}, // column ends with *
+        {R"(*a=b)"              , false, gen_cond(R"(*a)"    , "=" , R"(b)"    
     )}, // starts with *
+        {R"(a*a>>WTF(10086))"   , true,  gen_cond(R"(a*a)"   , ">>", 
R"(WTF(10086))")}, // function
+        {R"(a-b IS NULL)"       , true,  gen_cond(R"(a-b)"   , "IS", R"(NULL)" 
     )}, // - in col name and test IS NULL
+        {R"(@a*-b IS NOT NULL)" , true,  gen_cond(R"(@a*-b)" , "IS", R"(NOT 
NULL)"  )}, // test IS NOT NULL
+        {R"(a IS b IS NOT NULL)", true,  gen_cond(R"(a IS b)", "IS", R"(NOT 
NULL)"  )}, // test " IS " in column name
+        {R"(_a-zA-Z@0-9 /.a-zA-Z0-9_+-/?@#$%^&*" ,:=hell)", true, 
gen_cond(R"(_a-zA-Z@0-9 /.a-zA-Z0-9_+-/?@#$%^&*" ,:)", "=", R"(hell)")}, // 
hellbound column name
+        {R"(this is a col very 
loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon
 colum name=long)", true,  gen_cond(R"(this is a col very 
loooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooon
 colum name)", "=", R"(long)")}, // test " IS " in column name
+    };
+    for (auto& i : test_input) { test(i); }
+}
+// clang-format on
+
 } // namespace doris
diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java 
b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
index 53d2f3ca8b6..922edd99d2f 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/FeNameFormat.java
@@ -33,7 +33,7 @@ public class FeNameFormat {
     private static final String UNDERSCORE_COMMON_NAME_REGEX = 
"^[_a-zA-Z][a-zA-Z0-9-_]{0,63}$";
     private static final String TABLE_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9-_]*$";
     private static final String USER_NAME_REGEX = "^[a-zA-Z][a-zA-Z0-9.-_]*$";
-    private static final String COLUMN_NAME_REGEX = 
"^[_a-zA-Z@0-9\\s<>/][.a-zA-Z0-9_+-/><?@#$%^&*\"\\s,:]{0,255}$";
+    private static final String COLUMN_NAME_REGEX = 
"^[_a-zA-Z@0-9\\s/][.a-zA-Z0-9_+-/?@#$%^&*\"\\s,:]{0,255}$";
 
     private static final String UNICODE_LABEL_REGEX = 
"^[-_A-Za-z0-9:\\p{L}]{1,128}$";
     private static final String UNICODE_COMMON_NAME_REGEX = 
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]{0,63}$";
@@ -41,7 +41,7 @@ public class FeNameFormat {
     private static final String UNICODE_TABLE_NAME_REGEX = 
"^[a-zA-Z\\p{L}][a-zA-Z0-9-_\\p{L}]*$";
     private static final String UNICODE_USER_NAME_REGEX = 
"^[a-zA-Z\\p{L}][a-zA-Z0-9.-_\\p{L}]*$";
     private static final String UNICODE_COLUMN_NAME_REGEX
-            = "^[_a-zA-Z@0-9\\p{L}][.a-zA-Z0-9_+-/><?@#$%^&*\\p{L}]{0,255}$";
+            = "^[_a-zA-Z@0-9\\p{L}][.a-zA-Z0-9_+-/?@#$%^&*\\p{L}]{0,255}$";
 
     public static final String FORBIDDEN_PARTITION_NAME = "placeholder_";
 
diff --git a/regression-test/suites/delete_p0/test_delete_handler.groovy 
b/regression-test/suites/delete_p0/test_delete_handler.groovy
new file mode 100644
index 00000000000..8c9ac33edc9
--- /dev/null
+++ b/regression-test/suites/delete_p0/test_delete_handler.groovy
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+/**
+ * Test delete with strange name
+ */
+suite("test_delete_handler") {
+    // test condition operator
+    sql """drop table if exists td1;"""
+    sql """
+    CREATE TABLE `td1` (
+      `id` int(11) NULL,
+      `name` varchar(255) NULL,
+      `score` int(11) NULL
+    ) ENGINE=OLAP
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`id`) BUCKETS 1
+    PROPERTIES ( "replication_num" = "1" );
+    """
+
+    sql """insert into td1(id,name,`score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+    sql """insert into td1(id,name,`score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+    sql """insert into td1(id,name,`score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+
+    sql """delete from td1 where `score` = 4 and `score` = 3 and `score` = 
1;"""
+    sql """delete from td1 where `score` is null;"""
+    sql """delete from td1 where `score` is not null;"""
+    sql """delete from td1 where `score` in (1,2);"""
+    sql """delete from td1 where `score` not in (3,4);"""
+    sql """select * from td1;"""
+    sql """insert into td1(id,name,`score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+    sql """select * from td1;"""
+
+
+    // test column name
+    sql """drop table if exists td2;"""
+    sql """
+    CREATE TABLE `td2` (
+      `id` int(11) NULL,
+      `name` varchar(255) NULL,
+      `@score` int(11) NULL,
+      `scoreIS` int(11) NULL,
+      `sc ore` int(11) NULL,
+      `score IS score` int(11) NULL,
+      `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` int(11) NULL
+    ) ENGINE=OLAP
+    COMMENT 'OLAP'
+    DISTRIBUTED BY HASH(`id`) BUCKETS 1
+    PROPERTIES ( "replication_num" = "1" );
+    """
+
+    sql """insert into td2(id,name,`@score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+    sql """insert into td2(id,name,`@score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+    sql """insert into td2(id,name,`@score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+
+    sql """select * from td2;"""
+    sql """delete from td2 where `@score` = 88;"""
+    sql """delete from td2 where `scoreIS` is null;"""
+    sql """delete from td2 where `score IS score` is null;"""
+    sql """delete from td2 where `sc ore` is null;"""
+    sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` is null;"""
+    sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` is not null;"""
+    sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` in (1,2,3);"""
+    sql """delete from td2 where `_a-zA-Z0-9_.+-/?@#\$%^&*" ,:` not in 
(1,2,3);"""
+    sql """select * from td2;"""
+    sql """insert into td2(id,name,`@score`) 
values(1,"a",1),(2,"a",2),(3,"a",3),(4,"b",4);"""
+    sql """select * from td2;"""
+
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to