This is an automated email from the ASF dual-hosted git repository.

liulijia pushed a commit to branch fix-46902
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 68e4048319af4aa323c2f1ddf87a69e5d9800003
Author: liutang123 <liuli...@gmail.com>
AuthorDate: Thu Jan 16 00:23:59 2025 +0800

    [fix](DATE) Add some constraint when cast string to datetime
---
 be/src/vec/runtime/vdatetime_value.cpp             | 174 ++++++++++++++++-----
 .../datetime_functions/test_cast_str_date_fail.out |  26 +++
 .../datetime_functions/test_cast_str_date_fail.out |  26 +++
 .../test_cast_str_date_fail.groovy                 |  63 ++++++++
 .../test_cast_str_date_fail.groovy                 |  60 +++++++
 5 files changed, 313 insertions(+), 36 deletions(-)

diff --git a/be/src/vec/runtime/vdatetime_value.cpp 
b/be/src/vec/runtime/vdatetime_value.cpp
index 07508ede5c0..68a7d23280c 100644
--- a/be/src/vec/runtime/vdatetime_value.cpp
+++ b/be/src/vec/runtime/vdatetime_value.cpp
@@ -192,15 +192,78 @@ bool VecDateTimeValue::from_date_str_base(const char* 
date_str, int len,
             break;
         }
 
-        if (field_idx == 2 && *ptr == 'T') {
-            // YYYYMMDDTHHMMDD, skip 'T' and continue
-            ptr++;
-            field_idx++;
-            continue;
+        // escape separator
+        if (((1 << field_idx) & allow_space_mask) == 0) {
+            if (check_space(*ptr)) {
+                return false;
+            }
+        } else {
+            while(ptr < end && check_space(*ptr)) {
+                ptr++;
+            }
+        }
+
+        if (ptr < end) {
+            if (field_idx == 2 && *ptr == 'T') {
+                // YYYYMMDDTHHMMDD, skip 'T' and continue
+                ptr++;
+                while(ptr < end && check_space(*ptr)) {
+                    ptr++;
+                }
+                if (ptr == end || !isdigit(*ptr)) {
+                    return false;
+                }
+            } else if (check_date_punct(*ptr)) {
+                switch(field_idx) {
+                case 0:
+                case 1:
+                    if (*ptr == '-') {
+                        has_bar = true;
+                        ptr++;
+                        if (ptr == end || !isdigit(*ptr)) {
+                            return false;
+                        }
+                        break;
+                    } else {
+                        return false;
+                    }
+                case 3:
+                case 4:
+                    if (*ptr == ':') {
+                        ptr++;
+                        if (ptr == end || !isdigit(*ptr)) {
+                            return false;
+                        }
+                        break;
+                    } else {
+                        return false;
+                    }
+                case 5:
+                    if (*ptr == '.') {
+                        // Second part will handle ptr++;
+                        break;
+                    } else {
+                        return false;
+                    }
+                default:
+                    return false;
+                }
+            }
+        }
+
+        // escape separator
+        if (ptr < end && ((1 << field_idx) & allow_space_mask) == 0) {
+            if (check_space(*ptr)) {
+                return false;
+            }
+        } else {
+            while(ptr < end && check_space(*ptr)) {
+                ptr++;
+            }
         }
 
         // Second part
-        if (field_idx == 5) {
+        if (field_idx == 5 && ptr < end) {
             if (*ptr == '.') {
                 ptr++;
                 field_len = 6;
@@ -211,18 +274,6 @@ bool VecDateTimeValue::from_date_str_base(const char* 
date_str, int len,
             field_idx++;
             continue;
         }
-        // escape separator
-        while (ptr < end && (check_date_punct(*ptr) || check_space(*ptr))) {
-            if (check_space(*ptr)) {
-                if (((1 << field_idx) & allow_space_mask) == 0) {
-                    return false;
-                }
-            }
-            if (*ptr == '-') {
-                has_bar = true;
-            }
-            ptr++;
-        }
         field_idx++;
     }
     int num_field = field_idx;
@@ -2114,15 +2165,78 @@ bool DateV2Value<T>::from_date_str_base(const char* 
date_str, int len, int scale
             break;
         }
 
-        if (field_idx == 2 && *ptr == 'T') {
-            // YYYYMMDDTHHMMDD, skip 'T' and continue
-            ptr++;
-            field_idx++;
-            continue;
+        // escape separator
+        if (((1 << field_idx) & allow_space_mask) == 0) {
+            if (check_space(*ptr)) {
+                return false;
+            }
+        } else {
+            while(ptr < end && check_space(*ptr)) {
+                ptr++;
+            }
+        }
+
+        if (ptr < end) {
+            if (field_idx == 2 && *ptr == 'T') {
+                // YYYYMMDDTHHMMDD, skip 'T' and continue
+                ptr++;
+                while(ptr < end && check_space(*ptr)) {
+                    ptr++;
+                }
+                if (ptr == end || !isdigit(*ptr)) {
+                    return false;
+                }
+            } else if (check_date_punct(*ptr)) {
+                switch(field_idx) {
+                case 0:
+                case 1:
+                    if (*ptr == '-') {
+                        has_bar = true;
+                        ptr++;
+                        if (ptr == end || !isdigit(*ptr)) {
+                            return false;
+                        }
+                        break;
+                    } else {
+                        return false;
+                    }
+                case 3:
+                case 4:
+                    if (*ptr == ':') {
+                        ptr++;
+                        if (ptr == end || !isdigit(*ptr)) {
+                            return false;
+                        }
+                        break;
+                    } else {
+                        return false;
+                    }
+                case 5:
+                    if (*ptr == '.') {
+                        // Second part will handle ptr++;
+                        break;
+                    } else {
+                        return false;
+                    }
+                default:
+                    return false;
+                }
+            }
         }
 
+        // escape separator
+        if (ptr < end && ((1 << field_idx) & allow_space_mask) == 0) {
+            if (check_space(*ptr)) {
+                return false;
+            }
+        } else {
+            while(ptr < end && check_space(*ptr)) {
+                ptr++;
+            }
+         }
+
         // Second part
-        if (field_idx == 5) {
+        if (field_idx == 5 && ptr < end) {
             if (*ptr == '.') {
                 ptr++;
                 // for datetime, we need to discard the fraction part
@@ -2140,18 +2254,6 @@ bool DateV2Value<T>::from_date_str_base(const char* 
date_str, int len, int scale
             field_idx++;
             continue;
         }
-        // escape separator
-        while (ptr < end && (check_date_punct(*ptr) || check_space(*ptr))) {
-            if (check_space(*ptr)) {
-                if (((1 << field_idx) & allow_space_mask) == 0) {
-                    return false;
-                }
-            }
-            if (*ptr == '-') {
-                has_bar = true;
-            }
-            ptr++;
-        }
         field_idx++;
     }
 
diff --git 
a/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_cast_str_date_fail.out
 
b/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_cast_str_date_fail.out
new file mode 100644
index 00000000000..ec9638bf69f
--- /dev/null
+++ 
b/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_cast_str_date_fail.out
@@ -0,0 +1,26 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !cast_fail1 --
+1      2028---___%%%01-__01dfsdTfaTTTTT01:01   \N
+2      2028-01-01__wer1q23rads \N
+3      2028__01-01dfsdTfaTTTTT01:01    \N
+4      2028-01a01 T  10:10:10.234      \N
+5      2028-01-01_     \N
+6      2028-01-01}     \N
+7      2028-   \N
+8      2028-01-        \N
+9      2028-01-01T     \N
+10     2028-01-01T10:  \N
+11     2028-01-01T10:10:       \N
+12     2028-01-01T10:10:10.    2028-01-01T10:10:10
+13     2028    \N
+14     2028-01 \N
+15     2028-01-01      2028-01-01T00:00
+16     2028-01-01 T  10        2028-01-01T10:00
+17     2028-01-01 T  10:10     2028-01-01T10:10
+18     2028-01-01 T  10:10:10  2028-01-01T10:10:10
+19     2028-01-01 T  10:10:10.234      2028-01-01T10:10:10
+20     2028-01-01 T  10:10:10.234a     \N
+
+-- !cast_fail2 --
+15     2028-01-01      2028-01-01T00:00        true
+
diff --git 
a/regression-test/data/query_p0/sql_functions/datetime_functions/test_cast_str_date_fail.out
 
b/regression-test/data/query_p0/sql_functions/datetime_functions/test_cast_str_date_fail.out
new file mode 100644
index 00000000000..ec9638bf69f
--- /dev/null
+++ 
b/regression-test/data/query_p0/sql_functions/datetime_functions/test_cast_str_date_fail.out
@@ -0,0 +1,26 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !cast_fail1 --
+1      2028---___%%%01-__01dfsdTfaTTTTT01:01   \N
+2      2028-01-01__wer1q23rads \N
+3      2028__01-01dfsdTfaTTTTT01:01    \N
+4      2028-01a01 T  10:10:10.234      \N
+5      2028-01-01_     \N
+6      2028-01-01}     \N
+7      2028-   \N
+8      2028-01-        \N
+9      2028-01-01T     \N
+10     2028-01-01T10:  \N
+11     2028-01-01T10:10:       \N
+12     2028-01-01T10:10:10.    2028-01-01T10:10:10
+13     2028    \N
+14     2028-01 \N
+15     2028-01-01      2028-01-01T00:00
+16     2028-01-01 T  10        2028-01-01T10:00
+17     2028-01-01 T  10:10     2028-01-01T10:10
+18     2028-01-01 T  10:10:10  2028-01-01T10:10:10
+19     2028-01-01 T  10:10:10.234      2028-01-01T10:10:10
+20     2028-01-01 T  10:10:10.234a     \N
+
+-- !cast_fail2 --
+15     2028-01-01      2028-01-01T00:00        true
+
diff --git 
a/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_cast_str_date_fail.groovy
 
b/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_cast_str_date_fail.groovy
new file mode 100644
index 00000000000..a1584da7acb
--- /dev/null
+++ 
b/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_cast_str_date_fail.groovy
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import java.text.SimpleDateFormat
+
+suite("test_cast_str_date_fail") {
+    sql """ SET enable_profile = true """
+    sql "SET enable_nereids_planner=true"
+    sql "SET enable_fallback_to_original_planner=false"
+
+    sql """ DROP TABLE IF EXISTS aec_46902; """
+    sql """
+     CREATE TABLE IF NOT EXISTS aec_46902(
+            `ID` bigint NOT NULL,
+            `CD` date NOT NULL,
+            `PI` varchar(160) NOT NULL
+            )
+            DISTRIBUTED BY HASH(`ID`) BUCKETS 2
+            properties("replication_num" = "1");
+    """
+    
+    sql """ insert into aec_46902 values 
+    (1, "2028-01-01", "2028---___%%%01-__01dfsdTfaTTTTT01:01"),
+    (2, "2028-01-01", "2028-01-01__wer1q23rads"),
+    (3, "2028-01-01", "2028__01-01dfsdTfaTTTTT01:01"),
+    (4, "2028-01-01", "2028-01a01 T  10:10:10.234"),
+    (5, "2028-01-01", "2028-01-01_"),
+    (6, "2028-01-01", "2028-01-01}"),
+    (7, "2028-01-01", "2028-"),
+    (8, "2028-01-01", "2028-01-"),
+    (9, "2028-01-01", "2028-01-01T"),
+    (10, "2028-01-01", "2028-01-01T10:"),
+    (11, "2028-01-01", "2028-01-01T10:10:"),
+    (12, "2028-01-01", "2028-01-01T10:10:10."),
+    (13, "2028-01-01", "2028"),
+    (14, "2028-01-01", "2028-01"),
+    (15, "2028-01-01", "2028-01-01"),
+    (16, "2028-01-01", "2028-01-01 T  10"),
+    (17, "2028-01-01", "2028-01-01 T  10:10"),
+    (18, "2028-01-01", "2028-01-01 T  10:10:10"),
+    (19, "2028-01-01", "2028-01-01 T  10:10:10.234"),
+    (20, "2028-01-01", "2028-01-01 T  10:10:10.234a");
+    """
+
+    qt_cast_fail1 "select ID, PI, cast(trim(PI) AS datetimev2) from aec_46902 
where CD = trim('2028-01-01') order by ID"
+    qt_cast_fail2 "select ID, PI, cast(trim(PI) AS datetimev2), PI=CD from 
aec_46902 where CD = trim('2028-01-01') and PI=CD order by ID"
+
+    sql """ DROP TABLE IF EXISTS aec_46902; """
+}
diff --git 
a/regression-test/suites/query_p0/sql_functions/datetime_functions/test_cast_str_date_fail.groovy
 
b/regression-test/suites/query_p0/sql_functions/datetime_functions/test_cast_str_date_fail.groovy
new file mode 100644
index 00000000000..5298f7f4902
--- /dev/null
+++ 
b/regression-test/suites/query_p0/sql_functions/datetime_functions/test_cast_str_date_fail.groovy
@@ -0,0 +1,60 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import java.text.SimpleDateFormat
+
+suite("test_cast_str_date_fail") {
+
+    sql """ DROP TABLE IF EXISTS aec_46902; """
+    sql """
+     CREATE TABLE IF NOT EXISTS aec_46902(
+            `ID` bigint NOT NULL,
+            `CD` date NOT NULL,
+            `PI` varchar(160) NOT NULL
+            )
+            DISTRIBUTED BY HASH(`ID`) BUCKETS 2
+            properties("replication_num" = "1");
+    """
+    
+    sql """ insert into aec_46902 values 
+    (1, "2028-01-01", "2028---___%%%01-__01dfsdTfaTTTTT01:01"),
+    (2, "2028-01-01", "2028-01-01__wer1q23rads"),
+    (3, "2028-01-01", "2028__01-01dfsdTfaTTTTT01:01"),
+    (4, "2028-01-01", "2028-01a01 T  10:10:10.234"),
+    (5, "2028-01-01", "2028-01-01_"),
+    (6, "2028-01-01", "2028-01-01}"),
+    (7, "2028-01-01", "2028-"),
+    (8, "2028-01-01", "2028-01-"),
+    (9, "2028-01-01", "2028-01-01T"),
+    (10, "2028-01-01", "2028-01-01T10:"),
+    (11, "2028-01-01", "2028-01-01T10:10:"),
+    (12, "2028-01-01", "2028-01-01T10:10:10."),
+    (13, "2028-01-01", "2028"),
+    (14, "2028-01-01", "2028-01"),
+    (15, "2028-01-01", "2028-01-01"),
+    (16, "2028-01-01", "2028-01-01 T  10"),
+    (17, "2028-01-01", "2028-01-01 T  10:10"),
+    (18, "2028-01-01", "2028-01-01 T  10:10:10"),
+    (19, "2028-01-01", "2028-01-01 T  10:10:10.234"),
+    (20, "2028-01-01", "2028-01-01 T  10:10:10.234a");
+    """
+
+    qt_cast_fail1 "select ID, PI, cast(trim(PI) AS datetimev2) from aec_46902 
where CD = trim('2028-01-01') order by ID"
+    qt_cast_fail2 "select ID, PI, cast(trim(PI) AS datetimev2), PI=CD from 
aec_46902 where CD = trim('2028-01-01') and PI=CD order by ID"
+
+    sql """ DROP TABLE IF EXISTS aec_46902; """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to