This is an automated email from the ASF dual-hosted git repository. liulijia pushed a commit to branch fix-46902 in repository https://gitbox.apache.org/repos/asf/doris.git
commit 68e4048319af4aa323c2f1ddf87a69e5d9800003 Author: liutang123 <liuli...@gmail.com> AuthorDate: Thu Jan 16 00:23:59 2025 +0800 [fix](DATE) Add some constraint when cast string to datetime --- be/src/vec/runtime/vdatetime_value.cpp | 174 ++++++++++++++++----- .../datetime_functions/test_cast_str_date_fail.out | 26 +++ .../datetime_functions/test_cast_str_date_fail.out | 26 +++ .../test_cast_str_date_fail.groovy | 63 ++++++++ .../test_cast_str_date_fail.groovy | 60 +++++++ 5 files changed, 313 insertions(+), 36 deletions(-) diff --git a/be/src/vec/runtime/vdatetime_value.cpp b/be/src/vec/runtime/vdatetime_value.cpp index 07508ede5c0..68a7d23280c 100644 --- a/be/src/vec/runtime/vdatetime_value.cpp +++ b/be/src/vec/runtime/vdatetime_value.cpp @@ -192,15 +192,78 @@ bool VecDateTimeValue::from_date_str_base(const char* date_str, int len, break; } - if (field_idx == 2 && *ptr == 'T') { - // YYYYMMDDTHHMMDD, skip 'T' and continue - ptr++; - field_idx++; - continue; + // escape separator + if (((1 << field_idx) & allow_space_mask) == 0) { + if (check_space(*ptr)) { + return false; + } + } else { + while(ptr < end && check_space(*ptr)) { + ptr++; + } + } + + if (ptr < end) { + if (field_idx == 2 && *ptr == 'T') { + // YYYYMMDDTHHMMDD, skip 'T' and continue + ptr++; + while(ptr < end && check_space(*ptr)) { + ptr++; + } + if (ptr == end || !isdigit(*ptr)) { + return false; + } + } else if (check_date_punct(*ptr)) { + switch(field_idx) { + case 0: + case 1: + if (*ptr == '-') { + has_bar = true; + ptr++; + if (ptr == end || !isdigit(*ptr)) { + return false; + } + break; + } else { + return false; + } + case 3: + case 4: + if (*ptr == ':') { + ptr++; + if (ptr == end || !isdigit(*ptr)) { + return false; + } + break; + } else { + return false; + } + case 5: + if (*ptr == '.') { + // Second part will handle ptr++; + break; + } else { + return false; + } + default: + return false; + } + } + } + + // escape separator + if (ptr < end && ((1 << field_idx) & allow_space_mask) == 0) { + if (check_space(*ptr)) { + return false; + } + } else { + while(ptr < end && check_space(*ptr)) { + ptr++; + } } // Second part - if (field_idx == 5) { + if (field_idx == 5 && ptr < end) { if (*ptr == '.') { ptr++; field_len = 6; @@ -211,18 +274,6 @@ bool VecDateTimeValue::from_date_str_base(const char* date_str, int len, field_idx++; continue; } - // escape separator - while (ptr < end && (check_date_punct(*ptr) || check_space(*ptr))) { - if (check_space(*ptr)) { - if (((1 << field_idx) & allow_space_mask) == 0) { - return false; - } - } - if (*ptr == '-') { - has_bar = true; - } - ptr++; - } field_idx++; } int num_field = field_idx; @@ -2114,15 +2165,78 @@ bool DateV2Value<T>::from_date_str_base(const char* date_str, int len, int scale break; } - if (field_idx == 2 && *ptr == 'T') { - // YYYYMMDDTHHMMDD, skip 'T' and continue - ptr++; - field_idx++; - continue; + // escape separator + if (((1 << field_idx) & allow_space_mask) == 0) { + if (check_space(*ptr)) { + return false; + } + } else { + while(ptr < end && check_space(*ptr)) { + ptr++; + } + } + + if (ptr < end) { + if (field_idx == 2 && *ptr == 'T') { + // YYYYMMDDTHHMMDD, skip 'T' and continue + ptr++; + while(ptr < end && check_space(*ptr)) { + ptr++; + } + if (ptr == end || !isdigit(*ptr)) { + return false; + } + } else if (check_date_punct(*ptr)) { + switch(field_idx) { + case 0: + case 1: + if (*ptr == '-') { + has_bar = true; + ptr++; + if (ptr == end || !isdigit(*ptr)) { + return false; + } + break; + } else { + return false; + } + case 3: + case 4: + if (*ptr == ':') { + ptr++; + if (ptr == end || !isdigit(*ptr)) { + return false; + } + break; + } else { + return false; + } + case 5: + if (*ptr == '.') { + // Second part will handle ptr++; + break; + } else { + return false; + } + default: + return false; + } + } } + // escape separator + if (ptr < end && ((1 << field_idx) & allow_space_mask) == 0) { + if (check_space(*ptr)) { + return false; + } + } else { + while(ptr < end && check_space(*ptr)) { + ptr++; + } + } + // Second part - if (field_idx == 5) { + if (field_idx == 5 && ptr < end) { if (*ptr == '.') { ptr++; // for datetime, we need to discard the fraction part @@ -2140,18 +2254,6 @@ bool DateV2Value<T>::from_date_str_base(const char* date_str, int len, int scale field_idx++; continue; } - // escape separator - while (ptr < end && (check_date_punct(*ptr) || check_space(*ptr))) { - if (check_space(*ptr)) { - if (((1 << field_idx) & allow_space_mask) == 0) { - return false; - } - } - if (*ptr == '-') { - has_bar = true; - } - ptr++; - } field_idx++; } diff --git a/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_cast_str_date_fail.out b/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_cast_str_date_fail.out new file mode 100644 index 00000000000..ec9638bf69f --- /dev/null +++ b/regression-test/data/nereids_p0/sql_functions/datetime_functions/test_cast_str_date_fail.out @@ -0,0 +1,26 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !cast_fail1 -- +1 2028---___%%%01-__01dfsdTfaTTTTT01:01 \N +2 2028-01-01__wer1q23rads \N +3 2028__01-01dfsdTfaTTTTT01:01 \N +4 2028-01a01 T 10:10:10.234 \N +5 2028-01-01_ \N +6 2028-01-01} \N +7 2028- \N +8 2028-01- \N +9 2028-01-01T \N +10 2028-01-01T10: \N +11 2028-01-01T10:10: \N +12 2028-01-01T10:10:10. 2028-01-01T10:10:10 +13 2028 \N +14 2028-01 \N +15 2028-01-01 2028-01-01T00:00 +16 2028-01-01 T 10 2028-01-01T10:00 +17 2028-01-01 T 10:10 2028-01-01T10:10 +18 2028-01-01 T 10:10:10 2028-01-01T10:10:10 +19 2028-01-01 T 10:10:10.234 2028-01-01T10:10:10 +20 2028-01-01 T 10:10:10.234a \N + +-- !cast_fail2 -- +15 2028-01-01 2028-01-01T00:00 true + diff --git a/regression-test/data/query_p0/sql_functions/datetime_functions/test_cast_str_date_fail.out b/regression-test/data/query_p0/sql_functions/datetime_functions/test_cast_str_date_fail.out new file mode 100644 index 00000000000..ec9638bf69f --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/datetime_functions/test_cast_str_date_fail.out @@ -0,0 +1,26 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !cast_fail1 -- +1 2028---___%%%01-__01dfsdTfaTTTTT01:01 \N +2 2028-01-01__wer1q23rads \N +3 2028__01-01dfsdTfaTTTTT01:01 \N +4 2028-01a01 T 10:10:10.234 \N +5 2028-01-01_ \N +6 2028-01-01} \N +7 2028- \N +8 2028-01- \N +9 2028-01-01T \N +10 2028-01-01T10: \N +11 2028-01-01T10:10: \N +12 2028-01-01T10:10:10. 2028-01-01T10:10:10 +13 2028 \N +14 2028-01 \N +15 2028-01-01 2028-01-01T00:00 +16 2028-01-01 T 10 2028-01-01T10:00 +17 2028-01-01 T 10:10 2028-01-01T10:10 +18 2028-01-01 T 10:10:10 2028-01-01T10:10:10 +19 2028-01-01 T 10:10:10.234 2028-01-01T10:10:10 +20 2028-01-01 T 10:10:10.234a \N + +-- !cast_fail2 -- +15 2028-01-01 2028-01-01T00:00 true + diff --git a/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_cast_str_date_fail.groovy b/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_cast_str_date_fail.groovy new file mode 100644 index 00000000000..a1584da7acb --- /dev/null +++ b/regression-test/suites/nereids_p0/sql_functions/datetime_functions/test_cast_str_date_fail.groovy @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import java.text.SimpleDateFormat + +suite("test_cast_str_date_fail") { + sql """ SET enable_profile = true """ + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + sql """ DROP TABLE IF EXISTS aec_46902; """ + sql """ + CREATE TABLE IF NOT EXISTS aec_46902( + `ID` bigint NOT NULL, + `CD` date NOT NULL, + `PI` varchar(160) NOT NULL + ) + DISTRIBUTED BY HASH(`ID`) BUCKETS 2 + properties("replication_num" = "1"); + """ + + sql """ insert into aec_46902 values + (1, "2028-01-01", "2028---___%%%01-__01dfsdTfaTTTTT01:01"), + (2, "2028-01-01", "2028-01-01__wer1q23rads"), + (3, "2028-01-01", "2028__01-01dfsdTfaTTTTT01:01"), + (4, "2028-01-01", "2028-01a01 T 10:10:10.234"), + (5, "2028-01-01", "2028-01-01_"), + (6, "2028-01-01", "2028-01-01}"), + (7, "2028-01-01", "2028-"), + (8, "2028-01-01", "2028-01-"), + (9, "2028-01-01", "2028-01-01T"), + (10, "2028-01-01", "2028-01-01T10:"), + (11, "2028-01-01", "2028-01-01T10:10:"), + (12, "2028-01-01", "2028-01-01T10:10:10."), + (13, "2028-01-01", "2028"), + (14, "2028-01-01", "2028-01"), + (15, "2028-01-01", "2028-01-01"), + (16, "2028-01-01", "2028-01-01 T 10"), + (17, "2028-01-01", "2028-01-01 T 10:10"), + (18, "2028-01-01", "2028-01-01 T 10:10:10"), + (19, "2028-01-01", "2028-01-01 T 10:10:10.234"), + (20, "2028-01-01", "2028-01-01 T 10:10:10.234a"); + """ + + qt_cast_fail1 "select ID, PI, cast(trim(PI) AS datetimev2) from aec_46902 where CD = trim('2028-01-01') order by ID" + qt_cast_fail2 "select ID, PI, cast(trim(PI) AS datetimev2), PI=CD from aec_46902 where CD = trim('2028-01-01') and PI=CD order by ID" + + sql """ DROP TABLE IF EXISTS aec_46902; """ +} diff --git a/regression-test/suites/query_p0/sql_functions/datetime_functions/test_cast_str_date_fail.groovy b/regression-test/suites/query_p0/sql_functions/datetime_functions/test_cast_str_date_fail.groovy new file mode 100644 index 00000000000..5298f7f4902 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/datetime_functions/test_cast_str_date_fail.groovy @@ -0,0 +1,60 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import java.text.SimpleDateFormat + +suite("test_cast_str_date_fail") { + + sql """ DROP TABLE IF EXISTS aec_46902; """ + sql """ + CREATE TABLE IF NOT EXISTS aec_46902( + `ID` bigint NOT NULL, + `CD` date NOT NULL, + `PI` varchar(160) NOT NULL + ) + DISTRIBUTED BY HASH(`ID`) BUCKETS 2 + properties("replication_num" = "1"); + """ + + sql """ insert into aec_46902 values + (1, "2028-01-01", "2028---___%%%01-__01dfsdTfaTTTTT01:01"), + (2, "2028-01-01", "2028-01-01__wer1q23rads"), + (3, "2028-01-01", "2028__01-01dfsdTfaTTTTT01:01"), + (4, "2028-01-01", "2028-01a01 T 10:10:10.234"), + (5, "2028-01-01", "2028-01-01_"), + (6, "2028-01-01", "2028-01-01}"), + (7, "2028-01-01", "2028-"), + (8, "2028-01-01", "2028-01-"), + (9, "2028-01-01", "2028-01-01T"), + (10, "2028-01-01", "2028-01-01T10:"), + (11, "2028-01-01", "2028-01-01T10:10:"), + (12, "2028-01-01", "2028-01-01T10:10:10."), + (13, "2028-01-01", "2028"), + (14, "2028-01-01", "2028-01"), + (15, "2028-01-01", "2028-01-01"), + (16, "2028-01-01", "2028-01-01 T 10"), + (17, "2028-01-01", "2028-01-01 T 10:10"), + (18, "2028-01-01", "2028-01-01 T 10:10:10"), + (19, "2028-01-01", "2028-01-01 T 10:10:10.234"), + (20, "2028-01-01", "2028-01-01 T 10:10:10.234a"); + """ + + qt_cast_fail1 "select ID, PI, cast(trim(PI) AS datetimev2) from aec_46902 where CD = trim('2028-01-01') order by ID" + qt_cast_fail2 "select ID, PI, cast(trim(PI) AS datetimev2), PI=CD from aec_46902 where CD = trim('2028-01-01') and PI=CD order by ID" + + sql """ DROP TABLE IF EXISTS aec_46902; """ +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org