This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push: new cc1a5fb [Function] Support '%' in date format string (#3037) cc1a5fb is described below commit cc1a5fb8ea973b8344272dd15477be90f0da8336 Author: Mingyu Chen <morningman....@gmail.com> AuthorDate: Thu Mar 5 08:56:02 2020 +0800 [Function] Support '%' in date format string (#3037) eg: select str_to_date('2014-12-21 12%3A34%3A56', '%Y-%m-%d %H%%3A%i%%3A%s'); select unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s'); This also enable us to extract column fields from HDFS file path with contains '%'. --- be/src/runtime/datetime_value.cpp | 7 +++++++ be/test/runtime/datetime_value_test.cpp | 22 +++++++++++++++++++++ .../date-time-functions/date_format.md | 11 ++++++++++- .../date-time-functions/str_to_date.md | 7 +++++++ .../date-time-functions/unix_timestamp.md | 18 ++++++++++++++++- .../Data Manipulation/BROKER LOAD.md | 23 ++++++++++++++++++++++ .../date-time-functions/date_format_EN.md | 9 +++++++++ .../date-time-functions/str_to_date_EN.md | 10 +++++++++- .../date-time-functions/unix_timestamp_EN.md | 16 +++++++++++++++ .../Data Manipulation/BROKER LOAD_EN.md | 23 ++++++++++++++++++++++ 10 files changed, 143 insertions(+), 3 deletions(-) diff --git a/be/src/runtime/datetime_value.cpp b/be/src/runtime/datetime_value.cpp index 6aaeab0..4953490 100644 --- a/be/src/runtime/datetime_value.cpp +++ b/be/src/runtime/datetime_value.cpp @@ -1108,6 +1108,7 @@ bool DateTimeValue::from_date_format_str( bool strict_week_number_year_type = false; int strict_week_number_year = -1; bool usa_time = false; + while (ptr < end && val < val_end) { // Skip space character while (val < val_end && isspace(*val)) { @@ -1354,6 +1355,12 @@ bool DateTimeValue::from_date_format_str( val++; } break; + case '%': // %%, escape the % + if ('%' != *val) { + return false; + } + val++; + break; default: return false; } diff --git a/be/test/runtime/datetime_value_test.cpp b/be/test/runtime/datetime_value_test.cpp index c050dfc..9082572 100644 --- a/be/test/runtime/datetime_value_test.cpp +++ b/be/test/runtime/datetime_value_test.cpp @@ -513,6 +513,22 @@ TEST_F(DateTimeValueTest, from_date_format_str) { format_str.c_str(), format_str.size(), value_str.c_str(), value_str.size())); value.to_string(str); ASSERT_STREQ("1988-02-01 03:04:05", str); + + // escape % + format_str = "%Y-%m-%d %H%%3A%i%%3A%s"; + value_str = "2020-02-26 00%3A00%3A00"; + ASSERT_TRUE(value.from_date_format_str( + format_str.c_str(), format_str.size(), value_str.c_str(), value_str.size())); + value.to_string(str); + ASSERT_STREQ("2020-02-26 00:00:00", str); + + // escape % + format_str = "%Y-%m-%d%%%% %H%%3A%i%%3A%s"; + value_str = "2020-02-26%% 00%3A00%3A00"; + ASSERT_TRUE(value.from_date_format_str( + format_str.c_str(), format_str.size(), value_str.c_str(), value_str.size())); + value.to_string(str); + ASSERT_STREQ("2020-02-26 00:00:00", str); } // Calculate format @@ -539,10 +555,16 @@ TEST_F(DateTimeValueTest, from_date_format_str_invalid) { value_str = "2015 1 1"; ASSERT_FALSE(value.from_date_format_str( format_str.c_str(), format_str.size(), value_str.c_str(), value_str.size())); + format_str = "%x %V %w"; value_str = "2015 1 1"; ASSERT_FALSE(value.from_date_format_str( format_str.c_str(), format_str.size(), value_str.c_str(), value_str.size())); + + format_str = "%Y-%m-%d %H%3A%i%3A%s"; + value_str = "2020-02-26 00%3A00%3A00"; + ASSERT_FALSE(value.from_date_format_str( + format_str.c_str(), format_str.size(), value_str.c_str(), value_str.size())); } // Calculate format TEST_F(DateTimeValueTest, format_str) { diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md index a4afa60..7f8107a 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md @@ -91,7 +91,9 @@ date 参数是合法的日期。format 规定日期/时间的输出格式。 %Y | 年,4 位 -%y | 年,2 位 +%y | 年,2 位 + +%% | 用于表示 % ## example @@ -137,6 +139,13 @@ mysql> select date_format('2006-06-01', '%d'); +------------------------------------------+ | 01 | +------------------------------------------+ + +mysql> select date_format('2006-06-01', '%%%d'); ++--------------------------------------------+ +| date_format('2006-06-01 00:00:00', '%%%d') | ++--------------------------------------------+ +| %01 | ++--------------------------------------------+ ``` ## keyword diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md index dc4840c..630e601 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md @@ -38,6 +38,13 @@ mysql> select str_to_date('2014-12-21 12:34:56', '%Y-%m-%d %H:%i:%s'); | 2014-12-21 12:34:56 | +---------------------------------------------------------+ +mysql> select str_to_date('2014-12-21 12:34%3A56', '%Y-%m-%d %H:%i%%3A%s'); ++--------------------------------------------------------------+ +| str_to_date('2014-12-21 12:34%3A56', '%Y-%m-%d %H:%i%%3A%s') | ++--------------------------------------------------------------+ +| 2014-12-21 12:34:56 | ++--------------------------------------------------------------+ + mysql> select str_to_date('200442 Monday', '%X%V %W'); +-----------------------------------------+ | str_to_date('200442 Monday', '%X%V %W') | diff --git a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md index afe4bd1..7e0f061 100644 --- a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md +++ b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md @@ -21,7 +21,7 @@ under the License. ## description ### Syntax -`INT UNIX_TIMESTAMP(), UNIX_TIMESTAMP(DATETIME date)` +`INT UNIX_TIMESTAMP(), UNIX_TIMESTAMP(DATETIME date), UNIX_TIMESTAMP(DATETIME date, STRING fmt),` 将 Date 或者 Datetime 类型转化为 unix 时间戳。 @@ -31,6 +31,8 @@ under the License. 对于在 1970-01-01 00:00:00 之前或 2038-01-19 03:14:07 之后的时间,该函数将返回 0。 +Format 的格式请参阅 `date_format` 函数的格式说明。 + 该函数受时区影响。 ## example @@ -50,6 +52,20 @@ mysql> select unix_timestamp('2007-11-30 10:30:19'); | 1196389819 | +---------------------------------------+ +mysql> select unix_timestamp('2007-11-30 10:30-19', '%Y-%m-%d %H:%i-%s'); ++---------------------------------------+ +| unix_timestamp('2007-11-30 10:30-19') | ++---------------------------------------+ +| 1196389819 | ++---------------------------------------+ + +mysql> select unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s'); ++---------------------------------------+ +| unix_timestamp('2007-11-30 10:30%3A19') | ++---------------------------------------+ +| 1196389819 | ++---------------------------------------+ + mysql> select unix_timestamp('1969-01-01 00:00:00'); +---------------------------------------+ | unix_timestamp('1969-01-01 00:00:00') | diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md index 08061b2..5e96ed9 100644 --- a/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md +++ b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/BROKER LOAD.md @@ -399,6 +399,29 @@ under the License. "fs.s3a.secret.key" = "yyyyyyyyyyyyyyyyyyyy", "fs.s3a.endpoint" = "s3.amazonaws.com" ) + + 12. 提取文件路径中的时间分区字段,并且时间包含 %3A (在 hdfs 路径中,不允许有 ':',所有 ':' 会由 %3A 替换) + + 假设有如下文件: + + /user/data/data_time=2020-02-17 00%3A00%3A00/test.txt + /user/data/data_time=2020-02-18 00%3A00%3A00/test.txt + + 表结构为: + data_time DATETIME, + k2 INT, + k3 INT + + LOAD LABEL example_db.label12 + ( + DATA INFILE("hdfs://host:port/user/data/*/test.txt") + INTO TABLE `tbl12` + COLUMNS TERMINATED BY "," + (k2,k3) + COLUMNS FROM PATH AS (data_time) + SET (data_time=str_to_date(data_time, '%Y-%m-%d %H%%3A%i%%3A%s')) + ) + WITH BROKER "hdfs" ("username"="user", "password"="pass"); ## keyword diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_format_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_format_EN.md index 4b04f33..4cd4410 100644 --- a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_format_EN.md +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_format_EN.md @@ -93,6 +93,8 @@ The formats available are: % Y | Year, 2 +%% | Represent % + ## example ``` @@ -137,6 +139,13 @@ mysql> select date_format('2006-06-01', '%d'); +------------------------------------------+ | 01 | +------------------------------------------+ + +mysql> select date_format('2006-06-01', '%%%d'); ++--------------------------------------------+ +| date_format('2006-06-01 00:00:00', '%%%d') | ++--------------------------------------------+ +| %01 | ++--------------------------------------------+ ``` ##keyword DATE_FORMAT,DATE,FORMAT diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/str_to_date_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/str_to_date_EN.md index 5cf860c..7957a64 100644 --- a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/str_to_date_EN.md +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/str_to_date_EN.md @@ -38,6 +38,13 @@ mysql> select str_to_date('2014-12-21 12:34:56', '%Y-%m-%d %H:%i:%s'); | 2014-12-21 12:34:56 | +---------------------------------------------------------+ +mysql> select str_to_date('2014-12-21 12:34%3A56', '%Y-%m-%d %H:%i%%3A%s'); ++--------------------------------------------------------------+ +| str_to_date('2014-12-21 12:34%3A56', '%Y-%m-%d %H:%i%%3A%s') | ++--------------------------------------------------------------+ +| 2014-12-21 12:34:56 | ++--------------------------------------------------------------+ + mysql> select str_to_date('200442 Monday', '%X%V %W'); +-----------------------------------------+ | str_to_date('200442 Monday', '%X%V %W') | @@ -46,4 +53,5 @@ mysql> select str_to_date('200442 Monday', '%X%V %W'); +-----------------------------------------+ ``` ##keyword -STR_TO_DATE,STR,TO,DATE + + STR_TO_DATE,STR,TO,DATE diff --git a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/unix_timestamp_EN.md b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/unix_timestamp_EN.md index 83c8eda..611ba8a 100644 --- a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/unix_timestamp_EN.md +++ b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/unix_timestamp_EN.md @@ -31,6 +31,8 @@ The parameter needs to be Date or Datetime type. Any date before 1970-01-01 00:00:00 or after 2038-01-19 03:14:07 will return 0. +See `date_format` function to get Format explanation. + This function is affected by time zone. ## example @@ -50,6 +52,20 @@ mysql> select unix_timestamp('2007-11-30 10:30:19'); | 1196389819 | +---------------------------------------+ +mysql> select unix_timestamp('2007-11-30 10:30-19', '%Y-%m-%d %H:%i-%s'); ++---------------------------------------+ +| unix_timestamp('2007-11-30 10:30-19') | ++---------------------------------------+ +| 1196389819 | ++---------------------------------------+ + +mysql> select unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s'); ++---------------------------------------+ +| unix_timestamp('2007-11-30 10:30%3A19') | ++---------------------------------------+ +| 1196389819 | ++---------------------------------------+ + mysql> select unix_timestamp('1969-01-01 00:00:00'); +---------------------------------------+ | unix_timestamp('1969-01-01 00:00:00') | diff --git a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD_EN.md b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD_EN.md index af2f487..d4bb70f 100644 --- a/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD_EN.md +++ b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/BROKER LOAD_EN.md @@ -392,6 +392,29 @@ under the License. INTO TABLE `my_table` where k1 > k2 ); + + 11. Extract date partition fields in file paths, and date time include %3A (in hdfs path, all ':' will be replaced by '%3A') + + Assume we have files: + + /user/data/data_time=2020-02-17 00%3A00%3A00/test.txt + /user/data/data_time=2020-02-18 00%3A00%3A00/test.txt + + Table schema is: + data_time DATETIME, + k2 INT, + k3 INT + + LOAD LABEL example_db.label12 + ( + DATA INFILE("hdfs://host:port/user/data/*/test.txt") + INTO TABLE `tbl12` + COLUMNS TERMINATED BY "," + (k2,k3) + COLUMNS FROM PATH AS (data_time) + SET (data_time=str_to_date(data_time, '%Y-%m-%d %H%%3A%i%%3A%s')) + ) + WITH BROKER "hdfs" ("username"="user", "password"="pass"); ## keyword --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org