This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git


The following commit(s) were added to refs/heads/master by this push:
     new cc1a5fb  [Function] Support '%' in date format string (#3037)
cc1a5fb is described below

commit cc1a5fb8ea973b8344272dd15477be90f0da8336
Author: Mingyu Chen <morningman....@gmail.com>
AuthorDate: Thu Mar 5 08:56:02 2020 +0800

    [Function] Support '%' in date format string (#3037)
    
    eg:
    select str_to_date('2014-12-21 12%3A34%3A56', '%Y-%m-%d %H%%3A%i%%3A%s');
    select unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s');
    
    This also enable us to extract column fields from HDFS file path with 
contains '%'.
---
 be/src/runtime/datetime_value.cpp                  |  7 +++++++
 be/test/runtime/datetime_value_test.cpp            | 22 +++++++++++++++++++++
 .../date-time-functions/date_format.md             | 11 ++++++++++-
 .../date-time-functions/str_to_date.md             |  7 +++++++
 .../date-time-functions/unix_timestamp.md          | 18 ++++++++++++++++-
 .../Data Manipulation/BROKER LOAD.md               | 23 ++++++++++++++++++++++
 .../date-time-functions/date_format_EN.md          |  9 +++++++++
 .../date-time-functions/str_to_date_EN.md          | 10 +++++++++-
 .../date-time-functions/unix_timestamp_EN.md       | 16 +++++++++++++++
 .../Data Manipulation/BROKER LOAD_EN.md            | 23 ++++++++++++++++++++++
 10 files changed, 143 insertions(+), 3 deletions(-)

diff --git a/be/src/runtime/datetime_value.cpp 
b/be/src/runtime/datetime_value.cpp
index 6aaeab0..4953490 100644
--- a/be/src/runtime/datetime_value.cpp
+++ b/be/src/runtime/datetime_value.cpp
@@ -1108,6 +1108,7 @@ bool DateTimeValue::from_date_format_str(
     bool strict_week_number_year_type = false;
     int strict_week_number_year = -1;
     bool usa_time = false;
+
     while (ptr < end && val < val_end) {
         // Skip space character
         while (val < val_end && isspace(*val)) {
@@ -1354,6 +1355,12 @@ bool DateTimeValue::from_date_format_str(
                     val++;
                 }
                 break;
+            case '%': // %%, escape the %
+                if ('%' != *val) {
+                    return false;
+                }
+                val++;
+                break;
             default:
                 return false;
             }
diff --git a/be/test/runtime/datetime_value_test.cpp 
b/be/test/runtime/datetime_value_test.cpp
index c050dfc..9082572 100644
--- a/be/test/runtime/datetime_value_test.cpp
+++ b/be/test/runtime/datetime_value_test.cpp
@@ -513,6 +513,22 @@ TEST_F(DateTimeValueTest, from_date_format_str) {
             format_str.c_str(), format_str.size(), value_str.c_str(), 
value_str.size()));
     value.to_string(str);
     ASSERT_STREQ("1988-02-01 03:04:05", str);
+
+    // escape %
+    format_str = "%Y-%m-%d %H%%3A%i%%3A%s";
+    value_str = "2020-02-26 00%3A00%3A00";
+    ASSERT_TRUE(value.from_date_format_str(
+            format_str.c_str(), format_str.size(), value_str.c_str(), 
value_str.size()));
+    value.to_string(str);
+    ASSERT_STREQ("2020-02-26 00:00:00", str);
+
+    // escape %
+    format_str = "%Y-%m-%d%%%% %H%%3A%i%%3A%s";
+    value_str = "2020-02-26%% 00%3A00%3A00";
+    ASSERT_TRUE(value.from_date_format_str(
+            format_str.c_str(), format_str.size(), value_str.c_str(), 
value_str.size()));
+    value.to_string(str);
+    ASSERT_STREQ("2020-02-26 00:00:00", str);
 }
 
 // Calculate format
@@ -539,10 +555,16 @@ TEST_F(DateTimeValueTest, from_date_format_str_invalid) {
     value_str = "2015 1 1";
     ASSERT_FALSE(value.from_date_format_str(
             format_str.c_str(), format_str.size(), value_str.c_str(), 
value_str.size()));
+
     format_str = "%x %V %w";
     value_str = "2015 1 1";
     ASSERT_FALSE(value.from_date_format_str(
             format_str.c_str(), format_str.size(), value_str.c_str(), 
value_str.size()));
+
+    format_str = "%Y-%m-%d %H%3A%i%3A%s";
+    value_str = "2020-02-26 00%3A00%3A00";
+    ASSERT_FALSE(value.from_date_format_str(
+            format_str.c_str(), format_str.size(), value_str.c_str(), 
value_str.size()));
 }
 // Calculate format
 TEST_F(DateTimeValueTest, format_str) {
diff --git 
a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md
 
b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md
index a4afa60..7f8107a 100644
--- 
a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md
+++ 
b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/date_format.md
@@ -91,7 +91,9 @@ date 参数是合法的日期。format 规定日期/时间的输出格式。
 
 %Y | 年,4 位          
                            
-%y | 年,2 位                                    
+%y | 年,2 位
+
+%% | 用于表示 %
 
 ## example
 
@@ -137,6 +139,13 @@ mysql> select date_format('2006-06-01', '%d');
 +------------------------------------------+
 | 01                                       |
 +------------------------------------------+
+
+mysql> select date_format('2006-06-01', '%%%d');
++--------------------------------------------+
+| date_format('2006-06-01 00:00:00', '%%%d') |
++--------------------------------------------+
+| %01                                        |
++--------------------------------------------+
 ```
 
 ## keyword
diff --git 
a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md
 
b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md
index dc4840c..630e601 100644
--- 
a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md
+++ 
b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/str_to_date.md
@@ -38,6 +38,13 @@ mysql> select str_to_date('2014-12-21 12:34:56', '%Y-%m-%d 
%H:%i:%s');
 | 2014-12-21 12:34:56                                     |
 +---------------------------------------------------------+
 
+mysql> select str_to_date('2014-12-21 12:34%3A56', '%Y-%m-%d %H:%i%%3A%s');
++--------------------------------------------------------------+
+| str_to_date('2014-12-21 12:34%3A56', '%Y-%m-%d %H:%i%%3A%s') |
++--------------------------------------------------------------+
+| 2014-12-21 12:34:56                                          |
++--------------------------------------------------------------+
+
 mysql> select str_to_date('200442 Monday', '%X%V %W');
 +-----------------------------------------+
 | str_to_date('200442 Monday', '%X%V %W') |
diff --git 
a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md
 
b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md
index afe4bd1..7e0f061 100644
--- 
a/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md
+++ 
b/docs/documentation/cn/sql-reference/sql-functions/date-time-functions/unix_timestamp.md
@@ -21,7 +21,7 @@ under the License.
 ## description
 ### Syntax
 
-`INT UNIX_TIMESTAMP(), UNIX_TIMESTAMP(DATETIME date)`
+`INT UNIX_TIMESTAMP(), UNIX_TIMESTAMP(DATETIME date), UNIX_TIMESTAMP(DATETIME 
date, STRING fmt),`
 
 将 Date 或者 Datetime 类型转化为 unix 时间戳。
 
@@ -31,6 +31,8 @@ under the License.
 
 对于在 1970-01-01 00:00:00 之前或 2038-01-19 03:14:07 之后的时间,该函数将返回 0。
 
+Format 的格式请参阅 `date_format` 函数的格式说明。
+
 该函数受时区影响。
 
 ## example
@@ -50,6 +52,20 @@ mysql> select unix_timestamp('2007-11-30 10:30:19');
 |                            1196389819 |
 +---------------------------------------+
 
+mysql> select unix_timestamp('2007-11-30 10:30-19', '%Y-%m-%d %H:%i-%s');
++---------------------------------------+
+| unix_timestamp('2007-11-30 10:30-19') |
++---------------------------------------+
+|                            1196389819 |
++---------------------------------------+
+
+mysql> select unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s');
++---------------------------------------+
+| unix_timestamp('2007-11-30 10:30%3A19') |
++---------------------------------------+
+|                            1196389819 |
++---------------------------------------+
+
 mysql> select unix_timestamp('1969-01-01 00:00:00');
 +---------------------------------------+
 | unix_timestamp('1969-01-01 00:00:00') |
diff --git a/docs/documentation/cn/sql-reference/sql-statements/Data 
Manipulation/BROKER LOAD.md 
b/docs/documentation/cn/sql-reference/sql-statements/Data Manipulation/BROKER 
LOAD.md
index 08061b2..5e96ed9 100644
--- a/docs/documentation/cn/sql-reference/sql-statements/Data 
Manipulation/BROKER LOAD.md       
+++ b/docs/documentation/cn/sql-reference/sql-statements/Data 
Manipulation/BROKER LOAD.md       
@@ -399,6 +399,29 @@ under the License.
         "fs.s3a.secret.key" = "yyyyyyyyyyyyyyyyyyyy",
         "fs.s3a.endpoint" = "s3.amazonaws.com"
         )
+
+    12. 提取文件路径中的时间分区字段,并且时间包含 %3A (在 hdfs 路径中,不允许有 ':',所有 ':' 会由 %3A 替换)
+
+        假设有如下文件:
+
+        /user/data/data_time=2020-02-17 00%3A00%3A00/test.txt
+        /user/data/data_time=2020-02-18 00%3A00%3A00/test.txt
+
+        表结构为:
+        data_time DATETIME,
+        k2        INT,
+        k3        INT
+
+        LOAD LABEL example_db.label12
+        (
+         DATA INFILE("hdfs://host:port/user/data/*/test.txt") 
+         INTO TABLE `tbl12`
+         COLUMNS TERMINATED BY ","
+         (k2,k3)
+         COLUMNS FROM PATH AS (data_time)
+         SET (data_time=str_to_date(data_time, '%Y-%m-%d %H%%3A%i%%3A%s'))
+        ) 
+        WITH BROKER "hdfs" ("username"="user", "password"="pass");
          
 ## keyword
 
diff --git 
a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_format_EN.md
 
b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_format_EN.md
index 4b04f33..4cd4410 100644
--- 
a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_format_EN.md
+++ 
b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/date_format_EN.md
@@ -93,6 +93,8 @@ The formats available are:
 
 % Y | Year, 2
 
+%%  | Represent %
+
 ## example
 
 ```
@@ -137,6 +139,13 @@ mysql> select date_format('2006-06-01', '%d');
 +------------------------------------------+
 | 01                                       |
 +------------------------------------------+
+
+mysql> select date_format('2006-06-01', '%%%d');
++--------------------------------------------+
+| date_format('2006-06-01 00:00:00', '%%%d') |
++--------------------------------------------+
+| %01                                        |
++--------------------------------------------+
 ```
 ##keyword
 DATE_FORMAT,DATE,FORMAT
diff --git 
a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/str_to_date_EN.md
 
b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/str_to_date_EN.md
index 5cf860c..7957a64 100644
--- 
a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/str_to_date_EN.md
+++ 
b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/str_to_date_EN.md
@@ -38,6 +38,13 @@ mysql> select str_to_date('2014-12-21 12:34:56', '%Y-%m-%d 
%H:%i:%s');
 | 2014-12-21 12:34:56                                     |
 +---------------------------------------------------------+
 
+mysql> select str_to_date('2014-12-21 12:34%3A56', '%Y-%m-%d %H:%i%%3A%s');
++--------------------------------------------------------------+
+| str_to_date('2014-12-21 12:34%3A56', '%Y-%m-%d %H:%i%%3A%s') |
++--------------------------------------------------------------+
+| 2014-12-21 12:34:56                                          |
++--------------------------------------------------------------+
+
 mysql> select str_to_date('200442 Monday', '%X%V %W');
 +-----------------------------------------+
 | str_to_date('200442 Monday', '%X%V %W') |
@@ -46,4 +53,5 @@ mysql> select str_to_date('200442 Monday', '%X%V %W');
 +-----------------------------------------+
 ```
 ##keyword
-STR_TO_DATE,STR,TO,DATE
+
+    STR_TO_DATE,STR,TO,DATE
diff --git 
a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/unix_timestamp_EN.md
 
b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/unix_timestamp_EN.md
index 83c8eda..611ba8a 100644
--- 
a/docs/documentation/en/sql-reference/sql-functions/date-time-functions/unix_timestamp_EN.md
+++ 
b/docs/documentation/en/sql-reference/sql-functions/date-time-functions/unix_timestamp_EN.md
@@ -31,6 +31,8 @@ The parameter needs to be Date or Datetime type.
 
 Any date before 1970-01-01 00:00:00 or after 2038-01-19 03:14:07 will return 0.
 
+See `date_format` function to get Format explanation.
+
 This function is affected by time zone.
 
 ## example
@@ -50,6 +52,20 @@ mysql> select unix_timestamp('2007-11-30 10:30:19');
 |                            1196389819 |
 +---------------------------------------+
 
+mysql> select unix_timestamp('2007-11-30 10:30-19', '%Y-%m-%d %H:%i-%s');
++---------------------------------------+
+| unix_timestamp('2007-11-30 10:30-19') |
++---------------------------------------+
+|                            1196389819 |
++---------------------------------------+
+
+mysql> select unix_timestamp('2007-11-30 10:30%3A19', '%Y-%m-%d %H:%i%%3A%s');
++---------------------------------------+
+| unix_timestamp('2007-11-30 10:30%3A19') |
++---------------------------------------+
+|                            1196389819 |
++---------------------------------------+
+
 mysql> select unix_timestamp('1969-01-01 00:00:00');
 +---------------------------------------+
 | unix_timestamp('1969-01-01 00:00:00') |
diff --git a/docs/documentation/en/sql-reference/sql-statements/Data 
Manipulation/BROKER LOAD_EN.md 
b/docs/documentation/en/sql-reference/sql-statements/Data Manipulation/BROKER 
LOAD_EN.md
index af2f487..d4bb70f 100644
--- a/docs/documentation/en/sql-reference/sql-statements/Data 
Manipulation/BROKER LOAD_EN.md    
+++ b/docs/documentation/en/sql-reference/sql-statements/Data 
Manipulation/BROKER LOAD_EN.md    
@@ -392,6 +392,29 @@ under the License.
         INTO TABLE `my_table`
         where k1 > k2
         );
+
+    11. Extract date partition fields in file paths, and date time include %3A 
(in hdfs path, all ':' will be replaced by '%3A')
+
+        Assume we have files:
+
+        /user/data/data_time=2020-02-17 00%3A00%3A00/test.txt
+        /user/data/data_time=2020-02-18 00%3A00%3A00/test.txt
+
+        Table schema is:
+        data_time DATETIME,
+        k2        INT,
+        k3        INT
+
+        LOAD LABEL example_db.label12
+        (
+         DATA INFILE("hdfs://host:port/user/data/*/test.txt") 
+         INTO TABLE `tbl12`
+         COLUMNS TERMINATED BY ","
+         (k2,k3)
+         COLUMNS FROM PATH AS (data_time)
+         SET (data_time=str_to_date(data_time, '%Y-%m-%d %H%%3A%i%%3A%s'))
+        ) 
+        WITH BROKER "hdfs" ("username"="user", "password"="pass");
      
 ## keyword
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to