This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 736d6f3b4c [improvement](timezone) support mixed uppper-lower case of timezone names (#21572) 736d6f3b4c is described below commit 736d6f3b4c49e8971129ae06b1d655e0c37d7753 Author: TengJianPing <18241664+jackte...@users.noreply.github.com> AuthorDate: Tue Jul 11 09:37:14 2023 +0800 [improvement](timezone) support mixed uppper-lower case of timezone names (#21572) --- be/src/runtime/exec_env_init.cpp | 2 ++ be/src/util/timezone_utils.cpp | 39 ++++++++++++++++++++-- be/src/util/timezone_utils.h | 5 +++ be/test/vec/function/function_time_test.cpp | 3 ++ .../utils/arrow_column_to_doris_column_test.cpp | 1 + .../datetime_functions/test_date_function.out | 15 +++++++++ .../datetime_functions/test_date_function.groovy | 6 ++++ 7 files changed, 69 insertions(+), 2 deletions(-) diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index ccc15e1961..29ac809246 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -118,6 +118,8 @@ Status ExecEnv::_init(const std::vector<StorePath>& store_paths) { _frontend_client_cache = new FrontendServiceClientCache(config::max_client_cache_size_per_host); _broker_client_cache = new BrokerServiceClientCache(config::max_client_cache_size_per_host); + TimezoneUtils::load_timezone_names(); + ThreadPoolBuilder("SendBatchThreadPool") .set_min_threads(config::send_batch_thread_pool_thread_num) .set_max_threads(config::send_batch_thread_pool_thread_num) diff --git a/be/src/util/timezone_utils.cpp b/be/src/util/timezone_utils.cpp index e4d19946a7..d62e01a994 100644 --- a/be/src/util/timezone_utils.cpp +++ b/be/src/util/timezone_utils.cpp @@ -21,13 +21,44 @@ #include <cctz/time_zone.h> #include <re2/stringpiece.h> +#include <boost/algorithm/string.hpp> +#include <filesystem> +#include <string> + namespace doris { RE2 TimezoneUtils::time_zone_offset_format_reg("^[+-]{1}\\d{2}\\:\\d{2}$"); +std::unordered_map<std::string, std::string> TimezoneUtils::timezone_names_map_; +bool TimezoneUtils::inited_ = false; const std::string TimezoneUtils::default_time_zone = "+08:00"; +void TimezoneUtils::load_timezone_names() { + if (inited_) { + return; + } + + inited_ = true; + std::string path; + const char* tzdir = "/usr/share/zoneinfo"; + char* tzdir_env = std::getenv("TZDIR"); + if (tzdir_env && *tzdir_env) { + tzdir = tzdir_env; + } + path += tzdir; + path += '/'; + + auto path_prefix_len = path.size(); + for (auto const& dir_entry : std::filesystem::recursive_directory_iterator {path}) { + if (dir_entry.is_regular_file()) { + auto timezone_full_name = dir_entry.path().string().substr(path_prefix_len); + timezone_names_map_[boost::algorithm::to_lower_copy(timezone_full_name)] = + timezone_full_name; + } + } +} bool TimezoneUtils::find_cctz_time_zone(const std::string& timezone, cctz::time_zone& ctz) { + auto timezone_lower = boost::algorithm::to_lower_copy(timezone); re2::StringPiece value; if (time_zone_offset_format_reg.Match(timezone, 0, timezone.size(), RE2::UNANCHORED, &value, 1)) { @@ -47,12 +78,16 @@ bool TimezoneUtils::find_cctz_time_zone(const std::string& timezone, cctz::time_ offset *= positive ? 1 : -1; ctz = cctz::fixed_time_zone(cctz::seconds(offset)); return true; - } else if (timezone == "CST") { + } else if (timezone_lower == "cst") { // Supports offset and region timezone type, "CST" use here is compatibility purposes. ctz = cctz::fixed_time_zone(cctz::seconds(8 * 60 * 60)); return true; } else { - return cctz::load_time_zone(timezone, &ctz); + auto it = timezone_names_map_.find(timezone_lower); + if (it == timezone_names_map_.end()) { + return false; + } + return cctz::load_time_zone(it->second, &ctz); } } diff --git a/be/src/util/timezone_utils.h b/be/src/util/timezone_utils.h index b74ce9e4b1..d9e5ee82d8 100644 --- a/be/src/util/timezone_utils.h +++ b/be/src/util/timezone_utils.h @@ -21,6 +21,7 @@ #include <re2/re2.h> #include <string> +#include <unordered_map> namespace cctz { class time_zone; @@ -30,12 +31,16 @@ namespace doris { class TimezoneUtils { public: + static void load_timezone_names(); static bool find_cctz_time_zone(const std::string& timezone, cctz::time_zone& ctz); public: static const std::string default_time_zone; private: + static bool inited_; + static std::unordered_map<std::string, std::string> timezone_names_map_; + // RE2 obj is thread safe static RE2 time_zone_offset_format_reg; }; diff --git a/be/test/vec/function/function_time_test.cpp b/be/test/vec/function/function_time_test.cpp index b93d66cc4a..3f1a4408b2 100644 --- a/be/test/vec/function/function_time_test.cpp +++ b/be/test/vec/function/function_time_test.cpp @@ -177,6 +177,7 @@ TEST(VTimestampFunctionsTest, second_test) { TEST(VTimestampFunctionsTest, from_unix_test) { std::string func_name = "from_unixtime"; + TimezoneUtils::load_timezone_names(); InputTypeSet input_types = {TypeIndex::Int32}; @@ -538,6 +539,7 @@ TEST(VTimestampFunctionsTest, makedate_test) { } TEST(VTimestampFunctionsTest, convert_tz_test) { + TimezoneUtils::load_timezone_names(); std::string func_name = "convert_tz"; InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::String, TypeIndex::String}; @@ -1678,6 +1680,7 @@ TEST(VTimestampFunctionsTest, seconds_sub_v2_test) { } TEST(VTimestampFunctionsTest, convert_tz_v2_test) { + TimezoneUtils::load_timezone_names(); std::string func_name = "convert_tz"; InputTypeSet input_types = {TypeIndex::DateTimeV2, TypeIndex::String, TypeIndex::String}; diff --git a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp index e88dee6029..32b2ef9874 100644 --- a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp +++ b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp @@ -174,6 +174,7 @@ void test_arrow_to_datetime_column(std::shared_ptr<ArrowType> type, ColumnWithTy template <typename ArrowType, typename ColumnType, bool is_nullable> void test_datetime(std::shared_ptr<ArrowType> type, const std::vector<std::string>& test_cases, size_t num_elements) { + TimezoneUtils::load_timezone_names(); using ArrowCppType = typename arrow::TypeTraits<ArrowType>::CType; size_t counter = 0; auto pt = arrow_type_to_primitive_type(type->id()); diff --git a/regression-test/data/query_p0/sql_functions/datetime_functions/test_date_function.out b/regression-test/data/query_p0/sql_functions/datetime_functions/test_date_function.out index 2837f41d68..2f9cc35e4e 100644 --- a/regression-test/data/query_p0/sql_functions/datetime_functions/test_date_function.out +++ b/regression-test/data/query_p0/sql_functions/datetime_functions/test_date_function.out @@ -5,6 +5,9 @@ -- !sql -- 2019-07-31T22:21:03 +-- !sql -- +2019-07-31T22:21:03 + -- !sql -- 2019-08-01T06:21:03 @@ -14,6 +17,18 @@ -- !sql -- \N +-- !sql -- +2019-07-31T18:18:27 + +-- !sql -- +2019-07-31T18:18:27 + +-- !sql -- +2019-08-01T17:18:27 + +-- !sql -- +2019-08-01T17:18:27 + -- !sql -- \N diff --git a/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy b/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy index e31f410127..6debd4d0b8 100644 --- a/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy @@ -37,6 +37,7 @@ suite("test_date_function") { sql """ insert into ${tableName} values ("2019-08-01 13:21:03") """ // convert_tz qt_sql """ SELECT convert_tz(test_datetime, 'Asia/Shanghai', 'America/Los_Angeles') result from ${tableName}; """ + qt_sql """ SELECT convert_tz(test_datetime, 'Asia/SHANGHAI', 'america/Los_angeles') result from ${tableName}; """ qt_sql """ SELECT convert_tz(test_datetime, '+08:00', 'America/Los_Angeles') result from ${tableName}; """ qt_sql """ SELECT convert_tz(test_datetime, 'Asia/Shanghai', 'Europe/London') result from ${tableName}; """ @@ -44,6 +45,11 @@ suite("test_date_function") { qt_sql """ SELECT convert_tz(test_datetime, '+08:00', 'America/London') result from ${tableName}; """ + qt_sql """ select convert_tz("2019-08-01 02:18:27", 'Asia/Shanghai', 'UTC'); """ + qt_sql """ select convert_tz("2019-08-01 02:18:27", 'Asia/Shanghai', 'UTc'); """ + qt_sql """ select convert_tz("2019-08-01 02:18:27", 'America/Los_Angeles', 'CST'); """ + qt_sql """ select convert_tz("2019-08-01 02:18:27", 'America/Los_Angeles', 'cSt'); """ + // some invalid date qt_sql """ SELECT convert_tz('2022-2-29 13:21:03', '+08:00', 'America/London') result; """ qt_sql """ SELECT convert_tz('2022-02-29 13:21:03', '+08:00', 'America/London') result; """ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org