This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push: new 02fa51a586 [improvement](timezone) support mixed uppper-lower case of timezone names (#21572) (#21576) 02fa51a586 is described below commit 02fa51a586c58beef05743749941157ffceebadf Author: TengJianPing <18241664+jackte...@users.noreply.github.com> AuthorDate: Tue Jul 11 12:34:42 2023 +0800 [improvement](timezone) support mixed uppper-lower case of timezone names (#21572) (#21576) --- be/src/runtime/exec_env_init.cpp | 3 ++ be/src/util/timezone_utils.cpp | 40 ++++++++++++++++++++-- be/src/util/timezone_utils.h | 10 +++++- be/test/exprs/timestamp_functions_test.cpp | 1 + be/test/vec/function/function_time_test.cpp | 3 ++ .../utils/arrow_column_to_doris_column_test.cpp | 1 + .../datetime_functions/test_date_function.out | 15 ++++++++ .../datetime_functions/test_date_function.groovy | 6 ++++ 8 files changed, 76 insertions(+), 3 deletions(-) diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index 4a7ad1d29b..f91233dae0 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -95,6 +95,9 @@ Status ExecEnv::_init(const std::vector<StorePath>& store_paths) { _backend_client_cache = new BackendServiceClientCache(config::max_client_cache_size_per_host); _frontend_client_cache = new FrontendServiceClientCache(config::max_client_cache_size_per_host); _broker_client_cache = new BrokerServiceClientCache(config::max_client_cache_size_per_host); + + TimezoneUtils::load_timezone_names(); + _thread_mgr = new ThreadResourceMgr(); if (config::doris_enable_scanner_thread_pool_per_disk && config::doris_scanner_thread_pool_thread_num >= store_paths.size() && diff --git a/be/src/util/timezone_utils.cpp b/be/src/util/timezone_utils.cpp index b40a61d129..c22c12b560 100644 --- a/be/src/util/timezone_utils.cpp +++ b/be/src/util/timezone_utils.cpp @@ -17,14 +17,46 @@ // #include "util/timezone_utils.h" +#include <cctz/time_zone.h> +#include <boost/algorithm/string.hpp> +#include <filesystem> +#include <string> namespace doris { RE2 TimezoneUtils::time_zone_offset_format_reg("^[+-]{1}\\d{2}\\:\\d{2}$"); +std::unordered_map<std::string, std::string> TimezoneUtils::timezone_names_map_; +bool TimezoneUtils::inited_ = false; const std::string TimezoneUtils::default_time_zone = "+08:00"; +void TimezoneUtils::load_timezone_names() { + if (inited_) { + return; + } + + inited_ = true; + std::string path; + const char* tzdir = "/usr/share/zoneinfo"; + char* tzdir_env = std::getenv("TZDIR"); + if (tzdir_env && *tzdir_env) { + tzdir = tzdir_env; + } + path += tzdir; + path += '/'; + + auto path_prefix_len = path.size(); + for (auto const& dir_entry : std::filesystem::recursive_directory_iterator {path}) { + if (dir_entry.is_regular_file()) { + auto timezone_full_name = dir_entry.path().string().substr(path_prefix_len); + timezone_names_map_[boost::algorithm::to_lower_copy(timezone_full_name)] = + timezone_full_name; + } + } +} + bool TimezoneUtils::find_cctz_time_zone(const std::string& timezone, cctz::time_zone& ctz) { + auto timezone_lower = boost::algorithm::to_lower_copy(timezone); re2::StringPiece value; if (time_zone_offset_format_reg.Match(timezone, 0, timezone.size(), RE2::UNANCHORED, &value, 1)) { @@ -44,12 +76,16 @@ bool TimezoneUtils::find_cctz_time_zone(const std::string& timezone, cctz::time_ offset *= positive ? 1 : -1; ctz = cctz::fixed_time_zone(cctz::seconds(offset)); return true; - } else if (timezone == "CST") { + } else if (timezone_lower == "cst") { // Supports offset and region timezone type, "CST" use here is compatibility purposes. ctz = cctz::fixed_time_zone(cctz::seconds(8 * 60 * 60)); return true; } else { - return cctz::load_time_zone(timezone, &ctz); + auto it = timezone_names_map_.find(timezone_lower); + if (it == timezone_names_map_.end()) { + return false; + } + return cctz::load_time_zone(it->second, &ctz); } } diff --git a/be/src/util/timezone_utils.h b/be/src/util/timezone_utils.h index 7fbe0ef9e2..ce2fe3ab80 100644 --- a/be/src/util/timezone_utils.h +++ b/be/src/util/timezone_utils.h @@ -20,18 +20,26 @@ #include <re2/re2.h> -#include "cctz/time_zone.h" +#include <unordered_map> + +namespace cctz { +class time_zone; +} // namespace cctz namespace doris { class TimezoneUtils { public: + static void load_timezone_names(); static bool find_cctz_time_zone(const std::string& timezone, cctz::time_zone& ctz); public: static const std::string default_time_zone; private: + static bool inited_; + static std::unordered_map<std::string, std::string> timezone_names_map_; + // RE2 obj is thread safe static RE2 time_zone_offset_format_reg; }; diff --git a/be/test/exprs/timestamp_functions_test.cpp b/be/test/exprs/timestamp_functions_test.cpp index 5193ef71e5..697261e966 100644 --- a/be/test/exprs/timestamp_functions_test.cpp +++ b/be/test/exprs/timestamp_functions_test.cpp @@ -40,6 +40,7 @@ public: TimestampFunctionsTest() {} void SetUp() override { + TimezoneUtils::load_timezone_names(); utils = new FunctionUtils(); ctx = utils->get_fn_ctx(); } diff --git a/be/test/vec/function/function_time_test.cpp b/be/test/vec/function/function_time_test.cpp index 8a68a608b8..07d07ca73f 100644 --- a/be/test/vec/function/function_time_test.cpp +++ b/be/test/vec/function/function_time_test.cpp @@ -167,6 +167,7 @@ TEST(VTimestampFunctionsTest, second_test) { TEST(VTimestampFunctionsTest, from_unix_test) { std::string func_name = "from_unixtime"; + TimezoneUtils::load_timezone_names(); InputTypeSet input_types = {TypeIndex::Int32}; @@ -528,6 +529,7 @@ TEST(VTimestampFunctionsTest, makedate_test) { } TEST(VTimestampFunctionsTest, convert_tz_test) { + TimezoneUtils::load_timezone_names(); std::string func_name = "convert_tz"; InputTypeSet input_types = {TypeIndex::DateTime, TypeIndex::String, TypeIndex::String}; @@ -1861,6 +1863,7 @@ TEST(VTimestampFunctionsTest, seconds_sub_v2_test) { } TEST(VTimestampFunctionsTest, convert_tz_v2_test) { + TimezoneUtils::load_timezone_names(); std::string func_name = "convert_tz"; InputTypeSet input_types = {TypeIndex::DateTimeV2, TypeIndex::String, TypeIndex::String}; diff --git a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp index fda70edd56..e4455fd2e0 100644 --- a/be/test/vec/utils/arrow_column_to_doris_column_test.cpp +++ b/be/test/vec/utils/arrow_column_to_doris_column_test.cpp @@ -160,6 +160,7 @@ void test_arrow_to_datetime_column(std::shared_ptr<ArrowType> type, ColumnWithTy template <typename ArrowType, typename ColumnType, bool is_nullable> void test_datetime(std::shared_ptr<ArrowType> type, const std::vector<std::string>& test_cases, size_t num_elements) { + TimezoneUtils::load_timezone_names(); using ArrowCppType = typename arrow::TypeTraits<ArrowType>::CType; size_t counter = 0; auto pt = arrow_type_to_primitive_type(type->id()); diff --git a/regression-test/data/query_p0/sql_functions/datetime_functions/test_date_function.out b/regression-test/data/query_p0/sql_functions/datetime_functions/test_date_function.out index 5ca1401ab5..7cb5586756 100644 --- a/regression-test/data/query_p0/sql_functions/datetime_functions/test_date_function.out +++ b/regression-test/data/query_p0/sql_functions/datetime_functions/test_date_function.out @@ -5,6 +5,9 @@ -- !sql -- 2019-07-31T22:21:03 +-- !sql -- +2019-07-31T22:21:03 + -- !sql -- 2019-08-01T06:21:03 @@ -14,6 +17,18 @@ -- !sql -- \N +-- !sql -- +2019-07-31T18:18:27 + +-- !sql -- +2019-07-31T18:18:27 + +-- !sql -- +2019-08-01T17:18:27 + +-- !sql -- +2019-08-01T17:18:27 + -- !sql -- \N diff --git a/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy b/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy index 78f4a5cba4..2454f76275 100644 --- a/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/datetime_functions/test_date_function.groovy @@ -38,6 +38,7 @@ suite("test_date_function") { sql """ insert into ${tableName} values ("2019-08-01 13:21:03") """ // convert_tz qt_sql """ SELECT convert_tz(test_datetime, 'Asia/Shanghai', 'America/Los_Angeles') result from ${tableName}; """ + qt_sql """ SELECT convert_tz(test_datetime, 'Asia/SHANGHAI', 'america/Los_angeles') result from ${tableName}; """ qt_sql """ SELECT convert_tz(test_datetime, '+08:00', 'America/Los_Angeles') result from ${tableName}; """ qt_sql """ SELECT convert_tz(test_datetime, 'Asia/Shanghai', 'Europe/London') result from ${tableName}; """ @@ -45,6 +46,11 @@ suite("test_date_function") { qt_sql """ SELECT convert_tz(test_datetime, '+08:00', 'America/London') result from ${tableName}; """ + qt_sql """ select convert_tz("2019-08-01 02:18:27", 'Asia/Shanghai', 'UTC'); """ + qt_sql """ select convert_tz("2019-08-01 02:18:27", 'Asia/Shanghai', 'UTc'); """ + qt_sql """ select convert_tz("2019-08-01 02:18:27", 'America/Los_Angeles', 'CST'); """ + qt_sql """ select convert_tz("2019-08-01 02:18:27", 'America/Los_Angeles', 'cSt'); """ + // some invalid date qt_sql """ SELECT convert_tz('2022-2-29 13:21:03', '+08:00', 'America/London') result; """ qt_sql """ SELECT convert_tz('2022-02-29 13:21:03', '+08:00', 'America/London') result; """ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org