This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch branch-2.1 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push: new d9339564496 [branch-2.1](timezone) Preload time offset in datetime (#42395) (#42607) d9339564496 is described below commit d9339564496e984cba11352ef6724f3f7b38bd9a Author: zclllhhjj <zhaochan...@selectdb.com> AuthorDate: Sun Nov 10 00:30:28 2024 +0800 [branch-2.1](timezone) Preload time offset in datetime (#42395) (#42607) pick https://github.com/apache/doris/pull/42395 --- be/src/util/timezone_utils.cpp | 50 +++++++++--- be/src/util/timezone_utils.h | 3 + be/test/util/timezone_utils_test.cpp | 144 +++++++++++++++++++++++++++++++++++ 3 files changed, 188 insertions(+), 9 deletions(-) diff --git a/be/src/util/timezone_utils.cpp b/be/src/util/timezone_utils.cpp index 5aef6f8702b..6bb71ac4647 100644 --- a/be/src/util/timezone_utils.cpp +++ b/be/src/util/timezone_utils.cpp @@ -30,6 +30,7 @@ #include <boost/algorithm/string.hpp> #include <boost/algorithm/string/case_conv.hpp> +#include <cstdlib> #include <filesystem> #include <memory> #include <string> @@ -58,6 +59,9 @@ static const char* tzdir = "/usr/share/zoneinfo"; // default value, may change b void TimezoneUtils::clear_timezone_caches() { lower_zone_cache_->clear(); } +int TimezoneUtils::cache_size() { + return lower_zone_cache_->size(); +} static bool parse_save_name_tz(const std::string& tz_name) { cctz::time_zone tz; @@ -106,24 +110,54 @@ void TimezoneUtils::load_timezones_to_cache() { } lower_zone_cache_->erase("lmt"); // local mean time for every timezone - LOG(INFO) << "Read " << lower_zone_cache_->size() << " timezones."; + + load_offsets_to_cache(); + LOG(INFO) << "Preloaded" << lower_zone_cache_->size() << " timezones."; +} + +static std::string to_hour_string(int arg) { + if (arg < 0 && arg > -10) { // -9 to -1 + return std::string {"-0"} + std::to_string(std::abs(arg)); + } else if (arg >= 0 && arg < 10) { //0 to 9 + return std::string {"0"} + std::to_string(arg); + } + return std::to_string(arg); +} + +void TimezoneUtils::load_offsets_to_cache() { + for (int hour = -12; hour <= +14; hour++) { + for (int minute = 0; minute <= 30; minute += 30) { + std::string offset_str = (hour >= 0 ? "+" : "") + to_hour_string(hour) + ':' + + (minute == 0 ? "00" : "30"); + cctz::time_zone result; + parse_tz_offset_string(offset_str, result); + lower_zone_cache_->emplace(offset_str, result); + } + } + // -00 for hour is also valid + std::string offset_str = "-00:00"; + cctz::time_zone result; + parse_tz_offset_string(offset_str, result); + lower_zone_cache_->emplace(offset_str, result); + offset_str = "-00:30"; + parse_tz_offset_string(offset_str, result); + lower_zone_cache_->emplace(offset_str, result); } bool TimezoneUtils::find_cctz_time_zone(const std::string& timezone, cctz::time_zone& ctz) { - if (auto it = lower_zone_cache_->find(to_lower_copy(timezone)); - it != lower_zone_cache_->end()) { + if (auto it = lower_zone_cache_->find(to_lower_copy(timezone)); it != lower_zone_cache_->end()) + [[likely]] { ctz = it->second; return true; } - // offset format or just illegal - return parse_tz_offset_string(timezone, ctz); + return false; } bool TimezoneUtils::parse_tz_offset_string(const std::string& timezone, cctz::time_zone& ctz) { // like +08:00, which not in timezone_names_map_ re2::StringPiece value; - if (time_zone_offset_format_reg.Match(timezone, 0, timezone.size(), RE2::UNANCHORED, &value, - 1)) { + if (time_zone_offset_format_reg.Match(timezone, 0, timezone.size(), RE2::UNANCHORED, &value, 1)) + [[likely]] { bool positive = value[0] != '-'; //Regular expression guarantees hour and minute must be int @@ -139,8 +173,6 @@ bool TimezoneUtils::parse_tz_offset_string(const std::string& timezone, cctz::ti int offset = hour * 60 * 60 + minute * 60; offset *= positive ? 1 : -1; ctz = cctz::fixed_time_zone(cctz::seconds(offset)); - // try to push the result time offset of "+08:00" need lock. now it's harmful for performance. - // maybe we can use rcu of hazard-pointer to opt it. return true; } return false; diff --git a/be/src/util/timezone_utils.h b/be/src/util/timezone_utils.h index c8bce44b5ab..3cdb17fc6fd 100644 --- a/be/src/util/timezone_utils.h +++ b/be/src/util/timezone_utils.h @@ -41,6 +41,9 @@ public: private: // for ut only static void clear_timezone_caches(); + static int cache_size(); + + static void load_offsets_to_cache(); static bool parse_tz_offset_string(const std::string& timezone, cctz::time_zone& ctz); }; diff --git a/be/test/util/timezone_utils_test.cpp b/be/test/util/timezone_utils_test.cpp new file mode 100644 index 00000000000..9130e0be633 --- /dev/null +++ b/be/test/util/timezone_utils_test.cpp @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "util/timezone_utils.h" + +#include <gtest/gtest-message.h> +#include <gtest/gtest-test-part.h> + +#include <boost/utility/binary.hpp> +#include <iostream> + +#include "cctz/time_zone.h" +#include "gtest/gtest.h" +#include "gtest/gtest_pred_impl.h" + +namespace doris { + +TEST(TimezoneUtilsTest, ParseOffset) { + const auto tp = cctz::civil_second(2011, 1, 1, 0, 0, + 0); // offset has no DST, every time point is acceptable + cctz::time_zone result; + + TimezoneUtils::parse_tz_offset_string("+14:00", result); + auto cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, 14 * 3600); + + TimezoneUtils::parse_tz_offset_string("+00:00", result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, 0 * 3600); + + TimezoneUtils::parse_tz_offset_string("+00:30", result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, 1800); + + TimezoneUtils::parse_tz_offset_string("+10:30", result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, 10 * 3600 + 1800); + + TimezoneUtils::parse_tz_offset_string("+01:00", result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, 1 * 3600); + + TimezoneUtils::parse_tz_offset_string("-12:00", result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, -12 * 3600); + + TimezoneUtils::parse_tz_offset_string("-09:00", result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, -9 * 3600); + + TimezoneUtils::parse_tz_offset_string("-01:00", result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, -1 * 3600); + + TimezoneUtils::parse_tz_offset_string("-00:00", result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, 0 * 3600); + + TimezoneUtils::parse_tz_offset_string("-00:30", result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, -1800); + + TimezoneUtils::parse_tz_offset_string("-10:30", result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, -10 * 3600 - 1800); + + // out of range or illegal format + EXPECT_FALSE(TimezoneUtils::parse_tz_offset_string("+15:00", result)); + EXPECT_FALSE(TimezoneUtils::parse_tz_offset_string("-13:00", result)); + EXPECT_FALSE(TimezoneUtils::parse_tz_offset_string("+9:30", result)); +} + +TEST(TimezoneUtilsTest, LoadOffsets) { + TimezoneUtils::clear_timezone_caches(); + TimezoneUtils::load_offsets_to_cache(); + EXPECT_EQ(TimezoneUtils::cache_size(), (13 + 15) * 2); + + TimezoneUtils::load_timezones_to_cache(); + EXPECT_GE(TimezoneUtils::cache_size(), 100); +} + +TEST(TimezoneUtilsTest, FindTimezone) { + TimezoneUtils::load_timezones_to_cache(); + + std::string tzname; + cctz::time_zone result; + const auto tp = cctz::civil_second(2011, 1, 1, 0, 0, 0); + + tzname = "Asia/Shanghai"; + TimezoneUtils::find_cctz_time_zone(tzname, result); + auto cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, 8 * 3600); + + tzname = "America/Los_Angeles"; + TimezoneUtils::find_cctz_time_zone(tzname, result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, -8 * 3600); + + tzname = "+00:30"; + TimezoneUtils::find_cctz_time_zone(tzname, result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, 1800); + + tzname = "-00:00"; + TimezoneUtils::find_cctz_time_zone(tzname, result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, 0); + + tzname = "+14:00"; + TimezoneUtils::find_cctz_time_zone(tzname, result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, 14 * 3600); + + tzname = "-12:00"; + TimezoneUtils::find_cctz_time_zone(tzname, result); + cl = result.lookup(cctz::convert(tp, result)); + EXPECT_EQ(cl.offset, -12 * 3600); + + // out of range or illegal format + tzname = "+15:00"; + EXPECT_FALSE(TimezoneUtils::find_cctz_time_zone(tzname, result)); + + tzname = "-13:00"; + EXPECT_FALSE(TimezoneUtils::find_cctz_time_zone(tzname, result)); + + tzname = "+9:30"; + EXPECT_FALSE(TimezoneUtils::find_cctz_time_zone(tzname, result)); +} + +} // namespace doris --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org