This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new d9339564496 [branch-2.1](timezone) Preload time offset in datetime 
(#42395) (#42607)
d9339564496 is described below

commit d9339564496e984cba11352ef6724f3f7b38bd9a
Author: zclllhhjj <zhaochan...@selectdb.com>
AuthorDate: Sun Nov 10 00:30:28 2024 +0800

    [branch-2.1](timezone) Preload time offset in datetime (#42395) (#42607)
    
    pick https://github.com/apache/doris/pull/42395
---
 be/src/util/timezone_utils.cpp       |  50 +++++++++---
 be/src/util/timezone_utils.h         |   3 +
 be/test/util/timezone_utils_test.cpp | 144 +++++++++++++++++++++++++++++++++++
 3 files changed, 188 insertions(+), 9 deletions(-)

diff --git a/be/src/util/timezone_utils.cpp b/be/src/util/timezone_utils.cpp
index 5aef6f8702b..6bb71ac4647 100644
--- a/be/src/util/timezone_utils.cpp
+++ b/be/src/util/timezone_utils.cpp
@@ -30,6 +30,7 @@
 
 #include <boost/algorithm/string.hpp>
 #include <boost/algorithm/string/case_conv.hpp>
+#include <cstdlib>
 #include <filesystem>
 #include <memory>
 #include <string>
@@ -58,6 +59,9 @@ static const char* tzdir = "/usr/share/zoneinfo"; // default 
value, may change b
 void TimezoneUtils::clear_timezone_caches() {
     lower_zone_cache_->clear();
 }
+int TimezoneUtils::cache_size() {
+    return lower_zone_cache_->size();
+}
 
 static bool parse_save_name_tz(const std::string& tz_name) {
     cctz::time_zone tz;
@@ -106,24 +110,54 @@ void TimezoneUtils::load_timezones_to_cache() {
     }
 
     lower_zone_cache_->erase("lmt"); // local mean time for every timezone
-    LOG(INFO) << "Read " << lower_zone_cache_->size() << " timezones.";
+
+    load_offsets_to_cache();
+    LOG(INFO) << "Preloaded" << lower_zone_cache_->size() << " timezones.";
+}
+
+static std::string to_hour_string(int arg) {
+    if (arg < 0 && arg > -10) { // -9 to -1
+        return std::string {"-0"} + std::to_string(std::abs(arg));
+    } else if (arg >= 0 && arg < 10) { //0 to 9
+        return std::string {"0"} + std::to_string(arg);
+    }
+    return std::to_string(arg);
+}
+
+void TimezoneUtils::load_offsets_to_cache() {
+    for (int hour = -12; hour <= +14; hour++) {
+        for (int minute = 0; minute <= 30; minute += 30) {
+            std::string offset_str = (hour >= 0 ? "+" : "") + 
to_hour_string(hour) + ':' +
+                                     (minute == 0 ? "00" : "30");
+            cctz::time_zone result;
+            parse_tz_offset_string(offset_str, result);
+            lower_zone_cache_->emplace(offset_str, result);
+        }
+    }
+    // -00 for hour is also valid
+    std::string offset_str = "-00:00";
+    cctz::time_zone result;
+    parse_tz_offset_string(offset_str, result);
+    lower_zone_cache_->emplace(offset_str, result);
+    offset_str = "-00:30";
+    parse_tz_offset_string(offset_str, result);
+    lower_zone_cache_->emplace(offset_str, result);
 }
 
 bool TimezoneUtils::find_cctz_time_zone(const std::string& timezone, 
cctz::time_zone& ctz) {
-    if (auto it = lower_zone_cache_->find(to_lower_copy(timezone));
-        it != lower_zone_cache_->end()) {
+    if (auto it = lower_zone_cache_->find(to_lower_copy(timezone)); it != 
lower_zone_cache_->end())
+            [[likely]] {
         ctz = it->second;
         return true;
     }
-    // offset format or just illegal
-    return parse_tz_offset_string(timezone, ctz);
+    return false;
 }
 
 bool TimezoneUtils::parse_tz_offset_string(const std::string& timezone, 
cctz::time_zone& ctz) {
     // like +08:00, which not in timezone_names_map_
     re2::StringPiece value;
-    if (time_zone_offset_format_reg.Match(timezone, 0, timezone.size(), 
RE2::UNANCHORED, &value,
-                                          1)) {
+    if (time_zone_offset_format_reg.Match(timezone, 0, timezone.size(), 
RE2::UNANCHORED, &value, 1))
+            [[likely]] {
         bool positive = value[0] != '-';
 
         //Regular expression guarantees hour and minute must be int
@@ -139,8 +173,6 @@ bool TimezoneUtils::parse_tz_offset_string(const 
std::string& timezone, cctz::ti
         int offset = hour * 60 * 60 + minute * 60;
         offset *= positive ? 1 : -1;
         ctz = cctz::fixed_time_zone(cctz::seconds(offset));
-        // try to push the result time offset of "+08:00" need lock. now it's 
harmful for performance.
-        // maybe we can use rcu of hazard-pointer to opt it.
         return true;
     }
     return false;
diff --git a/be/src/util/timezone_utils.h b/be/src/util/timezone_utils.h
index c8bce44b5ab..3cdb17fc6fd 100644
--- a/be/src/util/timezone_utils.h
+++ b/be/src/util/timezone_utils.h
@@ -41,6 +41,9 @@ public:
 private:
     // for ut only
     static void clear_timezone_caches();
+    static int cache_size();
+
+    static void load_offsets_to_cache();
 
     static bool parse_tz_offset_string(const std::string& timezone, 
cctz::time_zone& ctz);
 };
diff --git a/be/test/util/timezone_utils_test.cpp 
b/be/test/util/timezone_utils_test.cpp
new file mode 100644
index 00000000000..9130e0be633
--- /dev/null
+++ b/be/test/util/timezone_utils_test.cpp
@@ -0,0 +1,144 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "util/timezone_utils.h"
+
+#include <gtest/gtest-message.h>
+#include <gtest/gtest-test-part.h>
+
+#include <boost/utility/binary.hpp>
+#include <iostream>
+
+#include "cctz/time_zone.h"
+#include "gtest/gtest.h"
+#include "gtest/gtest_pred_impl.h"
+
+namespace doris {
+
+TEST(TimezoneUtilsTest, ParseOffset) {
+    const auto tp = cctz::civil_second(2011, 1, 1, 0, 0,
+                                       0); // offset has no DST, every time 
point is acceptable
+    cctz::time_zone result;
+
+    TimezoneUtils::parse_tz_offset_string("+14:00", result);
+    auto cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, 14 * 3600);
+
+    TimezoneUtils::parse_tz_offset_string("+00:00", result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, 0 * 3600);
+
+    TimezoneUtils::parse_tz_offset_string("+00:30", result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, 1800);
+
+    TimezoneUtils::parse_tz_offset_string("+10:30", result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, 10 * 3600 + 1800);
+
+    TimezoneUtils::parse_tz_offset_string("+01:00", result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, 1 * 3600);
+
+    TimezoneUtils::parse_tz_offset_string("-12:00", result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, -12 * 3600);
+
+    TimezoneUtils::parse_tz_offset_string("-09:00", result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, -9 * 3600);
+
+    TimezoneUtils::parse_tz_offset_string("-01:00", result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, -1 * 3600);
+
+    TimezoneUtils::parse_tz_offset_string("-00:00", result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, 0 * 3600);
+
+    TimezoneUtils::parse_tz_offset_string("-00:30", result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, -1800);
+
+    TimezoneUtils::parse_tz_offset_string("-10:30", result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, -10 * 3600 - 1800);
+
+    // out of range or illegal format
+    EXPECT_FALSE(TimezoneUtils::parse_tz_offset_string("+15:00", result));
+    EXPECT_FALSE(TimezoneUtils::parse_tz_offset_string("-13:00", result));
+    EXPECT_FALSE(TimezoneUtils::parse_tz_offset_string("+9:30", result));
+}
+
+TEST(TimezoneUtilsTest, LoadOffsets) {
+    TimezoneUtils::clear_timezone_caches();
+    TimezoneUtils::load_offsets_to_cache();
+    EXPECT_EQ(TimezoneUtils::cache_size(), (13 + 15) * 2);
+
+    TimezoneUtils::load_timezones_to_cache();
+    EXPECT_GE(TimezoneUtils::cache_size(), 100);
+}
+
+TEST(TimezoneUtilsTest, FindTimezone) {
+    TimezoneUtils::load_timezones_to_cache();
+
+    std::string tzname;
+    cctz::time_zone result;
+    const auto tp = cctz::civil_second(2011, 1, 1, 0, 0, 0);
+
+    tzname = "Asia/Shanghai";
+    TimezoneUtils::find_cctz_time_zone(tzname, result);
+    auto cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, 8 * 3600);
+
+    tzname = "America/Los_Angeles";
+    TimezoneUtils::find_cctz_time_zone(tzname, result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, -8 * 3600);
+
+    tzname = "+00:30";
+    TimezoneUtils::find_cctz_time_zone(tzname, result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, 1800);
+
+    tzname = "-00:00";
+    TimezoneUtils::find_cctz_time_zone(tzname, result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, 0);
+
+    tzname = "+14:00";
+    TimezoneUtils::find_cctz_time_zone(tzname, result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, 14 * 3600);
+
+    tzname = "-12:00";
+    TimezoneUtils::find_cctz_time_zone(tzname, result);
+    cl = result.lookup(cctz::convert(tp, result));
+    EXPECT_EQ(cl.offset, -12 * 3600);
+
+    // out of range or illegal format
+    tzname = "+15:00";
+    EXPECT_FALSE(TimezoneUtils::find_cctz_time_zone(tzname, result));
+
+    tzname = "-13:00";
+    EXPECT_FALSE(TimezoneUtils::find_cctz_time_zone(tzname, result));
+
+    tzname = "+9:30";
+    EXPECT_FALSE(TimezoneUtils::find_cctz_time_zone(tzname, result));
+}
+
+} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to