This is an automated email from the ASF dual-hosted git repository.

lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 08832d9f3a9 [Fix](exec) Fix date dict dead loop. (#25570)
08832d9f3a9 is described below

commit 08832d9f3a94acff981140279bba443b69751e14
Author: Qi Chen <kaka11.c...@gmail.com>
AuthorDate: Tue Oct 24 02:51:43 2023 +0800

    [Fix](exec) Fix date dict dead loop. (#25570)
---
 be/src/util/time_lut.cpp                     |   2 +-
 be/src/vec/exec/format/orc/vorc_reader.cpp   |   2 +-
 be/src/vec/exec/format/orc/vorc_reader.h     |   2 +-
 be/src/vec/exec/format/parquet/decoder.cpp   |   2 +-
 be/src/vec/exec/format/parquet/decoder.h     |   2 +-
 be/src/vec/runtime/vdatetime_value.cpp       |  26 +++--
 be/src/vec/runtime/vdatetime_value.h         |  18 +--
 be/test/vec/runtime/vdatetime_value_test.cpp | 168 +++++++++++++++++++++++++++
 8 files changed, 201 insertions(+), 21 deletions(-)

diff --git a/be/src/util/time_lut.cpp b/be/src/util/time_lut.cpp
index 9be2dec4fc7..616541d411f 100644
--- a/be/src/util/time_lut.cpp
+++ b/be/src/util/time_lut.cpp
@@ -96,7 +96,7 @@ uint8_t calc_weekday(uint64_t day_nr, bool 
is_sunday_first_day) {
 }
 
 uint32_t calc_daynr(uint16_t year, uint8_t month, uint8_t day) {
-    // date_day_offet_dict range from [1900-01-01, 2039-10-24]
+    // date_day_offet_dict range from [1900-01-01, 2039-12-31]
     if (date_day_offset_dict::can_speed_up_calc_daynr(year) &&
         LIKELY(date_day_offset_dict::get_dict_init())) {
         return date_day_offset_dict::get().daynr(year, month, day);
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index c1228c9d454..212540c0d8e 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -149,7 +149,7 @@ OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState* 
state,
     TimezoneUtils::find_cctz_time_zone(ctz, _time_zone);
     VecDateTimeValue t;
     t.from_unixtime(0, ctz);
-    _offset_days = t.day() == 31 ? 0 : 1;
+    _offset_days = t.day() == 31 ? -1 : 0; // If 1969-12-31, then returns -1.
     _init_profile();
     _init_system_properties();
     _init_file_description();
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h 
b/be/src/vec/exec/format/orc/vorc_reader.h
index c41b8718d75..feb1b700626 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -500,7 +500,7 @@ private:
     int64_t _range_size;
     const std::string& _ctz;
     const std::vector<std::string>* _column_names;
-    size_t _offset_days = 0;
+    int32_t _offset_days = 0;
     cctz::time_zone _time_zone;
 
     std::list<std::string> _read_cols;
diff --git a/be/src/vec/exec/format/parquet/decoder.cpp 
b/be/src/vec/exec/format/parquet/decoder.cpp
index bf8ef0b2330..0a158176091 100644
--- a/be/src/vec/exec/format/parquet/decoder.cpp
+++ b/be/src/vec/exec/format/parquet/decoder.cpp
@@ -181,7 +181,7 @@ void Decoder::init(FieldSchema* field_schema, 
cctz::time_zone* ctz) {
     if (_decode_params->ctz) {
         VecDateTimeValue t;
         t.from_unixtime(0, *_decode_params->ctz);
-        _decode_params->offset_days = t.day() == 31 ? 0 : 1;
+        _decode_params->offset_days = t.day() == 31 ? -1 : 0; // If 
1969-12-31, then returns -1.
     }
 }
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/decoder.h 
b/be/src/vec/exec/format/parquet/decoder.h
index 6c1030818cd..acd9965bad8 100644
--- a/be/src/vec/exec/format/parquet/decoder.h
+++ b/be/src/vec/exec/format/parquet/decoder.h
@@ -71,7 +71,7 @@ struct DecodeParams {
     static const cctz::time_zone utc0;
     // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set the 
time zone
     cctz::time_zone* ctz = nullptr;
-    size_t offset_days = 0;
+    int32_t offset_days = 0;
     int64_t second_mask = 1;
     int64_t scale_to_nano_factor = 1;
     DecimalScaleParams decimal_scale;
diff --git a/be/src/vec/runtime/vdatetime_value.cpp 
b/be/src/vec/runtime/vdatetime_value.cpp
index 201548b1642..3e24c1ffe0a 100644
--- a/be/src/vec/runtime/vdatetime_value.cpp
+++ b/be/src/vec/runtime/vdatetime_value.cpp
@@ -2667,10 +2667,10 @@ template <typename T>
 typename DateV2Value<T>::underlying_value DateV2Value<T>::to_date_int_val() 
const {
     return int_val_;
 }
-
+// [1900-01-01, 2039-12-31]
 static std::array<DateV2Value<DateV2ValueType>, 
date_day_offset_dict::DICT_DAYS>
         DATE_DAY_OFFSET_ITEMS;
-
+// [1900-01-01, 2039-12-31]
 static std::array<std::array<std::array<int, 31>, 12>, 140> 
DATE_DAY_OFFSET_DICT;
 
 static bool DATE_DAY_OFFSET_ITEMS_INIT = false;
@@ -2687,19 +2687,27 @@ bool date_day_offset_dict::get_dict_init() {
 
 date_day_offset_dict::date_day_offset_dict() {
     DateV2Value<DateV2ValueType> d;
+    // Init days before epoch.
     d.set_time(1969, 12, 31, 0, 0, 0, 0);
-    for (int i = 0; i < DAY_AFTER_EPOCH; ++i) {
-        DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH + i] = d;
+    for (int i = 0; i < DAY_BEFORE_EPOCH; ++i) {
+        DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH - i - 1] = d;
         DATE_DAY_OFFSET_DICT[d.year() - START_YEAR][d.month() - 1][d.day() - 
1] =
                 calc_daynr(d.year(), d.month(), d.day());
-        d += 1;
+        d -= 1;
     }
-    d.set_time(1969, 12, 31, 0, 0, 0, 0);
-    for (int i = 0; i <= DAY_BEFORE_EPOCH; ++i) {
-        DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH - i] = d;
+    // Init epoch day.
+    d.set_time(1970, 1, 1, 0, 0, 0, 0);
+    DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH] = d;
+    DATE_DAY_OFFSET_DICT[d.year() - START_YEAR][d.month() - 1][d.day() - 1] =
+            calc_daynr(d.year(), d.month(), d.day());
+    d += 1;
+
+    // Init days after epoch.
+    for (int i = 0; i < DAY_AFTER_EPOCH; ++i) {
+        DATE_DAY_OFFSET_ITEMS[DAY_BEFORE_EPOCH + 1 + i] = d;
         DATE_DAY_OFFSET_DICT[d.year() - START_YEAR][d.month() - 1][d.day() - 
1] =
                 calc_daynr(d.year(), d.month(), d.day());
-        d -= 1;
+        d += 1;
     }
 
     DATE_DAY_OFFSET_ITEMS_INIT = true;
diff --git a/be/src/vec/runtime/vdatetime_value.h 
b/be/src/vec/runtime/vdatetime_value.h
index 97c82f68bbe..b03c09a55d3 100644
--- a/be/src/vec/runtime/vdatetime_value.h
+++ b/be/src/vec/runtime/vdatetime_value.h
@@ -1516,6 +1516,9 @@ int64_t datetime_diff(const VecDateTimeValue& ts_value1, 
const DateV2Value<T>& t
     return 0;
 }
 
+/**
+ * Date dict table. date range is [1900-01-01, 2039-12-31].
+ */
 class date_day_offset_dict {
 private:
     static date_day_offset_dict instance;
@@ -1526,15 +1529,16 @@ private:
     date_day_offset_dict& operator=(const date_day_offset_dict&) = default;
 
 public:
-    static constexpr int DAY_BEFORE_EPOCH = 25566; // 1900-01-01
-    static constexpr int DAY_AFTER_EPOCH = 25500;  // 2039-10-24
-    static constexpr int DICT_DAYS = DAY_BEFORE_EPOCH + DAY_AFTER_EPOCH;
+    static constexpr int DAY_BEFORE_EPOCH = 25567;                           
// 1900-01-01
+    static constexpr int DAY_AFTER_EPOCH = 25566;                            
// 2039-12-31
+    static constexpr int DICT_DAYS = DAY_BEFORE_EPOCH + 1 + DAY_AFTER_EPOCH; 
// 1 means 1970-01-01
 
-    static constexpr int START_YEAR = 1900;                         // 
1900-01-01
-    static constexpr int END_YEAR = 2039;                           // 
2039-10-24
-    static constexpr int DAY_OFFSET_CAL_START_POINT_DAYNR = 719527; // 
1969-12-31
+    static constexpr int START_YEAR = 1900; // 1900-01-01
+    static constexpr int END_YEAR = 2039;   // 2039-10-24
+    static constexpr int DAY_OFFSET_CAL_START_POINT_DAYNR =
+            719528; // 1970-01-01 (start from 0000-01-01, 0000-01-01 is day 1, 
returns 1)
 
-    static bool can_speed_up_calc_daynr(int year) { return year >= START_YEAR 
&& year < END_YEAR; }
+    static bool can_speed_up_calc_daynr(int year) { return year >= START_YEAR 
&& year <= END_YEAR; }
 
     static int get_offset_by_daynr(int daynr) { return daynr - 
DAY_OFFSET_CAL_START_POINT_DAYNR; }
 
diff --git a/be/test/vec/runtime/vdatetime_value_test.cpp 
b/be/test/vec/runtime/vdatetime_value_test.cpp
index 05943dcc6c3..bb396b2ce6f 100644
--- a/be/test/vec/runtime/vdatetime_value_test.cpp
+++ b/be/test/vec/runtime/vdatetime_value_test.cpp
@@ -570,4 +570,172 @@ TEST(VDateTimeValueTest, date_v2_to_string_test) {
     }
 }
 
+TEST(VDateTimeValueTest, date_v2_daynr_test) {
+    {
+        DateV2Value<DateV2ValueType> date_v2;
+        // 1970/01/01
+        EXPECT_TRUE(date_v2.get_date_from_daynr(719528));
+        EXPECT_TRUE(date_v2.year() == 1970);
+        EXPECT_TRUE(date_v2.month() == 1);
+        EXPECT_TRUE(date_v2.day() == 1);
+        EXPECT_TRUE(date_v2.hour() == 0);
+        EXPECT_TRUE(date_v2.minute() == 0);
+        EXPECT_TRUE(date_v2.second() == 0);
+        EXPECT_TRUE(date_v2.microsecond() == 0);
+        EXPECT_TRUE(doris::calc_daynr(1970, 1, 1) == 719528);
+        EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+        EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(1970));
+        
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(719528));
+    }
+
+    {
+        DateV2Value<DateV2ValueType> date_v2;
+        // 1969/12/31
+        EXPECT_TRUE(date_v2.get_date_from_daynr(719527));
+        EXPECT_TRUE(date_v2.year() == 1969);
+        EXPECT_TRUE(date_v2.month() == 12);
+        EXPECT_TRUE(date_v2.day() == 31);
+        EXPECT_TRUE(date_v2.hour() == 0);
+        EXPECT_TRUE(date_v2.minute() == 0);
+        EXPECT_TRUE(date_v2.second() == 0);
+        EXPECT_TRUE(date_v2.microsecond() == 0);
+        EXPECT_TRUE(doris::calc_daynr(1969, 12, 31) == 719527);
+        EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+        EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(1969));
+        
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(719527));
+    }
+
+    {
+        DateV2Value<DateV2ValueType> date_v2;
+        // 1900/01/01
+        EXPECT_TRUE(date_v2.get_date_from_daynr(693961));
+        EXPECT_TRUE(date_v2.year() == 1900);
+        EXPECT_TRUE(date_v2.month() == 1);
+        EXPECT_TRUE(date_v2.day() == 1);
+        EXPECT_TRUE(date_v2.hour() == 0);
+        EXPECT_TRUE(date_v2.minute() == 0);
+        EXPECT_TRUE(date_v2.second() == 0);
+        EXPECT_TRUE(date_v2.microsecond() == 0);
+        EXPECT_TRUE(doris::calc_daynr(1900, 1, 1) == 693961);
+        EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+        EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(1900));
+        
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(693961));
+    }
+
+    {
+        DateV2Value<DateV2ValueType> date_v2;
+        // 1899/12/31
+        EXPECT_TRUE(date_v2.get_date_from_daynr(693960));
+        EXPECT_TRUE(date_v2.year() == 1899);
+        EXPECT_TRUE(date_v2.month() == 12);
+        EXPECT_TRUE(date_v2.day() == 31);
+        EXPECT_TRUE(date_v2.hour() == 0);
+        EXPECT_TRUE(date_v2.minute() == 0);
+        EXPECT_TRUE(date_v2.second() == 0);
+        EXPECT_TRUE(date_v2.microsecond() == 0);
+        EXPECT_TRUE(doris::calc_daynr(1899, 12, 31) == 693960);
+        EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+        
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(1899));
+        
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(693960));
+    }
+
+    {
+        DateV2Value<DateV2ValueType> date_v2;
+        // 2039/12/31
+        EXPECT_TRUE(date_v2.get_date_from_daynr(745094));
+        EXPECT_TRUE(date_v2.year() == 2039);
+        EXPECT_TRUE(date_v2.month() == 12);
+        EXPECT_TRUE(date_v2.day() == 31);
+        EXPECT_TRUE(date_v2.hour() == 0);
+        EXPECT_TRUE(date_v2.minute() == 0);
+        EXPECT_TRUE(date_v2.second() == 0);
+        EXPECT_TRUE(date_v2.microsecond() == 0);
+        EXPECT_TRUE(doris::calc_daynr(2039, 12, 31) == 745094);
+        EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+        EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_calc_daynr(2039));
+        
EXPECT_TRUE(date_day_offset_dict::get().can_speed_up_daynr_to_date(745094));
+    }
+
+    {
+        DateV2Value<DateV2ValueType> date_v2;
+        // 2040/01/01
+        EXPECT_TRUE(date_v2.get_date_from_daynr(745095));
+        EXPECT_TRUE(date_v2.year() == 2040);
+        EXPECT_TRUE(date_v2.month() == 1);
+        EXPECT_TRUE(date_v2.day() == 1);
+        EXPECT_TRUE(date_v2.hour() == 0);
+        EXPECT_TRUE(date_v2.minute() == 0);
+        EXPECT_TRUE(date_v2.second() == 0);
+        EXPECT_TRUE(date_v2.microsecond() == 0);
+        EXPECT_TRUE(doris::calc_daynr(2040, 01, 01) == 745095);
+        EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+        
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(2040));
+        
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(745095));
+    }
+
+    {
+        DateV2Value<DateV2ValueType> date_v2;
+        // 0000/01/01
+        EXPECT_TRUE(date_v2.get_date_from_daynr(1));
+        EXPECT_TRUE(date_v2.year() == 0);
+        EXPECT_TRUE(date_v2.month() == 1);
+        EXPECT_TRUE(date_v2.day() == 1);
+        EXPECT_TRUE(date_v2.hour() == 0);
+        EXPECT_TRUE(date_v2.minute() == 0);
+        EXPECT_TRUE(date_v2.second() == 0);
+        EXPECT_TRUE(date_v2.microsecond() == 0);
+        EXPECT_TRUE(doris::calc_daynr(0, 01, 01) == 1);
+        EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+        EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(0));
+        
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(1));
+    }
+
+    {
+        DateV2Value<DateV2ValueType> date_v2;
+        // Invalid date 0000/00/01
+        EXPECT_TRUE(date_v2.year() == 0);
+        EXPECT_TRUE(date_v2.month() == 0);
+        EXPECT_TRUE(date_v2.day() == 0);
+        EXPECT_TRUE(date_v2.hour() == 0);
+        EXPECT_TRUE(date_v2.minute() == 0);
+        EXPECT_TRUE(date_v2.second() == 0);
+        EXPECT_TRUE(date_v2.microsecond() == 0);
+        EXPECT_TRUE(doris::calc_daynr(0, 0, 1) == 0);
+    }
+
+    {
+        DateV2Value<DateV2ValueType> date_v2;
+        // 9999/12/31
+        EXPECT_TRUE(date_v2.get_date_from_daynr(3652424));
+        EXPECT_TRUE(date_v2.year() == 9999);
+        EXPECT_TRUE(date_v2.month() == 12);
+        EXPECT_TRUE(date_v2.day() == 31);
+        EXPECT_TRUE(date_v2.hour() == 0);
+        EXPECT_TRUE(date_v2.minute() == 0);
+        EXPECT_TRUE(date_v2.second() == 0);
+        EXPECT_TRUE(date_v2.microsecond() == 0);
+        EXPECT_TRUE(doris::calc_daynr(9999, 12, 31) == 3652424);
+        EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+        
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(9999));
+        
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(3652424));
+    }
+
+    {
+        DateV2Value<DateV2ValueType> date_v2;
+        // Invalid date 10000/01/01
+        EXPECT_FALSE(date_v2.get_date_from_daynr(3652425));
+        EXPECT_TRUE(date_v2.year() == 0);
+        EXPECT_TRUE(date_v2.month() == 0);
+        EXPECT_TRUE(date_v2.day() == 0);
+        EXPECT_TRUE(date_v2.hour() == 0);
+        EXPECT_TRUE(date_v2.minute() == 0);
+        EXPECT_TRUE(date_v2.second() == 0);
+        EXPECT_TRUE(date_v2.microsecond() == 0);
+        EXPECT_TRUE(doris::calc_daynr(10000, 01, 01) == 3652425);
+        EXPECT_TRUE(date_day_offset_dict::get().get_dict_init());
+        
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_calc_daynr(10000));
+        
EXPECT_FALSE(date_day_offset_dict::get().can_speed_up_daynr_to_date(3652425));
+    }
+}
+
 } // namespace doris::vectorized


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to