This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit f827714e494b894776344024790bf2fac3c8679a
Author: Qi Chen <kaka11.c...@gmail.com>
AuthorDate: Fri Aug 4 10:52:09 2023 +0800

    [Opt](orc-reader) opt the performance of date convertion. (#22381)
    
    
    Opt the performance of date conversion in orc reader.
    
    ```
    mysql> select count(l_commitdate) from lineitem;
    +---------------------+
    | count(l_commitdate) |
    +---------------------+
    |           600037902 |
    +---------------------+
    1 row in set (1.28 sec)
    
    mysql> select count(l_commitdate) from lineitem;
    +---------------------+
    | count(l_commitdate) |
    +---------------------+
    |           600037902 |
    +---------------------+
    1 row in set (0.19 sec)
    ```
---
 be/src/vec/exec/format/orc/vorc_reader.cpp |  3 +++
 be/src/vec/exec/format/orc/vorc_reader.h   | 10 ++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 77fd37e913..cdb3b28f4d 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -148,6 +148,9 @@ OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState* 
state,
           _io_ctx(io_ctx),
           _enable_lazy_mat(enable_lazy_mat) {
     TimezoneUtils::find_cctz_time_zone(ctz, _time_zone);
+    VecDateTimeValue t;
+    t.from_unixtime(0, ctz);
+    _offset_days = t.day() == 31 ? 0 : 1;
     _init_profile();
     _init_system_properties();
     _init_file_description();
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h 
b/be/src/vec/exec/format/orc/vorc_reader.h
index b558f06465..41d919578f 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -406,6 +406,7 @@ private:
         if (data == nullptr) {
             return Status::InternalError("Wrong data type for colum '{}'", 
col_name);
         }
+        auto* __restrict date_day_offset_dict = get_date_day_offset_dict();
         auto& column_data = 
static_cast<ColumnVector<DorisColumnType>&>(*data_column).get_data();
         auto origin_size = column_data.size();
         column_data.resize(origin_size + num_values);
@@ -421,11 +422,15 @@ private:
                         continue;
                     }
                 }
-                int64_t& date_value = data->data[i];
-                v.from_unixtime(date_value * 24 * 60 * 60, _time_zone); // day 
to seconds
+                int64_t date_value = data->data[i] + _offset_days;
+                DCHECK_LT(date_value, 25500);
+                DCHECK_GE(date_value, 0);
                 if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
+                    v.create_from_date_v2(date_day_offset_dict[date_value], 
TIME_DATE);
                     // we should cast to date if using date v1.
                     v.cast_to_date();
+                } else {
+                    v = date_day_offset_dict[date_value];
                 }
             } else { // timestamp
                 if constexpr (is_filter) {
@@ -495,6 +500,7 @@ private:
     int64_t _range_size;
     const std::string& _ctz;
     const std::vector<std::string>* _column_names;
+    size_t _offset_days = 0;
     cctz::time_zone _time_zone;
 
     std::list<std::string> _read_cols;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to