This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 9c0528daf6 [Opt](orc-reader) opt the performance of date convertion. 
(#22381)
9c0528daf6 is described below

commit 9c0528daf62ccc3d88102e6f6b9d2f341776adb4
Author: Qi Chen <[email protected]>
AuthorDate: Fri Aug 4 10:52:09 2023 +0800

    [Opt](orc-reader) opt the performance of date convertion. (#22381)
    
    
    Opt the performance of date conversion in orc reader.
    
    ```
    mysql> select count(l_commitdate) from lineitem;
    +---------------------+
    | count(l_commitdate) |
    +---------------------+
    |           600037902 |
    +---------------------+
    1 row in set (1.28 sec)
    
    mysql> select count(l_commitdate) from lineitem;
    +---------------------+
    | count(l_commitdate) |
    +---------------------+
    |           600037902 |
    +---------------------+
    1 row in set (0.19 sec)
    ```
---
 be/src/vec/exec/format/orc/vorc_reader.cpp |  3 +++
 be/src/vec/exec/format/orc/vorc_reader.h   | 10 ++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 5c0fa31c66..964db48027 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -148,6 +148,9 @@ OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState* 
state,
           _io_ctx(io_ctx),
           _enable_lazy_mat(enable_lazy_mat) {
     TimezoneUtils::find_cctz_time_zone(ctz, _time_zone);
+    VecDateTimeValue t;
+    t.from_unixtime(0, ctz);
+    _offset_days = t.day() == 31 ? 0 : 1;
     _init_profile();
     _init_system_properties();
     _init_file_description();
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h 
b/be/src/vec/exec/format/orc/vorc_reader.h
index 2f5498b2cd..9b5c1fe576 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -406,6 +406,7 @@ private:
         if (data == nullptr) {
             return Status::InternalError("Wrong data type for colum '{}'", 
col_name);
         }
+        auto* __restrict date_day_offset_dict = get_date_day_offset_dict();
         auto& column_data = 
static_cast<ColumnVector<DorisColumnType>&>(*data_column).get_data();
         auto origin_size = column_data.size();
         column_data.resize(origin_size + num_values);
@@ -421,11 +422,15 @@ private:
                         continue;
                     }
                 }
-                int64_t& date_value = data->data[i];
-                v.from_unixtime(date_value * 24 * 60 * 60, _time_zone); // day 
to seconds
+                int64_t date_value = data->data[i] + _offset_days;
+                DCHECK_LT(date_value, 25500);
+                DCHECK_GE(date_value, 0);
                 if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
+                    v.create_from_date_v2(date_day_offset_dict[date_value], 
TIME_DATE);
                     // we should cast to date if using date v1.
                     v.cast_to_date();
+                } else {
+                    v = date_day_offset_dict[date_value];
                 }
             } else { // timestamp
                 if constexpr (is_filter) {
@@ -498,6 +503,7 @@ private:
     int64_t _range_size;
     const std::string& _ctz;
     const std::vector<std::string>* _column_names;
+    size_t _offset_days = 0;
     cctz::time_zone _time_zone;
 
     std::list<std::string> _read_cols;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to