This is an automated email from the ASF dual-hosted git repository. kxiao pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
commit f827714e494b894776344024790bf2fac3c8679a Author: Qi Chen <kaka11.c...@gmail.com> AuthorDate: Fri Aug 4 10:52:09 2023 +0800 [Opt](orc-reader) opt the performance of date convertion. (#22381) Opt the performance of date conversion in orc reader. ``` mysql> select count(l_commitdate) from lineitem; +---------------------+ | count(l_commitdate) | +---------------------+ | 600037902 | +---------------------+ 1 row in set (1.28 sec) mysql> select count(l_commitdate) from lineitem; +---------------------+ | count(l_commitdate) | +---------------------+ | 600037902 | +---------------------+ 1 row in set (0.19 sec) ``` --- be/src/vec/exec/format/orc/vorc_reader.cpp | 3 +++ be/src/vec/exec/format/orc/vorc_reader.h | 10 ++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 77fd37e913..cdb3b28f4d 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -148,6 +148,9 @@ OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState* state, _io_ctx(io_ctx), _enable_lazy_mat(enable_lazy_mat) { TimezoneUtils::find_cctz_time_zone(ctz, _time_zone); + VecDateTimeValue t; + t.from_unixtime(0, ctz); + _offset_days = t.day() == 31 ? 0 : 1; _init_profile(); _init_system_properties(); _init_file_description(); diff --git a/be/src/vec/exec/format/orc/vorc_reader.h b/be/src/vec/exec/format/orc/vorc_reader.h index b558f06465..41d919578f 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.h +++ b/be/src/vec/exec/format/orc/vorc_reader.h @@ -406,6 +406,7 @@ private: if (data == nullptr) { return Status::InternalError("Wrong data type for colum '{}'", col_name); } + auto* __restrict date_day_offset_dict = get_date_day_offset_dict(); auto& column_data = static_cast<ColumnVector<DorisColumnType>&>(*data_column).get_data(); auto origin_size = column_data.size(); column_data.resize(origin_size + num_values); @@ -421,11 +422,15 @@ private: continue; } } - int64_t& date_value = data->data[i]; - v.from_unixtime(date_value * 24 * 60 * 60, _time_zone); // day to seconds + int64_t date_value = data->data[i] + _offset_days; + DCHECK_LT(date_value, 25500); + DCHECK_GE(date_value, 0); if constexpr (std::is_same_v<CppType, VecDateTimeValue>) { + v.create_from_date_v2(date_day_offset_dict[date_value], TIME_DATE); // we should cast to date if using date v1. v.cast_to_date(); + } else { + v = date_day_offset_dict[date_value]; } } else { // timestamp if constexpr (is_filter) { @@ -495,6 +500,7 @@ private: int64_t _range_size; const std::string& _ctz; const std::vector<std::string>* _column_names; + size_t _offset_days = 0; cctz::time_zone _time_zone; std::list<std::string> _read_cols; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org