This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9c0528daf6 [Opt](orc-reader) opt the performance of date convertion.
(#22381)
9c0528daf6 is described below
commit 9c0528daf62ccc3d88102e6f6b9d2f341776adb4
Author: Qi Chen <[email protected]>
AuthorDate: Fri Aug 4 10:52:09 2023 +0800
[Opt](orc-reader) opt the performance of date convertion. (#22381)
Opt the performance of date conversion in orc reader.
```
mysql> select count(l_commitdate) from lineitem;
+---------------------+
| count(l_commitdate) |
+---------------------+
| 600037902 |
+---------------------+
1 row in set (1.28 sec)
mysql> select count(l_commitdate) from lineitem;
+---------------------+
| count(l_commitdate) |
+---------------------+
| 600037902 |
+---------------------+
1 row in set (0.19 sec)
```
---
be/src/vec/exec/format/orc/vorc_reader.cpp | 3 +++
be/src/vec/exec/format/orc/vorc_reader.h | 10 ++++++++--
2 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 5c0fa31c66..964db48027 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -148,6 +148,9 @@ OrcReader::OrcReader(RuntimeProfile* profile, RuntimeState*
state,
_io_ctx(io_ctx),
_enable_lazy_mat(enable_lazy_mat) {
TimezoneUtils::find_cctz_time_zone(ctz, _time_zone);
+ VecDateTimeValue t;
+ t.from_unixtime(0, ctz);
+ _offset_days = t.day() == 31 ? 0 : 1;
_init_profile();
_init_system_properties();
_init_file_description();
diff --git a/be/src/vec/exec/format/orc/vorc_reader.h
b/be/src/vec/exec/format/orc/vorc_reader.h
index 2f5498b2cd..9b5c1fe576 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.h
+++ b/be/src/vec/exec/format/orc/vorc_reader.h
@@ -406,6 +406,7 @@ private:
if (data == nullptr) {
return Status::InternalError("Wrong data type for colum '{}'",
col_name);
}
+ auto* __restrict date_day_offset_dict = get_date_day_offset_dict();
auto& column_data =
static_cast<ColumnVector<DorisColumnType>&>(*data_column).get_data();
auto origin_size = column_data.size();
column_data.resize(origin_size + num_values);
@@ -421,11 +422,15 @@ private:
continue;
}
}
- int64_t& date_value = data->data[i];
- v.from_unixtime(date_value * 24 * 60 * 60, _time_zone); // day
to seconds
+ int64_t date_value = data->data[i] + _offset_days;
+ DCHECK_LT(date_value, 25500);
+ DCHECK_GE(date_value, 0);
if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
+ v.create_from_date_v2(date_day_offset_dict[date_value],
TIME_DATE);
// we should cast to date if using date v1.
v.cast_to_date();
+ } else {
+ v = date_day_offset_dict[date_value];
}
} else { // timestamp
if constexpr (is_filter) {
@@ -498,6 +503,7 @@ private:
int64_t _range_size;
const std::string& _ctz;
const std::vector<std::string>* _column_names;
+ size_t _offset_days = 0;
cctz::time_zone _time_zone;
std::list<std::string> _read_cols;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]