This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 9787b7fa816829e88a153acc6a5aaad1a28c9da8
Author: HappenLee <happen...@hotmail.com>
AuthorDate: Tue Aug 1 12:24:00 2023 +0800

    [Opt](exec) opt the performance of date parquet convert by date dict 
(#22384)
    
    before:
    
    mysql> select count(l_commitdate) from lineitem;
    +---------------------+
    | count(l_commitdate) |
    +---------------------+
    | 600037902 |
    +---------------------+
    1 row in set (0.86 sec)
    after:
    
    mysql> select count(l_commitdate) from lineitem;
    +---------------------+
    | count(l_commitdate) |
    +---------------------+
    | 600037902 |
    +---------------------+
    1 row in set (0.36 sec)
---
 be/src/service/doris_main.cpp                             |  1 +
 be/src/vec/exec/format/parquet/decoder.cpp                |  2 +-
 .../vec/exec/format/parquet/fix_length_dict_decoder.hpp   | 10 +++++++---
 .../vec/exec/format/parquet/fix_length_plain_decoder.cpp  | 10 ++++++++--
 be/src/vec/runtime/vdatetime_value.cpp                    | 15 +++++++++++++++
 be/src/vec/runtime/vdatetime_value.h                      |  3 +++
 be/test/vec/exec/parquet/parquet_thrift_test.cpp          |  1 +
 7 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp
index 57dec22820..6271b34564 100644
--- a/be/src/service/doris_main.cpp
+++ b/be/src/service/doris_main.cpp
@@ -433,6 +433,7 @@ int main(int argc, char** argv) {
     auto exec_env = doris::ExecEnv::GetInstance();
     doris::ExecEnv::init(exec_env, paths);
     doris::TabletSchemaCache::create_global_schema_cache();
+    doris::vectorized::init_date_day_offset_dict();
 
     // init s3 write buffer pool
     doris::io::S3FileBufferPool* s3_buffer_pool = 
doris::io::S3FileBufferPool::GetInstance();
diff --git a/be/src/vec/exec/format/parquet/decoder.cpp 
b/be/src/vec/exec/format/parquet/decoder.cpp
index 539fc04a10..bf8ef0b233 100644
--- a/be/src/vec/exec/format/parquet/decoder.cpp
+++ b/be/src/vec/exec/format/parquet/decoder.cpp
@@ -181,7 +181,7 @@ void Decoder::init(FieldSchema* field_schema, 
cctz::time_zone* ctz) {
     if (_decode_params->ctz) {
         VecDateTimeValue t;
         t.from_unixtime(0, *_decode_params->ctz);
-        _decode_params->offset_days = doris::calc_daynr(t.year(), t.month(), 
t.day());
+        _decode_params->offset_days = t.day() == 31 ? 0 : 1;
     }
 }
 } // namespace doris::vectorized
diff --git a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp 
b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
index 817b5e7f96..bb95fb426f 100644
--- a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
+++ b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
@@ -216,7 +216,7 @@ protected:
         size_t data_index = column_data.size();
         column_data.resize(data_index + select_vector.num_values() - 
select_vector.num_filtered());
         size_t dict_index = 0;
-
+        auto* __restrict date_day_offset_dict = get_date_day_offset_dict();
         ColumnSelectVector::DataReadType read_type;
         while (size_t run_length = 
select_vector.get_next_run<has_filter>(&read_type)) {
             switch (read_type) {
@@ -224,11 +224,15 @@ protected:
                 for (size_t i = 0; i < run_length; ++i) {
                     int64_t date_value =
                             _dict_items[_indexes[dict_index++]] + 
_decode_params->offset_days;
-                    auto& v = 
reinterpret_cast<CppType&>(column_data[data_index++]);
-                    v.get_date_from_daynr(date_value);
+                    DCHECK_LT(date_value, 25500);
                     if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
+                        auto& v = 
reinterpret_cast<CppType&>(column_data[data_index++]);
+                        
v.create_from_date_v2(date_day_offset_dict[date_value], TIME_DATE);
                         // we should cast to date if using date v1.
                         v.cast_to_date();
+                    } else {
+                        reinterpret_cast<CppType&>(column_data[data_index++]) =
+                                date_day_offset_dict[date_value];
                     }
                 }
                 break;
diff --git a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp 
b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
index 940e70db79..3fe58e6a5d 100644
--- a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
+++ b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp
@@ -248,6 +248,7 @@ Status 
FixLengthPlainDecoder::_decode_date(MutableColumnPtr& doris_column,
     size_t data_index = column_data.size();
     column_data.resize(data_index + select_vector.num_values() - 
select_vector.num_filtered());
     ColumnSelectVector::DataReadType read_type;
+    auto* __restrict date_day_offset_dict = get_date_day_offset_dict();
 
     while (size_t run_length = 
select_vector.get_next_run<has_filter>(&read_type)) {
         switch (read_type) {
@@ -256,11 +257,16 @@ Status 
FixLengthPlainDecoder::_decode_date(MutableColumnPtr& doris_column,
                 char* buf_start = _data->data + _offset;
                 int64_t date_value = 
static_cast<int64_t>(*reinterpret_cast<int32_t*>(buf_start)) +
                                      _decode_params->offset_days;
-                auto& v = 
reinterpret_cast<CppType&>(column_data[data_index++]);
-                v.get_date_from_daynr(date_value);
+                DCHECK_LT(date_value, 25500);
+                DCHECK_GE(date_value, 0);
                 if constexpr (std::is_same_v<CppType, VecDateTimeValue>) {
+                    auto& v = 
reinterpret_cast<CppType&>(column_data[data_index++]);
+                    v.create_from_date_v2(date_day_offset_dict[date_value], 
TIME_DATE);
                     // we should cast to date if using date v1.
                     v.cast_to_date();
+                } else {
+                    reinterpret_cast<CppType&>(column_data[data_index++]) =
+                            date_day_offset_dict[date_value];
                 }
                 _offset += _type_length;
             }
diff --git a/be/src/vec/runtime/vdatetime_value.cpp 
b/be/src/vec/runtime/vdatetime_value.cpp
index 27926d1ee6..9675ab63fa 100644
--- a/be/src/vec/runtime/vdatetime_value.cpp
+++ b/be/src/vec/runtime/vdatetime_value.cpp
@@ -2613,6 +2613,21 @@ typename DateV2Value<T>::underlying_value 
DateV2Value<T>::to_date_int_val() cons
     return int_val_;
 }
 
+static std::array<DateV2Value<DateV2ValueType>, 25500> DATE_DAY_OFFSET_DICT;
+
+void init_date_day_offset_dict() {
+    DateV2Value<DateV2ValueType> d;
+    d.set_time(1969, 12, 31, 0, 0, 0, 0);
+    for (int i = 0; i < DATE_DAY_OFFSET_DICT.size(); ++i) {
+        DATE_DAY_OFFSET_DICT[i] = d;
+        d += 1;
+    }
+}
+
+DateV2Value<DateV2ValueType>* get_date_day_offset_dict() {
+    return DATE_DAY_OFFSET_DICT.data();
+}
+
 template <typename T>
 uint32_t DateV2Value<T>::set_date_uint32(uint32_t int_val) {
     union DateV2UInt32Union {
diff --git a/be/src/vec/runtime/vdatetime_value.h 
b/be/src/vec/runtime/vdatetime_value.h
index b33fe1e30e..e23aee6a81 100644
--- a/be/src/vec/runtime/vdatetime_value.h
+++ b/be/src/vec/runtime/vdatetime_value.h
@@ -1462,6 +1462,9 @@ class DataTypeDateTime;
 class DataTypeDateV2;
 class DataTypeDateTimeV2;
 
+[[maybe_unused]] void init_date_day_offset_dict();
+[[maybe_unused]] DateV2Value<DateV2ValueType>* get_date_day_offset_dict();
+
 template <typename T>
 struct DateTraits {};
 
diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp 
b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
index 9b36b55464..bb77b32e58 100644
--- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp
+++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp
@@ -437,6 +437,7 @@ static void read_parquet_data_and_check(const std::string& 
parquet_file,
 }
 
 TEST_F(ParquetThriftReaderTest, type_decoder) {
+    init_date_day_offset_dict();
     
read_parquet_data_and_check("./be/test/exec/test_data/parquet_scanner/type-decoder.parquet",
                                 
"./be/test/exec/test_data/parquet_scanner/type-decoder.txt", 10);
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to