This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new d73f170eeb [optimize](storage)optimize date in storage layer (#8967) d73f170eeb is described below commit d73f170eeb5a9cc7194d5eb55df33eb7ccacbba7 Author: wangbo <wan...@apache.org> AuthorDate: Thu Jun 23 12:29:10 2022 +0800 [optimize](storage)optimize date in storage layer (#8967) * opt date in storage * code style Co-authored-by: Wang Bo <wangb...@meituan.com> --- be/src/olap/comparison_predicate.cpp | 45 ++++++++++++++++++---- be/src/olap/in_list_predicate.h | 32 ++++++++++++++- be/src/olap/rowset/segment_v2/segment_iterator.cpp | 2 +- be/src/olap/schema.cpp | 2 +- be/src/olap/uint24.h | 2 + be/src/vec/columns/column_vector.h | 19 +++++---- be/src/vec/columns/predicate_column.h | 36 +++++++++++++++++ be/src/vec/runtime/vdatetime_value.h | 13 +++++++ 8 files changed, 131 insertions(+), 20 deletions(-) diff --git a/be/src/olap/comparison_predicate.cpp b/be/src/olap/comparison_predicate.cpp index fc50c354fd..91ef9f7156 100644 --- a/be/src/olap/comparison_predicate.cpp +++ b/be/src/olap/comparison_predicate.cpp @@ -224,6 +224,9 @@ COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=, true) COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >, true) COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true) +// todo(wb) for date type we use uint32_t to save it but using Predicate<uint24> to evaluate it. +// This is done for compatibility with Row Version predicate. +// We can use Predicate<uint32_t> for date after Row Version is removed. #define COMPARISON_PRED_COLUMN_EVALUATE_VEC(CLASS, OP) \ template <class T> \ void CLASS<T>::evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* flags) const { \ @@ -236,15 +239,40 @@ COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true) auto& null_bitmap = reinterpret_cast<const vectorized::ColumnVector<uint8_t>&>( \ *(nullable_column->get_null_map_column_ptr())) \ .get_data(); \ - for (uint16_t i = 0; i < size; i++) { \ - flags[i] = (data_array[i] OP _value) && (!null_bitmap[i]); \ + if constexpr (std::is_same_v<T, uint24_t>) { \ + auto& predicate_column = \ + reinterpret_cast<const vectorized::PredicateColumnType<uint32_t>&>( \ + nullable_column->get_nested_column()); \ + uint32_t int32_val = 0; \ + char* int32_val_ptr = (char*)&int32_val; \ + memory_copy(int32_val_ptr, _value.get_data(), sizeof(uint24_t)); \ + auto& data_array_uint32_t = predicate_column.get_data(); \ + for (uint16_t i = 0; i < size; i++) { \ + flags[i] = (data_array_uint32_t[i] OP int32_val) && (!null_bitmap[i]); \ + } \ + } else { \ + for (uint16_t i = 0; i < size; i++) { \ + flags[i] = (data_array[i] OP _value) && (!null_bitmap[i]); \ + } \ } \ } else { \ - auto& predicate_column = \ - reinterpret_cast<vectorized::PredicateColumnType<T>&>(column); \ - auto& data_array = predicate_column.get_data(); \ - for (uint16_t i = 0; i < size; i++) { \ - flags[i] = data_array[i] OP _value; \ + if constexpr (std::is_same_v<T, uint24_t>) { \ + auto& predicate_column = \ + reinterpret_cast<vectorized::PredicateColumnType<uint32_t>&>(column); \ + uint32_t int32_val = 0; \ + char* int32_val_ptr = (char*)&int32_val; \ + memory_copy(int32_val_ptr, _value.get_data(), sizeof(uint24_t)); \ + auto& data_array = predicate_column.get_data(); \ + for (uint16_t i = 0; i < size; i++) { \ + flags[i] = data_array[i] OP int32_val; \ + } \ + } else { \ + auto& predicate_column = \ + reinterpret_cast<vectorized::PredicateColumnType<T>&>(column); \ + auto& data_array = predicate_column.get_data(); \ + for (uint16_t i = 0; i < size; i++) { \ + flags[i] = data_array[i] OP _value; \ + } \ } \ } \ if (_opposite) { \ @@ -502,6 +530,7 @@ COMPARISON_PRED_BITMAP_EVALUATE(GreaterEqualPredicate, >=) template CLASS<decimal12_t>::CLASS(uint32_t column_id, const decimal12_t& value, \ bool opposite); \ template CLASS<uint24_t>::CLASS(uint32_t column_id, const uint24_t& value, bool opposite); \ + template CLASS<uint32_t>::CLASS(uint32_t column_id, const uint32_t& value, bool opposite); \ template CLASS<uint64_t>::CLASS(uint32_t column_id, const uint64_t& value, bool opposite); \ template CLASS<bool>::CLASS(uint32_t column_id, const bool& value, bool opposite); @@ -663,6 +692,8 @@ COMPARISON_PRED_COLUMN_EVALUATE_DECLARATION(GreaterEqualPredicate) bool* flags) const; \ template void CLASS<uint24_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \ bool* flags) const; \ + template void CLASS<uint32_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \ + bool* flags) const; \ template void CLASS<uint64_t>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \ bool* flags) const; \ template void CLASS<bool>::evaluate_vec(vectorized::IColumn& column, uint16_t size, \ diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index e39686abd4..04ec211568 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -290,7 +290,37 @@ private: uint16_t* sel, uint16_t size) const { uint16_t new_size = 0; - if (column->is_column_dictionary()) { + if constexpr (std::is_same_v<T, uint24_t>) { + auto* nested_col_ptr = + vectorized::check_and_get_column<vectorized::PredicateColumnType<uint32_t>>( + column); + auto& data_array = nested_col_ptr->get_data(); + + uint24_t tmp_uint24_value; + for (uint16_t i = 0; i < size; i++) { + uint16_t idx = sel[i]; + if constexpr (is_nullable) { + if ((*null_map)[idx]) { + if constexpr (is_opposite) { + sel[new_size++] = idx; + } + continue; + } + } + + memcpy((char*)(&tmp_uint24_value), (char*)(&(data_array[idx])), sizeof(uint24_t)); + if constexpr (!is_opposite) { + if (_operator(_values.find(tmp_uint24_value), _values.end())) { + sel[new_size++] = idx; + } + } else { + if (!_operator(_values.find(tmp_uint24_value), _values.end())) { + sel[new_size++] = idx; + } + } + } + + } else if (column->is_column_dictionary()) { if constexpr (std::is_same_v<T, StringValue>) { auto* nested_col_ptr = vectorized::check_and_get_column< vectorized::ColumnDictionary<vectorized::Int32>>(column); diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 14e5b92935..b3945e11ec 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -661,7 +661,7 @@ void SegmentIterator::_vec_init_lazy_materialization() { predicate->type() == PredicateType::IN_LIST || predicate->type() == PredicateType::NOT_IN_LIST || predicate->type() == PredicateType::IS_NULL || - predicate->type() == PredicateType::IS_NOT_NULL || type == OLAP_FIELD_TYPE_DATE || + predicate->type() == PredicateType::IS_NOT_NULL || type == OLAP_FIELD_TYPE_DECIMAL) { short_cir_pred_col_id_set.insert(cid); _short_cir_eval_predicate.push_back(predicate); diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp index 8c218ab8ed..a5e7896147 100644 --- a/be/src/olap/schema.cpp +++ b/be/src/olap/schema.cpp @@ -151,7 +151,7 @@ vectorized::IColumn::MutablePtr Schema::get_predicate_column_ptr(FieldType type) return doris::vectorized::PredicateColumnType<doris::vectorized::Int128>::create(); case OLAP_FIELD_TYPE_DATE: - return doris::vectorized::PredicateColumnType<uint24_t>::create(); + return doris::vectorized::PredicateColumnType<uint32_t>::create(); case OLAP_FIELD_TYPE_DATETIME: return doris::vectorized::PredicateColumnType<uint64_t>::create(); diff --git a/be/src/olap/uint24.h b/be/src/olap/uint24.h index 1605d893c9..f56ca7ddc6 100644 --- a/be/src/olap/uint24.h +++ b/be/src/olap/uint24.h @@ -140,6 +140,8 @@ public: return std::string(buf); } + const uint8_t* get_data() const { return data; } + private: uint8_t data[3]; } __attribute__((packed)); diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index e57ffe4a9c..1d42455c76 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -169,17 +169,16 @@ public: } void insert_date_column(const char* data_ptr, size_t num) { - size_t value_size = sizeof(uint24_t); + size_t input_value_size = sizeof(uint24_t); + for (int i = 0; i < num; i++) { - const char* cur_ptr = data_ptr + value_size * i; - uint64_t value = 0; - value = *(unsigned char*)(cur_ptr + 2); - value <<= 8; - value |= *(unsigned char*)(cur_ptr + 1); - value <<= 8; - value |= *(unsigned char*)(cur_ptr); - vectorized::VecDateTimeValue date = VecDateTimeValue::create_from_olap_date(value); - this->insert_data(reinterpret_cast<char*>(&date), 0); + uint64_t val = 0; + memcpy((char*)(&val), data_ptr, input_value_size); + data_ptr += input_value_size; + + VecDateTimeValue date; + date.set_olap_date(val); + data.push_back_without_reserve(unaligned_load<Int64>(reinterpret_cast<char*>(&date))); } } diff --git a/be/src/vec/columns/predicate_column.h b/be/src/vec/columns/predicate_column.h index eec3f1def7..27b1b7e5e0 100644 --- a/be/src/vec/columns/predicate_column.h +++ b/be/src/vec/columns/predicate_column.h @@ -63,6 +63,20 @@ private: } } + void insert_date32_to_res_column(const uint16_t* sel, size_t sel_size, + vectorized::ColumnVector<Int64>* res_ptr) { + res_ptr->reserve(sel_size); + auto& res_data = res_ptr->get_data(); + + for (size_t i = 0; i < sel_size; i++) { + uint64_t val = data[sel[i]]; + VecDateTimeValue date; + date.set_olap_date(val); + res_data.push_back_without_reserve( + unaligned_load<Int64>(reinterpret_cast<char*>(&date))); + } + } + void insert_datetime_to_res_column(const uint16_t* sel, size_t sel_size, vectorized::ColumnVector<Int64>* res_ptr) { for (size_t i = 0; i < sel_size; i++) { @@ -205,6 +219,21 @@ public: } } + void insert_many_date(const char* data_ptr, size_t num) { + size_t intput_type_size = sizeof(uint24_t); + size_t res_type_size = sizeof(uint32_t); + char* input_data_ptr = const_cast<char*>(data_ptr); + + char* res_ptr = (char*)data.get_end_ptr(); + memset(res_ptr, 0, res_type_size * num); + for (int i = 0; i < num; i++) { + memcpy(res_ptr, input_data_ptr, intput_type_size); + res_ptr += res_type_size; + input_data_ptr += intput_type_size; + } + data.set_end_ptr(res_ptr); + } + void insert_many_fix_len_data(const char* data_ptr, size_t num) override { if constexpr (std::is_same_v<T, decimal12_t>) { insert_many_in_copy_way(data_ptr, num); @@ -212,6 +241,10 @@ public: insert_many_in_copy_way(data_ptr, num); } else if constexpr (std::is_same_v<T, StringValue>) { // here is unreachable, just for compilation to be able to pass + } else if constexpr (std::is_same_v< + T, + uint32_t>) { // todo(wb) a trick type judge here,need refactor + insert_many_date(data_ptr, num); } else { insert_many_default_type(data_ptr, num); } @@ -405,6 +438,9 @@ public: } else if constexpr (std::is_same_v<T, uint24_t>) { insert_date_to_res_column(sel, sel_size, reinterpret_cast<vectorized::ColumnVector<Int64>*>(col_ptr)); + } else if constexpr (std::is_same_v<T, uint32_t>) { // a trick type judge, need refactor it. + insert_date32_to_res_column( + sel, sel_size, reinterpret_cast<vectorized::ColumnVector<Int64>*>(col_ptr)); } else if constexpr (std::is_same_v<T, doris::vectorized::Int128>) { insert_default_value_res_column( sel, sel_size, diff --git a/be/src/vec/runtime/vdatetime_value.h b/be/src/vec/runtime/vdatetime_value.h index 126b36f5b4..4c1f4cc137 100644 --- a/be/src/vec/runtime/vdatetime_value.h +++ b/be/src/vec/runtime/vdatetime_value.h @@ -223,6 +223,19 @@ public: return check_range_and_set_time(year, month, day, hour, minute, second, _type); } + //note(wb) not check in this method + void inline set_olap_date(uint64_t olap_date_val) { + _neg = 0; + _type = TIME_DATE; + + _day = olap_date_val & 0x1f; + _month = (olap_date_val >> 5) & 0x0f; + _year = olap_date_val >> 9; + _hour = 0; + _minute = 0; + _second = 0; + } + uint64_t to_olap_date() const { uint64_t val; val = _year; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org