This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.1-lts in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.1-lts by this push: new 3e0a2d9a0f [fix](predicate) support vectorized evaluate_or and evaluate_and of in predicate (#14603) 3e0a2d9a0f is described below commit 3e0a2d9a0f287bb23d460bf5d3260acbe151d02d Author: TengJianPing <18241664+jackte...@users.noreply.github.com> AuthorDate: Sat Nov 26 11:45:57 2022 +0800 [fix](predicate) support vectorized evaluate_or and evaluate_and of in predicate (#14603) cherry-pick #13587 --- be/src/olap/in_list_predicate.cpp | 383 +++++++++++++++++------- be/src/olap/in_list_predicate.h | 17 +- be/src/olap/reader.cpp | 40 +-- be/test/olap/in_list_predicate_test.cpp | 42 +-- be/test/olap/rowset/segment_v2/segment_test.cpp | 4 +- 5 files changed, 331 insertions(+), 155 deletions(-) diff --git a/be/src/olap/in_list_predicate.cpp b/be/src/olap/in_list_predicate.cpp index 9b78a8705f..3046842bae 100644 --- a/be/src/olap/in_list_predicate.cpp +++ b/be/src/olap/in_list_predicate.cpp @@ -26,17 +26,17 @@ namespace doris { -#define IN_LIST_PRED_CONSTRUCTOR(CLASS) \ - template <class T> \ - CLASS<T>::CLASS(uint32_t column_id, phmap::flat_hash_set<T>&& values, bool opposite) \ +#define IN_LIST_PRED_CONSTRUCTOR(CLASS) \ + template <PrimitiveType PRIMITIVE_TYPE, class T> \ + CLASS<PRIMITIVE_TYPE, T>::CLASS(uint32_t column_id, phmap::flat_hash_set<T>&& values, bool opposite) \ : ColumnPredicate(column_id, opposite), _values(std::move(values)) {} IN_LIST_PRED_CONSTRUCTOR(InListPredicate) IN_LIST_PRED_CONSTRUCTOR(NotInListPredicate) #define IN_LIST_PRED_EVALUATE(CLASS, OP) \ - template <class T> \ - void CLASS<T>::evaluate(VectorizedRowBatch* batch) const { \ + template <PrimitiveType PRIMITIVE_TYPE, class T> \ + void CLASS<PRIMITIVE_TYPE, T>::evaluate(VectorizedRowBatch* batch) const { \ uint16_t n = batch->size(); \ if (n == 0) { \ return; \ @@ -88,9 +88,9 @@ IN_LIST_PRED_CONSTRUCTOR(NotInListPredicate) IN_LIST_PRED_EVALUATE(InListPredicate, !=) IN_LIST_PRED_EVALUATE(NotInListPredicate, ==) -#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP) \ - template <class T> \ - void CLASS<T>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const { \ +#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP) \ + template <PrimitiveType PRIMITIVE_TYPE, class T> \ + void CLASS<PRIMITIVE_TYPE, T>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const { \ uint16_t new_size = 0; \ if (block->is_nullable()) { \ for (uint16_t i = 0; i < *size; ++i) { \ @@ -119,9 +119,9 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(InListPredicate, !=) IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==) // todo(zeno) define interface in IColumn to simplify code -#define IN_LIST_PRED_COLUMN_EVALUATE(CLASS, OP) \ - template <class T> \ - void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \ +#define IN_LIST_PRED_COLUMN_EVALUATE(CLASS, OP) \ + template <PrimitiveType PRIMITIVE_TYPE, class T> \ + void CLASS<PRIMITIVE_TYPE, T>::evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const { \ uint16_t new_size = 0; \ if (column.is_nullable()) { \ auto* nullable_col = \ @@ -188,9 +188,157 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==) IN_LIST_PRED_COLUMN_EVALUATE(InListPredicate, !=) IN_LIST_PRED_COLUMN_EVALUATE(NotInListPredicate, ==) -#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP) \ - template <class T> \ - void CLASS<T>::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \ +#define IN_LIST_EVALUATE_OPERATOR(CLASS, PT) \ + template <PrimitiveType PRIMITIVE_TYPE, class T> \ + template <typename LeftT, typename RightT> \ + bool CLASS<PRIMITIVE_TYPE, T>::_operator(const LeftT& lhs, const RightT& rhs) const { \ + if constexpr (PRIMITIVE_TYPE == TYPE_BOOLEAN) { \ + DCHECK(_values.size() == 2); \ + return PredicateType::PT == PredicateType::IN_LIST; \ + } else if constexpr (PredicateType::PT == PredicateType::IN_LIST) { \ + return lhs != rhs; \ + } \ + return lhs == rhs; \ + } + +IN_LIST_EVALUATE_OPERATOR(InListPredicate, IN_LIST) +IN_LIST_EVALUATE_OPERATOR(NotInListPredicate, NOT_IN_LIST) + +#define IN_LIST_BASE_EVALUATE_BIT(CLASS, PT) \ + template <PrimitiveType PRIMITIVE_TYPE, class T> \ + template <bool is_nullable, bool is_opposite, bool is_and> \ + void CLASS<PRIMITIVE_TYPE, T>::_base_evaluate_bit(const vectorized::IColumn* column, \ + const vectorized::PaddedPODArray<vectorized::UInt8>* null_map, \ + const uint16_t* sel, uint16_t size, bool* flags) const { \ + if (column->is_column_dictionary()) { \ + if constexpr (std::is_same_v<T, StringValue>) { \ + auto* nested_col_ptr = vectorized::check_and_get_column< \ + vectorized::ColumnDictionary<vectorized::Int32>>(column); \ + auto& data_array = nested_col_ptr->get_data(); \ + auto dict_codes = nested_col_ptr->find_codes(_values); \ + \ + for (uint16_t i = 0; i < size; i++) { \ + if (is_and ^ flags[i]) { \ + continue; \ + } \ + \ + uint16_t idx = sel[i]; \ + if constexpr (is_nullable) { \ + if ((*null_map)[idx]) { \ + if (is_and ^ is_opposite) { \ + flags[i] = !is_and; \ + } \ + continue; \ + } \ + } \ + \ + const auto& cell_value = data_array[idx]; \ + if constexpr (is_opposite != (PredicateType::PT == PredicateType::IN_LIST)) { \ + if (is_and ^ (dict_codes.find(cell_value) != dict_codes.end())) { \ + flags[i] = !is_and; \ + } \ + } else { \ + if (is_and ^ (dict_codes.find(cell_value) == dict_codes.end())) { \ + flags[i] = !is_and; \ + } \ + } \ + } \ + } else { \ + LOG(FATAL) << "column_dictionary must use StringValue predicate."; \ + } \ + } else { \ + auto* nested_col_ptr = \ + vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>( \ + column); \ + auto& data_array = nested_col_ptr->get_data(); \ + \ + for (uint16_t i = 0; i < size; i++) { \ + if (is_and ^ flags[i]) { \ + continue; \ + } \ + uint16_t idx = sel[i]; \ + if constexpr (is_nullable) { \ + if ((*null_map)[idx]) { \ + if (is_and ^ is_opposite) { \ + flags[i] = !is_and; \ + } \ + continue; \ + } \ + } \ + \ + if constexpr (!is_opposite) { \ + if (is_and ^ \ + _operator(_values.find(reinterpret_cast<const T&>(data_array[idx])), \ + _values.end())) { \ + flags[i] = !is_and; \ + } \ + } else { \ + if (is_and ^ \ + !_operator(_values.find(reinterpret_cast<const T&>(data_array[idx])), \ + _values.end())) { \ + flags[i] = !is_and; \ + } \ + } \ + } \ + } \ + } + +IN_LIST_BASE_EVALUATE_BIT(InListPredicate, IN_LIST) +IN_LIST_BASE_EVALUATE_BIT(NotInListPredicate, NOT_IN_LIST) + +#define IN_LIST_EVALUATE_BIT(CLASS) \ +template <PrimitiveType PRIMITIVE_TYPE, class T> \ +template <bool is_and> \ +void CLASS<PRIMITIVE_TYPE, T>::_evaluate_bit(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size, \ + bool* flags) const { \ + if (column.is_nullable()) { \ + auto* nullable_col = \ + vectorized::check_and_get_column<vectorized::ColumnNullable>(column); \ + auto& null_bitmap = reinterpret_cast<const vectorized::ColumnUInt8&>( \ + nullable_col->get_null_map_column()) \ + .get_data(); \ + auto& nested_col = nullable_col->get_nested_column(); \ + \ + if (_opposite) { \ + return _base_evaluate_bit<true, true, is_and>(&nested_col, &null_bitmap, sel, size, \ + flags); \ + } else { \ + return _base_evaluate_bit<true, false, is_and>(&nested_col, &null_bitmap, sel, size, \ + flags); \ + } \ + } else { \ + if (_opposite) { \ + return _base_evaluate_bit<false, true, is_and>(&column, nullptr, sel, size, flags); \ + } else { \ + return _base_evaluate_bit<false, false, is_and>(&column, nullptr, sel, size, flags); \ + } \ + } \ +} + +IN_LIST_EVALUATE_BIT(InListPredicate) +IN_LIST_EVALUATE_BIT(NotInListPredicate) + +#define IN_LIST_PRED_COLUMN_EVALUATE_OR(CLASS, OP) \ + template <PrimitiveType PRIMITIVE_TYPE, class T> \ + void CLASS<PRIMITIVE_TYPE, T>::evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const { \ + _evaluate_bit<false>(column, sel, size, flags); \ + } + +IN_LIST_PRED_COLUMN_EVALUATE_OR(InListPredicate, !=) +IN_LIST_PRED_COLUMN_EVALUATE_OR(NotInListPredicate, ==) + +#define IN_LIST_PRED_COLUMN_EVALUATE_AND(CLASS, OP) \ + template <PrimitiveType PRIMITIVE_TYPE, class T> \ + void CLASS<PRIMITIVE_TYPE, T>::evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const { \ + _evaluate_bit<true>(column, sel, size, flags); \ + } + +IN_LIST_PRED_COLUMN_EVALUATE_AND(InListPredicate, !=) +IN_LIST_PRED_COLUMN_EVALUATE_AND(NotInListPredicate, ==) + +#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP) \ + template <PrimitiveType PRIMITIVE_TYPE, class T> \ + void CLASS<PRIMITIVE_TYPE, T>::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \ const { \ if (block->is_nullable()) { \ for (uint16_t i = 0; i < size; ++i) { \ @@ -217,9 +365,9 @@ IN_LIST_PRED_COLUMN_EVALUATE(NotInListPredicate, ==) IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(InListPredicate, !=) IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(NotInListPredicate, ==) -#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(CLASS, OP) \ - template <class T> \ - void CLASS<T>::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \ +#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(CLASS, OP) \ + template <PrimitiveType PRIMITIVE_TYPE, class T> \ + void CLASS<PRIMITIVE_TYPE, T>::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) \ const { \ if (block->is_nullable()) { \ for (uint16_t i = 0; i < size; ++i) { \ @@ -247,8 +395,8 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(InListPredicate, !=) IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(NotInListPredicate, ==) #define IN_LIST_PRED_BITMAP_EVALUATE(CLASS, OP) \ - template <class T> \ - Status CLASS<T>::evaluate(const Schema& schema, \ + template <PrimitiveType PRIMITIVE_TYPE, class T> \ + Status CLASS<PRIMITIVE_TYPE, T>::evaluate(const Schema& schema, \ const std::vector<BitmapIndexIterator*>& iterators, \ uint32_t num_rows, roaring::Roaring* result) const { \ BitmapIndexIterator* iterator = iterators[_column_id]; \ @@ -283,109 +431,126 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(NotInListPredicate, ==) IN_LIST_PRED_BITMAP_EVALUATE(InListPredicate, &=) IN_LIST_PRED_BITMAP_EVALUATE(NotInListPredicate, -=) -#define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS) \ - template CLASS<int8_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int8_t>&& values, \ - bool opposite); \ - template CLASS<int16_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int16_t>&& values, \ - bool opposite); \ - template CLASS<int32_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int32_t>&& values, \ - bool opposite); \ - template CLASS<int64_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int64_t>&& values, \ - bool opposite); \ - template CLASS<int128_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int128_t>&& values, \ - bool opposite); \ - template CLASS<float>::CLASS(uint32_t column_id, phmap::flat_hash_set<float>&& values, \ - bool opposite); \ - template CLASS<double>::CLASS(uint32_t column_id, phmap::flat_hash_set<double>&& values, \ - bool opposite); \ - template CLASS<decimal12_t>::CLASS(uint32_t column_id, \ - phmap::flat_hash_set<decimal12_t>&& values, bool opposite); \ - template CLASS<StringValue>::CLASS(uint32_t column_id, \ - phmap::flat_hash_set<StringValue>&& values, bool opposite); \ - template CLASS<uint24_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<uint24_t>&& values, \ - bool opposite); \ - template CLASS<uint64_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<uint64_t>&& values, \ - bool opposite); +#define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS) \ + template CLASS<TYPE_TINYINT, int8_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int8_t>&& values, \ + bool opposite); \ + template CLASS<TYPE_SMALLINT, int16_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int16_t>&& values, \ + bool opposite); \ + template CLASS<TYPE_INT, int32_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int32_t>&& values, \ + bool opposite); \ + template CLASS<TYPE_BIGINT, int64_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int64_t>&& values, \ + bool opposite); \ + template CLASS<TYPE_LARGEINT, int128_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<int128_t>&& values, \ + bool opposite); \ + template CLASS<TYPE_FLOAT, float>::CLASS(uint32_t column_id, phmap::flat_hash_set<float>&& values, \ + bool opposite); \ + template CLASS<TYPE_DOUBLE, double>::CLASS(uint32_t column_id, phmap::flat_hash_set<double>&& values, \ + bool opposite); \ + template CLASS<TYPE_DECIMALV2, decimal12_t>::CLASS(uint32_t column_id, \ + phmap::flat_hash_set<decimal12_t>&& values, bool opposite); \ + template CLASS<TYPE_CHAR, StringValue>::CLASS(uint32_t column_id, \ + phmap::flat_hash_set<StringValue>&& values, bool opposite); \ + template CLASS<TYPE_STRING, StringValue>::CLASS(uint32_t column_id, \ + phmap::flat_hash_set<StringValue>&& values, bool opposite); \ + template CLASS<TYPE_DATE, uint24_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<uint24_t>&& values, \ + bool opposite); \ + template CLASS<TYPE_DATETIME, uint64_t>::CLASS(uint32_t column_id, phmap::flat_hash_set<uint64_t>&& values, \ + bool opposite); IN_LIST_PRED_CONSTRUCTOR_DECLARATION(InListPredicate) IN_LIST_PRED_CONSTRUCTOR_DECLARATION(NotInListPredicate) -#define IN_LIST_PRED_EVALUATE_DECLARATION(CLASS) \ - template void CLASS<int8_t>::evaluate(VectorizedRowBatch* batch) const; \ - template void CLASS<int16_t>::evaluate(VectorizedRowBatch* batch) const; \ - template void CLASS<int32_t>::evaluate(VectorizedRowBatch* batch) const; \ - template void CLASS<int64_t>::evaluate(VectorizedRowBatch* batch) const; \ - template void CLASS<int128_t>::evaluate(VectorizedRowBatch* batch) const; \ - template void CLASS<float>::evaluate(VectorizedRowBatch* batch) const; \ - template void CLASS<double>::evaluate(VectorizedRowBatch* batch) const; \ - template void CLASS<decimal12_t>::evaluate(VectorizedRowBatch* batch) const; \ - template void CLASS<StringValue>::evaluate(VectorizedRowBatch* batch) const; \ - template void CLASS<uint24_t>::evaluate(VectorizedRowBatch* batch) const; \ - template void CLASS<uint64_t>::evaluate(VectorizedRowBatch* batch) const; +#define IN_LIST_PRED_EVALUATE_DECLARATION(CLASS) \ + template void CLASS<TYPE_TINYINT, int8_t>::evaluate(VectorizedRowBatch* batch) const; \ + template void CLASS<TYPE_SMALLINT, int16_t>::evaluate(VectorizedRowBatch* batch) const; \ + template void CLASS<TYPE_INT, int32_t>::evaluate(VectorizedRowBatch* batch) const; \ + template void CLASS<TYPE_BIGINT, int64_t>::evaluate(VectorizedRowBatch* batch) const; \ + template void CLASS<TYPE_LARGEINT, int128_t>::evaluate(VectorizedRowBatch* batch) const; \ + template void CLASS<TYPE_FLOAT, float>::evaluate(VectorizedRowBatch* batch) const; \ + template void CLASS<TYPE_DOUBLE, double>::evaluate(VectorizedRowBatch* batch) const; \ + template void CLASS<TYPE_DECIMALV2, decimal12_t>::evaluate(VectorizedRowBatch* batch) const; \ + template void CLASS<TYPE_CHAR, StringValue>::evaluate(VectorizedRowBatch* batch) const; \ + template void CLASS<TYPE_STRING, StringValue>::evaluate(VectorizedRowBatch* batch) const; \ + template void CLASS<TYPE_DATE, uint24_t>::evaluate(VectorizedRowBatch* batch) const; \ + template void CLASS<TYPE_DATETIME, uint64_t>::evaluate(VectorizedRowBatch* batch) const; IN_LIST_PRED_EVALUATE_DECLARATION(InListPredicate) IN_LIST_PRED_EVALUATE_DECLARATION(NotInListPredicate) -#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(CLASS) \ - template void CLASS<int8_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ - const; \ - template void CLASS<int16_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ - const; \ - template void CLASS<int32_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ - const; \ - template void CLASS<int64_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ - const; \ - template void CLASS<int128_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ - const; \ - template void CLASS<float>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ - template void CLASS<double>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ - const; \ - template void CLASS<decimal12_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ - const; \ - template void CLASS<StringValue>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ - const; \ - template void CLASS<uint24_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ - const; \ - template void CLASS<uint64_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ +#define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(CLASS) \ + template void CLASS<TYPE_TINYINT, int8_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ + const; \ + template void CLASS<TYPE_SMALLINT, int16_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ + const; \ + template void CLASS<TYPE_INT, int32_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ + const; \ + template void CLASS<TYPE_BIGINT, int64_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ + const; \ + template void CLASS<TYPE_LARGEINT, int128_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ + const; \ + template void CLASS<TYPE_FLOAT, float>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const; \ + template void CLASS<TYPE_DOUBLE, double>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ + const; \ + template void CLASS<TYPE_DECIMALV2, decimal12_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ + const; \ + template void CLASS<TYPE_CHAR, StringValue>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ + const; \ + template void CLASS<TYPE_STRING, StringValue>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ + const; \ + template void CLASS<TYPE_DATE, uint24_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ + const; \ + template void CLASS<TYPE_DATETIME, uint64_t>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) \ const; IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(InListPredicate) IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotInListPredicate) -#define IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(CLASS) \ - template Status CLASS<int8_t>::evaluate(const Schema& schema, \ - const std::vector<BitmapIndexIterator*>& iterators, \ - uint32_t num_rows, roaring::Roaring* bitmap) const; \ - template Status CLASS<int16_t>::evaluate(const Schema& schema, \ - const std::vector<BitmapIndexIterator*>& iterators, \ - uint32_t num_rows, roaring::Roaring* bitmap) const; \ - template Status CLASS<int32_t>::evaluate(const Schema& schema, \ - const std::vector<BitmapIndexIterator*>& iterators, \ - uint32_t num_rows, roaring::Roaring* bitmap) const; \ - template Status CLASS<int64_t>::evaluate(const Schema& schema, \ - const std::vector<BitmapIndexIterator*>& iterators, \ - uint32_t num_rows, roaring::Roaring* bitmap) const; \ - template Status CLASS<int128_t>::evaluate(const Schema& schema, \ - const std::vector<BitmapIndexIterator*>& iterators, \ - uint32_t num_rows, roaring::Roaring* bitmap) const; \ - template Status CLASS<float>::evaluate(const Schema& schema, \ - const std::vector<BitmapIndexIterator*>& iterators, \ - uint32_t num_rows, roaring::Roaring* bitmap) const; \ - template Status CLASS<double>::evaluate(const Schema& schema, \ - const std::vector<BitmapIndexIterator*>& iterators, \ - uint32_t num_rows, roaring::Roaring* bitmap) const; \ - template Status CLASS<decimal12_t>::evaluate( \ - const Schema& schema, const std::vector<BitmapIndexIterator*>& iterators, \ - uint32_t num_rows, roaring::Roaring* bitmap) const; \ - template Status CLASS<StringValue>::evaluate( \ - const Schema& schema, const std::vector<BitmapIndexIterator*>& iterators, \ - uint32_t num_rows, roaring::Roaring* bitmap) const; \ - template Status CLASS<uint24_t>::evaluate(const Schema& schema, \ - const std::vector<BitmapIndexIterator*>& iterators, \ - uint32_t num_rows, roaring::Roaring* bitmap) const; \ - template Status CLASS<uint64_t>::evaluate(const Schema& schema, \ - const std::vector<BitmapIndexIterator*>& iterators, \ - uint32_t num_rows, roaring::Roaring* bitmap) const; +#define IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(CLASS) \ + template Status CLASS<TYPE_TINYINT, int8_t>::evaluate( \ + const Schema& schema, \ + const std::vector<BitmapIndexIterator*>& iterators, \ + uint32_t num_rows, roaring::Roaring* bitmap) const; \ + template Status CLASS<TYPE_SMALLINT, int16_t>::evaluate( \ + const Schema& schema, \ + const std::vector<BitmapIndexIterator*>& iterators, \ + uint32_t num_rows, roaring::Roaring* bitmap) const; \ + template Status CLASS<TYPE_INT, int32_t>::evaluate( \ + const Schema& schema, \ + const std::vector<BitmapIndexIterator*>& iterators, \ + uint32_t num_rows, roaring::Roaring* bitmap) const; \ + template Status CLASS<TYPE_BIGINT, int64_t>::evaluate( \ + const Schema& schema, \ + const std::vector<BitmapIndexIterator*>& iterators, \ + uint32_t num_rows, roaring::Roaring* bitmap) const; \ + template Status CLASS<TYPE_LARGEINT, int128_t>::evaluate( \ + const Schema& schema, \ + const std::vector<BitmapIndexIterator*>& iterators, \ + uint32_t num_rows, roaring::Roaring* bitmap) const; \ + template Status CLASS<TYPE_FLOAT, float>::evaluate( \ + const Schema& schema, \ + const std::vector<BitmapIndexIterator*>& iterators, \ + uint32_t num_rows, roaring::Roaring* bitmap) const; \ + template Status CLASS<TYPE_DOUBLE, double>::evaluate( \ + const Schema& schema, \ + const std::vector<BitmapIndexIterator*>& iterators, \ + uint32_t num_rows, roaring::Roaring* bitmap) const; \ + template Status CLASS<TYPE_DECIMALV2, decimal12_t>::evaluate( \ + const Schema& schema, const std::vector<BitmapIndexIterator*>& iterators, \ + uint32_t num_rows, roaring::Roaring* bitmap) const; \ + template Status CLASS<TYPE_CHAR, StringValue>::evaluate( \ + const Schema& schema, const std::vector<BitmapIndexIterator*>& iterators, \ + uint32_t num_rows, roaring::Roaring* bitmap) const; \ + template Status CLASS<TYPE_STRING, StringValue>::evaluate( \ + const Schema& schema, const std::vector<BitmapIndexIterator*>& iterators, \ + uint32_t num_rows, roaring::Roaring* bitmap) const; \ + template Status CLASS<TYPE_DATE, uint24_t>::evaluate( \ + const Schema& schema, \ + const std::vector<BitmapIndexIterator*>& iterators, \ + uint32_t num_rows, roaring::Roaring* bitmap) const; \ + template Status CLASS<TYPE_DATETIME, uint64_t>::evaluate( \ + const Schema& schema, \ + const std::vector<BitmapIndexIterator*>& iterators, \ + uint32_t num_rows, roaring::Roaring* bitmap) const; IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(InListPredicate) IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(NotInListPredicate) diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index 33682b6824..110398f938 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -27,6 +27,7 @@ #include "olap/column_predicate.h" #include "uint24.h" #include "util/murmur_hash3.h" +#include "runtime/primitive_type.h" namespace std { // for string value @@ -79,7 +80,7 @@ class VectorizedRowBatch; // todo(wb) support evaluate_and,evaluate_or #define IN_LIST_PRED_CLASS_DEFINE(CLASS, PT) \ - template <class T> \ + template <PrimitiveType PRIMITIVE_TYPE, class T> \ class CLASS : public ColumnPredicate { \ public: \ CLASS(uint32_t column_id, phmap::flat_hash_set<T>&& values, bool is_opposite = false); \ @@ -94,8 +95,18 @@ class VectorizedRowBatch; const std::vector<BitmapIndexIterator*>& iterators, \ uint32_t num_rows, roaring::Roaring* bitmap) const override; \ void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) const override; \ - void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \ - void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override {} \ + void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override; \ + void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t size, bool* flags) const override; \ + private: \ + template <typename LeftT, typename RightT> \ + bool _operator(const LeftT& lhs, const RightT& rhs) const; \ + template <bool is_and> \ + void _evaluate_bit(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size, \ + bool* flags) const; \ + template <bool is_nullable, bool is_opposite, bool is_and> \ + void _base_evaluate_bit(const vectorized::IColumn* column, \ + const vectorized::PaddedPODArray<vectorized::UInt8>* null_map, \ + const uint16_t* sel, uint16_t size, bool* flags) const; \ private: \ phmap::flat_hash_set<T> _values; \ }; diff --git a/be/src/olap/reader.cpp b/be/src/olap/reader.cpp index 9b0b7ceec0..96f13a9bf6 100644 --- a/be/src/olap/reader.cpp +++ b/be/src/olap/reader.cpp @@ -608,9 +608,9 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const TCondition& condition, values.insert(value); } if (condition.condition_op == "*=") { - predicate = new InListPredicate<int8_t>(index, std::move(values), opposite); + predicate = new InListPredicate<TYPE_TINYINT, int8_t>(index, std::move(values), opposite); } else { - predicate = new NotInListPredicate<int8_t>(index, std::move(values), opposite); + predicate = new NotInListPredicate<TYPE_TINYINT, int8_t>(index, std::move(values), opposite); } break; } @@ -622,9 +622,9 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const TCondition& condition, values.insert(value); } if (condition.condition_op == "*=") { - predicate = new InListPredicate<int16_t>(index, std::move(values), opposite); + predicate = new InListPredicate<TYPE_SMALLINT, int16_t>(index, std::move(values), opposite); } else { - predicate = new NotInListPredicate<int16_t>(index, std::move(values), opposite); + predicate = new NotInListPredicate<TYPE_SMALLINT, int16_t>(index, std::move(values), opposite); } break; } @@ -636,9 +636,9 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const TCondition& condition, values.insert(value); } if (condition.condition_op == "*=") { - predicate = new InListPredicate<int32_t>(index, std::move(values), opposite); + predicate = new InListPredicate<TYPE_INT, int32_t>(index, std::move(values), opposite); } else { - predicate = new NotInListPredicate<int32_t>(index, std::move(values), opposite); + predicate = new NotInListPredicate<TYPE_INT, int32_t>(index, std::move(values), opposite); } break; } @@ -650,9 +650,9 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const TCondition& condition, values.insert(value); } if (condition.condition_op == "*=") { - predicate = new InListPredicate<int64_t>(index, std::move(values), opposite); + predicate = new InListPredicate<TYPE_BIGINT, int64_t>(index, std::move(values), opposite); } else { - predicate = new NotInListPredicate<int64_t>(index, std::move(values), opposite); + predicate = new NotInListPredicate<TYPE_BIGINT, int64_t>(index, std::move(values), opposite); } break; } @@ -666,9 +666,9 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const TCondition& condition, values.insert(value); } if (condition.condition_op == "*=") { - predicate = new InListPredicate<int128_t>(index, std::move(values), opposite); + predicate = new InListPredicate<TYPE_LARGEINT, int128_t>(index, std::move(values), opposite); } else { - predicate = new NotInListPredicate<int128_t>(index, std::move(values), opposite); + predicate = new NotInListPredicate<TYPE_LARGEINT, int128_t>(index, std::move(values), opposite); } break; } @@ -680,9 +680,9 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const TCondition& condition, values.insert(value); } if (condition.condition_op == "*=") { - predicate = new InListPredicate<decimal12_t>(index, std::move(values), opposite); + predicate = new InListPredicate<TYPE_DECIMALV2, decimal12_t>(index, std::move(values), opposite); } else { - predicate = new NotInListPredicate<decimal12_t>(index, std::move(values), opposite); + predicate = new NotInListPredicate<TYPE_DECIMALV2, decimal12_t>(index, std::move(values), opposite); } break; } @@ -699,9 +699,9 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const TCondition& condition, values.insert(value); } if (condition.condition_op == "*=") { - predicate = new InListPredicate<StringValue>(index, std::move(values), opposite); + predicate = new InListPredicate<TYPE_CHAR, StringValue>(index, std::move(values), opposite); } else { - predicate = new NotInListPredicate<StringValue>(index, std::move(values), opposite); + predicate = new NotInListPredicate<TYPE_CHAR, StringValue>(index, std::move(values), opposite); } break; } @@ -718,9 +718,9 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const TCondition& condition, values.insert(value); } if (condition.condition_op == "*=") { - predicate = new InListPredicate<StringValue>(index, std::move(values), opposite); + predicate = new InListPredicate<TYPE_STRING, StringValue>(index, std::move(values), opposite); } else { - predicate = new NotInListPredicate<StringValue>(index, std::move(values), opposite); + predicate = new NotInListPredicate<TYPE_STRING, StringValue>(index, std::move(values), opposite); } break; } @@ -731,9 +731,9 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const TCondition& condition, values.insert(value); } if (condition.condition_op == "*=") { - predicate = new InListPredicate<uint24_t>(index, std::move(values), opposite); + predicate = new InListPredicate<TYPE_DATE, uint24_t>(index, std::move(values), opposite); } else { - predicate = new NotInListPredicate<uint24_t>(index, std::move(values), opposite); + predicate = new NotInListPredicate<TYPE_DATE, uint24_t>(index, std::move(values), opposite); } break; } @@ -744,9 +744,9 @@ ColumnPredicate* TabletReader::_parse_to_predicate(const TCondition& condition, values.insert(value); } if (condition.condition_op == "*=") { - predicate = new InListPredicate<uint64_t>(index, std::move(values), opposite); + predicate = new InListPredicate<TYPE_DATETIME, uint64_t>(index, std::move(values), opposite); } else { - predicate = new NotInListPredicate<uint64_t>(index, std::move(values), opposite); + predicate = new NotInListPredicate<TYPE_DATETIME, uint64_t>(index, std::move(values), opposite); } break; } diff --git a/be/test/olap/in_list_predicate_test.cpp b/be/test/olap/in_list_predicate_test.cpp index 285e6ea375..9d1539d27d 100644 --- a/be/test/olap/in_list_predicate_test.cpp +++ b/be/test/olap/in_list_predicate_test.cpp @@ -136,7 +136,7 @@ public: std::unique_ptr<Schema> _schema; }; -#define TEST_IN_LIST_PREDICATE(TYPE, TYPE_NAME, FIELD_TYPE) \ +#define TEST_IN_LIST_PREDICATE(PRIMITIVE_TYPE, TYPE, TYPE_NAME, FIELD_TYPE) \ TEST_F(TestInListPredicate, TYPE_NAME##_COLUMN) { \ TabletSchema tablet_schema; \ SetTabletSchema(std::string("TYPE_NAME##_COLUMN"), FIELD_TYPE, "REPLACE", 1, false, true, \ @@ -161,7 +161,7 @@ public: values.insert(4); \ values.insert(5); \ values.insert(6); \ - ColumnPredicate* pred = new InListPredicate<TYPE>(0, std::move(values)); \ + ColumnPredicate* pred = new InListPredicate<PRIMITIVE_TYPE, TYPE>(0, std::move(values)); \ pred->evaluate(_vectorized_batch); \ EXPECT_EQ(_vectorized_batch->size(), 3); \ uint16_t* sel = _vectorized_batch->selected(); \ @@ -190,13 +190,13 @@ public: delete pred; \ } -TEST_IN_LIST_PREDICATE(int8_t, TINYINT, "TINYINT") -TEST_IN_LIST_PREDICATE(int16_t, SMALLINT, "SMALLINT") -TEST_IN_LIST_PREDICATE(int32_t, INT, "INT") -TEST_IN_LIST_PREDICATE(int64_t, BIGINT, "BIGINT") -TEST_IN_LIST_PREDICATE(int128_t, LARGEINT, "LARGEINT") +TEST_IN_LIST_PREDICATE(TYPE_TINYINT, int8_t, TINYINT, "TINYINT") +TEST_IN_LIST_PREDICATE(TYPE_SMALLINT, int16_t, SMALLINT, "SMALLINT") +TEST_IN_LIST_PREDICATE(TYPE_INT, int32_t, INT, "INT") +TEST_IN_LIST_PREDICATE(TYPE_BIGINT, int64_t, BIGINT, "BIGINT") +TEST_IN_LIST_PREDICATE(TYPE_LARGEINT, int128_t, LARGEINT, "LARGEINT") -#define TEST_IN_LIST_PREDICATE_V2(TYPE, TYPE_NAME, FIELD_TYPE) \ +#define TEST_IN_LIST_PREDICATE_V2(PRIMITIVE_TYPE, TYPE, TYPE_NAME, FIELD_TYPE) \ TEST_F(TestInListPredicate, TYPE_NAME##_COLUMN_V2) { \ TabletSchema tablet_schema; \ SetTabletSchema(std::string("TYPE_NAME##_COLUMN"), FIELD_TYPE, "REPLACE", 1, false, true, \ @@ -208,7 +208,7 @@ TEST_IN_LIST_PREDICATE(int128_t, LARGEINT, "LARGEINT") values.insert(4); \ values.insert(5); \ values.insert(6); \ - ColumnPredicate* pred = new InListPredicate<TYPE>(0, std::move(values)); \ + ColumnPredicate* pred = new InListPredicate<PRIMITIVE_TYPE, TYPE>(0, std::move(values)); \ uint16_t sel[10]; \ for (int i = 0; i < 10; ++i) { \ sel[i] = i; \ @@ -255,11 +255,11 @@ TEST_IN_LIST_PREDICATE(int128_t, LARGEINT, "LARGEINT") delete pred; \ } -TEST_IN_LIST_PREDICATE_V2(int8_t, TINYINT, "TINYINT") -TEST_IN_LIST_PREDICATE_V2(int16_t, SMALLINT, "SMALLINT") -TEST_IN_LIST_PREDICATE_V2(int32_t, INT, "INT") -TEST_IN_LIST_PREDICATE_V2(int64_t, BIGINT, "BIGINT") -TEST_IN_LIST_PREDICATE_V2(int128_t, LARGEINT, "LARGEINT") +TEST_IN_LIST_PREDICATE_V2(TYPE_TINYINT, int8_t, TINYINT, "TINYINT") +TEST_IN_LIST_PREDICATE_V2(TYPE_SMALLINT, int16_t, SMALLINT, "SMALLINT") +TEST_IN_LIST_PREDICATE_V2(TYPE_INT, int32_t, INT, "INT") +TEST_IN_LIST_PREDICATE_V2(TYPE_BIGINT, int64_t, BIGINT, "BIGINT") +TEST_IN_LIST_PREDICATE_V2(TYPE_LARGEINT, int128_t, LARGEINT, "LARGEINT") TEST_F(TestInListPredicate, FLOAT_COLUMN) { TabletSchema tablet_schema; @@ -273,7 +273,7 @@ TEST_F(TestInListPredicate, FLOAT_COLUMN) { values.insert(4.1); values.insert(5.1); values.insert(6.1); - ColumnPredicate* pred = new InListPredicate<float>(0, std::move(values)); + ColumnPredicate* pred = new InListPredicate<TYPE_FLOAT, float>(0, std::move(values)); // for VectorizedBatch no null InitVectorizedBatch(&tablet_schema, return_columns, size); @@ -358,7 +358,7 @@ TEST_F(TestInListPredicate, DOUBLE_COLUMN) { values.insert(5.1); values.insert(6.1); - ColumnPredicate* pred = new InListPredicate<double>(0, std::move(values)); + ColumnPredicate* pred = new InListPredicate<TYPE_DOUBLE, double>(0, std::move(values)); // for VectorizedBatch no null InitVectorizedBatch(&tablet_schema, return_columns, size); @@ -447,7 +447,7 @@ TEST_F(TestInListPredicate, DECIMAL_COLUMN) { decimal12_t value3 = {6, 6}; values.insert(value3); - ColumnPredicate* pred = new InListPredicate<decimal12_t>(0, std::move(values)); + ColumnPredicate* pred = new InListPredicate<TYPE_DECIMALV2, decimal12_t>(0, std::move(values)); // for VectorizedBatch no null InitVectorizedBatch(&tablet_schema, return_columns, size); @@ -550,7 +550,7 @@ TEST_F(TestInListPredicate, CHAR_COLUMN) { value3.len = 5; values.insert(value3); - ColumnPredicate* pred = new InListPredicate<StringValue>(0, std::move(values)); + ColumnPredicate* pred = new InListPredicate<TYPE_CHAR, StringValue>(0, std::move(values)); // for VectorizedBatch no null InitVectorizedBatch(&tablet_schema, return_columns, size); @@ -678,7 +678,7 @@ TEST_F(TestInListPredicate, VARCHAR_COLUMN) { value3.len = 3; values.insert(value3); - ColumnPredicate* pred = new InListPredicate<StringValue>(0, std::move(values)); + ColumnPredicate* pred = new InListPredicate<TYPE_STRING, StringValue>(0, std::move(values)); // for VectorizedBatch no null InitVectorizedBatch(&tablet_schema, return_columns, size); @@ -793,7 +793,7 @@ TEST_F(TestInListPredicate, DATE_COLUMN) { uint24_t value3 = datetime::timestamp_from_date("2017-09-11"); values.insert(value3); - ColumnPredicate* pred = new InListPredicate<uint24_t>(0, std::move(values)); + ColumnPredicate* pred = new InListPredicate<TYPE_DATE, uint24_t>(0, std::move(values)); // for VectorizedBatch no nulls InitVectorizedBatch(&tablet_schema, return_columns, size); @@ -903,7 +903,7 @@ TEST_F(TestInListPredicate, DATETIME_COLUMN) { uint64_t value3 = datetime::timestamp_from_datetime("2017-09-11 01:01:00"); values.insert(value3); - ColumnPredicate* pred = new InListPredicate<uint64_t>(0, std::move(values)); + ColumnPredicate* pred = new InListPredicate<TYPE_DATETIME, uint64_t>(0, std::move(values)); // for VectorizedBatch no nulls InitVectorizedBatch(&tablet_schema, return_columns, size); diff --git a/be/test/olap/rowset/segment_v2/segment_test.cpp b/be/test/olap/rowset/segment_v2/segment_test.cpp index fcc1778106..7ca79874b9 100644 --- a/be/test/olap/rowset/segment_v2/segment_test.cpp +++ b/be/test/olap/rowset/segment_v2/segment_test.cpp @@ -1105,7 +1105,7 @@ TEST_F(SegmentReaderWriterTest, TestBitmapPredicate) { values.insert(20); values.insert(1); std::unique_ptr<ColumnPredicate> predicate( - new InListPredicate<int32_t>(0, std::move(values))); + new InListPredicate<TYPE_INT, int32_t>(0, std::move(values))); column_predicates.emplace_back(predicate.get()); StorageReadOptions read_opts; @@ -1128,7 +1128,7 @@ TEST_F(SegmentReaderWriterTest, TestBitmapPredicate) { values.insert(10); values.insert(20); std::unique_ptr<ColumnPredicate> predicate( - new NotInListPredicate<int32_t>(0, std::move(values))); + new NotInListPredicate<TYPE_INT, int32_t>(0, std::move(values))); column_predicates.emplace_back(predicate.get()); StorageReadOptions read_opts; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org