This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-unstable in repository https://gitbox.apache.org/repos/asf/doris.git
commit d85c1e98b472bd915f68c66e9e407b4c17e6d3ee Author: luozenglin <37725793+luozeng...@users.noreply.github.com> AuthorDate: Thu Dec 1 10:41:09 2022 +0800 [Enhancement](bitmapfilter) Support bitmap filter to apply zone_map index to filter pages (#14635) --- be/src/exprs/bitmapfilter_predicate.h | 35 ++++++---- be/src/exprs/runtime_filter.cpp | 21 ++++-- be/src/exprs/runtime_filter.h | 3 + be/src/olap/bitmap_filter_predicate.h | 45 +++++++------ be/src/olap/olap_common.h | 2 + be/src/olap/rowset/segment_v2/segment_iterator.cpp | 1 + be/src/util/bitmap_value.h | 76 +++++++++++++++++++++- be/src/vec/exec/scan/new_olap_scan_node.cpp | 1 + be/src/vec/exec/scan/new_olap_scan_node.h | 1 + be/src/vec/exec/scan/new_olap_scanner.cpp | 2 + be/src/vec/runtime/shared_hash_table_controller.h | 2 +- 11 files changed, 151 insertions(+), 38 deletions(-) diff --git a/be/src/exprs/bitmapfilter_predicate.h b/be/src/exprs/bitmapfilter_predicate.h index c9a2efc621..dbc9447034 100644 --- a/be/src/exprs/bitmapfilter_predicate.h +++ b/be/src/exprs/bitmapfilter_predicate.h @@ -17,7 +17,8 @@ #pragma once -#include "common/object_pool.h" +#include <algorithm> + #include "gutil/integral_types.h" #include "runtime/define_primitive_type.h" #include "runtime/primitive_type.h" @@ -30,14 +31,15 @@ class BitmapFilterFuncBase { public: virtual void insert(const void* data) = 0; virtual void insert_many(const std::vector<const BitmapValue*> bitmaps) = 0; - virtual bool find(uint64 data) const = 0; - virtual bool is_empty() = 0; + virtual bool empty() = 0; virtual Status assign(BitmapValue* bitmap_value) = 0; virtual void light_copy(BitmapFilterFuncBase* other) { _not_in = other->_not_in; }; virtual uint16_t find_fixed_len_olap_engine(const char* data, const uint8* nullmap, uint16_t* offsets, int number) = 0; virtual void find_batch(const char* data, const uint8* nullmap, int number, uint8* results) const = 0; + virtual size_t size() const = 0; + bool is_not_in() const { return _not_in; } void set_not_in(bool not_in) { _not_in = not_in; } virtual ~BitmapFilterFuncBase() = default; @@ -51,7 +53,7 @@ class BitmapFilterFunc : public BitmapFilterFuncBase { public: using CppType = typename PrimitiveTypeTraits<type>::CppType; - BitmapFilterFunc() : _bitmap_value(std::make_shared<BitmapValue>()), _empty(true) {} + BitmapFilterFunc() : _bitmap_value(std::make_shared<BitmapValue>()) {} ~BitmapFilterFunc() override = default; @@ -59,15 +61,13 @@ public: void insert_many(const std::vector<const BitmapValue*> bitmaps) override; - bool find(uint64 data) const override { return _not_in ^ _bitmap_value->contains(data); } - uint16_t find_fixed_len_olap_engine(const char* data, const uint8* nullmap, uint16_t* offsets, int number) override; void find_batch(const char* data, const uint8* nullmap, int number, uint8* results) const override; - bool is_empty() override { return _empty; } + bool empty() override { return _bitmap_value->empty(); } Status assign(BitmapValue* bitmap_value) override { *_bitmap_value = *bitmap_value; @@ -76,9 +76,25 @@ public: void light_copy(BitmapFilterFuncBase* bloomfilter_func) override; + size_t size() const override { return _bitmap_value->cardinality(); } + + uint64_t max() { return _bitmap_value->max(nullptr); } + + uint64_t min() { return _bitmap_value->min(nullptr); } + + bool contains_any(CppType left, CppType right) { + if (right < 0) { + return false; + } + return _bitmap_value->contains_any(std::max(left, (CppType)0), right); + } + + std::shared_ptr<BitmapValue> get_inner_bitmap() { return _bitmap_value; } + private: std::shared_ptr<BitmapValue> _bitmap_value; - bool _empty; + + bool find(CppType data) const { return _not_in ^ (data >= 0 && _bitmap_value->contains(data)); } }; template <PrimitiveType type> @@ -88,7 +104,6 @@ void BitmapFilterFunc<type>::insert(const void* data) { } *_bitmap_value |= *reinterpret_cast<const BitmapValue*>(data); - _empty = false; } template <PrimitiveType type> @@ -97,7 +112,6 @@ void BitmapFilterFunc<type>::insert_many(const std::vector<const BitmapValue*> b return; } _bitmap_value->fastunion(bitmaps); - _empty = false; } template <PrimitiveType type> @@ -136,7 +150,6 @@ template <PrimitiveType type> void BitmapFilterFunc<type>::light_copy(BitmapFilterFuncBase* bitmapfilter_func) { BitmapFilterFuncBase::light_copy(bitmapfilter_func); auto other_func = reinterpret_cast<BitmapFilterFunc*>(bitmapfilter_func); - _empty = other_func->_empty; _bitmap_value = other_func->_bitmap_value; } diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index 39760c7c3d..d584f06d17 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -17,6 +17,8 @@ #include "runtime_filter.h" +#include <memory> + #include "common/object_pool.h" #include "common/status.h" #include "exprs/binary_predicate.h" @@ -457,8 +459,8 @@ public: return Status::OK(); } case RuntimeFilterType::BITMAP_FILTER: { - _context._bitmap_filter_func.reset(create_bitmap_filter(_column_return_type)); - _context._bitmap_filter_func->set_not_in(params->bitmap_filter_not_in); + _context.bitmap_filter_func.reset(create_bitmap_filter(_column_return_type)); + _context.bitmap_filter_func->set_not_in(params->bitmap_filter_not_in); return Status::OK(); } default: @@ -524,7 +526,7 @@ public: break; } case RuntimeFilterType::BITMAP_FILTER: { - _context._bitmap_filter_func->insert(data); + _context.bitmap_filter_func->insert(data); break; } default: @@ -602,7 +604,7 @@ public: for (int index : rows) { bitmaps.push_back(&(col->get_data()[index])); } - _context._bitmap_filter_func->insert_many(bitmaps); + _context.bitmap_filter_func->insert_many(bitmaps); } RuntimeFilterType get_real_type() { @@ -1086,6 +1088,10 @@ public: size_t get_in_filter_size() const { return _context.hybrid_set->size(); } + std::shared_ptr<BitmapFilterFuncBase> get_bitmap_filter() const { + return _context.bitmap_filter_func; + } + friend class IRuntimeFilter; private: @@ -1260,6 +1266,11 @@ void IRuntimeFilter::signal() { if (_wrapper->get_real_type() == RuntimeFilterType::IN_FILTER) { _profile->add_info_string("InFilterSize", std::to_string(_wrapper->get_in_filter_size())); } + if (_wrapper->get_real_type() == RuntimeFilterType::BITMAP_FILTER) { + auto bitmap_filter = _wrapper->get_bitmap_filter(); + _profile->add_info_string("BitmapSize", std::to_string(bitmap_filter->size())); + _profile->add_info_string("IsNotIn", bitmap_filter->is_not_in() ? "true" : "false"); + } } BloomFilterFuncBase* IRuntimeFilter::get_bloomfilter() const { @@ -1935,7 +1946,7 @@ Status RuntimePredicateWrapper::get_push_vexprs(std::vector<doris::vectorized::V node.__set_vector_opcode(to_in_opcode(_column_return_type)); node.__set_is_nullable(false); auto bitmap_pred = _pool->add(new vectorized::VBitmapPredicate(node)); - bitmap_pred->set_filter(_context._bitmap_filter_func); + bitmap_pred->set_filter(_context.bitmap_filter_func); auto cloned_vexpr = vprob_expr->root()->clone(_pool); bitmap_pred->add_child(cloned_vexpr); auto wrapper = _pool->add(new vectorized::VRuntimeFilterWrapper(node, bitmap_pred)); diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h index 09afe6c640..5cd08aca46 100644 --- a/be/src/exprs/runtime_filter.h +++ b/be/src/exprs/runtime_filter.h @@ -43,6 +43,7 @@ class PMinMaxFilter; class HashJoinNode; class RuntimeProfile; class BloomFilterFuncBase; +class BitmapFilterFuncBase; namespace vectorized { class VExpr; @@ -252,6 +253,8 @@ public: void ready_for_publish(); + std::shared_ptr<BitmapFilterFuncBase> get_bitmap_filter() const; + static bool enable_use_batch(int be_exec_version, PrimitiveType type) { return be_exec_version > 0 && (is_int_or_bool(type) || is_float_or_double(type)); } diff --git a/be/src/olap/bitmap_filter_predicate.h b/be/src/olap/bitmap_filter_predicate.h index be3dc23cc9..053cdb748d 100644 --- a/be/src/olap/bitmap_filter_predicate.h +++ b/be/src/olap/bitmap_filter_predicate.h @@ -17,9 +17,14 @@ #pragma once +#include <cstdint> + #include "exprs/bitmapfilter_predicate.h" +#include "exprs/cast_functions.h" #include "exprs/runtime_filter.h" #include "olap/column_predicate.h" +#include "olap/wrapper_field.h" +#include "util/bitmap_value.h" #include "vec/columns/column_dictionary.h" #include "vec/columns/column_nullable.h" #include "vec/columns/column_vector.h" @@ -51,6 +56,26 @@ public: void evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t size, bool* flags) const override {}; + bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& statistic) const override { + if (_specific_filter->is_not_in()) { + return true; + } + + CppType max_value; + if (statistic.second->is_null()) { + // no non-null values + return false; + } else { + max_value = *reinterpret_cast<const CppType*>(statistic.second->cell_ptr()); + } + + CppType min_value = + statistic.first->is_null() /* contains null values */ + ? 0 + : *reinterpret_cast<const CppType*>(statistic.first->cell_ptr()); + return _specific_filter->contains_any(min_value, max_value); + } + Status evaluate(BitmapIndexIterator*, uint32_t, roaring::Roaring*) const override { return Status::OK(); } @@ -84,25 +109,7 @@ private: }; template <PrimitiveType T> -void BitmapFilterColumnPredicate<T>::evaluate(ColumnBlock* block, uint16_t* sel, - uint16_t* size) const { - uint16_t new_size = 0; - if (block->is_nullable()) { - for (uint16_t i = 0; i < *size; ++i) { - uint16_t idx = sel[i]; - sel[new_size] = idx; - new_size += (!block->cell(idx).is_null() && - _specific_filter->find(*block->cell(idx).cell_ptr())); - } - } else { - for (uint16_t i = 0; i < *size; ++i) { - uint16_t idx = sel[i]; - sel[new_size] = idx; - new_size += _specific_filter->find(*block->cell(idx).cell_ptr()); - } - } - *size = new_size; -} +void BitmapFilterColumnPredicate<T>::evaluate(ColumnBlock*, uint16_t*, uint16_t*) const {} template <PrimitiveType T> uint16_t BitmapFilterColumnPredicate<T>::evaluate(const vectorized::IColumn& column, uint16_t* sel, diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index 2ca2c37982..41f9c2132a 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -286,6 +286,7 @@ struct OlapReaderStatistics { // block_load_ns // block_init_ns // block_init_seek_ns + // block_conditions_filtered_ns // first_read_ns // block_first_read_seek_ns // lazy_read_ns @@ -322,6 +323,7 @@ struct OlapReaderStatistics { int64_t rows_del_by_bitmap = 0; // the number of rows filtered by various column indexes. int64_t rows_conditions_filtered = 0; + int64_t block_conditions_filtered_ns = 0; int64_t index_load_ns = 0; diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp b/be/src/olap/rowset/segment_v2/segment_iterator.cpp index 1caa62f638..5f5419073a 100644 --- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp +++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp @@ -282,6 +282,7 @@ Status SegmentIterator::_prepare_seek(const StorageReadOptions::KeyRange& key_ra } Status SegmentIterator::_get_row_ranges_by_column_conditions() { + SCOPED_RAW_TIMER(&_opts.stats->block_conditions_filtered_ns); if (_row_bitmap.isEmpty()) { return Status::OK(); } diff --git a/be/src/util/bitmap_value.h b/be/src/util/bitmap_value.h index e8f10a4f39..2717f18895 100644 --- a/be/src/util/bitmap_value.h +++ b/be/src/util/bitmap_value.h @@ -21,6 +21,7 @@ #include <algorithm> #include <cstdarg> +#include <cstdint> #include <cstdio> #include <limits> #include <map> @@ -32,6 +33,7 @@ #include <utility> #include "common/logging.h" +#include "gutil/integral_types.h" #include "udf/udf.h" #include "util/coding.h" #include "vec/common/pod_array.h" @@ -1430,6 +1432,9 @@ public: return false; } + // true if contains a value that belongs to the range [left, right]. + bool contains_any(uint64_t left, uint64_t right) const; + uint64_t cardinality() const { switch (_type) { case EMPTY: @@ -1677,6 +1682,16 @@ public: } } + uint64_t max(bool* empty) const { + return min_or_max(empty, [&]() { return _bitmap.maximum(); }); + } + + uint64_t min(bool* empty) const { + return min_or_max(empty, [&]() { return _bitmap.minimum(); }); + } + + bool empty() const { return _type == EMPTY; } + /** * Return new set with specified range (not include the range_end) */ @@ -1823,6 +1838,7 @@ public: b_iterator begin() const; b_iterator end() const; + b_iterator lower_bound(uint64_t val) const; private: void _convert_to_smaller_type() { @@ -1839,6 +1855,25 @@ private: } } + uint64_t min_or_max(bool* empty, std::function<uint64_t()> func) const { + bool is_empty = false; + uint64_t result = 0; + switch (_type) { + case SINGLE: + result = _sv; + break; + case BITMAP: + result = func(); + break; + default: + is_empty = true; + } + if (empty) { + *empty = is_empty; + } + return result; + } + enum BitmapDataType { EMPTY = 0, SINGLE = 1, // single element @@ -1876,7 +1911,9 @@ public: } BitmapValueIterator(const BitmapValueIterator& other) - : _bitmap(other._bitmap), _iter(other._iter), _sv(other._sv), _end(other._end) {} + : _bitmap(other._bitmap), _sv(other._sv), _end(other._end) { + _iter = other._iter ? new detail::Roaring64MapSetBitForwardIterator(*other._iter) : nullptr; + } ~BitmapValueIterator() { if (_iter != nullptr) { @@ -1930,7 +1967,9 @@ public: } bool operator==(const BitmapValueIterator& other) const { - if (_end && other._end) return true; + if (_end && other._end) { + return true; + } switch (_bitmap._type) { case BitmapValue::BitmapDataType::EMPTY: @@ -1947,6 +1986,27 @@ public: bool operator!=(const BitmapValueIterator& other) const { return !(*this == other); } + /** + * Move the iterator to the first value >= `val`. + */ + BitmapValueIterator& move(uint64_t val) { + switch (_bitmap._type) { + case BitmapValue::BitmapDataType::SINGLE: + if (_sv < val) { + _end = true; + } + break; + case BitmapValue::BitmapDataType::BITMAP: + if (!_iter->move(val)) { + _end = true; + } + break; + default: + break; + } + return *this; + } + private: const BitmapValue& _bitmap; detail::Roaring64MapSetBitForwardIterator* _iter = nullptr; @@ -1962,4 +2022,16 @@ inline BitmapValueIterator BitmapValue::end() const { return BitmapValueIterator(*this, true); } +inline BitmapValueIterator BitmapValue::lower_bound(uint64_t val) const { + return BitmapValueIterator(*this).move(val); +} + +inline bool BitmapValue::contains_any(uint64_t left, uint64_t right) const { + if (left > right) { + return false; + } + auto it = lower_bound(left); + return it != end() && *it <= right; +} + } // namespace doris diff --git a/be/src/vec/exec/scan/new_olap_scan_node.cpp b/be/src/vec/exec/scan/new_olap_scan_node.cpp index 46cb029d38..ad978de41f 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.cpp +++ b/be/src/vec/exec/scan/new_olap_scan_node.cpp @@ -66,6 +66,7 @@ Status NewOlapScanNode::_init_profile() { _block_init_timer = ADD_TIMER(_segment_profile, "BlockInitTime"); _block_init_seek_timer = ADD_TIMER(_segment_profile, "BlockInitSeekTime"); _block_init_seek_counter = ADD_COUNTER(_segment_profile, "BlockInitSeekCount", TUnit::UNIT); + _block_conditions_filtered_timer = ADD_TIMER(_segment_profile, "BlockConditionsFilteredTime"); _rows_vec_cond_counter = ADD_COUNTER(_segment_profile, "RowsVectorPredFiltered", TUnit::UNIT); _vec_cond_timer = ADD_TIMER(_segment_profile, "VectorPredEvalTime"); diff --git a/be/src/vec/exec/scan/new_olap_scan_node.h b/be/src/vec/exec/scan/new_olap_scan_node.h index f55e715659..dc882aa6a9 100644 --- a/be/src/vec/exec/scan/new_olap_scan_node.h +++ b/be/src/vec/exec/scan/new_olap_scan_node.h @@ -95,6 +95,7 @@ private: RuntimeProfile::Counter* _block_init_timer = nullptr; RuntimeProfile::Counter* _block_init_seek_timer = nullptr; RuntimeProfile::Counter* _block_init_seek_counter = nullptr; + RuntimeProfile::Counter* _block_conditions_filtered_timer = nullptr; RuntimeProfile::Counter* _first_read_timer = nullptr; RuntimeProfile::Counter* _first_read_seek_timer = nullptr; RuntimeProfile::Counter* _first_read_seek_counter = nullptr; diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 3ddccdf460..224affa4bc 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -386,6 +386,8 @@ void NewOlapScanner::_update_counters_before_close() { COUNTER_UPDATE(olap_parent->_block_init_timer, stats.block_init_ns); COUNTER_UPDATE(olap_parent->_block_init_seek_timer, stats.block_init_seek_ns); COUNTER_UPDATE(olap_parent->_block_init_seek_counter, stats.block_init_seek_num); + COUNTER_UPDATE(olap_parent->_block_conditions_filtered_timer, + stats.block_conditions_filtered_ns); COUNTER_UPDATE(olap_parent->_first_read_timer, stats.first_read_ns); COUNTER_UPDATE(olap_parent->_first_read_seek_timer, stats.block_first_read_seek_ns); COUNTER_UPDATE(olap_parent->_first_read_seek_counter, stats.block_first_read_seek_num); diff --git a/be/src/vec/runtime/shared_hash_table_controller.h b/be/src/vec/runtime/shared_hash_table_controller.h index 0a308f8aea..e2c54f533d 100644 --- a/be/src/vec/runtime/shared_hash_table_controller.h +++ b/be/src/vec/runtime/shared_hash_table_controller.h @@ -42,7 +42,7 @@ struct SharedRuntimeFilterContext { std::shared_ptr<MinMaxFuncBase> minmax_func; std::shared_ptr<HybridSetBase> hybrid_set; std::shared_ptr<BloomFilterFuncBase> bloom_filter_func; - std::shared_ptr<BitmapFilterFuncBase> _bitmap_filter_func; + std::shared_ptr<BitmapFilterFuncBase> bitmap_filter_func; }; struct SharedHashTableContext { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org