This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch dev-1.1.2 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/dev-1.1.2 by this push: new 5a63846aa8 [dev-1.1.2](cherry-pick) fix column dict incorrect result #11694 5a63846aa8 is described below commit 5a63846aa8e8298b1bbb763c94ac2bdf0009c45b Author: yiguolei <yiguo...@gmail.com> AuthorDate: Fri Aug 12 11:09:44 2022 +0800 [dev-1.1.2](cherry-pick) fix column dict incorrect result #11694 --- be/src/vec/columns/column_dictionary.h | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 97052cfb7c..a854408c20 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -22,7 +22,10 @@ #include <algorithm> #include "gutil/hash/string_hash.h" +#include "olap/column_predicate.h" +#include "olap/comparison_predicate.h" #include "olap/decimal12.h" +#include "olap/in_list_predicate.h" #include "olap/uint24.h" #include "runtime/string_value.h" #include "util/slice.h" @@ -32,11 +35,8 @@ #include "vec/columns/column_string.h" #include "vec/columns/column_vector.h" #include "vec/columns/predicate_column.h" -#include "vec/core/types.h" #include "vec/common/typeid_cast.h" -#include "olap/column_predicate.h" -#include "olap/comparison_predicate.h" -#include "olap/in_list_predicate.h" +#include "vec/core/types.h" namespace doris::vectorized { @@ -55,6 +55,7 @@ namespace doris::vectorized { template <typename T> class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> { static_assert(IsNumber<T>); + private: friend class COWHelper<IColumn, ColumnDictionary>; @@ -255,9 +256,7 @@ public: _dict.generate_hash_values_for_runtime_filter(_type); } - uint32_t get_hash_value(uint32_t idx) const { - return _dict.get_hash_value(_codes[idx]); - } + uint32_t get_hash_value(uint32_t idx) const { return _dict.get_hash_value(_codes[idx]); } phmap::flat_hash_set<int32_t> find_codes( const phmap::flat_hash_set<StringValue>& values) const { @@ -269,6 +268,9 @@ public: bool is_dict_code_converted() const { return _dict_code_converted; } MutableColumnPtr convert_to_predicate_column_if_dictionary() override { + if (is_dict_sorted() && !is_dict_code_converted()) { + convert_dict_codes_if_necessary(); + } auto res = vectorized::PredicateColumnType<StringValue>::create(); res->reserve(_reserve_size); for (size_t i = 0; i < _codes.size(); ++i) { @@ -334,9 +336,7 @@ public: } } - inline uint32_t get_hash_value(T code) const { - return _hash_values[code]; - } + inline uint32_t get_hash_value(T code) const { return _hash_values[code]; } // For > , code takes upper_bound - 1; For >= , code takes upper_bound // For < , code takes upper_bound; For <=, code takes upper_bound - 1 @@ -384,9 +384,7 @@ public: _hash_values.clear(); } - void clear_hash_values() { - _hash_values.clear(); - } + void clear_hash_values() { _hash_values.clear(); } void sort() { size_t dict_size = _dict_data.size(); @@ -414,7 +412,7 @@ public: phmap::flat_hash_map<T, T> _code_convert_map; // hash value of origin string , used for bloom filter // It's a trade-off of space for performance - // But in TPC-DS 1GB q60,we see no significant improvement. + // But in TPC-DS 1GB q60,we see no significant improvement. // This may because the magnitude of the data is not large enough(in q60, only about 80k rows data is filtered for largest table) // So we may need more test here. HashValueContainer _hash_values; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org