This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch dev-1.1.2
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/dev-1.1.2 by this push:
     new 5a63846aa8 [dev-1.1.2](cherry-pick) fix column dict incorrect result 
#11694
5a63846aa8 is described below

commit 5a63846aa8e8298b1bbb763c94ac2bdf0009c45b
Author: yiguolei <yiguo...@gmail.com>
AuthorDate: Fri Aug 12 11:09:44 2022 +0800

    [dev-1.1.2](cherry-pick) fix column dict incorrect result #11694
---
 be/src/vec/columns/column_dictionary.h | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/be/src/vec/columns/column_dictionary.h 
b/be/src/vec/columns/column_dictionary.h
index 97052cfb7c..a854408c20 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -22,7 +22,10 @@
 #include <algorithm>
 
 #include "gutil/hash/string_hash.h"
+#include "olap/column_predicate.h"
+#include "olap/comparison_predicate.h"
 #include "olap/decimal12.h"
+#include "olap/in_list_predicate.h"
 #include "olap/uint24.h"
 #include "runtime/string_value.h"
 #include "util/slice.h"
@@ -32,11 +35,8 @@
 #include "vec/columns/column_string.h"
 #include "vec/columns/column_vector.h"
 #include "vec/columns/predicate_column.h"
-#include "vec/core/types.h"
 #include "vec/common/typeid_cast.h"
-#include "olap/column_predicate.h"
-#include "olap/comparison_predicate.h"
-#include "olap/in_list_predicate.h"
+#include "vec/core/types.h"
 
 namespace doris::vectorized {
 
@@ -55,6 +55,7 @@ namespace doris::vectorized {
 template <typename T>
 class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {
     static_assert(IsNumber<T>);
+
 private:
     friend class COWHelper<IColumn, ColumnDictionary>;
 
@@ -255,9 +256,7 @@ public:
         _dict.generate_hash_values_for_runtime_filter(_type);
     }
 
-    uint32_t get_hash_value(uint32_t idx) const {
-        return _dict.get_hash_value(_codes[idx]);
-    }
+    uint32_t get_hash_value(uint32_t idx) const { return 
_dict.get_hash_value(_codes[idx]); }
 
     phmap::flat_hash_set<int32_t> find_codes(
             const phmap::flat_hash_set<StringValue>& values) const {
@@ -269,6 +268,9 @@ public:
     bool is_dict_code_converted() const { return _dict_code_converted; }
 
     MutableColumnPtr convert_to_predicate_column_if_dictionary() override {
+        if (is_dict_sorted() && !is_dict_code_converted()) {
+            convert_dict_codes_if_necessary();
+        }
         auto res = vectorized::PredicateColumnType<StringValue>::create();
         res->reserve(_reserve_size);
         for (size_t i = 0; i < _codes.size(); ++i) {
@@ -334,9 +336,7 @@ public:
             }
         }
 
-        inline uint32_t get_hash_value(T code) const {
-            return _hash_values[code];
-        }
+        inline uint32_t get_hash_value(T code) const { return 
_hash_values[code]; }
 
         // For > , code takes upper_bound - 1; For >= , code takes upper_bound
         // For < , code takes upper_bound; For <=, code takes upper_bound - 1
@@ -384,9 +384,7 @@ public:
             _hash_values.clear();
         }
 
-        void clear_hash_values() {
-            _hash_values.clear();
-        }
+        void clear_hash_values() { _hash_values.clear(); }
 
         void sort() {
             size_t dict_size = _dict_data.size();
@@ -414,7 +412,7 @@ public:
         phmap::flat_hash_map<T, T> _code_convert_map;
         // hash value of origin string , used for bloom filter
         // It's a trade-off of space for performance
-        // But in TPC-DS 1GB q60,we see no significant improvement. 
+        // But in TPC-DS 1GB q60,we see no significant improvement.
         // This may because the magnitude of the data is not large enough(in 
q60, only about 80k rows data is filtered for largest table)
         // So we may need more test here.
         HashValueContainer _hash_values;


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to