This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch dev-1.0.1
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git

commit dfc51faa3974b4c89ccd835dd2d98af8dcb00ff9
Author: ZenoYang <cookie...@qq.com>
AuthorDate: Tue Mar 29 19:11:54 2022 +0800

    [refactor][optimize](storage) Code optimization and refactoring for 
low-cardinality columns in storage layer (#8627)
    
    * Optimize predicate calculation and refactor
---
 be/src/olap/bloom_filter_predicate.h               |  26 +--
 be/src/olap/column_predicate.h                     |  23 ++-
 be/src/olap/comparison_predicate.cpp               | 190 ++++++++++++-------
 be/src/olap/comparison_predicate.h                 |  26 +--
 be/src/olap/in_list_predicate.cpp                  | 111 +++++++----
 be/src/olap/in_list_predicate.h                    |  17 +-
 be/src/olap/null_predicate.cpp                     |   4 +
 be/src/olap/null_predicate.h                       |   2 +
 be/src/olap/rowset/segment_v2/binary_dict_page.cpp |  13 +-
 be/src/olap/rowset/segment_v2/segment_iterator.cpp |  27 ++-
 be/src/runtime/string_value.h                      |   6 +
 be/src/vec/columns/column.h                        |   8 +-
 be/src/vec/columns/column_dictionary.h             | 211 ++++++++++-----------
 be/src/vec/columns/column_nullable.h               |  11 ++
 14 files changed, 388 insertions(+), 287 deletions(-)

diff --git a/be/src/olap/bloom_filter_predicate.h 
b/be/src/olap/bloom_filter_predicate.h
index 3b49cb0..fa65293 100644
--- a/be/src/olap/bloom_filter_predicate.h
+++ b/be/src/olap/bloom_filter_predicate.h
@@ -37,10 +37,10 @@ namespace doris {
 class VectorizedRowBatch;
 
 // only use in runtime filter and segment v2
-template <PrimitiveType type>
+template <PrimitiveType T>
 class BloomFilterColumnPredicate : public ColumnPredicate {
 public:
-    using SpecificFilter = BloomFilterFunc<type, CurrentBloomFilterAdaptor>;
+    using SpecificFilter = BloomFilterFunc<T, CurrentBloomFilterAdaptor>;
 
     BloomFilterColumnPredicate(uint32_t column_id,
                                const std::shared_ptr<IBloomFilterFuncBase>& 
filter)
@@ -49,6 +49,8 @@ public:
               _specific_filter(static_cast<SpecificFilter*>(_filter.get())) {}
     ~BloomFilterColumnPredicate() override = default;
 
+    PredicateType type() const override { return PredicateType::BF; }
+
     void evaluate(VectorizedRowBatch* batch) const override;
 
     void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const 
override;
@@ -65,16 +67,14 @@ public:
 
     void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* size) 
const override;
 
-    bool is_bloom_filter_predicate() override { return true; }
-
 private:
     std::shared_ptr<IBloomFilterFuncBase> _filter;
     SpecificFilter* _specific_filter; // owned by _filter
 };
 
 // bloom filter column predicate do not support in segment v1
-template <PrimitiveType type>
-void BloomFilterColumnPredicate<type>::evaluate(VectorizedRowBatch* batch) 
const {
+template <PrimitiveType T>
+void BloomFilterColumnPredicate<T>::evaluate(VectorizedRowBatch* batch) const {
     uint16_t n = batch->size();
     uint16_t* sel = batch->selected();
     if (!batch->selected_in_use()) {
@@ -84,8 +84,8 @@ void 
BloomFilterColumnPredicate<type>::evaluate(VectorizedRowBatch* batch) const
     }
 }
 
-template <PrimitiveType type>
-void BloomFilterColumnPredicate<type>::evaluate(ColumnBlock* block, uint16_t* 
sel,
+template <PrimitiveType T>
+void BloomFilterColumnPredicate<T>::evaluate(ColumnBlock* block, uint16_t* sel,
                                                 uint16_t* size) const {
     uint16_t new_size = 0;
     if (block->is_nullable()) {
@@ -107,16 +107,16 @@ void 
BloomFilterColumnPredicate<type>::evaluate(ColumnBlock* block, uint16_t* se
     *size = new_size;
 }
 
-template <PrimitiveType type>
-void BloomFilterColumnPredicate<type>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,
+template <PrimitiveType T>
+void BloomFilterColumnPredicate<T>::evaluate(vectorized::IColumn& column, 
uint16_t* sel,
                                                 uint16_t* size) const {
     uint16_t new_size = 0;
-    using T = typename PredicatePrimitiveTypeTraits<type>::PredicateFieldType;
+    using FT = typename PredicatePrimitiveTypeTraits<T>::PredicateFieldType;
 
     if (column.is_nullable()) {
         auto* nullable_col = 
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);
         auto& null_map_data = nullable_col->get_null_map_column().get_data();
-        auto* pred_col = 
vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(
+        auto* pred_col = 
vectorized::check_and_get_column<vectorized::PredicateColumnType<FT>>(
                 nullable_col->get_nested_column());
         auto& pred_col_data = pred_col->get_data();
         for (uint16_t i = 0; i < *size; i++) {
@@ -127,7 +127,7 @@ void 
BloomFilterColumnPredicate<type>::evaluate(vectorized::IColumn& column, uin
         }
     } else {
         auto* pred_col =
-                
vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(column);
+                
vectorized::check_and_get_column<vectorized::PredicateColumnType<FT>>(column);
         auto& pred_col_data = pred_col->get_data();
         for (uint16_t i = 0; i < *size; i++) {
             uint16_t idx = sel[i];
diff --git a/be/src/olap/column_predicate.h b/be/src/olap/column_predicate.h
index 342d8fc..45f8fcf 100644
--- a/be/src/olap/column_predicate.h
+++ b/be/src/olap/column_predicate.h
@@ -33,6 +33,21 @@ class VectorizedRowBatch;
 class Schema;
 class RowBlockV2;
 
+enum class PredicateType {
+    UNKNOWN = 0,
+    EQ = 1,
+    NE = 2,
+    LT = 3,
+    LE = 4,
+    GT = 5,
+    GE = 6,
+    IN_LIST = 7,
+    NO_IN_LIST = 8,
+    IS_NULL = 9,
+    NOT_IS_NULL = 10,
+    BF = 11, // BloomFilter
+};
+
 class ColumnPredicate {
 public:
     explicit ColumnPredicate(uint32_t column_id, bool opposite = false)
@@ -40,6 +55,8 @@ public:
 
     virtual ~ColumnPredicate() = default;
 
+    virtual PredicateType type() const = 0;
+
     //evaluate predicate on VectorizedRowBatch
     virtual void evaluate(VectorizedRowBatch* batch) const = 0;
 
@@ -69,11 +86,7 @@ public:
     virtual void evaluate_vec(vectorized::IColumn& column, uint16_t size, 
bool* flags) const {};
     uint32_t column_id() const { return _column_id; }
 
-    virtual bool is_in_predicate() { return false; }
-
-    virtual bool is_bloom_filter_predicate() { return false; }
-
-    virtual bool is_range_comparison_predicate() { return false; }
+    virtual void set_dict_code_if_necessary(vectorized::IColumn& column) { }
 
 protected:
     uint32_t _column_id;
diff --git a/be/src/olap/comparison_predicate.cpp 
b/be/src/olap/comparison_predicate.cpp
index d74dd10..ef6ee3a 100644
--- a/be/src/olap/comparison_predicate.cpp
+++ b/be/src/olap/comparison_predicate.cpp
@@ -28,9 +28,9 @@
 
 namespace doris {
 
-#define COMPARISON_PRED_CONSTRUCTOR(CLASS)                                   \
-    template <class type>                                                    \
-    CLASS<type>::CLASS(uint32_t column_id, const type& value, bool opposite) \
+#define COMPARISON_PRED_CONSTRUCTOR(CLASS) \
+    template <class T>                     \
+    CLASS<T>::CLASS(uint32_t column_id, const T& value, bool opposite) \
             : ColumnPredicate(column_id, opposite), _value(value) {}
 
 COMPARISON_PRED_CONSTRUCTOR(EqualPredicate)
@@ -56,15 +56,15 @@ COMPARISON_PRED_CONSTRUCTOR_STRING(GreaterPredicate)
 COMPARISON_PRED_CONSTRUCTOR_STRING(GreaterEqualPredicate)
 
 #define COMPARISON_PRED_EVALUATE(CLASS, OP)                                    
       \
-    template <class type>                                                      
       \
-    void CLASS<type>::evaluate(VectorizedRowBatch* batch) const {              
       \
+    template <class T>                                                         
       \
+    void CLASS<T>::evaluate(VectorizedRowBatch* batch) const {                 
       \
         uint16_t n = batch->size();                                            
       \
         if (n == 0) {                                                          
       \
             return;                                                            
       \
         }                                                                      
       \
         uint16_t* sel = batch->selected();                                     
       \
-        const type* col_vector =                                               
       \
-                reinterpret_cast<const 
type*>(batch->column(_column_id)->col_data()); \
+        const T* col_vector =                                                  
       \
+                reinterpret_cast<const 
T*>(batch->column(_column_id)->col_data());    \
         uint16_t new_size = 0;                                                 
       \
         if (batch->column(_column_id)->no_nulls()) {                           
       \
             if (batch->selected_in_use()) {                                    
       \
@@ -114,15 +114,15 @@ COMPARISON_PRED_EVALUATE(GreaterPredicate, >)
 COMPARISON_PRED_EVALUATE(GreaterEqualPredicate, >=)
 
 #define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP)                       
           \
-    template <class type>                                                      
           \
-    void CLASS<type>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* 
size) const { \
+    template <class T>                                                         
           \
+    void CLASS<T>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) 
const {    \
         uint16_t new_size = 0;                                                 
           \
         if (block->is_nullable()) {                                            
           \
             for (uint16_t i = 0; i < *size; ++i) {                             
           \
                 uint16_t idx = sel[i];                                         
           \
                 sel[new_size] = idx;                                           
           \
-                const type* cell_value =                                       
           \
-                        reinterpret_cast<const 
type*>(block->cell(idx).cell_ptr());       \
+                const T* cell_value =                                          
           \
+                        reinterpret_cast<const 
T*>(block->cell(idx).cell_ptr());          \
                 auto result = (!block->cell(idx).is_null() && (*cell_value OP 
_value));   \
                 new_size += _opposite ? !result : result;                      
           \
             }                                                                  
           \
@@ -130,8 +130,8 @@ COMPARISON_PRED_EVALUATE(GreaterEqualPredicate, >=)
             for (uint16_t i = 0; i < *size; ++i) {                             
           \
                 uint16_t idx = sel[i];                                         
           \
                 sel[new_size] = idx;                                           
           \
-                const type* cell_value =                                       
           \
-                        reinterpret_cast<const 
type*>(block->cell(idx).cell_ptr());       \
+                const T* cell_value =                                          
           \
+                        reinterpret_cast<const 
T*>(block->cell(idx).cell_ptr());          \
                 auto result = (*cell_value OP _value);                         
           \
                 new_size += _opposite ? !result : result;                      
           \
             }                                                                  
           \
@@ -147,9 +147,9 @@ COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterPredicate, >)
 COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
 
 // todo(zeno) define interface in IColumn to simplify code
-#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP, IS_RANGE)                   
                    \
-    template <class type>                                                      
                    \
-    void CLASS<type>::evaluate(vectorized::IColumn& column, uint16_t* sel, 
uint16_t* size) const { \
+#define COMPARISON_PRED_COLUMN_EVALUATE(CLASS, OP)                             
                    \
+    template <class T>                                                         
                    \
+    void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, 
uint16_t* size) const {    \
         uint16_t new_size = 0;                                                 
                    \
         if (column.is_nullable()) {                                            
                    \
             auto* nullable_col =                                               
                    \
@@ -159,60 +159,54 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
                                         .get_data();                           
                    \
             auto& nested_col = nullable_col->get_nested_column();              
                    \
             if (nested_col.is_column_dictionary()) {                           
                    \
-                if constexpr (std::is_same_v<type, StringValue>) {             
                    \
+                if constexpr (std::is_same_v<T, StringValue>) {                
                    \
                     auto* nested_col_ptr = vectorized::check_and_get_column<   
                    \
                             
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);          \
-                    auto code = IS_RANGE ? 
nested_col_ptr->find_bound_code(_value, 0 OP 1, 1 OP 1) \
-                                         : nested_col_ptr->find_code(_value);  
                    \
                     auto& data_array = nested_col_ptr->get_data();             
                    \
                     for (uint16_t i = 0; i < *size; i++) {                     
                    \
                         uint16_t idx = sel[i];                                 
                    \
                         sel[new_size] = idx;                                   
                    \
-                        const auto& cell_value =                               
                    \
-                                reinterpret_cast<const 
vectorized::Int32&>(data_array[idx]);       \
-                        bool ret = !null_bitmap[idx] && (cell_value OP code);  
                    \
+                        const auto& cell_value = data_array[idx];              
                    \
+                        bool ret = !null_bitmap[idx] && (cell_value OP 
_dict_code);                \
                         new_size += _opposite ? !ret : ret;                    
                    \
                     }                                                          
                    \
                 }                                                              
                    \
             } else {                                                           
                    \
                 auto* nested_col_ptr =                                         
                    \
-                        
vectorized::check_and_get_column<vectorized::PredicateColumnType<type>>(   \
+                        
vectorized::check_and_get_column<vectorized::PredicateColumnType<T>>(      \
                                 nested_col);                                   
                    \
                 auto& data_array = nested_col_ptr->get_data();                 
                    \
                 for (uint16_t i = 0; i < *size; i++) {                         
                    \
                     uint16_t idx = sel[i];                                     
                    \
                     sel[new_size] = idx;                                       
                    \
-                    const type& cell_value = reinterpret_cast<const 
type&>(data_array[idx]);       \
+                    const auto& cell_value = reinterpret_cast<const 
T&>(data_array[idx]);          \
                     bool ret = !null_bitmap[idx] && (cell_value OP _value);    
                    \
                     new_size += _opposite ? !ret : ret;                        
                    \
                 }                                                              
                    \
             }                                                                  
                    \
             *size = new_size;                                                  
                    \
         } else if (column.is_column_dictionary()) {                            
                    \
-            if constexpr (std::is_same_v<type, StringValue>) {                 
                    \
+            if constexpr (std::is_same_v<T, StringValue>) {                    
                    \
                 auto& dict_col =                                               
                    \
                         
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(column);\
                 auto& data_array = dict_col.get_data();                        
                    \
-                auto code = IS_RANGE ? dict_col.find_bound_code(_value, 0 OP 
1, 1 OP 1)            \
-                                     : dict_col.find_code(_value);             
                    \
                 for (uint16_t i = 0; i < *size; ++i) {                         
                    \
                     uint16_t idx = sel[i];                                     
                    \
                     sel[new_size] = idx;                                       
                    \
-                    const auto& cell_value =                                   
                    \
-                            reinterpret_cast<const 
vectorized::Int32&>(data_array[idx]);           \
-                    bool ret = cell_value OP code;                             
                    \
+                    const auto& cell_value = data_array[idx];                  
                    \
+                    bool ret = cell_value OP _dict_code;                       
                    \
                     new_size += _opposite ? !ret : ret;                        
                    \
                 }                                                              
                    \
                 *size = new_size;                                              
                    \
             }                                                                  
                    \
         } else {                                                               
                    \
             auto& pred_column_ref =                                            
                    \
-                    
reinterpret_cast<vectorized::PredicateColumnType<type>&>(column);              \
+                    
reinterpret_cast<vectorized::PredicateColumnType<T>&>(column);                 \
             auto& data_array = pred_column_ref.get_data();                     
                    \
             for (uint16_t i = 0; i < *size; i++) {                             
                    \
                 uint16_t idx = sel[i];                                         
                    \
                 sel[new_size] = idx;                                           
                    \
-                const type& cell_value = reinterpret_cast<const 
type&>(data_array[idx]);           \
+                const auto& cell_value = reinterpret_cast<const 
T&>(data_array[idx]);              \
                 auto ret = cell_value OP _value;                               
                    \
                 new_size += _opposite ? !ret : ret;                            
                    \
             }                                                                  
                    \
@@ -221,21 +215,21 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE(GreaterEqualPredicate, >=)
     }
 
 
-COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==, false)
-COMPARISON_PRED_COLUMN_EVALUATE(NotEqualPredicate, !=, false)
-COMPARISON_PRED_COLUMN_EVALUATE(LessPredicate, <, true)
-COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=, true)
-COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >, true)
-COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, true)
+COMPARISON_PRED_COLUMN_EVALUATE(EqualPredicate, ==)
+COMPARISON_PRED_COLUMN_EVALUATE(NotEqualPredicate, !=)
+COMPARISON_PRED_COLUMN_EVALUATE(LessPredicate, <)
+COMPARISON_PRED_COLUMN_EVALUATE(LessEqualPredicate, <=)
+COMPARISON_PRED_COLUMN_EVALUATE(GreaterPredicate, >)
+COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=)
 
 #define COMPARISON_PRED_COLUMN_EVALUATE_VEC(CLASS, OP)                         
                \
-    template <class type>                                                      
                \
-    void CLASS<type>::evaluate_vec(vectorized::IColumn& column, uint16_t size, 
bool* flags)    \
+    template <class T>                                                         
                \
+    void CLASS<T>::evaluate_vec(vectorized::IColumn& column, uint16_t size, 
bool* flags)       \
             const {                                                            
                \
         if (column.is_nullable()) {                                            
                \
             auto* nullable_column =                                            
                \
                     
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);      \
-            auto& data_array = reinterpret_cast<const 
vectorized::PredicateColumnType<type>&>( \
+            auto& data_array = reinterpret_cast<const 
vectorized::PredicateColumnType<T>&>(    \
                                        nullable_column->get_nested_column())   
                \
                                        .get_data();                            
                \
             auto& null_bitmap = reinterpret_cast<const 
vectorized::ColumnVector<uint8_t>&>(    \
@@ -246,7 +240,7 @@ COMPARISON_PRED_COLUMN_EVALUATE(GreaterEqualPredicate, >=, 
true)
             }                                                                  
                \
         } else {                                                               
                \
             auto& predicate_column =                                           
                \
-                    
reinterpret_cast<vectorized::PredicateColumnType<type>&>(column);          \
+                    
reinterpret_cast<vectorized::PredicateColumnType<T>&>(column);             \
             auto& data_array = predicate_column.get_data();                    
                \
             for (uint16_t i = 0; i < size; i++) {                              
                \
                 flags[i] = data_array[i] OP _value;                            
                \
@@ -267,15 +261,15 @@ COMPARISON_PRED_COLUMN_EVALUATE_VEC(GreaterPredicate, >)
 COMPARISON_PRED_COLUMN_EVALUATE_VEC(GreaterEqualPredicate, >=)
 
 #define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP)                    
                  \
-    template <class type>                                                      
                  \
-    void CLASS<type>::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t 
size, bool* flags) \
+    template <class T>                                                         
                  \
+    void CLASS<T>::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t 
size, bool* flags)    \
             const {                                                            
                  \
         if (block->is_nullable()) {                                            
                  \
             for (uint16_t i = 0; i < size; ++i) {                              
                  \
                 if (flags[i]) continue;                                        
                  \
                 uint16_t idx = sel[i];                                         
                  \
-                const type* cell_value =                                       
                  \
-                        reinterpret_cast<const 
type*>(block->cell(idx).cell_ptr());              \
+                const T* cell_value =                                          
                  \
+                        reinterpret_cast<const 
T*>(block->cell(idx).cell_ptr());                 \
                 auto result = (!block->cell(idx).is_null() && (*cell_value OP 
_value));          \
                 flags[i] |= _opposite ? !result : result;                      
                  \
             }                                                                  
                  \
@@ -283,8 +277,8 @@ COMPARISON_PRED_COLUMN_EVALUATE_VEC(GreaterEqualPredicate, 
>=)
             for (uint16_t i = 0; i < size; ++i) {                              
                  \
                 if (flags[i]) continue;                                        
                  \
                 uint16_t idx = sel[i];                                         
                  \
-                const type* cell_value =                                       
                  \
-                        reinterpret_cast<const 
type*>(block->cell(idx).cell_ptr());              \
+                const T* cell_value =                                          
                  \
+                        reinterpret_cast<const 
T*>(block->cell(idx).cell_ptr());                 \
                 auto result = (*cell_value OP _value);                         
                  \
                 flags[i] |= _opposite ? !result : result;                      
                  \
             }                                                                  
                  \
@@ -299,11 +293,11 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(GreaterPredicate, >)
 COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(GreaterEqualPredicate, >=)
 
 #define COMPARISON_PRED_COLUMN_EVALUATE_OR(CLASS, OP)                          
                                                                      \
-    template <class type>                                                      
                                                                      \
-    void CLASS<type>::evaluate_or(vectorized::IColumn& column, uint16_t* sel, 
uint16_t size, bool* flags) const {                                    \
+    template <class T>                                                         
                                                                      \
+    void CLASS<T>::evaluate_or(vectorized::IColumn& column, uint16_t* sel, 
uint16_t size, bool* flags) const {                                       \
         if (column.is_nullable()) {                                            
                                                                      \
             auto* nullable_column = 
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);           
                                 \
-            auto& data_array = reinterpret_cast<const 
vectorized::PredicateColumnType<type>&>(nullable_column->get_nested_column()).get_data();
      \
+            auto& data_array = reinterpret_cast<const 
vectorized::PredicateColumnType<T>&>(nullable_column->get_nested_column()).get_data();
         \
             auto& null_bitmap = reinterpret_cast<const 
vectorized::ColumnVector<uint8_t>&>(*(nullable_column->get_null_map_column_ptr())).get_data();\
             for (uint16_t i = 0; i < size; i++) {                              
                                                                      \
                 if (flags[i]) continue;                                        
                                                                      \
@@ -312,7 +306,7 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_OR(GreaterEqualPredicate, >=)
                 flags[i] |= _opposite ? !ret : ret;                            
                                                                      \
             }                                                                  
                                                                      \
         } else {                                                               
                                                                      \
-            auto& predicate_column = 
reinterpret_cast<vectorized::PredicateColumnType<type>&>(column);               
                                \
+            auto& predicate_column = 
reinterpret_cast<vectorized::PredicateColumnType<T>&>(column);                  
                                \
             auto& data_array = predicate_column.get_data();                    
                                                                      \
             for (uint16_t i = 0; i < size; ++i) {                              
                                                                      \
                 if (flags[i]) continue;                                        
                                                                      \
@@ -331,15 +325,15 @@ COMPARISON_PRED_COLUMN_EVALUATE_OR(GreaterPredicate, >)
 COMPARISON_PRED_COLUMN_EVALUATE_OR(GreaterEqualPredicate, >=)
 
 #define COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(CLASS, OP)                   
                   \
-    template <class type>                                                      
                   \
-    void CLASS<type>::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t 
size, bool* flags) \
+    template <class T>                                                         
                   \
+    void CLASS<T>::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t 
size, bool* flags)    \
             const {                                                            
                   \
         if (block->is_nullable()) {                                            
                   \
             for (uint16_t i = 0; i < size; ++i) {                              
                   \
                 if (!flags[i]) continue;                                       
                   \
                 uint16_t idx = sel[i];                                         
                   \
-                const type* cell_value =                                       
                   \
-                        reinterpret_cast<const 
type*>(block->cell(idx).cell_ptr());               \
+                const T* cell_value =                                          
                   \
+                        reinterpret_cast<const 
T*>(block->cell(idx).cell_ptr());                  \
                 auto result = (!block->cell(idx).is_null() && (*cell_value OP 
_value));           \
                 flags[i] &= _opposite ? !result : result;                      
                   \
             }                                                                  
                   \
@@ -347,8 +341,8 @@ COMPARISON_PRED_COLUMN_EVALUATE_OR(GreaterEqualPredicate, 
>=)
             for (uint16_t i = 0; i < size; ++i) {                              
                   \
                 if (!flags[i]) continue;                                       
                   \
                 uint16_t idx = sel[i];                                         
                   \
-                const type* cell_value =                                       
                   \
-                        reinterpret_cast<const 
type*>(block->cell(idx).cell_ptr());               \
+                const T* cell_value =                                          
                   \
+                        reinterpret_cast<const 
T*>(block->cell(idx).cell_ptr());                  \
                 auto result = (*cell_value OP _value);                         
                   \
                 flags[i] &= _opposite ? !result : result;                      
                   \
             }                                                                  
                   \
@@ -363,11 +357,11 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(GreaterPredicate, >)
 COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(GreaterEqualPredicate, >=)
 
 #define COMPARISON_PRED_COLUMN_EVALUATE_AND(CLASS, OP)                         
                                                                      \
-    template <class type>                                                      
                                                                      \
-    void CLASS<type>::evaluate_and(vectorized::IColumn& column, uint16_t* sel, 
uint16_t size, bool* flags) const {                                   \
+    template <class T>                                                         
                                                                      \
+    void CLASS<T>::evaluate_and(vectorized::IColumn& column, uint16_t* sel, 
uint16_t size, bool* flags) const {                                      \
         if (column.is_nullable()) {                                            
                                                                      \
             auto* nullable_column = 
vectorized::check_and_get_column<vectorized::ColumnNullable>(column);           
                                 \
-            auto& data_array = reinterpret_cast<const 
vectorized::PredicateColumnType<type>&>(nullable_column->get_nested_column()).get_data();
      \
+            auto& data_array = reinterpret_cast<const 
vectorized::PredicateColumnType<T>&>(nullable_column->get_nested_column()).get_data();
         \
             auto& null_bitmap = reinterpret_cast<const 
vectorized::ColumnVector<uint8_t>&>(*(nullable_column->get_null_map_column_ptr())).get_data();\
             for (uint16_t i = 0; i < size; i++) {                              
                                                                      \
                 if (!flags[i]) continue;                                       
                                                                      \
@@ -376,7 +370,7 @@ 
COMPARISON_PRED_COLUMN_BLOCK_EVALUATE_AND(GreaterEqualPredicate, >=)
                 flags[i] &= _opposite ? !ret : ret;                            
                                                                      \
             }                                                                  
                                                                      \
         } else {                                                               
                                                                      \
-            auto& predicate_column = 
reinterpret_cast<vectorized::PredicateColumnType<type>&>(column);               
                                \
+            auto& predicate_column = 
reinterpret_cast<vectorized::PredicateColumnType<T>&>(column);                  
                                \
             auto& data_array = predicate_column.get_data();                    
                                                                      \
             for (uint16_t i = 0; i < size; ++i) {                              
                                                                      \
                 if (!flags[i]) continue;                                       
                                                                      \
@@ -477,8 +471,8 @@ COMPARISON_PRED_COLUMN_EVALUATE_AND(GreaterEqualPredicate, 
>=)
     BITMAP_COMPARE_##CLASS(s, exact_match, seeked_ordinal, iterator, bitmap, 
roaring)
 
 #define COMPARISON_PRED_BITMAP_EVALUATE(CLASS, OP)                             
           \
-    template <class type>                                                      
           \
-    Status CLASS<type>::evaluate(const Schema& schema,                         
           \
+    template <class T>                                                         
           \
+    Status CLASS<T>::evaluate(const Schema& schema,                            
           \
                                  const std::vector<BitmapIndexIterator*>& 
iterators,      \
                                  uint32_t num_rows, roaring::Roaring* bitmap) 
const {     \
         BitmapIndexIterator* iterator = iterators[_column_id];                 
           \
@@ -508,6 +502,64 @@ COMPARISON_PRED_BITMAP_EVALUATE(LessEqualPredicate, <=)
 COMPARISON_PRED_BITMAP_EVALUATE(GreaterPredicate, >)
 COMPARISON_PRED_BITMAP_EVALUATE(GreaterEqualPredicate, >=)
 
+
+#define COMPARISON_PRED_SET_DICT_CODE(CLASS)                                   
                \
+    template <class T>                                                         
                \
+    void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) {   
                \
+        if (_dict_code_inited) {                                               
                \
+            return;                                                            
                \
+        }                                                                      
                \
+        if constexpr (std::is_same_v<T, StringValue>) {                        
                \
+            auto* col_ptr = column.get_ptr().get();                            
                \
+            if (column.is_nullable()) {                                        
                \
+                auto nullable_col =                                            
                \
+                        
reinterpret_cast<vectorized::ColumnNullable*>(col_ptr);                \
+                col_ptr = nullable_col->get_nested_column_ptr().get();         
                \
+            }                                                                  
                \
+            if (col_ptr->is_column_dictionary()) {                             
                \
+                auto& dict_col =                                               
                \
+                        
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(    \
+                                *col_ptr);                                     
                \
+                auto code = dict_col.find_code(_value);                        
                \
+                _dict_code = code;                                             
                \
+                _dict_code_inited = true;                                      
                \
+            }                                                                  
                \
+        }                                                                      
                \
+    }
+
+COMPARISON_PRED_SET_DICT_CODE(EqualPredicate)
+COMPARISON_PRED_SET_DICT_CODE(NotEqualPredicate)
+
+#define RAMGE_COMPARISON_PRED_SET_DICT_CODE(CLASS, OP)                         
                \
+    template <class T>                                                         
                \
+    void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) {   
                \
+        if (_dict_code_inited) {                                               
                \
+            return;                                                            
                \
+        }                                                                      
                \
+        if constexpr (std::is_same_v<T, StringValue>) {                        
                \
+            auto* col_ptr = column.get_ptr().get();                            
                \
+            if (column.is_nullable()) {                                        
                \
+                auto nullable_col =                                            
                \
+                        
reinterpret_cast<vectorized::ColumnNullable*>(col_ptr);                \
+                col_ptr = nullable_col->get_nested_column_ptr().get();         
                \
+            }                                                                  
                \
+                                                                               
                \
+            if (col_ptr->is_column_dictionary()) {                             
                \
+                auto& dict_col =                                               
                \
+                        
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(    \
+                                *col_ptr);                                     
                \
+                auto code = dict_col.find_code_by_bound(_value, 0 OP 1, 1 OP 
1);               \
+                _dict_code = code;                                             
                \
+                _dict_code_inited = true;                                      
                \
+            }                                                                  
                \
+        }                                                                      
                \
+    }
+
+RAMGE_COMPARISON_PRED_SET_DICT_CODE(LessPredicate, <)
+RAMGE_COMPARISON_PRED_SET_DICT_CODE(LessEqualPredicate, <=)
+RAMGE_COMPARISON_PRED_SET_DICT_CODE(GreaterPredicate, >)
+RAMGE_COMPARISON_PRED_SET_DICT_CODE(GreaterEqualPredicate, >=)
+
 #define COMPARISON_PRED_CONSTRUCTOR_DECLARATION(CLASS)                         
                \
     template CLASS<int8_t>::CLASS(uint32_t column_id, const int8_t& value, 
bool opposite);     \
     template CLASS<int16_t>::CLASS(uint32_t column_id, const int16_t& value, 
bool opposite);   \
@@ -692,4 +744,14 @@ 
COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(LessEqualPredicate)
 COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterPredicate)
 COMPARISON_PRED_COLUMN_EVALUATE_VEC_DECLARATION(GreaterEqualPredicate)
 
+#define COMPARISON_PRED_SET_DICT_CODE_DECLARATION(CLASS) \
+template void 
CLASS<StringValue>::set_dict_code_if_necessary(vectorized::IColumn& column);
+
+COMPARISON_PRED_SET_DICT_CODE_DECLARATION(EqualPredicate)
+COMPARISON_PRED_SET_DICT_CODE_DECLARATION(NotEqualPredicate)
+COMPARISON_PRED_SET_DICT_CODE_DECLARATION(LessPredicate)
+COMPARISON_PRED_SET_DICT_CODE_DECLARATION(LessEqualPredicate)
+COMPARISON_PRED_SET_DICT_CODE_DECLARATION(GreaterPredicate)
+COMPARISON_PRED_SET_DICT_CODE_DECLARATION(GreaterEqualPredicate)
+
 } //namespace doris
diff --git a/be/src/olap/comparison_predicate.h 
b/be/src/olap/comparison_predicate.h
index e363675..3df31c3 100644
--- a/be/src/olap/comparison_predicate.h
+++ b/be/src/olap/comparison_predicate.h
@@ -26,11 +26,12 @@ namespace doris {
 
 class VectorizedRowBatch;
 
-#define COMPARISON_PRED_CLASS_DEFINE(CLASS, IS_RANGE)                          
                    \
-    template <class type>                                                      
                    \
+#define COMPARISON_PRED_CLASS_DEFINE(CLASS, PT)                                
                    \
+    template <class T>                                                         
                    \
     class CLASS : public ColumnPredicate {                                     
                    \
     public:                                                                    
                    \
-        CLASS(uint32_t column_id, const type& value, bool opposite = false);   
                    \
+        CLASS(uint32_t column_id, const T& value, bool opposite = false);      
                    \
+        PredicateType type() const override { return PredicateType::PT; }      
                    \
         virtual void evaluate(VectorizedRowBatch* batch) const override;       
                    \
         void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const 
override;           \
         void evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size,     
                    \
@@ -46,18 +47,19 @@ class VectorizedRowBatch;
         void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t 
size,                \
                          bool* flags) const override;                          
                    \
         void evaluate_vec(vectorized::IColumn& column, uint16_t size, bool* 
flags) const override; \
-        bool is_range_comparison_predicate() override { return IS_RANGE; }     
                    \
-                                                                               
                    \
+        void set_dict_code_if_necessary(vectorized::IColumn& column) override; 
                    \
     private:                                                                   
                    \
-        type _value;                                                           
                    \
+        T _value;                                                              
                    \
+        bool _dict_code_inited = false;                                        
                    \
+        int32_t _dict_code;                                                    
                    \
     };
 
-COMPARISON_PRED_CLASS_DEFINE(EqualPredicate, false)
-COMPARISON_PRED_CLASS_DEFINE(NotEqualPredicate, false)
-COMPARISON_PRED_CLASS_DEFINE(LessPredicate, true)
-COMPARISON_PRED_CLASS_DEFINE(LessEqualPredicate, true)
-COMPARISON_PRED_CLASS_DEFINE(GreaterPredicate, true)
-COMPARISON_PRED_CLASS_DEFINE(GreaterEqualPredicate, true)
+COMPARISON_PRED_CLASS_DEFINE(EqualPredicate, EQ)
+COMPARISON_PRED_CLASS_DEFINE(NotEqualPredicate, NE)
+COMPARISON_PRED_CLASS_DEFINE(LessPredicate, LT)
+COMPARISON_PRED_CLASS_DEFINE(LessEqualPredicate, LE)
+COMPARISON_PRED_CLASS_DEFINE(GreaterPredicate, GT)
+COMPARISON_PRED_CLASS_DEFINE(GreaterEqualPredicate, GE)
 
 } //namespace doris
 
diff --git a/be/src/olap/in_list_predicate.cpp 
b/be/src/olap/in_list_predicate.cpp
index 3fdac7d..21214f2 100644
--- a/be/src/olap/in_list_predicate.cpp
+++ b/be/src/olap/in_list_predicate.cpp
@@ -27,23 +27,23 @@
 namespace doris {
 
 #define IN_LIST_PRED_CONSTRUCTOR(CLASS)                                        
                \
-    template <class type>                                                      
                \
-    CLASS<type>::CLASS(uint32_t column_id, phmap::flat_hash_set<type>&& 
values, bool opposite) \
+    template <class T>                                                         
                \
+    CLASS<T>::CLASS(uint32_t column_id, phmap::flat_hash_set<T>&& values, bool 
opposite)       \
             : ColumnPredicate(column_id, opposite), _values(std::move(values)) 
{}
 
 IN_LIST_PRED_CONSTRUCTOR(InListPredicate)
 IN_LIST_PRED_CONSTRUCTOR(NotInListPredicate)
 
 #define IN_LIST_PRED_EVALUATE(CLASS, OP)                                       
                \
-    template <class type>                                                      
                \
-    void CLASS<type>::evaluate(VectorizedRowBatch* batch) const {              
                \
+    template <class T>                                                         
                \
+    void CLASS<T>::evaluate(VectorizedRowBatch* batch) const {                 
                \
         uint16_t n = batch->size();                                            
                \
         if (n == 0) {                                                          
                \
             return;                                                            
                \
         }                                                                      
                \
         uint16_t* sel = batch->selected();                                     
                \
-        const type* col_vector =                                               
                \
-                reinterpret_cast<const 
type*>(batch->column(_column_id)->col_data());          \
+        const T* col_vector =                                                  
                \
+                reinterpret_cast<const 
T*>(batch->column(_column_id)->col_data());             \
         uint16_t new_size = 0;                                                 
                \
         if (batch->column(_column_id)->no_nulls()) {                           
                \
             if (batch->selected_in_use()) {                                    
                \
@@ -89,15 +89,15 @@ IN_LIST_PRED_EVALUATE(InListPredicate, !=)
 IN_LIST_PRED_EVALUATE(NotInListPredicate, ==)
 
 #define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(CLASS, OP)                          
           \
-    template <class type>                                                      
           \
-    void CLASS<type>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* 
size) const { \
+    template <class T>                                                         
           \
+    void CLASS<T>::evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) 
const {    \
         uint16_t new_size = 0;                                                 
           \
         if (block->is_nullable()) {                                            
           \
             for (uint16_t i = 0; i < *size; ++i) {                             
           \
                 uint16_t idx = sel[i];                                         
           \
                 sel[new_size] = idx;                                           
           \
-                const type* cell_value =                                       
           \
-                        reinterpret_cast<const 
type*>(block->cell(idx).cell_ptr());       \
+                const T* cell_value =                                          
           \
+                        reinterpret_cast<const 
T*>(block->cell(idx).cell_ptr());          \
                 auto result = (!block->cell(idx).is_null() && 
_values.find(*cell_value)   \
                                                                       OP 
_values.end());  \
                 new_size += _opposite ? !result : result;                      
           \
@@ -106,8 +106,8 @@ IN_LIST_PRED_EVALUATE(NotInListPredicate, ==)
             for (uint16_t i = 0; i < *size; ++i) {                             
           \
                 uint16_t idx = sel[i];                                         
           \
                 sel[new_size] = idx;                                           
           \
-                const type* cell_value =                                       
           \
-                        reinterpret_cast<const 
type*>(block->cell(idx).cell_ptr());       \
+                const T* cell_value =                                          
           \
+                        reinterpret_cast<const 
T*>(block->cell(idx).cell_ptr());          \
                 auto result = (_values.find(*cell_value) OP _values.end());    
           \
                 new_size += _opposite ? !result : result;                      
           \
             }                                                                  
           \
@@ -120,8 +120,8 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
 
 // todo(zeno) define interface in IColumn to simplify code
 #define IN_LIST_PRED_COLUMN_EVALUATE(CLASS, OP)                                
                    \
-    template <class type>                                                      
                    \
-    void CLASS<type>::evaluate(vectorized::IColumn& column, uint16_t* sel, 
uint16_t* size) const { \
+    template <class T>                                                         
                    \
+    void CLASS<T>::evaluate(vectorized::IColumn& column, uint16_t* sel, 
uint16_t* size) const {    \
         uint16_t new_size = 0;                                                 
                    \
         if (column.is_nullable()) {                                            
                    \
             auto* nullable_col =                                               
                    \
@@ -130,57 +130,53 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE(NotInListPredicate, ==)
                                         
nullable_col->get_null_map_column()).get_data();           \
             auto& nested_col = nullable_col->get_nested_column();              
                    \
             if (nested_col.is_column_dictionary()) {                           
                    \
-                if constexpr (std::is_same_v<type, StringValue>) {             
                    \
+                if constexpr (std::is_same_v<T, StringValue>) {                
                    \
                     auto* nested_col_ptr = vectorized::check_and_get_column<   
                    \
                             
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);          \
-                    auto code_set = nested_col_ptr->find_codes(_values);       
                    \
                     auto& data_array = nested_col_ptr->get_data();             
                    \
                     for (uint16_t i = 0; i < *size; i++) {                     
                    \
                         uint16_t idx = sel[i];                                 
                    \
                         sel[new_size] = idx;                                   
                    \
-                        const auto& cell_value =                               
                    \
-                                reinterpret_cast<const 
vectorized::Int32&>(data_array[idx]);       \
+                        const auto& cell_value = data_array[idx];              
                    \
                         bool ret = !null_bitmap[idx]                           
                    \
-                                   && (code_set.find(cell_value) OP 
code_set.end());               \
+                                   && (_dict_codes.find(cell_value) OP 
_dict_codes.end());         \
                         new_size += _opposite ? !ret : ret;                    
                    \
                     }                                                          
                    \
                 }                                                              
                    \
             } else {                                                           
                    \
                 auto* nested_col_ptr = vectorized::check_and_get_column<       
                    \
-                        vectorized::PredicateColumnType<type>>(nested_col);    
                    \
+                        vectorized::PredicateColumnType<T>>(nested_col);       
                    \
                 auto& data_array = nested_col_ptr->get_data();                 
                    \
                 for (uint16_t i = 0; i < *size; i++) {                         
                    \
                     uint16_t idx = sel[i];                                     
                    \
                     sel[new_size] = idx;                                       
                    \
-                    const type& cell_value = reinterpret_cast<const 
type&>(data_array[idx]);       \
+                    const auto& cell_value = reinterpret_cast<const 
T&>(data_array[idx]);          \
                     bool ret = !null_bitmap[idx] && (_values.find(cell_value) 
OP _values.end());   \
                     new_size += _opposite ? !ret : ret;                        
                    \
                 }                                                              
                    \
             }                                                                  
                    \
             *size = new_size;                                                  
                    \
         } else if (column.is_column_dictionary()) {                            
                    \
-            if constexpr (std::is_same_v<type, StringValue>) {                 
                    \
+            if constexpr (std::is_same_v<T, StringValue>) {                    
                    \
                 auto& dict_col =                                               
                    \
                         
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(        \
                                 column);                                       
                    \
                 auto& data_array = dict_col.get_data();                        
                    \
-                auto code_set = dict_col.find_codes(_values);                  
                    \
                 for (uint16_t i = 0; i < *size; i++) {                         
                    \
                     uint16_t idx = sel[i];                                     
                    \
                     sel[new_size] = idx;                                       
                    \
-                    const auto& cell_value =                                   
                    \
-                            reinterpret_cast<const 
vectorized::Int32&>(data_array[idx]);           \
-                    auto result = (code_set.find(cell_value) OP 
code_set.end());                   \
+                    const auto& cell_value = data_array[idx];                  
                    \
+                    auto result = (_dict_codes.find(cell_value) OP 
_dict_codes.end());             \
                     new_size += _opposite ? !result : result;                  
                    \
                 }                                                              
                    \
             }                                                                  
                    \
         } else {                                                               
                    \
-            auto& number_column = 
reinterpret_cast<vectorized::PredicateColumnType<type>&>(column);\
+            auto& number_column = 
reinterpret_cast<vectorized::PredicateColumnType<T>&>(column);   \
             auto& data_array = number_column.get_data();                       
                    \
             for (uint16_t i = 0; i < *size; i++) {                             
                    \
                 uint16_t idx = sel[i];                                         
                    \
                 sel[new_size] = idx;                                           
                    \
-                const type& cell_value = reinterpret_cast<const 
type&>(data_array[idx]);           \
+                const auto& cell_value = reinterpret_cast<const 
T&>(data_array[idx]);              \
                 auto result = (_values.find(cell_value) OP _values.end());     
                    \
                 new_size += _opposite ? !result : result;                      
                    \
             }                                                                  
                    \
@@ -192,15 +188,15 @@ IN_LIST_PRED_COLUMN_EVALUATE(InListPredicate, !=)
 IN_LIST_PRED_COLUMN_EVALUATE(NotInListPredicate, ==)
 
 #define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(CLASS, OP)                       
                  \
-    template <class type>                                                      
                  \
-    void CLASS<type>::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t 
size, bool* flags) \
+    template <class T>                                                         
                  \
+    void CLASS<T>::evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t 
size, bool* flags)    \
             const {                                                            
                  \
         if (block->is_nullable()) {                                            
                  \
             for (uint16_t i = 0; i < size; ++i) {                              
                  \
                 if (flags[i]) continue;                                        
                  \
                 uint16_t idx = sel[i];                                         
                  \
-                const type* cell_value =                                       
                  \
-                        reinterpret_cast<const 
type*>(block->cell(idx).cell_ptr());              \
+                const T* cell_value =                                          
                  \
+                        reinterpret_cast<const 
T*>(block->cell(idx).cell_ptr());                 \
                 auto result = (!block->cell(idx).is_null() && 
_values.find(*cell_value)          \
                                                                       OP 
_values.end());         \
                 flags[i] |= _opposite ? !result : result;                      
                  \
@@ -209,8 +205,8 @@ IN_LIST_PRED_COLUMN_EVALUATE(NotInListPredicate, ==)
             for (uint16_t i = 0; i < size; ++i) {                              
                  \
                 if (flags[i]) continue;                                        
                  \
                 uint16_t idx = sel[i];                                         
                  \
-                const type* cell_value =                                       
                  \
-                        reinterpret_cast<const 
type*>(block->cell(idx).cell_ptr());              \
+                const T* cell_value =                                          
                  \
+                        reinterpret_cast<const 
T*>(block->cell(idx).cell_ptr());                 \
                 auto result = (_values.find(*cell_value) OP _values.end());    
                  \
                 flags[i] |= _opposite ? !result : result;                      
                  \
             }                                                                  
                  \
@@ -221,15 +217,15 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(InListPredicate, !=)
 IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(NotInListPredicate, ==)
 
 #define IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(CLASS, OP)                      
                   \
-    template <class type>                                                      
                   \
-    void CLASS<type>::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t 
size, bool* flags) \
+    template <class T>                                                         
                   \
+    void CLASS<T>::evaluate_and(ColumnBlock* block, uint16_t* sel, uint16_t 
size, bool* flags)    \
             const {                                                            
                   \
         if (block->is_nullable()) {                                            
                   \
             for (uint16_t i = 0; i < size; ++i) {                              
                   \
                 if (!flags[i]) continue;                                       
                   \
                 uint16_t idx = sel[i];                                         
                   \
-                const type* cell_value =                                       
                   \
-                        reinterpret_cast<const 
type*>(block->cell(idx).cell_ptr());               \
+                const T* cell_value =                                          
                   \
+                        reinterpret_cast<const 
T*>(block->cell(idx).cell_ptr());                  \
                 auto result = (!block->cell(idx).is_null() && 
_values.find(*cell_value)           \
                                                                       OP 
_values.end());          \
                 flags[i] &= _opposite ? !result : result;                      
                   \
@@ -238,8 +234,8 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_OR(NotInListPredicate, 
==)
             for (uint16_t i = 0; i < size; ++i) {                              
                   \
                 if (!flags[i]) continue;                                       
                   \
                 uint16_t idx = sel[i];                                         
                   \
-                const type* cell_value =                                       
                   \
-                        reinterpret_cast<const 
type*>(block->cell(idx).cell_ptr());               \
+                const T* cell_value =                                          
                   \
+                        reinterpret_cast<const 
T*>(block->cell(idx).cell_ptr());                  \
                 auto result = (_values.find(*cell_value) OP _values.end());    
                   \
                 flags[i] &= _opposite ? !result : result;                      
                   \
             }                                                                  
                   \
@@ -250,8 +246,8 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(InListPredicate, !=)
 IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(NotInListPredicate, ==)
 
 #define IN_LIST_PRED_BITMAP_EVALUATE(CLASS, OP)                                
       \
-    template <class type>                                                      
       \
-    Status CLASS<type>::evaluate(const Schema& schema,                         
       \
+    template <class T>                                                         
       \
+    Status CLASS<T>::evaluate(const Schema& schema,                            
       \
                                  const std::vector<BitmapIndexIterator*>& 
iterators,  \
                                  uint32_t num_rows, roaring::Roaring* result) 
const { \
         BitmapIndexIterator* iterator = iterators[_column_id];                 
       \
@@ -286,6 +282,33 @@ IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_AND(NotInListPredicate, 
==)
 IN_LIST_PRED_BITMAP_EVALUATE(InListPredicate, &=)
 IN_LIST_PRED_BITMAP_EVALUATE(NotInListPredicate, -=)
 
+#define IN_LIST_PRED_SET_DICT_CODE(CLASS)                                      
                \
+    template <class T>                                                         
                \
+    void CLASS<T>::set_dict_code_if_necessary(vectorized::IColumn& column) {   
                \
+        if (_dict_code_inited) {                                               
                \
+            return;                                                            
                \
+        }                                                                      
                \
+        if constexpr (std::is_same_v<T, StringValue>) {                        
                \
+            auto* col_ptr = column.get_ptr().get();                            
                \
+            if (column.is_nullable()) {                                        
                \
+                auto nullable_col =                                            
                \
+                        
reinterpret_cast<vectorized::ColumnNullable*>(col_ptr);                \
+                col_ptr = nullable_col->get_nested_column_ptr().get();         
                \
+            }                                                                  
                \
+            if (col_ptr->is_column_dictionary()) {                             
                \
+                auto& dict_col =                                               
                \
+                        
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(    \
+                                *col_ptr);                                     
                \
+                auto code_set = dict_col.find_codes(_values);                  
                \
+                _dict_codes = std::move(code_set);                             
                \
+                _dict_code_inited = true;                                      
                \
+            }                                                                  
                \
+        }                                                                      
                \
+    }
+
+IN_LIST_PRED_SET_DICT_CODE(InListPredicate)
+IN_LIST_PRED_SET_DICT_CODE(NotInListPredicate)
+
 #define IN_LIST_PRED_CONSTRUCTOR_DECLARATION(CLASS)                            
                    \
     template CLASS<int8_t>::CLASS(uint32_t column_id, 
phmap::flat_hash_set<int8_t>&& values,       \
                                   bool opposite);                              
                    \
@@ -393,4 +416,8 @@ 
IN_LIST_PRED_COLUMN_BLOCK_EVALUATE_DECLARATION(NotInListPredicate)
 IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(InListPredicate)
 IN_LIST_PRED_BITMAP_EVALUATE_DECLARATION(NotInListPredicate)
 
+template void 
InListPredicate<StringValue>::set_dict_code_if_necessary(vectorized::IColumn& 
column);
+template void NotInListPredicate<StringValue>::set_dict_code_if_necessary(
+        vectorized::IColumn& column);
+
 } //namespace doris
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index 7cd237b..089ee84 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -78,11 +78,12 @@ class VectorizedRowBatch;
 
 // todo(wb) support evaluate_and,evaluate_or
 
-#define IN_LIST_PRED_CLASS_DEFINE(CLASS)                                       
                   \
-    template <class type>                                                      
                   \
+#define IN_LIST_PRED_CLASS_DEFINE(CLASS, PT)                                   
                   \
+    template <class T>                                                         
                   \
     class CLASS : public ColumnPredicate {                                     
                   \
     public:                                                                    
                   \
-        CLASS(uint32_t column_id, phmap::flat_hash_set<type>&& values, bool 
is_opposite = false); \
+        CLASS(uint32_t column_id, phmap::flat_hash_set<T>&& values, bool 
is_opposite = false);    \
+        PredicateType type() const override { return PredicateType::PT; }      
                   \
         virtual void evaluate(VectorizedRowBatch* batch) const override;       
                   \
         void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const 
override;          \
         void evaluate_or(ColumnBlock* block, uint16_t* sel, uint16_t size,     
                   \
@@ -95,13 +96,15 @@ class VectorizedRowBatch;
         void evaluate(vectorized::IColumn& column, uint16_t* sel, uint16_t* 
size) const override; \
         void evaluate_and(vectorized::IColumn& column, uint16_t* sel, uint16_t 
size, bool* flags) const override {} \
         void evaluate_or(vectorized::IColumn& column, uint16_t* sel, uint16_t 
size, bool* flags) const override {} \
-        bool is_in_predicate() override { return true; }                       
                                                                   \
+        void set_dict_code_if_necessary(vectorized::IColumn& column) override; 
                   \
     private:                                                                   
                   \
-        phmap::flat_hash_set<type> _values;                                    
                   \
+        phmap::flat_hash_set<T> _values;                                       
                   \
+        bool _dict_code_inited = false;                                        
                   \
+        phmap::flat_hash_set<int32_t> _dict_codes;                             
                   \
     };
 
-IN_LIST_PRED_CLASS_DEFINE(InListPredicate)
-IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate)
+IN_LIST_PRED_CLASS_DEFINE(InListPredicate, IN_LIST)
+IN_LIST_PRED_CLASS_DEFINE(NotInListPredicate, NO_IN_LIST)
 
 } //namespace doris
 
diff --git a/be/src/olap/null_predicate.cpp b/be/src/olap/null_predicate.cpp
index da3eb29..43cfbca 100644
--- a/be/src/olap/null_predicate.cpp
+++ b/be/src/olap/null_predicate.cpp
@@ -29,6 +29,10 @@ namespace doris {
 NullPredicate::NullPredicate(uint32_t column_id, bool is_null, bool opposite)
         : ColumnPredicate(column_id), _is_null(opposite != is_null) {}
 
+PredicateType NullPredicate::type() const {
+    return _is_null ? PredicateType::IS_NULL : PredicateType::NOT_IS_NULL;
+}
+
 void NullPredicate::evaluate(VectorizedRowBatch* batch) const {
     uint16_t n = batch->size();
     if (n == 0) {
diff --git a/be/src/olap/null_predicate.h b/be/src/olap/null_predicate.h
index 681e60b..7b90ffb 100644
--- a/be/src/olap/null_predicate.h
+++ b/be/src/olap/null_predicate.h
@@ -32,6 +32,8 @@ class NullPredicate : public ColumnPredicate {
 public:
     NullPredicate(uint32_t column_id, bool is_null, bool opposite = false);
 
+    virtual PredicateType type() const override;
+
     virtual void evaluate(VectorizedRowBatch* batch) const override;
 
     void evaluate(ColumnBlock* block, uint16_t* sel, uint16_t* size) const 
override;
diff --git a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp 
b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
index f199000..3c6e623 100644
--- a/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
+++ b/be/src/olap/rowset/segment_v2/binary_dict_page.cpp
@@ -241,18 +241,7 @@ void BinaryDictPageDecoder::set_dict_decoder(PageDecoder* 
dict_decoder, StringRe
 
 Status BinaryDictPageDecoder::next_batch(size_t* n, 
vectorized::MutableColumnPtr &dst) {
     if (_encoding_type == PLAIN_ENCODING) {
-        // todo(zeno) Handle convert in ColumnDictionary,
-        //  add interface like convert_to_predicate_column_if_necessary
-        auto* col_ptr = dst.get();
-        if (dst->is_nullable()) {
-            auto nullable_col = 
reinterpret_cast<vectorized::ColumnNullable*>(dst.get());
-            col_ptr = nullable_col->get_nested_column_ptr().get();
-        }
-
-        if (col_ptr->is_column_dictionary()) {
-            auto* dict_col_ptr = 
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>*>(col_ptr);
-            col_ptr = 
(*std::move(dict_col_ptr->convert_to_predicate_column())).assume_mutable();
-        }
+        dst = 
(*(std::move(dst->convert_to_predicate_column_if_dictionary()))).assume_mutable();
         return _data_page_decoder->next_batch(n, dst);
     }
     // dictionary encoding
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp 
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index e57730a..549977d 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -616,8 +616,9 @@ void SegmentIterator::_vec_init_lazy_materialization() {
             pred_column_ids.insert(cid);
 
             if (type == OLAP_FIELD_TYPE_VARCHAR || type == 
OLAP_FIELD_TYPE_CHAR ||
-                type == OLAP_FIELD_TYPE_STRING || predicate->is_in_predicate() 
||
-                predicate->is_bloom_filter_predicate()) {
+                type == OLAP_FIELD_TYPE_STRING || predicate->type() == 
PredicateType::BF ||
+                predicate->type() == PredicateType::IN_LIST ||
+                predicate->type() == PredicateType::NO_IN_LIST) {
                 short_cir_pred_col_id_set.insert(cid);
                 _short_cir_eval_predicate.push_back(predicate);
                 _is_all_column_basic_type = false;
@@ -859,23 +860,17 @@ void 
SegmentIterator::_evaluate_short_circuit_predicate(uint16_t* vec_sel_rowid_
         return;
     }
 
-    for (auto column_predicate : _short_cir_eval_predicate) {
-        auto column_id = column_predicate->column_id();
+    for (auto predicate : _short_cir_eval_predicate) {
+        auto column_id = predicate->column_id();
         auto& short_cir_column = _current_return_columns[column_id];
         auto* col_ptr = short_cir_column.get();
-        // todo(zeno) define convert_dict_codes_if_dictionary interface in 
IColumn
-        if (short_cir_column->is_nullable()) {
-            auto nullable_col =
-                    
reinterpret_cast<vectorized::ColumnNullable*>(short_cir_column.get());
-            col_ptr = nullable_col->get_nested_column_ptr().get();
+        // range comparison predicate needs to sort the dict and convert the 
encoding
+        if (predicate->type() == PredicateType::LT || predicate->type() == 
PredicateType::LE ||
+            predicate->type() == PredicateType::GT || predicate->type() == 
PredicateType::GE) {
+            col_ptr->convert_dict_codes_if_necessary();
         }
-
-        if (col_ptr->is_column_dictionary() && 
column_predicate->is_range_comparison_predicate()) {
-            auto& dict_col =
-                    
reinterpret_cast<vectorized::ColumnDictionary<vectorized::Int32>&>(*col_ptr);
-            dict_col.convert_dict_codes();
-        }
-        column_predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, 
selected_size_ptr);
+        predicate->set_dict_code_if_necessary(*short_cir_column);
+        predicate->evaluate(*short_cir_column, vec_sel_rowid_idx, 
selected_size_ptr);
     }
 
     // evaluate delete condition
diff --git a/be/src/runtime/string_value.h b/be/src/runtime/string_value.h
index 860fced..cdf33ab 100644
--- a/be/src/runtime/string_value.h
+++ b/be/src/runtime/string_value.h
@@ -188,6 +188,12 @@ struct StringValue {
             return a.compare(b) < 0;
         }
     };
+
+    struct HashOfStringValue {
+        size_t operator()(const StringValue& v) const {
+            return HashUtil::hash(v.ptr, v.len, 0);
+        }
+    };
 };
 
 // This function must be called 'hash_value' to be picked up by boost.
diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index 0927f34..7e717bb 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -64,6 +64,13 @@ public:
     /// If column is ColumnLowCardinality, transforms is to full column.
     virtual Ptr convert_to_full_column_if_low_cardinality() const { return 
get_ptr(); }
 
+    /// If column isn't ColumnDictionary, return itself.
+    /// If column is ColumnDictionary, transforms is to predicate column.
+    virtual Ptr convert_to_predicate_column_if_dictionary() { return 
get_ptr(); }
+
+    /// If column is ColumnDictionary, and is a range comparison predicate, 
convert dict encoding
+    virtual void convert_dict_codes_if_necessary() {}
+
     /// Creates empty column with the same type.
     virtual MutablePtr clone_empty() const { return clone_resized(0); }
 
@@ -518,7 +525,6 @@ bool is_column_const(const IColumn& column);
 
 /// True if column's an ColumnNullable instance. It's just a syntax sugar for 
type check.
 bool is_column_nullable(const IColumn& column);
-
 } // namespace doris::vectorized
 
 // Wrap `ColumnPtr` because `ColumnPtr` can't be used in forward declaration.
diff --git a/be/src/vec/columns/column_dictionary.h 
b/be/src/vec/columns/column_dictionary.h
index 3aa5bd7..eb33f55 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -32,6 +32,10 @@
 #include "vec/columns/column_vector.h"
 #include "vec/columns/predicate_column.h"
 #include "vec/core/types.h"
+#include "vec/common/typeid_cast.h"
+#include "olap/column_predicate.h"
+#include "olap/comparison_predicate.h"
+#include "olap/in_list_predicate.h"
 
 namespace doris::vectorized {
 
@@ -49,12 +53,13 @@ namespace doris::vectorized {
  */
 template <typename T>
 class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {
+    static_assert(IsNumber<T>);
 private:
     friend class COWHelper<IColumn, ColumnDictionary>;
 
     ColumnDictionary() {}
-    ColumnDictionary(const size_t n) : codes(n) {}
-    ColumnDictionary(const ColumnDictionary& src) : codes(src.codes.begin(), 
src.codes.end()) {}
+    ColumnDictionary(const size_t n) : _codes(n) {}
+    ColumnDictionary(const ColumnDictionary& src) : _codes(src._codes.begin(), 
src._codes.end()) {}
 
 public:
     using Self = ColumnDictionary;
@@ -62,13 +67,9 @@ public:
     using Container = PaddedPODArray<value_type>;
     using DictContainer = PaddedPODArray<StringValue>;
 
-    bool is_numeric() const override { return false; }
-
-    bool is_predicate_column() const override { return false; }
-
     bool is_column_dictionary() const override { return true; }
 
-    size_t size() const override { return codes.size(); }
+    size_t size() const override { return _codes.size(); }
 
     [[noreturn]] StringRef get_data_at(size_t n) const override {
         LOG(FATAL) << "get_data_at not supported in ColumnDictionary";
@@ -94,17 +95,20 @@ public:
     }
 
     void insert_data(const char* pos, size_t /*length*/) override {
-        codes.push_back(unaligned_load<T>(pos));
+        _codes.push_back(unaligned_load<T>(pos));
     }
 
-    void insert_data(const T value) { codes.push_back(value); }
+    void insert_data(const T value) { _codes.push_back(value); }
 
-    void insert_default() override { codes.push_back(T()); }
+    void insert_default() override { _codes.push_back(T()); }
 
-    void clear() override { codes.clear(); }
+    void clear() override {
+        _codes.clear();
+        _dict_code_converted = false;
+    }
 
     // TODO: Make dict memory usage more precise
-    size_t byte_size() const override { return codes.size() * 
sizeof(codes[0]); }
+    size_t byte_size() const override { return _codes.size() * 
sizeof(_codes[0]); }
 
     size_t allocated_bytes() const override { return byte_size(); }
 
@@ -115,11 +119,9 @@ public:
         LOG(FATAL) << "get_permutation not supported in ColumnDictionary";
     }
 
-    void reserve(size_t n) override { codes.reserve(n); }
+    void reserve(size_t n) override { _codes.reserve(n); }
 
-    [[noreturn]] const char* get_family_name() const override {
-        LOG(FATAL) << "get_family_name not supported in ColumnDictionary";
-    }
+    const char* get_family_name() const override { return "ColumnDictionary"; }
 
     [[noreturn]] MutableColumnPtr clone_resized(size_t size) const override {
         LOG(FATAL) << "clone_resized not supported in ColumnDictionary";
@@ -129,43 +131,13 @@ public:
         LOG(FATAL) << "insert not supported in ColumnDictionary";
     }
 
-    Field operator[](size_t n) const override { return codes[n]; }
+    Field operator[](size_t n) const override { return _codes[n]; }
 
     void get(size_t n, Field& res) const override { res = (*this)[n]; }
 
-    [[noreturn]] UInt64 get64(size_t n) const override {
-        LOG(FATAL) << "get field not supported in ColumnDictionary";
-    }
-
-    [[noreturn]] Float64 get_float64(size_t n) const override {
-        LOG(FATAL) << "get field not supported in ColumnDictionary";
-    }
-
-    [[noreturn]] UInt64 get_uint(size_t n) const override {
-        LOG(FATAL) << "get field not supported in ColumnDictionary";
-    }
-
-    [[noreturn]] bool get_bool(size_t n) const override {
-        LOG(FATAL) << "get field not supported in ColumnDictionary";
-    }
-
-    [[noreturn]] Int64 get_int(size_t n) const override {
-        LOG(FATAL) << "get field not supported in ColumnDictionary";
-    }
+    Container& get_data() { return _codes; }
 
-    Container& get_data() { return codes; }
-
-    const Container& get_data() const { return codes; }
-
-    T find_code(const StringValue& value) const { return 
dict.find_code(value); }
-
-    T find_bound_code(const StringValue& value, bool lower, bool eq) const {
-        return dict.find_bound_code(value, lower, eq);
-    }
-
-    phmap::flat_hash_set<T> find_codes(const 
phmap::flat_hash_set<StringValue>& values) const {
-        return dict.find_codes(values);
-    }
+    const Container& get_data() const { return _codes; }
 
     // it's impossable to use ComplexType as key , so we don't have to 
implemnt them
     [[noreturn]] StringRef serialize_value_into_arena(size_t n, Arena& arena,
@@ -222,8 +194,8 @@ public:
         auto* res_col = reinterpret_cast<vectorized::ColumnString*>(col_ptr);
         for (size_t i = 0; i < sel_size; i++) {
             uint16_t n = sel[i];
-            auto& code = reinterpret_cast<T&>(codes[n]);
-            auto value = dict.get_value(code);
+            auto& code = reinterpret_cast<T&>(_codes[n]);
+            auto value = _dict.get_value(code);
             res_col->insert_data(value.ptr, value.len);
         }
         return Status::OK();
@@ -241,18 +213,43 @@ public:
                                const StringRef* dict_array, size_t data_num,
                                uint32_t dict_num) override {
         if (!is_dict_inited()) {
-            dict.reserve(dict_num);
+            _dict.reserve(dict_num);
             for (uint32_t i = 0; i < dict_num; ++i) {
                 auto value = StringValue(dict_array[i].data, 
dict_array[i].size);
-                dict.insert_value(value);
+                _dict.insert_value(value);
             }
             _dict_inited = true;
         }
 
-        char* end_ptr = (char*)codes.get_end_ptr();
+        char* end_ptr = (char*)_codes.get_end_ptr();
         memcpy(end_ptr, data_array + start_index, data_num * sizeof(T));
         end_ptr += data_num * sizeof(T);
-        codes.set_end_ptr(end_ptr);
+        _codes.set_end_ptr(end_ptr);
+    }
+
+    void convert_dict_codes_if_necessary() override {
+        if (!is_dict_sorted()) {
+            _dict.sort();
+            _dict_sorted = true;
+        }
+
+        if (!is_dict_code_converted()) {
+            for (size_t i = 0; i < size(); ++i) {
+                _codes[i] = _dict.convert_code(_codes[i]);
+            }
+            _dict_code_converted = true;
+        }
+    }
+
+    int32_t find_code(const StringValue& value) const { return 
_dict.find_code(value); }
+
+    int32_t find_code_by_bound(const StringValue& value, bool lower, bool eq) 
const {
+        return _dict.find_code_by_bound(value, lower, eq);
+    }
+
+    phmap::flat_hash_set<int32_t> find_codes(
+            const phmap::flat_hash_set<StringValue>& values) const {
+        return _dict.find_codes(values);
     }
 
     bool is_dict_inited() const { return _dict_inited; }
@@ -261,126 +258,110 @@ public:
 
     bool is_dict_code_converted() const { return _dict_code_converted; }
 
-    ColumnPtr convert_to_predicate_column() {
+    ColumnPtr convert_to_predicate_column_if_dictionary() override {
         auto res = vectorized::PredicateColumnType<StringValue>::create();
-        size_t size = codes.size();
+        size_t size = _codes.size();
         res->reserve(size);
         for (size_t i = 0; i < size; ++i) {
-            auto& code = reinterpret_cast<T&>(codes[i]);
-            auto value = dict.get_value(code);
+            auto& code = reinterpret_cast<T&>(_codes[i]);
+            auto value = _dict.get_value(code);
             res->insert_data(value.ptr, value.len);
         }
-        dict.clear();
+        _dict.clear();
         return res;
     }
 
-    void convert_dict_codes() {
-        if (!is_dict_sorted()) {
-            sort_dict();
-        }
-
-        if (!is_dict_code_converted()) {
-            for (size_t i = 0; i < size(); ++i) {
-                codes[i] = dict.convert_code(codes[i]);
-            }
-            _dict_code_converted = true;
-        }
-    }
-
-    void sort_dict() {
-        dict.sort();
-        _dict_sorted = true;
-    }
-
     class Dictionary {
     public:
         Dictionary() = default;
 
         void reserve(size_t n) {
-            dict_data.reserve(n);
-            inverted_index.reserve(n);
+            _dict_data.reserve(n);
+            _inverted_index.reserve(n);
         }
 
         inline void insert_value(StringValue& value) {
-            dict_data.push_back_without_reserve(value);
-            inverted_index[value] = inverted_index.size();
+            _dict_data.push_back_without_reserve(value);
+            _inverted_index[value] = _inverted_index.size();
         }
 
-        inline T find_code(const StringValue& value) const {
-            auto it = inverted_index.find(value);
-            if (it != inverted_index.end()) {
+        inline int32_t find_code(const StringValue& value) const {
+            auto it = _inverted_index.find(value);
+            if (it != _inverted_index.end()) {
                 return it->second;
             }
             return -1;
         }
 
-        inline T find_bound_code(const StringValue& value, bool lower, bool 
eq) const {
+        inline int32_t find_code_by_bound(const StringValue& value, bool 
lower, bool eq) const {
             auto code = find_code(value);
             if (code >= 0) {
                 return code;
             }
 
             if (lower) {
-                return std::lower_bound(dict_data.begin(), dict_data.end(), 
value) - dict_data.begin() - eq;
+                return std::lower_bound(_dict_data.begin(), _dict_data.end(), 
value) -
+                       _dict_data.begin() - eq;
             } else {
-                return std::upper_bound(dict_data.begin(), dict_data.end(), 
value) - dict_data.begin() + eq;
+                return std::upper_bound(_dict_data.begin(), _dict_data.end(), 
value) -
+                       _dict_data.begin() + eq;
             }
         }
 
-        inline phmap::flat_hash_set<T> find_codes(const 
phmap::flat_hash_set<StringValue>& values) const {
-            phmap::flat_hash_set<T> code_set;
+        inline phmap::flat_hash_set<int32_t> find_codes(
+                const phmap::flat_hash_set<StringValue>& values) const {
+            phmap::flat_hash_set<int32_t> code_set;
             for (const auto& value : values) {
-                auto it = inverted_index.find(value);
-                if (it != inverted_index.end()) {
+                auto it = _inverted_index.find(value);
+                if (it != _inverted_index.end()) {
                     code_set.insert(it->second);
                 }
             }
             return code_set;
         }
 
-        inline StringValue& get_value(T code) { return dict_data[code]; }
+        inline StringValue& get_value(T code) { return _dict_data[code]; }
 
         void clear() {
-            dict_data.clear();
-            inverted_index.clear();
-            code_convert_map.clear();
+            _dict_data.clear();
+            _inverted_index.clear();
+            _code_convert_map.clear();
         }
 
         void sort() {
-            size_t dict_size = dict_data.size();
-            std::sort(dict_data.begin(), dict_data.end(), comparator);
+            size_t dict_size = _dict_data.size();
+            std::sort(_dict_data.begin(), _dict_data.end(), _comparator);
             for (size_t i = 0; i < dict_size; ++i) {
-                code_convert_map[inverted_index.find(dict_data[i])->second] = 
(T)i;
-                inverted_index[dict_data[i]] = (T)i;
+                _code_convert_map[_inverted_index.find(_dict_data[i])->second] 
= (T)i;
+                _inverted_index[_dict_data[i]] = (T)i;
             }
         }
 
-        inline T convert_code(const T& code) const { return 
code_convert_map.find(code)->second; }
+        inline T convert_code(const T& code) const { return 
_code_convert_map.find(code)->second; }
 
-        size_t byte_size() { return dict_data.size() * sizeof(dict_data[0]); }
+        size_t byte_size() { return _dict_data.size() * sizeof(_dict_data[0]); 
}
 
     private:
-        struct HashOfStringValue {
-            size_t operator()(const StringValue& value) const {
-                return HashStringThoroughly(value.ptr, value.len);
-            }
-        };
-
-        StringValue::Comparator comparator;
+        StringValue::Comparator _comparator;
         // dict code -> dict value
-        DictContainer dict_data;
+        DictContainer _dict_data;
         // dict value -> dict code
-        phmap::flat_hash_map<StringValue, T, HashOfStringValue> inverted_index;
+        phmap::flat_hash_map<StringValue, T, StringValue::HashOfStringValue> 
_inverted_index;
         // data page code -> sorted dict code, only used for range comparison 
predicate
-        phmap::flat_hash_map<T, T> code_convert_map;
+        phmap::flat_hash_map<T, T> _code_convert_map;
     };
 
 private:
     bool _dict_inited = false;
     bool _dict_sorted = false;
     bool _dict_code_converted = false;
-    Dictionary dict;
-    Container codes;
+    Dictionary _dict;
+    Container _codes;
 };
 
-} // namespace doris::vectorized
\ No newline at end of file
+template class ColumnDictionary<uint8_t>;
+template class ColumnDictionary<uint16_t>;
+template class ColumnDictionary<uint32_t>;
+template class ColumnDictionary<int32_t>;
+
+} // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_nullable.h 
b/be/src/vec/columns/column_nullable.h
index 8badf6e..aa0df2d 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -267,6 +267,17 @@ public:
         LOG(FATAL) << "should not call the method in column nullable";
     }
 
+    ColumnPtr convert_to_predicate_column_if_dictionary() override {
+        IColumn* nested_ptr = get_nested_column_ptr().get();
+        nested_ptr = 
(*(std::move(nested_ptr->convert_to_predicate_column_if_dictionary()
+                                  ))).assume_mutable();
+        return get_ptr();
+    }
+
+    void convert_dict_codes_if_necessary() override {
+        get_nested_column().convert_dict_codes_if_necessary();
+    }
+
 private:
     WrappedPtr nested_column;
     WrappedPtr null_map;

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to