This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch cs_opt_version-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit c187e3b9b6cce21fd15103212c8e7f5f02593a0c
Author: HappenLee <[email protected]>
AuthorDate: Wed Jul 9 21:50:03 2025 +0800

    cherry pick like opt/lru-k
---
 be/src/olap/like_column_predicate.cpp | 21 +++--------
 be/src/olap/like_column_predicate.h   | 65 +++++++++++++++++++++++++----------
 be/src/vec/functions/like.cpp         | 13 ++++---
 3 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/be/src/olap/like_column_predicate.cpp 
b/be/src/olap/like_column_predicate.cpp
index b441e982606..6da2aa3062f 100644
--- a/be/src/olap/like_column_predicate.cpp
+++ b/be/src/olap/like_column_predicate.cpp
@@ -62,15 +62,12 @@ uint16_t LikeColumnPredicate<T>::_evaluate_inner(const 
vectorized::IColumn& colu
             auto* nested_col_ptr = vectorized::check_and_get_column<
                     
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);
             auto& data_array = nested_col_ptr->get_data();
+            const auto& dict_res = 
*_find_code_from_dictionary_column(*nested_col_ptr);
             if (!nullable_col->has_null()) {
                 for (uint16_t i = 0; i != size; i++) {
                     uint16_t idx = sel[i];
                     sel[new_size] = idx;
-                    StringRef cell_value = 
nested_col_ptr->get_shrink_value(data_array[idx]);
-                    unsigned char flag = 0;
-                    static_cast<void>((_state->scalar_function)(
-                            
const_cast<vectorized::LikeSearchState*>(&_like_state),
-                            StringRef(cell_value.data, cell_value.size), 
pattern, &flag));
+                    unsigned char flag = dict_res[data_array[idx]];
                     new_size += _opposite ^ flag;
                 }
             } else {
@@ -81,12 +78,7 @@ uint16_t LikeColumnPredicate<T>::_evaluate_inner(const 
vectorized::IColumn& colu
                         new_size += _opposite;
                         continue;
                     }
-
-                    StringRef cell_value = 
nested_col_ptr->get_shrink_value(data_array[idx]);
-                    unsigned char flag = 0;
-                    static_cast<void>((_state->scalar_function)(
-                            
const_cast<vectorized::LikeSearchState*>(&_like_state),
-                            StringRef(cell_value.data, cell_value.size), 
pattern, &flag));
+                    unsigned char flag = dict_res[data_array[idx]];
                     new_size += _opposite ^ flag;
                 }
             }
@@ -126,15 +118,12 @@ uint16_t LikeColumnPredicate<T>::_evaluate_inner(const 
vectorized::IColumn& colu
         if (column.is_column_dictionary()) {
             auto* nested_col_ptr = vectorized::check_and_get_column<
                     vectorized::ColumnDictionary<vectorized::Int32>>(column);
+            const auto& dict_res = 
*_find_code_from_dictionary_column(*nested_col_ptr);
             auto& data_array = nested_col_ptr->get_data();
             for (uint16_t i = 0; i != size; i++) {
                 uint16_t idx = sel[i];
                 sel[new_size] = idx;
-                StringRef cell_value = 
nested_col_ptr->get_shrink_value(data_array[idx]);
-                unsigned char flag = 0;
-                static_cast<void>((_state->scalar_function)(
-                        const_cast<vectorized::LikeSearchState*>(&_like_state),
-                        StringRef(cell_value.data, cell_value.size), pattern, 
&flag));
+                unsigned char flag = dict_res[data_array[idx]];
                 new_size += _opposite ^ flag;
             }
         } else {
diff --git a/be/src/olap/like_column_predicate.h 
b/be/src/olap/like_column_predicate.h
index 31763d45f7e..7402d8c9f5a 100644
--- a/be/src/olap/like_column_predicate.h
+++ b/be/src/olap/like_column_predicate.h
@@ -101,6 +101,7 @@ private:
             if (nested_col.is_column_dictionary()) {
                 auto* nested_col_ptr = vectorized::check_and_get_column<
                         
vectorized::ColumnDictionary<vectorized::Int32>>(nested_col);
+                const auto& dict_res = 
*_find_code_from_dictionary_column(*nested_col_ptr);
                 auto& data_array = nested_col_ptr->get_data();
                 for (uint16_t i = 0; i < size; i++) {
                     if (null_map_data[i]) {
@@ -112,18 +113,10 @@ private:
                         continue;
                     }
 
-                    StringRef cell_value = 
nested_col_ptr->get_shrink_value(data_array[i]);
+                    unsigned char flag = dict_res[data_array[i]];
                     if constexpr (is_and) {
-                        unsigned char flag = 0;
-                        static_cast<void>((_state->scalar_function)(
-                                
const_cast<vectorized::LikeSearchState*>(&_like_state),
-                                StringRef(cell_value.data, cell_value.size), 
pattern, &flag));
                         flags[i] &= _opposite ^ flag;
                     } else {
-                        unsigned char flag = 0;
-                        static_cast<void>((_state->scalar_function)(
-                                
const_cast<vectorized::LikeSearchState*>(&_like_state),
-                                StringRef(cell_value.data, cell_value.size), 
pattern, &flag));
                         flags[i] = _opposite ^ flag;
                     }
                 }
@@ -136,19 +129,12 @@ private:
                 auto* nested_col_ptr = vectorized::check_and_get_column<
                         
vectorized::ColumnDictionary<vectorized::Int32>>(column);
                 auto& data_array = nested_col_ptr->get_data();
+                const auto& dict_res = 
*_find_code_from_dictionary_column(*nested_col_ptr);
                 for (uint16_t i = 0; i < size; i++) {
-                    StringRef cell_value = 
nested_col_ptr->get_shrink_value(data_array[i]);
+                    unsigned char flag = dict_res[data_array[i]];
                     if constexpr (is_and) {
-                        unsigned char flag = 0;
-                        static_cast<void>((_state->scalar_function)(
-                                
const_cast<vectorized::LikeSearchState*>(&_like_state),
-                                StringRef(cell_value.data, cell_value.size), 
pattern, &flag));
                         flags[i] &= _opposite ^ flag;
                     } else {
-                        unsigned char flag = 0;
-                        static_cast<void>((_state->scalar_function)(
-                                
const_cast<vectorized::LikeSearchState*>(&_like_state),
-                                StringRef(cell_value.data, cell_value.size), 
pattern, &flag));
                         flags[i] = _opposite ^ flag;
                     }
                 }
@@ -159,6 +145,49 @@ private:
         }
     }
 
+    __attribute__((flatten)) std::vector<bool>* 
_find_code_from_dictionary_column(
+            const vectorized::ColumnDictI32& column) const {
+        std::vector<bool>* res = nullptr;
+        if (_segment_id_to_cached_res_flags.if_contains(
+                    column.get_rowset_segment_id(),
+                    [&res](const auto& pair) { res = &pair.second; })) {
+            return res;
+        }
+
+        std::vector<bool> tmp_res(column.dict_size(), false);
+        for (int i = 0; i < column.dict_size(); i++) {
+            StringRef cell_value = column.get_shrink_value(i);
+            unsigned char flag = 0;
+            static_cast<void>((_state->scalar_function)(
+                    const_cast<vectorized::LikeSearchState*>(&_like_state),
+                    StringRef(cell_value.data, cell_value.size), pattern, 
&flag));
+            tmp_res[i] = flag;
+        }
+        // Sometimes the dict is not initialized when run comparison predicate 
here, for example,
+        // the full page is null, then the reader will skip read, so that the 
dictionary is not
+        // inited. The cached code is wrong during this case, because the 
following page maybe not
+        // null, and the dict should have items in the future.
+        //
+        // Cached code may have problems, so that add a config here, if not 
opened, then
+        // we will return the code and not cache it.
+        if (!column.is_dict_empty() && 
config::enable_low_cardinality_cache_code) {
+            _segment_id_to_cached_res_flags.emplace(
+                    std::pair {column.get_rowset_segment_id(), tmp_res});
+        }
+
+        _segment_id_to_cached_res_flags.if_contains(
+                column.get_rowset_segment_id(), [&res](const auto& pair) { res 
= &pair.second; });
+        return res;
+    }
+
+    mutable phmap::parallel_flat_hash_map<
+            std::pair<RowsetId, uint32_t>, std::vector<bool>,
+            phmap::priv::hash_default_hash<std::pair<RowsetId, uint32_t>>,
+            phmap::priv::hash_default_eq<std::pair<RowsetId, uint32_t>>,
+            std::allocator<std::pair<const std::pair<RowsetId, uint32_t>, 
int32_t>>, 4,
+            std::shared_mutex>
+            _segment_id_to_cached_res_flags;
+
     std::string _debug_string() const override {
         std::string info = "LikeColumnPredicate";
         return info;
diff --git a/be/src/vec/functions/like.cpp b/be/src/vec/functions/like.cpp
index 4ed14280e4c..f3aa71d03a0 100644
--- a/be/src/vec/functions/like.cpp
+++ b/be/src/vec/functions/like.cpp
@@ -509,11 +509,12 @@ Status FunctionLikeBase::execute_impl(FunctionContext* 
context, Block& block,
                                       size_t input_rows_count) const {
     const auto values_col =
             
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
-    const auto* values = check_and_get_column<ColumnString>(values_col.get());
+    const auto* values =
+            assert_cast<const ColumnString*, 
TypeCheckOnRelease::DISABLE>(values_col.get());
 
-    if (!values) {
-        return Status::InternalError("Not supported input arguments types");
-    }
+    //    if (!values) {
+    //        return Status::InternalError("Not supported input arguments 
types");
+    //    }
     // result column
     auto res = ColumnUInt8::create();
     ColumnUInt8::Container& vec_res = res->get_data();
@@ -578,9 +579,7 @@ Status FunctionLikeBase::execute_substring(const 
ColumnString::Chars& values,
         }
 
         /// We check that the entry does not pass through the boundaries of 
strings.
-        if (pos + needle_size <= begin + value_offsets[i]) {
-            result[i] = 1;
-        }
+        result[i] = pos + needle_size <= begin + value_offsets[i];
 
         // move to next string offset
         pos = begin + value_offsets[i];


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to