This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 08adf914f9 [improvement](vec) avoid creating a new column while 
filtering mutable columns (#16850)
08adf914f9 is described below

commit 08adf914f9c1ead5aa843701fced2fa82638f3b6
Author: Jerry Hu <mrh...@gmail.com>
AuthorDate: Tue Feb 21 09:47:21 2023 +0800

    [improvement](vec) avoid creating a new column while filtering mutable 
columns (#16850)
    
    Currently, when filtering a column, a new column will be created to store 
the filtering result, which will cause some performance loss。 ssb-flat without 
pushdown expr from 19s to 15s.
---
 be/src/vec/columns/column.h                     |   4 +
 be/src/vec/columns/column_array.cpp             | 151 ++++++++++++++++++++++++
 be/src/vec/columns/column_array.h               |   8 ++
 be/src/vec/columns/column_complex.h             |  32 +++++
 be/src/vec/columns/column_const.cpp             |  11 ++
 be/src/vec/columns/column_const.h               |   2 +
 be/src/vec/columns/column_decimal.cpp           |  55 +++++++++
 be/src/vec/columns/column_decimal.h             |   3 +
 be/src/vec/columns/column_dictionary.h          |   4 +
 be/src/vec/columns/column_dummy.h               |   6 +
 be/src/vec/columns/column_fixed_length_object.h |   4 +
 be/src/vec/columns/column_map.cpp               |   7 ++
 be/src/vec/columns/column_map.h                 |   3 +
 be/src/vec/columns/column_nullable.cpp          |   7 ++
 be/src/vec/columns/column_nullable.h            |   3 +
 be/src/vec/columns/column_object.h              |   5 +
 be/src/vec/columns/column_string.cpp            |  10 ++
 be/src/vec/columns/column_string.h              |   1 +
 be/src/vec/columns/column_struct.cpp            |  12 ++
 be/src/vec/columns/column_struct.h              |   3 +
 be/src/vec/columns/column_vector.cpp            |  56 +++++++++
 be/src/vec/columns/column_vector.h              |   1 +
 be/src/vec/columns/columns_common.cpp           | 117 +++++++++++++++++-
 be/src/vec/columns/columns_common.h             |   8 ++
 be/src/vec/columns/predicate_column.h           |   4 +
 be/src/vec/core/block.cpp                       |  11 +-
 26 files changed, 522 insertions(+), 6 deletions(-)

diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h
index af94604749..99ea1f66a4 100644
--- a/be/src/vec/columns/column.h
+++ b/be/src/vec/columns/column.h
@@ -386,6 +386,10 @@ public:
     using Filter = PaddedPODArray<UInt8>;
     virtual Ptr filter(const Filter& filt, ssize_t result_size_hint) const = 0;
 
+    /// This function will modify the original table.
+    /// Return rows number after filtered.
+    virtual size_t filter(const Filter& filter) = 0;
+
     /**
      *  used by lazy materialization to filter column by selected rowids
      *  Q: Why use IColumn* as args type instead of MutablePtr or ImmutablePtr 
?
diff --git a/be/src/vec/columns/column_array.cpp 
b/be/src/vec/columns/column_array.cpp
index 10d61864cb..0cd72f26c7 100644
--- a/be/src/vec/columns/column_array.cpp
+++ b/be/src/vec/columns/column_array.cpp
@@ -388,6 +388,36 @@ ColumnPtr ColumnArray::filter(const Filter& filt, ssize_t 
result_size_hint) cons
     return filter_generic(filt, result_size_hint);
 }
 
+size_t ColumnArray::filter(const Filter& filter) {
+    if (typeid_cast<const ColumnUInt8*>(data.get())) {
+        return filter_number<UInt8>(filter);
+    } else if (typeid_cast<const ColumnUInt16*>(data.get())) {
+        return filter_number<UInt16>(filter);
+    } else if (typeid_cast<const ColumnUInt32*>(data.get())) {
+        return filter_number<UInt32>(filter);
+    } else if (typeid_cast<const ColumnUInt64*>(data.get())) {
+        return filter_number<UInt64>(filter);
+    } else if (typeid_cast<const ColumnInt8*>(data.get())) {
+        return filter_number<Int8>(filter);
+    } else if (typeid_cast<const ColumnInt16*>(data.get())) {
+        return filter_number<Int16>(filter);
+    } else if (typeid_cast<const ColumnInt32*>(data.get())) {
+        return filter_number<Int32>(filter);
+    } else if (typeid_cast<const ColumnInt64*>(data.get())) {
+        return filter_number<Int64>(filter);
+    } else if (typeid_cast<const ColumnFloat32*>(data.get())) {
+        return filter_number<Float32>(filter);
+    } else if (typeid_cast<const ColumnFloat64*>(data.get())) {
+        return filter_number<Float64>(filter);
+    } else if (typeid_cast<const ColumnString*>(data.get())) {
+        return filter_string(filter);
+    } else if (typeid_cast<const ColumnNullable*>(data.get())) {
+        return filter_nullable(filter);
+    } else {
+        return filter_generic(filter);
+    }
+}
+
 template <typename T>
 ColumnPtr ColumnArray::filter_number(const Filter& filt, ssize_t 
result_size_hint) const {
     if (get_offsets().empty()) return ColumnArray::create(data);
@@ -402,6 +432,12 @@ ColumnPtr ColumnArray::filter_number(const Filter& filt, 
ssize_t result_size_hin
     return res;
 }
 
+template <typename T>
+size_t ColumnArray::filter_number(const Filter& filter) {
+    return filter_arrays_impl<T, 
Offset64>(assert_cast<ColumnVector<T>&>(*data).get_data(),
+                                           get_offsets(), filter);
+}
+
 ColumnPtr ColumnArray::filter_string(const Filter& filt, ssize_t 
result_size_hint) const {
     size_t col_size = get_offsets().size();
     if (col_size != filt.size()) LOG(FATAL) << "Size of filter doesn't match 
size of column.";
@@ -465,6 +501,70 @@ ColumnPtr ColumnArray::filter_string(const Filter& filt, 
ssize_t result_size_hin
     return res;
 }
 
+size_t ColumnArray::filter_string(const Filter& filter) {
+    size_t col_size = get_offsets().size();
+    if (col_size != filter.size()) {
+        LOG(FATAL) << "Size of filter doesn't match size of column.";
+    }
+
+    if (0 == col_size) {
+        return ColumnArray::create(data);
+    }
+
+    ColumnString& src_string = typeid_cast<ColumnString&>(*data);
+    auto* src_chars = src_string.get_chars().data();
+    auto* src_string_offsets = src_string.get_offsets().data();
+    auto* src_offsets = get_offsets().data();
+
+    ColumnString::Chars& res_chars = src_string.get_chars();
+    auto& res_string_offsets = src_string.get_offsets();
+    auto& res_offsets = get_offsets();
+
+    res_chars.set_end_ptr(res_chars.data());
+    res_string_offsets.set_end_ptr(res_string_offsets.data());
+    res_offsets.set_end_ptr(res_offsets.data());
+
+    Offset64 prev_src_offset = 0;
+    IColumn::Offset prev_src_string_offset = 0;
+
+    Offset64 prev_res_offset = 0;
+    IColumn::Offset prev_res_string_offset = 0;
+
+    for (size_t i = 0; i < col_size; ++i) {
+        /// Number of rows in the array.
+        size_t array_size = src_offsets[i] - prev_src_offset;
+
+        if (filter[i]) {
+            /// If the array is not empty - copy content.
+            if (array_size) {
+                size_t chars_to_copy = src_string_offsets[array_size + 
prev_src_offset - 1] -
+                                       prev_src_string_offset;
+                size_t res_chars_prev_size = res_chars.size();
+                res_chars.resize(res_chars_prev_size + chars_to_copy);
+                memmove(&res_chars[res_chars_prev_size], 
&src_chars[prev_src_string_offset],
+                        chars_to_copy);
+
+                for (size_t j = 0; j < array_size; ++j) {
+                    res_string_offsets.push_back(src_string_offsets[j + 
prev_src_offset] +
+                                                 prev_res_string_offset - 
prev_src_string_offset);
+                }
+
+                prev_res_string_offset = res_string_offsets.back();
+            }
+
+            prev_res_offset += array_size;
+            res_offsets.push_back(prev_res_offset);
+        }
+
+        if (array_size) {
+            prev_src_offset += array_size;
+            prev_src_string_offset = src_string_offsets[prev_src_offset - 1];
+        }
+    }
+
+    return res_offsets.size();
+}
+
 ColumnPtr ColumnArray::filter_generic(const Filter& filt, ssize_t 
result_size_hint) const {
     size_t size = get_offsets().size();
     if (size != filt.size()) LOG(FATAL) << "Size of filter doesn't match size 
of column.";
@@ -504,6 +604,41 @@ ColumnPtr ColumnArray::filter_generic(const Filter& filt, 
ssize_t result_size_hi
     return res;
 }
 
+size_t ColumnArray::filter_generic(const Filter& filter) {
+    size_t size = get_offsets().size();
+    if (size != filter.size()) {
+        LOG(FATAL) << "Size of filter doesn't match size of column.";
+    }
+
+    if (size == 0) {
+        return 0;
+    }
+
+    Filter nested_filter(get_offsets().back());
+    for (size_t i = 0; i < size; ++i) {
+        if (filter[i]) {
+            memset(&nested_filter[offset_at(i)], 1, size_at(i));
+        } else {
+            memset(&nested_filter[offset_at(i)], 0, size_at(i));
+        }
+    }
+
+    data->filter(nested_filter);
+
+    auto& res_offsets = get_offsets();
+    res_offsets.set_end_ptr(res_offsets.data());
+
+    size_t current_offset = 0;
+    for (size_t i = 0; i < size; ++i) {
+        if (filter[i]) {
+            current_offset += size_at(i);
+            res_offsets.push_back(current_offset);
+        }
+    }
+
+    return res_offsets.size();
+}
+
 ColumnPtr ColumnArray::filter_nullable(const Filter& filt, ssize_t 
result_size_hint) const {
     if (get_offsets().empty()) return ColumnArray::create(data);
 
@@ -525,6 +660,22 @@ ColumnPtr ColumnArray::filter_nullable(const Filter& filt, 
ssize_t result_size_h
                                filtered_offsets);
 }
 
+size_t ColumnArray::filter_nullable(const Filter& filter) {
+    if (get_offsets().empty()) {
+        return 0;
+    }
+
+    ColumnNullable& nullable_elems = assert_cast<ColumnNullable&>(*data);
+    const auto result_size =
+            filter_arrays_impl_only_data(nullable_elems.get_null_map_data(), 
get_offsets(), filter);
+
+    auto array_of_nested = 
ColumnArray::create(nullable_elems.get_nested_column_ptr(), offsets);
+    const auto nested_result_size = 
array_of_nested->assume_mutable()->filter(filter);
+
+    CHECK_EQ(result_size, nested_result_size);
+    return result_size;
+}
+
 void ColumnArray::insert_indices_from(const IColumn& src, const int* 
indices_begin,
                                       const int* indices_end) {
     for (auto x = indices_begin; x != indices_end; ++x) {
diff --git a/be/src/vec/columns/column_array.h 
b/be/src/vec/columns/column_array.h
index a5f396d520..4cb96a2eb4 100644
--- a/be/src/vec/columns/column_array.h
+++ b/be/src/vec/columns/column_array.h
@@ -104,6 +104,7 @@ public:
     void insert_default() override;
     void pop_back(size_t n) override;
     ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const 
override;
+    size_t filter(const Filter& filter) override;
     ColumnPtr permute(const Permutation& perm, size_t limit) const override;
     //ColumnPtr index(const IColumn & indexes, size_t limit) const;
     template <typename Type>
@@ -227,9 +228,16 @@ private:
     template <typename T>
     ColumnPtr filter_number(const Filter& filt, ssize_t result_size_hint) 
const;
 
+    template <typename T>
+    size_t filter_number(const Filter& filter);
+
     ColumnPtr filter_string(const Filter& filt, ssize_t result_size_hint) 
const;
     ColumnPtr filter_nullable(const Filter& filt, ssize_t result_size_hint) 
const;
     ColumnPtr filter_generic(const Filter& filt, ssize_t result_size_hint) 
const;
+
+    size_t filter_string(const Filter& filter);
+    size_t filter_nullable(const Filter& filter);
+    size_t filter_generic(const Filter& filter);
 };
 
 } // namespace doris::vectorized
diff --git a/be/src/vec/columns/column_complex.h 
b/be/src/vec/columns/column_complex.h
index 0bffb5c842..d6c47d0dca 100644
--- a/be/src/vec/columns/column_complex.h
+++ b/be/src/vec/columns/column_complex.h
@@ -248,6 +248,8 @@ public:
 
     ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) 
const override;
 
+    size_t filter(const IColumn::Filter& filter) override;
+
     ColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const 
override;
 
     Container& get_data() { return data; }
@@ -327,6 +329,36 @@ ColumnPtr ColumnComplexType<T>::filter(const 
IColumn::Filter& filt,
     return res;
 }
 
+template <typename T>
+size_t ColumnComplexType<T>::filter(const IColumn::Filter& filter) {
+    size_t size = data.size();
+    if (size != filter.size()) {
+        LOG(FATAL) << "Size of filter doesn't match size of column.";
+    }
+
+    if (data.size() == 0) {
+        return 0;
+    }
+
+    T* res_data = data.data();
+
+    const UInt8* filter_pos = filter.data();
+    const UInt8* filter_end = filter_pos + size;
+    const T* data_pos = data.data();
+
+    while (filter_pos < filter_end) {
+        if (*filter_pos) {
+            *res_data = std::move(*data_pos);
+            ++res_data;
+        }
+
+        ++filter_pos;
+        ++data_pos;
+    }
+
+    return res_data - data.data();
+}
+
 template <typename T>
 ColumnPtr ColumnComplexType<T>::permute(const IColumn::Permutation& perm, 
size_t limit) const {
     size_t size = data.size();
diff --git a/be/src/vec/columns/column_const.cpp 
b/be/src/vec/columns/column_const.cpp
index f207ff67b9..7f4bf8834d 100644
--- a/be/src/vec/columns/column_const.cpp
+++ b/be/src/vec/columns/column_const.cpp
@@ -58,6 +58,17 @@ ColumnPtr ColumnConst::filter(const Filter& filt, ssize_t 
/*result_size_hint*/)
     return ColumnConst::create(data, count_bytes_in_filter(filt));
 }
 
+size_t ColumnConst::filter(const Filter& filter) {
+    if (s != filter.size()) {
+        LOG(FATAL) << fmt::format("Size of filter ({}) doesn't match size of 
column ({})",
+                                  filter.size(), s);
+    }
+
+    const auto result_size = count_bytes_in_filter(filter);
+    resize(result_size);
+    return result_size;
+}
+
 ColumnPtr ColumnConst::replicate(const Offsets& offsets) const {
     if (s != offsets.size()) {
         LOG(FATAL) << fmt::format("Size of offsets ({}) doesn't match size of 
column ({})",
diff --git a/be/src/vec/columns/column_const.h 
b/be/src/vec/columns/column_const.h
index 094c6b37d2..9f70628776 100644
--- a/be/src/vec/columns/column_const.h
+++ b/be/src/vec/columns/column_const.h
@@ -144,6 +144,8 @@ public:
                                   const uint8_t* __restrict null_data) const 
override;
 
     ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const 
override;
+    size_t filter(const Filter& filter) override;
+
     ColumnPtr replicate(const Offsets& offsets) const override;
     void replicate(const uint32_t* counts, size_t target_size, IColumn& 
column, size_t begin = 0,
                    int count_sz = -1) const override;
diff --git a/be/src/vec/columns/column_decimal.cpp 
b/be/src/vec/columns/column_decimal.cpp
index 4bca086e5f..778a0b83d3 100644
--- a/be/src/vec/columns/column_decimal.cpp
+++ b/be/src/vec/columns/column_decimal.cpp
@@ -325,6 +325,61 @@ ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter& 
filt, ssize_t result_s
     return res;
 }
 
+template <typename T>
+size_t ColumnDecimal<T>::filter(const IColumn::Filter& filter) {
+    size_t size = data.size();
+    if (size != filter.size()) {
+        LOG(FATAL) << "Size of filter doesn't match size of column.";
+    }
+
+    const UInt8* filter_pos = filter.data();
+    const UInt8* filter_end = filter_pos + size;
+    const T* data_pos = data.data();
+    T* result_data = data.data();
+
+    /** A slightly more optimized version.
+        * Based on the assumption that often pieces of consecutive values
+        *  completely pass or do not pass the filter.
+        * Therefore, we will optimistically check the parts of `SIMD_BYTES` 
values.
+        */
+    static constexpr size_t SIMD_BYTES = 32;
+    const UInt8* filter_end_sse = filter_pos + size / SIMD_BYTES * SIMD_BYTES;
+
+    while (filter_pos < filter_end_sse) {
+        uint32_t mask = simd::bytes32_mask_to_bits32_mask(filter_pos);
+
+        if (0xFFFFFFFF == mask) {
+            memmove(result_data, data_pos, sizeof(T) * SIMD_BYTES);
+            result_data += SIMD_BYTES;
+        } else {
+            while (mask) {
+                const size_t idx = __builtin_ctzll(mask);
+                *result_data = data_pos[idx];
+                ++result_data;
+                mask = mask & (mask - 1);
+            }
+        }
+
+        filter_pos += SIMD_BYTES;
+        data_pos += SIMD_BYTES;
+    }
+
+    while (filter_pos < filter_end) {
+        if (*filter_pos) {
+            *result_data = *data_pos;
+            ++result_data;
+        }
+
+        ++filter_pos;
+        ++data_pos;
+    }
+
+    const auto result_size = result_data - data.data();
+    data.set_end_ptr(result_data);
+
+    return result_size;
+}
+
 template <typename T>
 ColumnPtr ColumnDecimal<T>::replicate(const IColumn::Offsets& offsets) const {
     size_t size = data.size();
diff --git a/be/src/vec/columns/column_decimal.h 
b/be/src/vec/columns/column_decimal.h
index e785e0df42..fe00b98b65 100644
--- a/be/src/vec/columns/column_decimal.h
+++ b/be/src/vec/columns/column_decimal.h
@@ -183,6 +183,9 @@ public:
     void clear() override { data.clear(); }
 
     ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) 
const override;
+
+    size_t filter(const IColumn::Filter& filter) override;
+
     ColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const 
override;
     //    ColumnPtr index(const IColumn & indexes, size_t limit) const 
override;
 
diff --git a/be/src/vec/columns/column_dictionary.h 
b/be/src/vec/columns/column_dictionary.h
index 9ed997c43f..5c0ee0f059 100644
--- a/be/src/vec/columns/column_dictionary.h
+++ b/be/src/vec/columns/column_dictionary.h
@@ -179,6 +179,10 @@ public:
         LOG(FATAL) << "filter not supported in ColumnDictionary";
     }
 
+    [[noreturn]] size_t filter(const IColumn::Filter&) override {
+        LOG(FATAL) << "filter not supported in ColumnDictionary";
+    }
+
     [[noreturn]] ColumnPtr permute(const IColumn::Permutation& perm, size_t 
limit) const override {
         LOG(FATAL) << "permute not supported in ColumnDictionary";
     }
diff --git a/be/src/vec/columns/column_dummy.h 
b/be/src/vec/columns/column_dummy.h
index 40bdd9cfd2..b66b284e3f 100644
--- a/be/src/vec/columns/column_dummy.h
+++ b/be/src/vec/columns/column_dummy.h
@@ -87,6 +87,12 @@ public:
         return clone_dummy(count_bytes_in_filter(filt));
     }
 
+    size_t filter(const Filter& filter) override {
+        const auto result_size = count_bytes_in_filter(filter);
+        s = result_size;
+        return result_size;
+    }
+
     ColumnPtr permute(const Permutation& perm, size_t limit) const override {
         if (s != perm.size()) {
             LOG(FATAL) << "Size of permutation doesn't match size of column.";
diff --git a/be/src/vec/columns/column_fixed_length_object.h 
b/be/src/vec/columns/column_fixed_length_object.h
index aba83c5f1c..c1e3a53043 100644
--- a/be/src/vec/columns/column_fixed_length_object.h
+++ b/be/src/vec/columns/column_fixed_length_object.h
@@ -143,6 +143,10 @@ public:
         LOG(FATAL) << "filter not supported";
     }
 
+    [[noreturn]] size_t filter(const IColumn::Filter&) override {
+        LOG(FATAL) << "filter not supported";
+    }
+
     [[noreturn]] ColumnPtr permute(const IColumn::Permutation& perm, size_t 
limit) const override {
         LOG(FATAL) << "permute not supported";
     }
diff --git a/be/src/vec/columns/column_map.cpp 
b/be/src/vec/columns/column_map.cpp
index f477b9f336..9febda9570 100644
--- a/be/src/vec/columns/column_map.cpp
+++ b/be/src/vec/columns/column_map.cpp
@@ -157,6 +157,13 @@ ColumnPtr ColumnMap::filter(const Filter& filt, ssize_t 
result_size_hint) const
                              values->filter(filt, result_size_hint));
 }
 
+size_t ColumnMap::filter(const Filter& filter) {
+    const auto key_result_size = keys->filter(filter);
+    const auto value_result_size = values->filter(filter);
+    CHECK_EQ(key_result_size, value_result_size);
+    return value_result_size;
+}
+
 ColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit) const {
     return ColumnMap::create(keys->permute(perm, limit), values->permute(perm, 
limit));
 }
diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h
index 500fc43f3a..946dbd537d 100644
--- a/be/src/vec/columns/column_map.h
+++ b/be/src/vec/columns/column_map.h
@@ -84,6 +84,9 @@ public:
     void update_hash_with_value(size_t n, SipHash& hash) const override;
 
     ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const 
override;
+
+    size_t filter(const Filter& filter) override;
+
     ColumnPtr permute(const Permutation& perm, size_t limit) const override;
     ColumnPtr replicate(const Offsets& offsets) const override;
     MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) 
const override {
diff --git a/be/src/vec/columns/column_nullable.cpp 
b/be/src/vec/columns/column_nullable.cpp
index 5ec9f43571..92fb114a4e 100644
--- a/be/src/vec/columns/column_nullable.cpp
+++ b/be/src/vec/columns/column_nullable.cpp
@@ -309,6 +309,13 @@ ColumnPtr ColumnNullable::filter(const Filter& filt, 
ssize_t result_size_hint) c
     return ColumnNullable::create(filtered_data, filtered_null_map);
 }
 
+size_t ColumnNullable::filter(const Filter& filter) {
+    const auto data_result_size = get_nested_column().filter(filter);
+    const auto map_result_size = get_null_map_column().filter(filter);
+    CHECK_EQ(data_result_size, map_result_size);
+    return data_result_size;
+}
+
 Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t 
sel_size, IColumn* col_ptr) {
     const ColumnNullable* nullable_col_ptr = reinterpret_cast<const 
ColumnNullable*>(col_ptr);
     ColumnPtr nest_col_ptr = nullable_col_ptr->nested_column;
diff --git a/be/src/vec/columns/column_nullable.h 
b/be/src/vec/columns/column_nullable.h
index fc8e901c94..c48e3c80a1 100644
--- a/be/src/vec/columns/column_nullable.h
+++ b/be/src/vec/columns/column_nullable.h
@@ -161,6 +161,9 @@ public:
 
     void pop_back(size_t n) override;
     ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const 
override;
+
+    size_t filter(const Filter& filter) override;
+
     Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* 
col_ptr) override;
     ColumnPtr permute(const Permutation& perm, size_t limit) const override;
     //    ColumnPtr index(const IColumn & indexes, size_t limit) const 
override;
diff --git a/be/src/vec/columns/column_object.h 
b/be/src/vec/columns/column_object.h
index 2bd804fc51..65a9d31829 100644
--- a/be/src/vec/columns/column_object.h
+++ b/be/src/vec/columns/column_object.h
@@ -315,6 +315,11 @@ public:
         return nullptr;
     }
 
+    size_t filter(const Filter&) override {
+        LOG(FATAL) << "should not call the method in column object";
+        return 0;
+    }
+
     ColumnPtr permute(const Permutation&, size_t) const override {
         LOG(FATAL) << "should not call the method in column object";
         return nullptr;
diff --git a/be/src/vec/columns/column_string.cpp 
b/be/src/vec/columns/column_string.cpp
index 8c6ca64efe..eff29d17a4 100644
--- a/be/src/vec/columns/column_string.cpp
+++ b/be/src/vec/columns/column_string.cpp
@@ -148,6 +148,16 @@ ColumnPtr ColumnString::filter(const Filter& filt, ssize_t 
result_size_hint) con
     return res;
 }
 
+size_t ColumnString::filter(const Filter& filter) {
+    CHECK_EQ(filter.size(), offsets.size());
+    if (offsets.size() == 0) {
+        resize(0);
+        return 0;
+    }
+
+    return filter_arrays_impl<UInt8, Offset>(chars, offsets, filter);
+}
+
 ColumnPtr ColumnString::permute(const Permutation& perm, size_t limit) const {
     size_t size = offsets.size();
 
diff --git a/be/src/vec/columns/column_string.h 
b/be/src/vec/columns/column_string.h
index 4623d02eda..01cef34181 100644
--- a/be/src/vec/columns/column_string.h
+++ b/be/src/vec/columns/column_string.h
@@ -419,6 +419,7 @@ public:
                              const int* indices_end) override;
 
     ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const 
override;
+    size_t filter(const Filter& filter) override;
 
     ColumnPtr permute(const Permutation& perm, size_t limit) const override;
 
diff --git a/be/src/vec/columns/column_struct.cpp 
b/be/src/vec/columns/column_struct.cpp
index afef00c314..66eb57500c 100644
--- a/be/src/vec/columns/column_struct.cpp
+++ b/be/src/vec/columns/column_struct.cpp
@@ -248,6 +248,18 @@ ColumnPtr ColumnStruct::filter(const Filter& filt, ssize_t 
result_size_hint) con
     return ColumnStruct::create(new_columns);
 }
 
+size_t ColumnStruct::filter(const Filter& filter) {
+    const size_t tuple_size = columns.size();
+
+    size_t result_size = 0;
+    for (size_t i = 0; i < tuple_size; ++i) {
+        const auto this_result_size = columns[i]->filter(filter);
+        CHECK(result_size == 0 || result_size == this_result_size);
+        result_size = this_result_size;
+    }
+    return result_size;
+}
+
 ColumnPtr ColumnStruct::permute(const Permutation& perm, size_t limit) const {
     const size_t tuple_size = columns.size();
     Columns new_columns(tuple_size);
diff --git a/be/src/vec/columns/column_struct.h 
b/be/src/vec/columns/column_struct.h
index 60d0e46315..6d0f951d05 100644
--- a/be/src/vec/columns/column_struct.h
+++ b/be/src/vec/columns/column_struct.h
@@ -146,6 +146,9 @@ public:
 
     void insert_range_from(const IColumn& src, size_t start, size_t length) 
override;
     ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const 
override;
+
+    size_t filter(const Filter& filter) override;
+
     ColumnPtr permute(const Permutation& perm, size_t limit) const override;
     ColumnPtr replicate(const Offsets& offsets) const override;
     MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) 
const override;
diff --git a/be/src/vec/columns/column_vector.cpp 
b/be/src/vec/columns/column_vector.cpp
index ea5c14c0a8..06c0a149ab 100644
--- a/be/src/vec/columns/column_vector.cpp
+++ b/be/src/vec/columns/column_vector.cpp
@@ -440,6 +440,62 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter& 
filt, ssize_t result_si
     return res;
 }
 
+template <typename T>
+size_t ColumnVector<T>::filter(const IColumn::Filter& filter) {
+    size_t size = data.size();
+    if (size != filter.size()) {
+        LOG(FATAL) << "Size of filter doesn't match size of column. data size: 
" << size
+                   << ", filter size: " << filter.size() << get_stack_trace();
+    }
+
+    const UInt8* filter_pos = filter.data();
+    const UInt8* filter_end = filter_pos + size;
+    T* data_pos = data.data();
+    T* result_data = data_pos;
+
+    /** A slightly more optimized version.
+        * Based on the assumption that often pieces of consecutive values
+        *  completely pass or do not pass the filter.
+        * Therefore, we will optimistically check the parts of `SIMD_BYTES` 
values.
+        */
+    static constexpr size_t SIMD_BYTES = 32;
+    const UInt8* filter_end_sse = filter_pos + size / SIMD_BYTES * SIMD_BYTES;
+
+    while (filter_pos < filter_end_sse) {
+        uint32_t mask = simd::bytes32_mask_to_bits32_mask(filter_pos);
+
+        if (0xFFFFFFFF == mask) {
+            memmove(result_data, data_pos, sizeof(T) * SIMD_BYTES);
+            result_data += SIMD_BYTES;
+        } else {
+            while (mask) {
+                const size_t idx = __builtin_ctzll(mask);
+                *result_data = data_pos[idx];
+                ++result_data;
+                mask = mask & (mask - 1);
+            }
+        }
+
+        filter_pos += SIMD_BYTES;
+        data_pos += SIMD_BYTES;
+    }
+
+    while (filter_pos < filter_end) {
+        if (*filter_pos) {
+            *result_data = *data_pos;
+            ++result_data;
+        }
+
+        ++filter_pos;
+        ++data_pos;
+    }
+
+    const auto new_size = result_data - data.data();
+    resize(new_size);
+
+    return new_size;
+}
+
 template <typename T>
 ColumnPtr ColumnVector<T>::permute(const IColumn::Permutation& perm, size_t 
limit) const {
     size_t size = data.size();
diff --git a/be/src/vec/columns/column_vector.h 
b/be/src/vec/columns/column_vector.h
index cd56cbdd04..f1678715c7 100644
--- a/be/src/vec/columns/column_vector.h
+++ b/be/src/vec/columns/column_vector.h
@@ -325,6 +325,7 @@ public:
     }
 
     ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) 
const override;
+    size_t filter(const IColumn::Filter& filter) override;
 
     // note(wb) this method is only used in storage layer now
     Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* 
col_ptr) override {
diff --git a/be/src/vec/columns/columns_common.cpp 
b/be/src/vec/columns/columns_common.cpp
index 1988695eaa..d022607185 100644
--- a/be/src/vec/columns/columns_common.cpp
+++ b/be/src/vec/columns/columns_common.cpp
@@ -99,7 +99,7 @@ namespace {
 /// Implementation details of filterArraysImpl function, used as template 
parameter.
 /// Allow to build or not to build offsets array.
 
-template <typename OT>
+template <typename OT, bool USE_MEMMOVE = false>
 struct ResultOffsetsBuilder {
     PaddedPODArray<OT>& res_offsets;
     OT current_src_offset = 0;
@@ -119,7 +119,11 @@ struct ResultOffsetsBuilder {
     void insert_chunk(const OT* src_offsets_pos, bool first, OT chunk_offset, 
size_t chunk_size) {
         const auto offsets_size_old = res_offsets.size();
         res_offsets.resize_assume_reserved(offsets_size_old + SIMD_BYTES);
-        memcpy(&res_offsets[offsets_size_old], src_offsets_pos, SIMD_BYTES * 
sizeof(OT));
+        if constexpr (USE_MEMMOVE) {
+            memmove(&res_offsets[offsets_size_old], src_offsets_pos, 
SIMD_BYTES * sizeof(OT));
+        } else {
+            memcpy(&res_offsets[offsets_size_old], src_offsets_pos, SIMD_BYTES 
* sizeof(OT));
+        }
 
         if (!first) {
             /// difference between current and actual offset
@@ -228,6 +232,95 @@ void filter_arrays_impl_generic(const PaddedPODArray<T>& 
src_elems,
         ++offsets_pos;
     }
 }
+
+template <typename T, typename OT, typename ResultOffsetsBuilder>
+size_t filter_arrays_impl_generic_without_reserving(PaddedPODArray<T>& elems,
+                                                    PaddedPODArray<OT>& 
offsets,
+                                                    const IColumn::Filter& 
filter) {
+    const size_t size = offsets.size();
+    if (offsets.size() != filter.size()) {
+        LOG(FATAL) << "Size of filter doesn't match size of column.";
+    }
+
+    /// If no need to filter the `offsets`, here do not reset the end ptr of 
`offsets`
+    if constexpr (!std::is_same_v<ResultOffsetsBuilder, 
NoResultOffsetsBuilder<OT>>) {
+        /// Reset the end ptr to prepare for inserting/pushing elements into 
`offsets` in `ResultOffsetsBuilder`.
+        offsets.set_end_ptr(offsets.data());
+    }
+
+    ResultOffsetsBuilder result_offsets_builder(&offsets);
+
+    const UInt8* filter_pos = filter.data();
+    const T* src_data = elems.data();
+    T* result_data = elems.data();
+    const auto filter_end = filter_pos + size;
+
+    auto offsets_pos = offsets.data();
+    const auto offsets_begin = offsets_pos;
+    size_t result_size = 0;
+
+    /// copy array ending at *end_offset_ptr
+    const auto copy_array = [&](const OT* offset_ptr) {
+        const auto arr_offset = offset_ptr == offsets_begin ? 0 : 
offset_ptr[-1];
+        const auto arr_size = *offset_ptr - arr_offset;
+
+        result_offsets_builder.insert_one(arr_size);
+        const size_t size_to_copy = arr_size * sizeof(T);
+        memmove(result_data, &src_data[arr_offset], size_to_copy);
+        result_data += arr_size;
+    };
+
+    static constexpr size_t SIMD_BYTES = 32;
+    const auto filter_end_aligned = filter_pos + size / SIMD_BYTES * 
SIMD_BYTES;
+
+    while (filter_pos < filter_end_aligned) {
+        auto mask = simd::bytes32_mask_to_bits32_mask(filter_pos);
+
+        if (mask == 0xffffffff) {
+            /// SIMD_BYTES consecutive rows pass the filter
+            const auto first = offsets_pos == offsets_begin;
+
+            const auto chunk_offset = first ? 0 : offsets_pos[-1];
+            const auto chunk_size = offsets_pos[SIMD_BYTES - 1] - chunk_offset;
+
+            result_offsets_builder.template 
insert_chunk<SIMD_BYTES>(offsets_pos, first,
+                                                                     
chunk_offset, chunk_size);
+
+            /// copy elements for SIMD_BYTES arrays at once
+            const size_t size_to_copy = chunk_size * sizeof(T);
+            memmove(result_data, &src_data[chunk_offset], size_to_copy);
+            result_data += chunk_size;
+            result_size += SIMD_BYTES;
+        } else {
+            while (mask) {
+                const size_t bit_pos = __builtin_ctzll(mask);
+                copy_array(offsets_pos + bit_pos);
+                ++result_size;
+                mask = mask & (mask - 1);
+            }
+        }
+
+        filter_pos += SIMD_BYTES;
+        offsets_pos += SIMD_BYTES;
+    }
+
+    while (filter_pos < filter_end) {
+        if (*filter_pos) {
+            copy_array(offsets_pos);
+            ++result_size;
+        }
+
+        ++filter_pos;
+        ++offsets_pos;
+    }
+
+    if constexpr (!std::is_same_v<ResultOffsetsBuilder, 
NoResultOffsetsBuilder<OT>>) {
+        const size_t result_data_size = result_data - elems.data();
+        CHECK_EQ(result_data_size, offsets.back());
+    }
+    elems.set_end_ptr(result_data);
+    return result_size;
+}
 } // namespace
 
 template <typename T, typename OT>
@@ -238,6 +331,13 @@ void filter_arrays_impl(const PaddedPODArray<T>& 
src_elems, const PaddedPODArray
             src_elems, src_offsets, res_elems, &res_offsets, filt, 
result_size_hint);
 }
 
+template <typename T, typename OT>
+size_t filter_arrays_impl(PaddedPODArray<T>& data, PaddedPODArray<OT>& offsets,
+                          const IColumn::Filter& filter) {
+    return filter_arrays_impl_generic_without_reserving<T, OT, 
ResultOffsetsBuilder<OT, true>>(
+            data, offsets, filter);
+}
+
 template <typename T, typename OT>
 void filter_arrays_impl_only_data(const PaddedPODArray<T>& src_elems,
                                   const PaddedPODArray<OT>& src_offsets,
@@ -247,14 +347,25 @@ void filter_arrays_impl_only_data(const 
PaddedPODArray<T>& src_elems,
             src_elems, src_offsets, res_elems, nullptr, filt, 
result_size_hint);
 }
 
+template <typename T, typename OT>
+size_t filter_arrays_impl_only_data(PaddedPODArray<T>& data, 
PaddedPODArray<OT>& offsets,
+                                    const IColumn::Filter& filter) {
+    return filter_arrays_impl_generic_without_reserving<T, OT, 
NoResultOffsetsBuilder<OT>>(
+            data, offsets, filter);
+}
+
 /// Explicit instantiations - not to place the implementation of the function 
above in the header file.
 #define INSTANTIATE(TYPE, OFFTYPE)                                             
                 \
     template void filter_arrays_impl<TYPE, OFFTYPE>(                           
                 \
             const PaddedPODArray<TYPE>&, const PaddedPODArray<OFFTYPE>&, 
PaddedPODArray<TYPE>&, \
             PaddedPODArray<OFFTYPE>&, const IColumn::Filter&, ssize_t);        
                 \
+    template size_t filter_arrays_impl<TYPE, OFFTYPE>(                         
                 \
+            PaddedPODArray<TYPE>&, PaddedPODArray<OFFTYPE>&, const 
IColumn::Filter&);           \
     template void filter_arrays_impl_only_data<TYPE, OFFTYPE>(                 
                 \
             const PaddedPODArray<TYPE>&, const PaddedPODArray<OFFTYPE>&, 
PaddedPODArray<TYPE>&, \
-            const IColumn::Filter&, ssize_t);
+            const IColumn::Filter&, ssize_t);                                  
                 \
+    template size_t filter_arrays_impl_only_data<TYPE, OFFTYPE>(               
                 \
+            PaddedPODArray<TYPE>&, PaddedPODArray<OFFTYPE>&, const 
IColumn::Filter&);
 
 INSTANTIATE(UInt8, IColumn::Offset)
 INSTANTIATE(UInt8, ColumnArray::Offset64)
diff --git a/be/src/vec/columns/columns_common.h 
b/be/src/vec/columns/columns_common.h
index 10233c6111..3bae20f0ba 100644
--- a/be/src/vec/columns/columns_common.h
+++ b/be/src/vec/columns/columns_common.h
@@ -51,6 +51,14 @@ void filter_arrays_impl_only_data(const PaddedPODArray<T>& 
src_elems,
                                   PaddedPODArray<T>& res_elems, const 
IColumn::Filter& filt,
                                   ssize_t result_size_hint);
 
+template <typename T, typename OT>
+size_t filter_arrays_impl(PaddedPODArray<T>& data, PaddedPODArray<OT>& offsets,
+                          const IColumn::Filter& filter);
+
+template <typename T, typename OT>
+size_t filter_arrays_impl_only_data(PaddedPODArray<T>& data, 
PaddedPODArray<OT>& offsets,
+                                    const IColumn::Filter& filter);
+
 namespace detail {
 template <typename T>
 const PaddedPODArray<T>* get_indexes_data(const IColumn& indexes);
diff --git a/be/src/vec/columns/predicate_column.h 
b/be/src/vec/columns/predicate_column.h
index 2b00fee50c..f67909889f 100644
--- a/be/src/vec/columns/predicate_column.h
+++ b/be/src/vec/columns/predicate_column.h
@@ -433,6 +433,10 @@ public:
         LOG(FATAL) << "filter not supported in PredicateColumnType";
     }
 
+    [[noreturn]] size_t filter(const IColumn::Filter&) override {
+        LOG(FATAL) << "filter not supported in PredicateColumnType";
+    }
+
     [[noreturn]] ColumnPtr permute(const IColumn::Permutation& perm, size_t 
limit) const override {
         LOG(FATAL) << "permute not supported in PredicateColumnType";
     }
diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp
index ce54efd5a4..ec84a19b7b 100644
--- a/be/src/vec/core/block.cpp
+++ b/be/src/vec/core/block.cpp
@@ -655,9 +655,14 @@ void Block::filter_block_internal(Block* block, const 
std::vector<uint32_t>& col
         }
     } else {
         for (auto& col : columns_to_filter) {
-            if (block->get_by_position(col).column->size() != count) {
-                block->get_by_position(col).column =
-                        block->get_by_position(col).column->filter(filter, 
count);
+            auto& column = block->get_by_position(col).column;
+            if (column->size() != count) {
+                if (column->use_count() == 1) {
+                    const auto result_size = 
column->assume_mutable()->filter(filter);
+                    CHECK_EQ(result_size, count);
+                } else {
+                    column = column->filter(filter, count);
+                }
             }
         }
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org


Reply via email to