This is an automated email from the ASF dual-hosted git repository. yiguolei pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push: new 08adf914f9 [improvement](vec) avoid creating a new column while filtering mutable columns (#16850) 08adf914f9 is described below commit 08adf914f9c1ead5aa843701fced2fa82638f3b6 Author: Jerry Hu <mrh...@gmail.com> AuthorDate: Tue Feb 21 09:47:21 2023 +0800 [improvement](vec) avoid creating a new column while filtering mutable columns (#16850) Currently, when filtering a column, a new column will be created to store the filtering result, which will cause some performance loss。 ssb-flat without pushdown expr from 19s to 15s. --- be/src/vec/columns/column.h | 4 + be/src/vec/columns/column_array.cpp | 151 ++++++++++++++++++++++++ be/src/vec/columns/column_array.h | 8 ++ be/src/vec/columns/column_complex.h | 32 +++++ be/src/vec/columns/column_const.cpp | 11 ++ be/src/vec/columns/column_const.h | 2 + be/src/vec/columns/column_decimal.cpp | 55 +++++++++ be/src/vec/columns/column_decimal.h | 3 + be/src/vec/columns/column_dictionary.h | 4 + be/src/vec/columns/column_dummy.h | 6 + be/src/vec/columns/column_fixed_length_object.h | 4 + be/src/vec/columns/column_map.cpp | 7 ++ be/src/vec/columns/column_map.h | 3 + be/src/vec/columns/column_nullable.cpp | 7 ++ be/src/vec/columns/column_nullable.h | 3 + be/src/vec/columns/column_object.h | 5 + be/src/vec/columns/column_string.cpp | 10 ++ be/src/vec/columns/column_string.h | 1 + be/src/vec/columns/column_struct.cpp | 12 ++ be/src/vec/columns/column_struct.h | 3 + be/src/vec/columns/column_vector.cpp | 56 +++++++++ be/src/vec/columns/column_vector.h | 1 + be/src/vec/columns/columns_common.cpp | 117 +++++++++++++++++- be/src/vec/columns/columns_common.h | 8 ++ be/src/vec/columns/predicate_column.h | 4 + be/src/vec/core/block.cpp | 11 +- 26 files changed, 522 insertions(+), 6 deletions(-) diff --git a/be/src/vec/columns/column.h b/be/src/vec/columns/column.h index af94604749..99ea1f66a4 100644 --- a/be/src/vec/columns/column.h +++ b/be/src/vec/columns/column.h @@ -386,6 +386,10 @@ public: using Filter = PaddedPODArray<UInt8>; virtual Ptr filter(const Filter& filt, ssize_t result_size_hint) const = 0; + /// This function will modify the original table. + /// Return rows number after filtered. + virtual size_t filter(const Filter& filter) = 0; + /** * used by lazy materialization to filter column by selected rowids * Q: Why use IColumn* as args type instead of MutablePtr or ImmutablePtr ? diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index 10d61864cb..0cd72f26c7 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -388,6 +388,36 @@ ColumnPtr ColumnArray::filter(const Filter& filt, ssize_t result_size_hint) cons return filter_generic(filt, result_size_hint); } +size_t ColumnArray::filter(const Filter& filter) { + if (typeid_cast<const ColumnUInt8*>(data.get())) { + return filter_number<UInt8>(filter); + } else if (typeid_cast<const ColumnUInt16*>(data.get())) { + return filter_number<UInt16>(filter); + } else if (typeid_cast<const ColumnUInt32*>(data.get())) { + return filter_number<UInt32>(filter); + } else if (typeid_cast<const ColumnUInt64*>(data.get())) { + return filter_number<UInt64>(filter); + } else if (typeid_cast<const ColumnInt8*>(data.get())) { + return filter_number<Int8>(filter); + } else if (typeid_cast<const ColumnInt16*>(data.get())) { + return filter_number<Int16>(filter); + } else if (typeid_cast<const ColumnInt32*>(data.get())) { + return filter_number<Int32>(filter); + } else if (typeid_cast<const ColumnInt64*>(data.get())) { + return filter_number<Int64>(filter); + } else if (typeid_cast<const ColumnFloat32*>(data.get())) { + return filter_number<Float32>(filter); + } else if (typeid_cast<const ColumnFloat64*>(data.get())) { + return filter_number<Float64>(filter); + } else if (typeid_cast<const ColumnString*>(data.get())) { + return filter_string(filter); + } else if (typeid_cast<const ColumnNullable*>(data.get())) { + return filter_nullable(filter); + } else { + return filter_generic(filter); + } +} + template <typename T> ColumnPtr ColumnArray::filter_number(const Filter& filt, ssize_t result_size_hint) const { if (get_offsets().empty()) return ColumnArray::create(data); @@ -402,6 +432,12 @@ ColumnPtr ColumnArray::filter_number(const Filter& filt, ssize_t result_size_hin return res; } +template <typename T> +size_t ColumnArray::filter_number(const Filter& filter) { + return filter_arrays_impl<T, Offset64>(assert_cast<ColumnVector<T>&>(*data).get_data(), + get_offsets(), filter); +} + ColumnPtr ColumnArray::filter_string(const Filter& filt, ssize_t result_size_hint) const { size_t col_size = get_offsets().size(); if (col_size != filt.size()) LOG(FATAL) << "Size of filter doesn't match size of column."; @@ -465,6 +501,70 @@ ColumnPtr ColumnArray::filter_string(const Filter& filt, ssize_t result_size_hin return res; } +size_t ColumnArray::filter_string(const Filter& filter) { + size_t col_size = get_offsets().size(); + if (col_size != filter.size()) { + LOG(FATAL) << "Size of filter doesn't match size of column."; + } + + if (0 == col_size) { + return ColumnArray::create(data); + } + + ColumnString& src_string = typeid_cast<ColumnString&>(*data); + auto* src_chars = src_string.get_chars().data(); + auto* src_string_offsets = src_string.get_offsets().data(); + auto* src_offsets = get_offsets().data(); + + ColumnString::Chars& res_chars = src_string.get_chars(); + auto& res_string_offsets = src_string.get_offsets(); + auto& res_offsets = get_offsets(); + + res_chars.set_end_ptr(res_chars.data()); + res_string_offsets.set_end_ptr(res_string_offsets.data()); + res_offsets.set_end_ptr(res_offsets.data()); + + Offset64 prev_src_offset = 0; + IColumn::Offset prev_src_string_offset = 0; + + Offset64 prev_res_offset = 0; + IColumn::Offset prev_res_string_offset = 0; + + for (size_t i = 0; i < col_size; ++i) { + /// Number of rows in the array. + size_t array_size = src_offsets[i] - prev_src_offset; + + if (filter[i]) { + /// If the array is not empty - copy content. + if (array_size) { + size_t chars_to_copy = src_string_offsets[array_size + prev_src_offset - 1] - + prev_src_string_offset; + size_t res_chars_prev_size = res_chars.size(); + res_chars.resize(res_chars_prev_size + chars_to_copy); + memmove(&res_chars[res_chars_prev_size], &src_chars[prev_src_string_offset], + chars_to_copy); + + for (size_t j = 0; j < array_size; ++j) { + res_string_offsets.push_back(src_string_offsets[j + prev_src_offset] + + prev_res_string_offset - prev_src_string_offset); + } + + prev_res_string_offset = res_string_offsets.back(); + } + + prev_res_offset += array_size; + res_offsets.push_back(prev_res_offset); + } + + if (array_size) { + prev_src_offset += array_size; + prev_src_string_offset = src_string_offsets[prev_src_offset - 1]; + } + } + + return res_offsets.size(); +} + ColumnPtr ColumnArray::filter_generic(const Filter& filt, ssize_t result_size_hint) const { size_t size = get_offsets().size(); if (size != filt.size()) LOG(FATAL) << "Size of filter doesn't match size of column."; @@ -504,6 +604,41 @@ ColumnPtr ColumnArray::filter_generic(const Filter& filt, ssize_t result_size_hi return res; } +size_t ColumnArray::filter_generic(const Filter& filter) { + size_t size = get_offsets().size(); + if (size != filter.size()) { + LOG(FATAL) << "Size of filter doesn't match size of column."; + } + + if (size == 0) { + return 0; + } + + Filter nested_filter(get_offsets().back()); + for (size_t i = 0; i < size; ++i) { + if (filter[i]) { + memset(&nested_filter[offset_at(i)], 1, size_at(i)); + } else { + memset(&nested_filter[offset_at(i)], 0, size_at(i)); + } + } + + data->filter(nested_filter); + + auto& res_offsets = get_offsets(); + res_offsets.set_end_ptr(res_offsets.data()); + + size_t current_offset = 0; + for (size_t i = 0; i < size; ++i) { + if (filter[i]) { + current_offset += size_at(i); + res_offsets.push_back(current_offset); + } + } + + return res_offsets.size(); +} + ColumnPtr ColumnArray::filter_nullable(const Filter& filt, ssize_t result_size_hint) const { if (get_offsets().empty()) return ColumnArray::create(data); @@ -525,6 +660,22 @@ ColumnPtr ColumnArray::filter_nullable(const Filter& filt, ssize_t result_size_h filtered_offsets); } +size_t ColumnArray::filter_nullable(const Filter& filter) { + if (get_offsets().empty()) { + return 0; + } + + ColumnNullable& nullable_elems = assert_cast<ColumnNullable&>(*data); + const auto result_size = + filter_arrays_impl_only_data(nullable_elems.get_null_map_data(), get_offsets(), filter); + + auto array_of_nested = ColumnArray::create(nullable_elems.get_nested_column_ptr(), offsets); + const auto nested_result_size = array_of_nested->assume_mutable()->filter(filter); + + CHECK_EQ(result_size, nested_result_size); + return result_size; +} + void ColumnArray::insert_indices_from(const IColumn& src, const int* indices_begin, const int* indices_end) { for (auto x = indices_begin; x != indices_end; ++x) { diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index a5f396d520..4cb96a2eb4 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -104,6 +104,7 @@ public: void insert_default() override; void pop_back(size_t n) override; ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; + size_t filter(const Filter& filter) override; ColumnPtr permute(const Permutation& perm, size_t limit) const override; //ColumnPtr index(const IColumn & indexes, size_t limit) const; template <typename Type> @@ -227,9 +228,16 @@ private: template <typename T> ColumnPtr filter_number(const Filter& filt, ssize_t result_size_hint) const; + template <typename T> + size_t filter_number(const Filter& filter); + ColumnPtr filter_string(const Filter& filt, ssize_t result_size_hint) const; ColumnPtr filter_nullable(const Filter& filt, ssize_t result_size_hint) const; ColumnPtr filter_generic(const Filter& filt, ssize_t result_size_hint) const; + + size_t filter_string(const Filter& filter); + size_t filter_nullable(const Filter& filter); + size_t filter_generic(const Filter& filter); }; } // namespace doris::vectorized diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h index 0bffb5c842..d6c47d0dca 100644 --- a/be/src/vec/columns/column_complex.h +++ b/be/src/vec/columns/column_complex.h @@ -248,6 +248,8 @@ public: ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) const override; + size_t filter(const IColumn::Filter& filter) override; + ColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const override; Container& get_data() { return data; } @@ -327,6 +329,36 @@ ColumnPtr ColumnComplexType<T>::filter(const IColumn::Filter& filt, return res; } +template <typename T> +size_t ColumnComplexType<T>::filter(const IColumn::Filter& filter) { + size_t size = data.size(); + if (size != filter.size()) { + LOG(FATAL) << "Size of filter doesn't match size of column."; + } + + if (data.size() == 0) { + return 0; + } + + T* res_data = data.data(); + + const UInt8* filter_pos = filter.data(); + const UInt8* filter_end = filter_pos + size; + const T* data_pos = data.data(); + + while (filter_pos < filter_end) { + if (*filter_pos) { + *res_data = std::move(*data_pos); + ++res_data; + } + + ++filter_pos; + ++data_pos; + } + + return res_data - data.data(); +} + template <typename T> ColumnPtr ColumnComplexType<T>::permute(const IColumn::Permutation& perm, size_t limit) const { size_t size = data.size(); diff --git a/be/src/vec/columns/column_const.cpp b/be/src/vec/columns/column_const.cpp index f207ff67b9..7f4bf8834d 100644 --- a/be/src/vec/columns/column_const.cpp +++ b/be/src/vec/columns/column_const.cpp @@ -58,6 +58,17 @@ ColumnPtr ColumnConst::filter(const Filter& filt, ssize_t /*result_size_hint*/) return ColumnConst::create(data, count_bytes_in_filter(filt)); } +size_t ColumnConst::filter(const Filter& filter) { + if (s != filter.size()) { + LOG(FATAL) << fmt::format("Size of filter ({}) doesn't match size of column ({})", + filter.size(), s); + } + + const auto result_size = count_bytes_in_filter(filter); + resize(result_size); + return result_size; +} + ColumnPtr ColumnConst::replicate(const Offsets& offsets) const { if (s != offsets.size()) { LOG(FATAL) << fmt::format("Size of offsets ({}) doesn't match size of column ({})", diff --git a/be/src/vec/columns/column_const.h b/be/src/vec/columns/column_const.h index 094c6b37d2..9f70628776 100644 --- a/be/src/vec/columns/column_const.h +++ b/be/src/vec/columns/column_const.h @@ -144,6 +144,8 @@ public: const uint8_t* __restrict null_data) const override; ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; + size_t filter(const Filter& filter) override; + ColumnPtr replicate(const Offsets& offsets) const override; void replicate(const uint32_t* counts, size_t target_size, IColumn& column, size_t begin = 0, int count_sz = -1) const override; diff --git a/be/src/vec/columns/column_decimal.cpp b/be/src/vec/columns/column_decimal.cpp index 4bca086e5f..778a0b83d3 100644 --- a/be/src/vec/columns/column_decimal.cpp +++ b/be/src/vec/columns/column_decimal.cpp @@ -325,6 +325,61 @@ ColumnPtr ColumnDecimal<T>::filter(const IColumn::Filter& filt, ssize_t result_s return res; } +template <typename T> +size_t ColumnDecimal<T>::filter(const IColumn::Filter& filter) { + size_t size = data.size(); + if (size != filter.size()) { + LOG(FATAL) << "Size of filter doesn't match size of column."; + } + + const UInt8* filter_pos = filter.data(); + const UInt8* filter_end = filter_pos + size; + const T* data_pos = data.data(); + T* result_data = data.data(); + + /** A slightly more optimized version. + * Based on the assumption that often pieces of consecutive values + * completely pass or do not pass the filter. + * Therefore, we will optimistically check the parts of `SIMD_BYTES` values. + */ + static constexpr size_t SIMD_BYTES = 32; + const UInt8* filter_end_sse = filter_pos + size / SIMD_BYTES * SIMD_BYTES; + + while (filter_pos < filter_end_sse) { + uint32_t mask = simd::bytes32_mask_to_bits32_mask(filter_pos); + + if (0xFFFFFFFF == mask) { + memmove(result_data, data_pos, sizeof(T) * SIMD_BYTES); + result_data += SIMD_BYTES; + } else { + while (mask) { + const size_t idx = __builtin_ctzll(mask); + *result_data = data_pos[idx]; + ++result_data; + mask = mask & (mask - 1); + } + } + + filter_pos += SIMD_BYTES; + data_pos += SIMD_BYTES; + } + + while (filter_pos < filter_end) { + if (*filter_pos) { + *result_data = *data_pos; + ++result_data; + } + + ++filter_pos; + ++data_pos; + } + + const auto result_size = result_data - data.data(); + data.set_end_ptr(result_data); + + return result_size; +} + template <typename T> ColumnPtr ColumnDecimal<T>::replicate(const IColumn::Offsets& offsets) const { size_t size = data.size(); diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h index e785e0df42..fe00b98b65 100644 --- a/be/src/vec/columns/column_decimal.h +++ b/be/src/vec/columns/column_decimal.h @@ -183,6 +183,9 @@ public: void clear() override { data.clear(); } ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) const override; + + size_t filter(const IColumn::Filter& filter) override; + ColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const override; // ColumnPtr index(const IColumn & indexes, size_t limit) const override; diff --git a/be/src/vec/columns/column_dictionary.h b/be/src/vec/columns/column_dictionary.h index 9ed997c43f..5c0ee0f059 100644 --- a/be/src/vec/columns/column_dictionary.h +++ b/be/src/vec/columns/column_dictionary.h @@ -179,6 +179,10 @@ public: LOG(FATAL) << "filter not supported in ColumnDictionary"; } + [[noreturn]] size_t filter(const IColumn::Filter&) override { + LOG(FATAL) << "filter not supported in ColumnDictionary"; + } + [[noreturn]] ColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const override { LOG(FATAL) << "permute not supported in ColumnDictionary"; } diff --git a/be/src/vec/columns/column_dummy.h b/be/src/vec/columns/column_dummy.h index 40bdd9cfd2..b66b284e3f 100644 --- a/be/src/vec/columns/column_dummy.h +++ b/be/src/vec/columns/column_dummy.h @@ -87,6 +87,12 @@ public: return clone_dummy(count_bytes_in_filter(filt)); } + size_t filter(const Filter& filter) override { + const auto result_size = count_bytes_in_filter(filter); + s = result_size; + return result_size; + } + ColumnPtr permute(const Permutation& perm, size_t limit) const override { if (s != perm.size()) { LOG(FATAL) << "Size of permutation doesn't match size of column."; diff --git a/be/src/vec/columns/column_fixed_length_object.h b/be/src/vec/columns/column_fixed_length_object.h index aba83c5f1c..c1e3a53043 100644 --- a/be/src/vec/columns/column_fixed_length_object.h +++ b/be/src/vec/columns/column_fixed_length_object.h @@ -143,6 +143,10 @@ public: LOG(FATAL) << "filter not supported"; } + [[noreturn]] size_t filter(const IColumn::Filter&) override { + LOG(FATAL) << "filter not supported"; + } + [[noreturn]] ColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const override { LOG(FATAL) << "permute not supported"; } diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index f477b9f336..9febda9570 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -157,6 +157,13 @@ ColumnPtr ColumnMap::filter(const Filter& filt, ssize_t result_size_hint) const values->filter(filt, result_size_hint)); } +size_t ColumnMap::filter(const Filter& filter) { + const auto key_result_size = keys->filter(filter); + const auto value_result_size = values->filter(filter); + CHECK_EQ(key_result_size, value_result_size); + return value_result_size; +} + ColumnPtr ColumnMap::permute(const Permutation& perm, size_t limit) const { return ColumnMap::create(keys->permute(perm, limit), values->permute(perm, limit)); } diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index 500fc43f3a..946dbd537d 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -84,6 +84,9 @@ public: void update_hash_with_value(size_t n, SipHash& hash) const override; ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; + + size_t filter(const Filter& filter) override; + ColumnPtr permute(const Permutation& perm, size_t limit) const override; ColumnPtr replicate(const Offsets& offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override { diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index 5ec9f43571..92fb114a4e 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -309,6 +309,13 @@ ColumnPtr ColumnNullable::filter(const Filter& filt, ssize_t result_size_hint) c return ColumnNullable::create(filtered_data, filtered_null_map); } +size_t ColumnNullable::filter(const Filter& filter) { + const auto data_result_size = get_nested_column().filter(filter); + const auto map_result_size = get_null_map_column().filter(filter); + CHECK_EQ(data_result_size, map_result_size); + return data_result_size; +} + Status ColumnNullable::filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) { const ColumnNullable* nullable_col_ptr = reinterpret_cast<const ColumnNullable*>(col_ptr); ColumnPtr nest_col_ptr = nullable_col_ptr->nested_column; diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index fc8e901c94..c48e3c80a1 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -161,6 +161,9 @@ public: void pop_back(size_t n) override; ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; + + size_t filter(const Filter& filter) override; + Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) override; ColumnPtr permute(const Permutation& perm, size_t limit) const override; // ColumnPtr index(const IColumn & indexes, size_t limit) const override; diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 2bd804fc51..65a9d31829 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -315,6 +315,11 @@ public: return nullptr; } + size_t filter(const Filter&) override { + LOG(FATAL) << "should not call the method in column object"; + return 0; + } + ColumnPtr permute(const Permutation&, size_t) const override { LOG(FATAL) << "should not call the method in column object"; return nullptr; diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 8c6ca64efe..eff29d17a4 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -148,6 +148,16 @@ ColumnPtr ColumnString::filter(const Filter& filt, ssize_t result_size_hint) con return res; } +size_t ColumnString::filter(const Filter& filter) { + CHECK_EQ(filter.size(), offsets.size()); + if (offsets.size() == 0) { + resize(0); + return 0; + } + + return filter_arrays_impl<UInt8, Offset>(chars, offsets, filter); +} + ColumnPtr ColumnString::permute(const Permutation& perm, size_t limit) const { size_t size = offsets.size(); diff --git a/be/src/vec/columns/column_string.h b/be/src/vec/columns/column_string.h index 4623d02eda..01cef34181 100644 --- a/be/src/vec/columns/column_string.h +++ b/be/src/vec/columns/column_string.h @@ -419,6 +419,7 @@ public: const int* indices_end) override; ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; + size_t filter(const Filter& filter) override; ColumnPtr permute(const Permutation& perm, size_t limit) const override; diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp index afef00c314..66eb57500c 100644 --- a/be/src/vec/columns/column_struct.cpp +++ b/be/src/vec/columns/column_struct.cpp @@ -248,6 +248,18 @@ ColumnPtr ColumnStruct::filter(const Filter& filt, ssize_t result_size_hint) con return ColumnStruct::create(new_columns); } +size_t ColumnStruct::filter(const Filter& filter) { + const size_t tuple_size = columns.size(); + + size_t result_size = 0; + for (size_t i = 0; i < tuple_size; ++i) { + const auto this_result_size = columns[i]->filter(filter); + CHECK(result_size == 0 || result_size == this_result_size); + result_size = this_result_size; + } + return result_size; +} + ColumnPtr ColumnStruct::permute(const Permutation& perm, size_t limit) const { const size_t tuple_size = columns.size(); Columns new_columns(tuple_size); diff --git a/be/src/vec/columns/column_struct.h b/be/src/vec/columns/column_struct.h index 60d0e46315..6d0f951d05 100644 --- a/be/src/vec/columns/column_struct.h +++ b/be/src/vec/columns/column_struct.h @@ -146,6 +146,9 @@ public: void insert_range_from(const IColumn& src, size_t start, size_t length) override; ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override; + + size_t filter(const Filter& filter) override; + ColumnPtr permute(const Permutation& perm, size_t limit) const override; ColumnPtr replicate(const Offsets& offsets) const override; MutableColumns scatter(ColumnIndex num_columns, const Selector& selector) const override; diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index ea5c14c0a8..06c0a149ab 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -440,6 +440,62 @@ ColumnPtr ColumnVector<T>::filter(const IColumn::Filter& filt, ssize_t result_si return res; } +template <typename T> +size_t ColumnVector<T>::filter(const IColumn::Filter& filter) { + size_t size = data.size(); + if (size != filter.size()) { + LOG(FATAL) << "Size of filter doesn't match size of column. data size: " << size + << ", filter size: " << filter.size() << get_stack_trace(); + } + + const UInt8* filter_pos = filter.data(); + const UInt8* filter_end = filter_pos + size; + T* data_pos = data.data(); + T* result_data = data_pos; + + /** A slightly more optimized version. + * Based on the assumption that often pieces of consecutive values + * completely pass or do not pass the filter. + * Therefore, we will optimistically check the parts of `SIMD_BYTES` values. + */ + static constexpr size_t SIMD_BYTES = 32; + const UInt8* filter_end_sse = filter_pos + size / SIMD_BYTES * SIMD_BYTES; + + while (filter_pos < filter_end_sse) { + uint32_t mask = simd::bytes32_mask_to_bits32_mask(filter_pos); + + if (0xFFFFFFFF == mask) { + memmove(result_data, data_pos, sizeof(T) * SIMD_BYTES); + result_data += SIMD_BYTES; + } else { + while (mask) { + const size_t idx = __builtin_ctzll(mask); + *result_data = data_pos[idx]; + ++result_data; + mask = mask & (mask - 1); + } + } + + filter_pos += SIMD_BYTES; + data_pos += SIMD_BYTES; + } + + while (filter_pos < filter_end) { + if (*filter_pos) { + *result_data = *data_pos; + ++result_data; + } + + ++filter_pos; + ++data_pos; + } + + const auto new_size = result_data - data.data(); + resize(new_size); + + return new_size; +} + template <typename T> ColumnPtr ColumnVector<T>::permute(const IColumn::Permutation& perm, size_t limit) const { size_t size = data.size(); diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index cd56cbdd04..f1678715c7 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -325,6 +325,7 @@ public: } ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) const override; + size_t filter(const IColumn::Filter& filter) override; // note(wb) this method is only used in storage layer now Status filter_by_selector(const uint16_t* sel, size_t sel_size, IColumn* col_ptr) override { diff --git a/be/src/vec/columns/columns_common.cpp b/be/src/vec/columns/columns_common.cpp index 1988695eaa..d022607185 100644 --- a/be/src/vec/columns/columns_common.cpp +++ b/be/src/vec/columns/columns_common.cpp @@ -99,7 +99,7 @@ namespace { /// Implementation details of filterArraysImpl function, used as template parameter. /// Allow to build or not to build offsets array. -template <typename OT> +template <typename OT, bool USE_MEMMOVE = false> struct ResultOffsetsBuilder { PaddedPODArray<OT>& res_offsets; OT current_src_offset = 0; @@ -119,7 +119,11 @@ struct ResultOffsetsBuilder { void insert_chunk(const OT* src_offsets_pos, bool first, OT chunk_offset, size_t chunk_size) { const auto offsets_size_old = res_offsets.size(); res_offsets.resize_assume_reserved(offsets_size_old + SIMD_BYTES); - memcpy(&res_offsets[offsets_size_old], src_offsets_pos, SIMD_BYTES * sizeof(OT)); + if constexpr (USE_MEMMOVE) { + memmove(&res_offsets[offsets_size_old], src_offsets_pos, SIMD_BYTES * sizeof(OT)); + } else { + memcpy(&res_offsets[offsets_size_old], src_offsets_pos, SIMD_BYTES * sizeof(OT)); + } if (!first) { /// difference between current and actual offset @@ -228,6 +232,95 @@ void filter_arrays_impl_generic(const PaddedPODArray<T>& src_elems, ++offsets_pos; } } + +template <typename T, typename OT, typename ResultOffsetsBuilder> +size_t filter_arrays_impl_generic_without_reserving(PaddedPODArray<T>& elems, + PaddedPODArray<OT>& offsets, + const IColumn::Filter& filter) { + const size_t size = offsets.size(); + if (offsets.size() != filter.size()) { + LOG(FATAL) << "Size of filter doesn't match size of column."; + } + + /// If no need to filter the `offsets`, here do not reset the end ptr of `offsets` + if constexpr (!std::is_same_v<ResultOffsetsBuilder, NoResultOffsetsBuilder<OT>>) { + /// Reset the end ptr to prepare for inserting/pushing elements into `offsets` in `ResultOffsetsBuilder`. + offsets.set_end_ptr(offsets.data()); + } + + ResultOffsetsBuilder result_offsets_builder(&offsets); + + const UInt8* filter_pos = filter.data(); + const T* src_data = elems.data(); + T* result_data = elems.data(); + const auto filter_end = filter_pos + size; + + auto offsets_pos = offsets.data(); + const auto offsets_begin = offsets_pos; + size_t result_size = 0; + + /// copy array ending at *end_offset_ptr + const auto copy_array = [&](const OT* offset_ptr) { + const auto arr_offset = offset_ptr == offsets_begin ? 0 : offset_ptr[-1]; + const auto arr_size = *offset_ptr - arr_offset; + + result_offsets_builder.insert_one(arr_size); + const size_t size_to_copy = arr_size * sizeof(T); + memmove(result_data, &src_data[arr_offset], size_to_copy); + result_data += arr_size; + }; + + static constexpr size_t SIMD_BYTES = 32; + const auto filter_end_aligned = filter_pos + size / SIMD_BYTES * SIMD_BYTES; + + while (filter_pos < filter_end_aligned) { + auto mask = simd::bytes32_mask_to_bits32_mask(filter_pos); + + if (mask == 0xffffffff) { + /// SIMD_BYTES consecutive rows pass the filter + const auto first = offsets_pos == offsets_begin; + + const auto chunk_offset = first ? 0 : offsets_pos[-1]; + const auto chunk_size = offsets_pos[SIMD_BYTES - 1] - chunk_offset; + + result_offsets_builder.template insert_chunk<SIMD_BYTES>(offsets_pos, first, + chunk_offset, chunk_size); + + /// copy elements for SIMD_BYTES arrays at once + const size_t size_to_copy = chunk_size * sizeof(T); + memmove(result_data, &src_data[chunk_offset], size_to_copy); + result_data += chunk_size; + result_size += SIMD_BYTES; + } else { + while (mask) { + const size_t bit_pos = __builtin_ctzll(mask); + copy_array(offsets_pos + bit_pos); + ++result_size; + mask = mask & (mask - 1); + } + } + + filter_pos += SIMD_BYTES; + offsets_pos += SIMD_BYTES; + } + + while (filter_pos < filter_end) { + if (*filter_pos) { + copy_array(offsets_pos); + ++result_size; + } + + ++filter_pos; + ++offsets_pos; + } + + if constexpr (!std::is_same_v<ResultOffsetsBuilder, NoResultOffsetsBuilder<OT>>) { + const size_t result_data_size = result_data - elems.data(); + CHECK_EQ(result_data_size, offsets.back()); + } + elems.set_end_ptr(result_data); + return result_size; +} } // namespace template <typename T, typename OT> @@ -238,6 +331,13 @@ void filter_arrays_impl(const PaddedPODArray<T>& src_elems, const PaddedPODArray src_elems, src_offsets, res_elems, &res_offsets, filt, result_size_hint); } +template <typename T, typename OT> +size_t filter_arrays_impl(PaddedPODArray<T>& data, PaddedPODArray<OT>& offsets, + const IColumn::Filter& filter) { + return filter_arrays_impl_generic_without_reserving<T, OT, ResultOffsetsBuilder<OT, true>>( + data, offsets, filter); +} + template <typename T, typename OT> void filter_arrays_impl_only_data(const PaddedPODArray<T>& src_elems, const PaddedPODArray<OT>& src_offsets, @@ -247,14 +347,25 @@ void filter_arrays_impl_only_data(const PaddedPODArray<T>& src_elems, src_elems, src_offsets, res_elems, nullptr, filt, result_size_hint); } +template <typename T, typename OT> +size_t filter_arrays_impl_only_data(PaddedPODArray<T>& data, PaddedPODArray<OT>& offsets, + const IColumn::Filter& filter) { + return filter_arrays_impl_generic_without_reserving<T, OT, NoResultOffsetsBuilder<OT>>( + data, offsets, filter); +} + /// Explicit instantiations - not to place the implementation of the function above in the header file. #define INSTANTIATE(TYPE, OFFTYPE) \ template void filter_arrays_impl<TYPE, OFFTYPE>( \ const PaddedPODArray<TYPE>&, const PaddedPODArray<OFFTYPE>&, PaddedPODArray<TYPE>&, \ PaddedPODArray<OFFTYPE>&, const IColumn::Filter&, ssize_t); \ + template size_t filter_arrays_impl<TYPE, OFFTYPE>( \ + PaddedPODArray<TYPE>&, PaddedPODArray<OFFTYPE>&, const IColumn::Filter&); \ template void filter_arrays_impl_only_data<TYPE, OFFTYPE>( \ const PaddedPODArray<TYPE>&, const PaddedPODArray<OFFTYPE>&, PaddedPODArray<TYPE>&, \ - const IColumn::Filter&, ssize_t); + const IColumn::Filter&, ssize_t); \ + template size_t filter_arrays_impl_only_data<TYPE, OFFTYPE>( \ + PaddedPODArray<TYPE>&, PaddedPODArray<OFFTYPE>&, const IColumn::Filter&); INSTANTIATE(UInt8, IColumn::Offset) INSTANTIATE(UInt8, ColumnArray::Offset64) diff --git a/be/src/vec/columns/columns_common.h b/be/src/vec/columns/columns_common.h index 10233c6111..3bae20f0ba 100644 --- a/be/src/vec/columns/columns_common.h +++ b/be/src/vec/columns/columns_common.h @@ -51,6 +51,14 @@ void filter_arrays_impl_only_data(const PaddedPODArray<T>& src_elems, PaddedPODArray<T>& res_elems, const IColumn::Filter& filt, ssize_t result_size_hint); +template <typename T, typename OT> +size_t filter_arrays_impl(PaddedPODArray<T>& data, PaddedPODArray<OT>& offsets, + const IColumn::Filter& filter); + +template <typename T, typename OT> +size_t filter_arrays_impl_only_data(PaddedPODArray<T>& data, PaddedPODArray<OT>& offsets, + const IColumn::Filter& filter); + namespace detail { template <typename T> const PaddedPODArray<T>* get_indexes_data(const IColumn& indexes); diff --git a/be/src/vec/columns/predicate_column.h b/be/src/vec/columns/predicate_column.h index 2b00fee50c..f67909889f 100644 --- a/be/src/vec/columns/predicate_column.h +++ b/be/src/vec/columns/predicate_column.h @@ -433,6 +433,10 @@ public: LOG(FATAL) << "filter not supported in PredicateColumnType"; } + [[noreturn]] size_t filter(const IColumn::Filter&) override { + LOG(FATAL) << "filter not supported in PredicateColumnType"; + } + [[noreturn]] ColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const override { LOG(FATAL) << "permute not supported in PredicateColumnType"; } diff --git a/be/src/vec/core/block.cpp b/be/src/vec/core/block.cpp index ce54efd5a4..ec84a19b7b 100644 --- a/be/src/vec/core/block.cpp +++ b/be/src/vec/core/block.cpp @@ -655,9 +655,14 @@ void Block::filter_block_internal(Block* block, const std::vector<uint32_t>& col } } else { for (auto& col : columns_to_filter) { - if (block->get_by_position(col).column->size() != count) { - block->get_by_position(col).column = - block->get_by_position(col).column->filter(filter, count); + auto& column = block->get_by_position(col).column; + if (column->size() != count) { + if (column->use_count() == 1) { + const auto result_size = column->assume_mutable()->filter(filter); + CHECK_EQ(result_size, count); + } else { + column = column->filter(filter, count); + } } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org