kaka11chen commented on code in PR #17594: URL: https://github.com/apache/doris/pull/17594#discussion_r1136986784
########## be/src/vec/columns/column_dictionary.h: ########## @@ -166,21 +176,123 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> { size_t size_of_value_if_fixed() const override { return sizeof(T); } - [[noreturn]] StringRef get_raw_data() const override { - LOG(FATAL) << "get_raw_data not supported in ColumnDictionary"; + StringRef get_raw_data() const override { + return StringRef(reinterpret_cast<const char*>(_codes.data()), _codes.size()); } [[noreturn]] bool structure_equals(const IColumn& rhs) const override { LOG(FATAL) << "structure_equals not supported in ColumnDictionary"; } - [[noreturn]] ColumnPtr filter(const IColumn::Filter& filt, - ssize_t result_size_hint) const override { - LOG(FATAL) << "filter not supported in ColumnDictionary"; + ColumnPtr filter(const IColumn::Filter& filt, ssize_t result_size_hint) const override { + size_t size = _codes.size(); + if (size != filt.size()) { + LOG(FATAL) << "Size of filter doesn't match size of column. data size: " << size + << ", filter size: " << filt.size() << get_stack_trace(); + } + + auto res = this->create(); + if constexpr (std::is_same_v<T, vectorized::Int64>) { + res->copy_date_types(*this); + } + Container& res_data = res->get_data(); + + res_data.reserve(result_size_hint > 0 ? result_size_hint : size); + + const UInt8* filt_pos = filt.data(); + const UInt8* filt_end = filt_pos + size; + const T* data_pos = _codes.data(); + + /** A slightly more optimized version. + * Based on the assumption that often pieces of consecutive values + * completely pass or do not pass the filter. + * Therefore, we will optimistically check the parts of `SIMD_BYTES` values. + */ + static constexpr size_t SIMD_BYTES = 32; + const UInt8* filt_end_sse = filt_pos + size / SIMD_BYTES * SIMD_BYTES; + + while (filt_pos < filt_end_sse) { + uint32_t mask = simd::bytes32_mask_to_bits32_mask(filt_pos); + + if (0xFFFFFFFF == mask) { + res_data.insert(data_pos, data_pos + SIMD_BYTES); + } else { + while (mask) { + const size_t idx = __builtin_ctzll(mask); + res_data.push_back_without_reserve(data_pos[idx]); + mask = mask & (mask - 1); + } + } + + filt_pos += SIMD_BYTES; + data_pos += SIMD_BYTES; + } + + while (filt_pos < filt_end) { + if (*filt_pos) { + res_data.push_back_without_reserve(*data_pos); + } + + ++filt_pos; + ++data_pos; + } + + return res; } - [[noreturn]] size_t filter(const IColumn::Filter&) override { - LOG(FATAL) << "filter not supported in ColumnDictionary"; + size_t filter(const IColumn::Filter& filter) override { + size_t size = _codes.size(); + if (size != filter.size()) { + LOG(FATAL) << "Size of filter doesn't match size of column. data size: " << size + << ", filter size: " << filter.size() << get_stack_trace(); + } + + const UInt8* filter_pos = filter.data(); Review Comment: will rollback column_dictionary, so this case does not exists. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org