BiteTheDDDDt commented on code in PR #10472: URL: https://github.com/apache/doris/pull/10472#discussion_r909126908
########## be/src/util/simd/bits.h: ########## @@ -59,10 +59,28 @@ inline uint32_t bytes32_mask_to_bits32_mask(const bool* data) { return bytes32_mask_to_bits32_mask(reinterpret_cast<const uint8_t*>(data)); } -// compiler will make this SIMD automatically inline size_t count_zero_num(const int8_t* __restrict data, size_t size) { size_t num = 0; const int8_t* end = data + size; +#if defined(__SSE2__) && defined(__POPCNT__) + const __m128i zero16 = _mm_setzero_si128(); + const int8_t* end64 = data + (size / 64 * 64); + + for (; data < end64; data += 64) { + num += __builtin_popcountll( + static_cast<uint64_t>(_mm_movemask_epi8(_mm_cmpeq_epi8( Review Comment: Sorry, I find another way, we can shift bit directly. But there doesn't seem to be a significant difference in performance either. This should get the correct number of `1`: ```cpp __builtin_popcountll( static_cast<uint64_t>( _mm_movemask_epi8(*reinterpret_cast<const __m128i*>(data) << 7)) | (static_cast<uint64_t>( _mm_movemask_epi8(*reinterpret_cast<const __m128i*>(data + 16) << 7)) << 16) | (static_cast<uint64_t>( _mm_movemask_epi8(*reinterpret_cast<const __m128i*>(data + 32) << 7)) << 32) | (static_cast<uint64_t>( _mm_movemask_epi8(*reinterpret_cast<const __m128i*>(data + 48) << 7)) << 48)); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org