https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103771
--- Comment #10 from Hongtao.liu <crazylht at gmail dot com> --- with @@ -12120,7 +12120,8 @@ supportable_narrowing_operation (enum tree_code code, c1 = VEC_PACK_TRUNC_EXPR; if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype) && VECTOR_BOOLEAN_TYPE_P (vectype) - && TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype) + && (TYPE_MODE (narrow_vectype) == TYPE_MODE (vectype) + || known_lt (TYPE_VECTOR_SUBPARTS (vectype), BITS_PER_UNIT)) && SCALAR_INT_MODE_P (TYPE_MODE (vectype))) optab1 = vec_pack_sbool_trunc_optab; else @@ -12213,6 +12214,7 @@ supportable_narrowing_operation (enum tree_code code, if (VECTOR_BOOLEAN_TYPE_P (intermediate_type) && VECTOR_BOOLEAN_TYPE_P (prev_type) && intermediate_mode == prev_mode + && known_lt (TYPE_VECTOR_SUBPARTS (intermediate_type), BITS_PER_UNIT) && SCALAR_INT_MODE_P (prev_mode)) interm_optab = vec_pack_sbool_trunc_optab; else -march=icelake-server -O3 -mprefer-vector-width=128 now can get vectorized loop. vmovdqu8 (%rsi,%rax), %xmm0 vpmovzxbw %xmm0, %xmm2 vpmovzxwd %xmm2, %xmm1 vpsrldq $8, %xmm0, %xmm0 vpsrldq $8, %xmm2, %xmm2 vpmovzxbw %xmm0, %xmm0 vpmovzxwd %xmm2, %xmm2 vpmulld %xmm9, %xmm1, %xmm1 vpmulld %xmm9, %xmm2, %xmm2 vpmovzxwd %xmm0, %xmm4 vpsrldq $8, %xmm0, %xmm0 vpmovzxwd %xmm0, %xmm0 vpmulld %xmm9, %xmm4, %xmm4 vpmulld %xmm9, %xmm0, %xmm0 vpcmpud $6, %xmm6, %xmm1, %k0 vpsubd %xmm1, %xmm7, %xmm3 vpcmpud $6, %xmm6, %xmm2, %k1 vpsubd %xmm2, %xmm7, %xmm5 vpsrad $31, %xmm5, %xmm5 vpsrad $31, %xmm3, %xmm3 vpermt2w %xmm5, %xmm8, %xmm3 vpsubd %xmm0, %xmm7, %xmm10 vpsubd %xmm4, %xmm7, %xmm5 kshiftlb $4, %k1, %k1 vpcmpud $6, %xmm6, %xmm0, %k2 vpsrad $31, %xmm5, %xmm5 vpsrad $31, %xmm10, %xmm10 kandb %k3, %k0, %k0 korb %k1, %k0, %k0 vpcmpud $6, %xmm6, %xmm4, %k1 vpermt2w %xmm10, %xmm8, %xmm5 vpermt2w %xmm2, %xmm8, %xmm1 vpermt2w %xmm0, %xmm8, %xmm4 vpermt2b %xmm5, %xmm11, %xmm3 vpermt2b %xmm4, %xmm11, %xmm1 kandb %k3, %k1, %k1 kshiftlb $4, %k2, %k2 korb %k2, %k1, %k1 kunpckbw %k0, %k1, %k1 vmovdqu8 %xmm3, %xmm1{%k1} vmovdqu8 %xmm1, (%rdi,%rax) addq $16, %rax cmpq %rax, %r8 jne .L4