On Tue, Sep 24, 2024 at 10:16 AM Levy Hsu <ad...@levyhsu.com> wrote: > > This patch enables vectorization of the popcount operation for V2QI, V4QI, > V8QI, V2HI, V4HI, and V2SI modes. Ok. > > gcc/ChangeLog: > > * config/i386/mmx.md: > (VQI_16_32_64): New mode iterator for 8-byte, 4-byte, and 2-byte > QImode. > (popcount<mode>2): New pattern for popcount of V2QI/V4QI/V8QI mode. > (popcount<mode>2): New pattern for popcount of V2HI/V4HI mode. > (popcountv2si2): New pattern for popcount of V2SI mode. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/part-vect-popcount-1.c: New test. > --- > gcc/config/i386/mmx.md | 24 +++++++++ > .../gcc.target/i386/part-vect-popcount-1.c | 49 +++++++++++++++++++ > 2 files changed, 73 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md > index 4bc191b874b..147ae150bf3 100644 > --- a/gcc/config/i386/mmx.md > +++ b/gcc/config/i386/mmx.md > @@ -70,6 +70,9 @@ > ;; 8-byte and 4-byte HImode vector modes > (define_mode_iterator VI2_32_64 [(V4HI "TARGET_MMX_WITH_SSE") V2HI]) > > +;; 8-byte, 4-byte and 2-byte QImode vector modes > +(define_mode_iterator VI1_16_32_64 [(V8QI "TARGET_MMX_WITH_SSE") V4QI V2QI]) > + > ;; 4-byte and 2-byte integer vector modes > (define_mode_iterator VI_16_32 [V4QI V2QI V2HI]) > > @@ -6786,3 +6789,24 @@ > [(set_attr "type" "mmx") > (set_attr "modrm" "0") > (set_attr "memory" "none")]) > + > +(define_insn "popcount<mode>2" > + [(set (match_operand:VI1_16_32_64 0 "register_operand" "=v") > + (popcount:VI1_16_32_64 > + (match_operand:VI1_16_32_64 1 "register_operand" "v")))] > + "TARGET_AVX512VL && TARGET_AVX512BITALG" > + "vpopcntb\t{%1, %0|%0, %1}") > + > +(define_insn "popcount<mode>2" > + [(set (match_operand:VI2_32_64 0 "register_operand" "=v") > + (popcount:VI2_32_64 > + (match_operand:VI2_32_64 1 "register_operand" "v")))] > + "TARGET_AVX512VL && TARGET_AVX512BITALG" > + "vpopcntw\t{%1, %0|%0, %1}") > + > +(define_insn "popcountv2si2" > + [(set (match_operand:V2SI 0 "register_operand" "=v") > + (popcount:V2SI > + (match_operand:V2SI 1 "register_operand" "v")))] > + "TARGET_AVX512VPOPCNTDQ && TARGET_AVX512VL && TARGET_MMX_WITH_SSE" > + "vpopcntd\t{%1, %0|%0, %1}") > diff --git a/gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c > b/gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c > new file mode 100644 > index 00000000000..a30f6ec4726 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c > @@ -0,0 +1,49 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -mavx512vpopcntdq -mavx512bitalg -mavx512vl" } */ > +/* { dg-final { scan-assembler-times "vpopcntd\[^\n\r\]*xmm\[0-9\]" 1 { > target { ! ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vpopcntw\[^\n\r\]*xmm\[0-9\]" 3 { > target ia32 } } } */ > +/* { dg-final { scan-assembler-times "vpopcntw\[^\n\r\]*xmm\[0-9\]" 2 { > target { ! ia32 } } } } */ > +/* { dg-final { scan-assembler-times "vpopcntb\[^\n\r\]*xmm\[0-9\]" 4 { > target ia32 } } } */ > +/* { dg-final { scan-assembler-times "vpopcntb\[^\n\r\]*xmm\[0-9\]" 3 { > target { ! ia32 } } } } */ > + > +void > +foo1 (int* a, int* __restrict b) > +{ > + for (int i = 0; i != 2; i++) > + a[i] = __builtin_popcount (b[i]); > +} > + > +void > +foo2 (unsigned short* a, unsigned short* __restrict b) > +{ > + for (int i = 0; i != 4; i++) > + a[i] = __builtin_popcount (b[i]); > +} > + > +void > +foo3 (unsigned short* a, unsigned short* __restrict b) > +{ > + for (int i = 0; i != 2; i++) > + a[i] = __builtin_popcount (b[i]); > +} > + > +void > +foo4 (unsigned char* a, unsigned char* __restrict b) > +{ > + for (int i = 0; i != 8; i++) > + a[i] = __builtin_popcount (b[i]); > +} > + > +void > +foo5 (unsigned char* a, unsigned char* __restrict b) > +{ > + for (int i = 0; i != 4; i++) > + a[i] = __builtin_popcount (b[i]); > +} > + > +void > +foo6 (unsigned char* a, unsigned char* __restrict b) > +{ > + for (int i = 0; i != 2; i++) > + a[i] = __builtin_popcount (b[i]); > +} > -- > 2.31.1 >
-- BR, Hongtao