On Tue, Sep 24, 2024 at 10:16 AM Levy Hsu <ad...@levyhsu.com> wrote:
>
> This patch enables vectorization of the popcount operation for V2QI, V4QI,
> V8QI, V2HI, V4HI, and V2SI modes.
Ok.
>
> gcc/ChangeLog:
>
>         * config/i386/mmx.md:
>         (VQI_16_32_64): New mode iterator for 8-byte, 4-byte, and 2-byte 
> QImode.
>         (popcount<mode>2): New pattern for popcount of V2QI/V4QI/V8QI mode.
>         (popcount<mode>2): New pattern for popcount of V2HI/V4HI mode.
>         (popcountv2si2): New pattern for popcount of V2SI mode.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/i386/part-vect-popcount-1.c: New test.
> ---
>  gcc/config/i386/mmx.md                        | 24 +++++++++
>  .../gcc.target/i386/part-vect-popcount-1.c    | 49 +++++++++++++++++++
>  2 files changed, 73 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 4bc191b874b..147ae150bf3 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -70,6 +70,9 @@
>  ;; 8-byte and 4-byte HImode vector modes
>  (define_mode_iterator VI2_32_64 [(V4HI "TARGET_MMX_WITH_SSE") V2HI])
>
> +;; 8-byte, 4-byte and 2-byte QImode vector modes
> +(define_mode_iterator VI1_16_32_64 [(V8QI "TARGET_MMX_WITH_SSE") V4QI V2QI])
> +
>  ;; 4-byte and 2-byte integer vector modes
>  (define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
>
> @@ -6786,3 +6789,24 @@
>    [(set_attr "type" "mmx")
>     (set_attr "modrm" "0")
>     (set_attr "memory" "none")])
> +
> +(define_insn "popcount<mode>2"
> +  [(set (match_operand:VI1_16_32_64 0 "register_operand" "=v")
> +       (popcount:VI1_16_32_64
> +         (match_operand:VI1_16_32_64 1 "register_operand" "v")))]
> +  "TARGET_AVX512VL && TARGET_AVX512BITALG"
> +  "vpopcntb\t{%1, %0|%0, %1}")
> +
> +(define_insn "popcount<mode>2"
> +  [(set (match_operand:VI2_32_64 0 "register_operand" "=v")
> +       (popcount:VI2_32_64
> +         (match_operand:VI2_32_64 1 "register_operand" "v")))]
> +  "TARGET_AVX512VL && TARGET_AVX512BITALG"
> +  "vpopcntw\t{%1, %0|%0, %1}")
> +
> +(define_insn "popcountv2si2"
> +  [(set (match_operand:V2SI 0 "register_operand" "=v")
> +       (popcount:V2SI
> +         (match_operand:V2SI 1 "register_operand" "v")))]
> +  "TARGET_AVX512VPOPCNTDQ && TARGET_AVX512VL && TARGET_MMX_WITH_SSE"
> +  "vpopcntd\t{%1, %0|%0, %1}")
> diff --git a/gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c 
> b/gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c
> new file mode 100644
> index 00000000000..a30f6ec4726
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c
> @@ -0,0 +1,49 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mavx512vpopcntdq -mavx512bitalg -mavx512vl" } */
> +/* { dg-final { scan-assembler-times "vpopcntd\[^\n\r\]*xmm\[0-9\]" 1 { 
> target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpopcntw\[^\n\r\]*xmm\[0-9\]" 3 { 
> target ia32 } } } */
> +/* { dg-final { scan-assembler-times "vpopcntw\[^\n\r\]*xmm\[0-9\]" 2 { 
> target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-times "vpopcntb\[^\n\r\]*xmm\[0-9\]" 4 { 
> target ia32 } } } */
> +/* { dg-final { scan-assembler-times "vpopcntb\[^\n\r\]*xmm\[0-9\]" 3 { 
> target { ! ia32 } } } } */
> +
> +void
> +foo1 (int* a, int* __restrict b)
> +{
> +  for (int i = 0; i != 2; i++)
> +    a[i] = __builtin_popcount (b[i]);
> +}
> +
> +void
> +foo2 (unsigned short* a, unsigned short* __restrict b)
> +{
> +  for (int i = 0; i != 4; i++)
> +    a[i] = __builtin_popcount (b[i]);
> +}
> +
> +void
> +foo3 (unsigned short* a, unsigned short* __restrict b)
> +{
> +  for (int i = 0; i != 2; i++)
> +    a[i] = __builtin_popcount (b[i]);
> +}
> +
> +void
> +foo4 (unsigned char* a, unsigned char* __restrict b)
> +{
> +  for (int i = 0; i != 8; i++)
> +    a[i] = __builtin_popcount (b[i]);
> +}
> +
> +void
> +foo5 (unsigned char* a, unsigned char* __restrict b)
> +{
> +  for (int i = 0; i != 4; i++)
> +    a[i] = __builtin_popcount (b[i]);
> +}
> +
> +void
> +foo6 (unsigned char* a, unsigned char* __restrict b)
> +{
> +  for (int i = 0; i != 2; i++)
> +    a[i] = __builtin_popcount (b[i]);
> +}
> --
> 2.31.1
>


-- 
BR,
Hongtao

Reply via email to