https://gcc.gnu.org/g:85910e650a61de6da37e3d67a7ac208904dd3c0d

commit r15-3886-g85910e650a61de6da37e3d67a7ac208904dd3c0d
Author: Levy Hsu <ad...@levyhsu.com>
Date:   Wed Sep 11 14:19:02 2024 +0930

    x86: Extend AVX512 Vectorization for Popcount in Various Modes
    
    This patch enables vectorization of the popcount operation for V2QI, V4QI,
    V8QI, V2HI, V4HI, and V2SI modes.
    
    gcc/ChangeLog:
    
            * config/i386/mmx.md:
            (VQI_16_32_64): New mode iterator for 8-byte, 4-byte, and 2-byte 
QImode.
            (popcount<mode>2): New pattern for popcount of V2QI/V4QI/V8QI mode.
            (popcount<mode>2): New pattern for popcount of V2HI/V4HI mode.
            (popcountv2si2): New pattern for popcount of V2SI mode.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/part-vect-popcount-1.c: New test.

Diff:
---
 gcc/config/i386/mmx.md                             | 24 +++++++++++
 .../gcc.target/i386/part-vect-popcount-1.c         | 49 ++++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index e88a06c441fa..ca768b95df79 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -70,6 +70,9 @@
 ;; 8-byte and 4-byte HImode vector modes
 (define_mode_iterator VI2_32_64 [(V4HI "TARGET_MMX_WITH_SSE") V2HI])
 
+;; 8-byte, 4-byte and 2-byte QImode vector modes
+(define_mode_iterator VI1_16_32_64 [(V8QI "TARGET_MMX_WITH_SSE") V4QI V2QI])
+
 ;; 4-byte and 2-byte integer vector modes
 (define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
 
@@ -6803,3 +6806,24 @@
   [(set_attr "type" "mmx")
    (set_attr "modrm" "0")
    (set_attr "memory" "none")])
+
+(define_insn "popcount<mode>2"
+  [(set (match_operand:VI1_16_32_64 0 "register_operand" "=v")
+       (popcount:VI1_16_32_64
+         (match_operand:VI1_16_32_64 1 "register_operand" "v")))]
+  "TARGET_AVX512VL && TARGET_AVX512BITALG"
+  "vpopcntb\t{%1, %0|%0, %1}")
+
+(define_insn "popcount<mode>2"
+  [(set (match_operand:VI2_32_64 0 "register_operand" "=v")
+       (popcount:VI2_32_64
+         (match_operand:VI2_32_64 1 "register_operand" "v")))]
+  "TARGET_AVX512VL && TARGET_AVX512BITALG"
+  "vpopcntw\t{%1, %0|%0, %1}")
+
+(define_insn "popcountv2si2"
+  [(set (match_operand:V2SI 0 "register_operand" "=v")
+       (popcount:V2SI
+         (match_operand:V2SI 1 "register_operand" "v")))]
+  "TARGET_AVX512VPOPCNTDQ && TARGET_AVX512VL && TARGET_MMX_WITH_SSE"
+  "vpopcntd\t{%1, %0|%0, %1}")
diff --git a/gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c 
b/gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c
new file mode 100644
index 000000000000..a30f6ec4726b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/part-vect-popcount-1.c
@@ -0,0 +1,49 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512vpopcntdq -mavx512bitalg -mavx512vl" } */
+/* { dg-final { scan-assembler-times "vpopcntd\[^\n\r\]*xmm\[0-9\]" 1 { target 
{ ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[^\n\r\]*xmm\[0-9\]" 3 { target 
ia32 } } } */
+/* { dg-final { scan-assembler-times "vpopcntw\[^\n\r\]*xmm\[0-9\]" 2 { target 
{ ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[^\n\r\]*xmm\[0-9\]" 4 { target 
ia32 } } } */
+/* { dg-final { scan-assembler-times "vpopcntb\[^\n\r\]*xmm\[0-9\]" 3 { target 
{ ! ia32 } } } } */
+
+void
+foo1 (int* a, int* __restrict b)
+{
+  for (int i = 0; i != 2; i++)
+    a[i] = __builtin_popcount (b[i]);
+}
+
+void
+foo2 (unsigned short* a, unsigned short* __restrict b)
+{
+  for (int i = 0; i != 4; i++)
+    a[i] = __builtin_popcount (b[i]);
+}
+
+void
+foo3 (unsigned short* a, unsigned short* __restrict b)
+{
+  for (int i = 0; i != 2; i++)
+    a[i] = __builtin_popcount (b[i]);
+}
+
+void
+foo4 (unsigned char* a, unsigned char* __restrict b)
+{
+  for (int i = 0; i != 8; i++)
+    a[i] = __builtin_popcount (b[i]);
+}
+
+void
+foo5 (unsigned char* a, unsigned char* __restrict b)
+{
+  for (int i = 0; i != 4; i++)
+    a[i] = __builtin_popcount (b[i]);
+}
+
+void
+foo6 (unsigned char* a, unsigned char* __restrict b)
+{
+  for (int i = 0; i != 2; i++)
+    a[i] = __builtin_popcount (b[i]);
+}

Reply via email to