gcc/ChangeLog:

        * config/i386/i386-builtin-types.def:
        Add DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI).
        * config/i386/i386-builtin.def (BDESC): Add new builtins.
        * config/i386/i386-expand.cc (ix86_expand_args_builtin): Handle
        V16SI_FTYPE_V16SI_V16SI.
        * config/i386/sm4intrin.h: Add zmm insns.
        * config/i386/sse.md (vsm4key4_<mode>): Add EVEX pattern.
        (vsm4rnds4_<mode>): Ditto.

gcc/testsuite/ChangeLog:

        * gcc.target/i386/sm4-check.h: Add zmm test.
        * gcc.target/i386/sm4-avx10_2-1.c: New test.
        * gcc.target/i386/sm4-avx10_2-512-1.c: Ditto.
        * gcc.target/i386/sm4key4-avx10_2-512-2.c: Ditto.
        * gcc.target/i386/sm4rnds4-avx10_2-512-2.c: Ditto.
---
 gcc/config/i386/i386-builtin-types.def        |  3 +
 gcc/config/i386/i386-builtin.def              |  2 +
 gcc/config/i386/i386-expand.cc                |  1 +
 gcc/config/i386/sm4intrin.h                   | 25 ++++++++
 gcc/config/i386/sse.md                        | 20 ++++---
 gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c | 58 +++++++++++++++++++
 .../gcc.target/i386/sm4-avx10_2-512-1.c       | 15 +++++
 gcc/testsuite/gcc.target/i386/sm4-check.h     | 36 +++++++++++-
 .../gcc.target/i386/sm4key4-avx10_2-512-2.c   | 18 ++++++
 .../gcc.target/i386/sm4rnds4-avx10_2-512-2.c  | 18 ++++++
 10 files changed, 186 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sm4-avx10_2-512-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sm4key4-avx10_2-512-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/sm4rnds4-avx10_2-512-2.c

diff --git a/gcc/config/i386/i386-builtin-types.def 
b/gcc/config/i386/i386-builtin-types.def
index 290f6e649a9..c6034238ac4 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -1507,3 +1507,6 @@ DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, UQI, INT)
 DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT, V32HF, USI, INT)
 DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, INT, V16HF, UHI, INT)
 DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, UHI, INT)
+
+# SM4 builtins
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 151ccf4f252..db87dd7e8e1 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1668,8 +1668,10 @@ BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_SM3, 
CODE_FOR_vsm3rnds2, "__builtin
 /* SM4 */
 BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4key4_v4si, 
"__builtin_ia32_vsm4key4128", IX86_BUILTIN_VSM4KEY4128, UNKNOWN, (int) 
V4SI_FTYPE_V4SI_V4SI)
 BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4key4_v8si, 
"__builtin_ia32_vsm4key4256", IX86_BUILTIN_VSM4KEY4256, UNKNOWN, (int) 
V8SI_FTYPE_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_SM4 | OPTION_MASK_ISA2_AVX10_2_512, 
CODE_FOR_vsm4key4_v16si, "__builtin_ia32_vsm4key4512", 
IX86_BUILTIN_VSM4KEY4512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI)
 BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4rnds4_v4si, 
"__builtin_ia32_vsm4rnds4128", IX86_BUILTIN_VSM4RNDS4128, UNKNOWN, (int) 
V4SI_FTYPE_V4SI_V4SI)
 BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4rnds4_v8si, 
"__builtin_ia32_vsm4rnds4256", IX86_BUILTIN_VSM4RNDS4256, UNKNOWN, (int) 
V8SI_FTYPE_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_SM4 | OPTION_MASK_ISA2_AVX10_2_512, 
CODE_FOR_vsm4rnds4_v16si, "__builtin_ia32_vsm4rnds4512", 
IX86_BUILTIN_VSM4RNDS4512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI)
 
 /* SHA512 */
 BDESC (0, OPTION_MASK_ISA2_SHA512, CODE_FOR_vsha512msg1, 
"__builtin_ia32_vsha512msg1", IX86_BUILTIN_VSHA512MSG1, UNKNOWN, (int) 
V4DI_FTYPE_V4DI_V2DI)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index ba598701e34..1fffb111530 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -11477,6 +11477,7 @@ ix86_expand_args_builtin (const struct 
builtin_description *d,
     case V16QI_FTYPE_V8HI_V8HI:
     case V16HF_FTYPE_V16HF_V16HF:
     case V16SF_FTYPE_V16SF_V16SF:
+    case V16SI_FTYPE_V16SI_V16SI:
     case V8QI_FTYPE_V8QI_V8QI:
     case V8QI_FTYPE_V4HI_V4HI:
     case V8HI_FTYPE_V8HI_V8HI:
diff --git a/gcc/config/i386/sm4intrin.h b/gcc/config/i386/sm4intrin.h
index 4c212ccb566..e2d78f01e6e 100644
--- a/gcc/config/i386/sm4intrin.h
+++ b/gcc/config/i386/sm4intrin.h
@@ -67,4 +67,29 @@ _mm256_sm4rnds4_epi32 (__m256i __A, __m256i __B)
 #pragma GCC pop_options
 #endif /* __DISABLE_SM4__ */
 
+#if !defined (__SM4__) || !defined (__AVX10_2_512__)
+#pragma GCC push_options
+#pragma GCC target("sm4,avx10.2-512")
+#define __DISABLE_SM4_512__
+#endif /* __SM4_512__ */
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sm4key4_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vsm4key4512 ((__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sm4rnds4_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vsm4rnds4512 ((__v16si) __A, (__v16si) __B);
+}
+
+#ifdef __DISABLE_SM4_512__
+#undef __DISABLE_SM4_512__
+#pragma GCC pop_options
+#endif /* __DISABLE_SM4_512__ */
+
 #endif /* _SM4INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 6c28b74ac3f..e438189d56d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -30054,25 +30054,29 @@
    (set_attr "mode" "OI")])
 
 (define_insn "vsm4key4_<mode>"
-  [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
-        (unspec:VI4_AVX
-          [(match_operand:VI4_AVX 1 "register_operand" "x")
-           (match_operand:VI4_AVX 2 "vector_operand" "xBm")]
+  [(set (match_operand:VI4_AVX10_2 0 "register_operand" "=x,v")
+        (unspec:VI4_AVX10_2
+          [(match_operand:VI4_AVX10_2 1 "register_operand" "x,v")
+           (match_operand:VI4_AVX10_2 2 "vector_operand" "xBm,vBm")]
           UNSPEC_SM4KEY4))]
   "TARGET_SM4"
   "vsm4key4\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "other")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "isa" "avx,avx10_2")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "vsm4rnds4_<mode>"
-  [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
-        (unspec:VI4_AVX
-          [(match_operand:VI4_AVX 1 "register_operand" "x")
-           (match_operand:VI4_AVX 2 "vector_operand" "xBm")]
+  [(set (match_operand:VI4_AVX10_2 0 "register_operand" "=x,v")
+        (unspec:VI4_AVX10_2
+          [(match_operand:VI4_AVX10_2 1 "register_operand" "x,v")
+           (match_operand:VI4_AVX10_2 2 "vector_operand" "xBm,vBm")]
           UNSPEC_SM4RNDS4))]
   "TARGET_SM4"
   "vsm4rnds4\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "other")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "isa" "avx,avx10_2")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
diff --git a/gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c 
b/gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c
new file mode 100644
index 00000000000..4746f6f7800
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c
@@ -0,0 +1,58 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2" } */
+
+#include <immintrin.h>
+
+void
+f1 (__m128i x, __m128i y)
+{
+  register __m128i a __asm("xmm16");
+  register __m128i b __asm("xmm17");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b)); 
+  a = _mm_sm4key4_epi32 (a, b);
+  asm volatile ("" : "+v" (a));
+}
+
+void
+f2 (__m256i x, __m256i y)
+{
+  register __m256i a __asm("ymm16");
+  register __m256i b __asm("ymm17");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b)); 
+  a = _mm256_sm4key4_epi32 (a, b);
+  asm volatile ("" : "+v" (a));
+}
+
+void
+f3 (__m128i x, __m128i y)
+{
+  register __m128i a __asm("xmm16");
+  register __m128i b __asm("xmm17");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b)); 
+  a = _mm_sm4rnds4_epi32 (a, b);
+  asm volatile ("" : "+v" (a));
+}
+
+void
+f4 (__m256i x, __m256i y)
+{
+  register __m256i a __asm("ymm16");
+  register __m256i b __asm("ymm17");
+  a = x;
+  b = y;
+  asm volatile ("" : "+v" (a), "+v" (b)); 
+  a = _mm256_sm4rnds4_epi32 (a, b);
+  asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vsm4key4\[ 
\\t\]+\[^\n\]*%xmm17\[^\n\]*%xmm16\[^\n\]*%xmm16" } } */
+/* { dg-final { scan-assembler "vsm4key4\[ 
\\t\]+\[^\n\]*%ymm17\[^\n\]*%ymm16\[^\n\]*%ymm16" } } */
+/* { dg-final { scan-assembler "vsm4rnds4\[ 
\\t\]+\[^\n\]*%xmm17\[^\n\]*%xmm16\[^\n\]*%xmm16" } } */
+/* { dg-final { scan-assembler "vsm4rnds4\[ 
\\t\]+\[^\n\]*%ymm17\[^\n\]*%ymm16\[^\n\]*%ymm16" } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/sm4-avx10_2-512-1.c 
b/gcc/testsuite/gcc.target/i386/sm4-avx10_2-512-1.c
new file mode 100644
index 00000000000..546472a933b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sm4-avx10_2-512-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2-512" } */
+/* { dg-final { scan-assembler "vsm4key4\[ 
\\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vsm4rnds4\[ 
\\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y, z;
+
+void extern
+sm4_test (void)
+{
+  x = _mm512_sm4key4_epi32 (y, z);
+  x = _mm512_sm4rnds4_epi32 (y, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sm4-check.h 
b/gcc/testsuite/gcc.target/i386/sm4-check.h
index 435fcf2b17d..72543a72ad8 100644
--- a/gcc/testsuite/gcc.target/i386/sm4-check.h
+++ b/gcc/testsuite/gcc.target/i386/sm4-check.h
@@ -1,7 +1,11 @@
 #include <stdlib.h>
-#include "m256-check.h"
+#include "m512-check.h"
 
+#ifdef AVX10_2_512
+static void sm4_avx512f_test (void);
+#else
 static void sm4_test (void);
+#endif
 
 typedef union
 {
@@ -156,18 +160,46 @@ compute_sm4##name##4 (int *dst, int *src1, int *src2, int 
vl) \
   if (check_union256i_d (res2, dst2))                        \
     abort ();
 
+#define SM4_AVX512F_SIMULATE(name)                           \
+  union512i_d src5, src6, res3;                                      \
+  int dst3[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};    \
+                                                             \
+  src5.x = _mm512_set_epi32 (111, 222, 333, 444, 555, 666, 777, 888,  \
+                             999, 123, 456, 789, 135, 792, 468, 147); \
+  src6.x = _mm512_set_epi32 (258, 369, 159, 483, 726, 162, 738, 495,  \
+                             174, 285, 396, 186, 429, 752, 198, 765); \
+  res3.x = _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); \
+                                                             \
+  res3.x = _mm512_sm4##name##4_epi32 (src5.x, src6.x);       \
+                                                             \
+  compute_sm4##name##4 (dst3, src5.a, src6.a, 512);          \
+                                                             \
+  if (check_union512i_d (res3, dst3))                        \
+    abort ();
+
 static void
 __attribute__ ((noinline))
 do_test (void)
 {
+#ifdef AVX10_512BIT
+  sm4_avx512f_test ();
+#else
   sm4_test ();
+#endif
 }
 
 int
 main ()
 {
   /* Check CPU support for SM4.  */
-  if (__builtin_cpu_supports ("sm4"))
+  if (__builtin_cpu_supports ("sm4")
+#ifdef AVX10_2
+      && __builtin_cpu_supports ("avx10.2")
+#endif
+#ifdef AVX10_2_512
+      && __builtin_cpu_supports ("avx10.2-512")
+#endif
+      )
     {
       do_test ();
 #ifdef DEBUG
diff --git a/gcc/testsuite/gcc.target/i386/sm4key4-avx10_2-512-2.c 
b/gcc/testsuite/gcc.target/i386/sm4key4-avx10_2-512-2.c
new file mode 100644
index 00000000000..85b7e3ef118
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sm4key4-avx10_2-512-2.c
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2-512" } */
+/* { dg-require-effective-target sm4 } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#include "sm4-check.h"
+
+char key;
+SM4_FUNC (key);
+
+static void
+sm4_avx512f_test (void)
+{
+  SM4_AVX512F_SIMULATE (key);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sm4rnds4-avx10_2-512-2.c 
b/gcc/testsuite/gcc.target/i386/sm4rnds4-avx10_2-512-2.c
new file mode 100644
index 00000000000..1eaf08bde1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sm4rnds4-avx10_2-512-2.c
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2-512" } */
+/* { dg-require-effective-target sm4 } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#include "sm4-check.h"
+
+char rnds;
+SM4_FUNC (rnds);
+
+static void
+sm4_avx512f_test (void)
+{
+  SM4_AVX512F_SIMULATE (rnds);
+}
-- 
2.31.1

Reply via email to