gcc/ChangeLog: * config/i386/i386-builtin-types.def: Add DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI). * config/i386/i386-builtin.def (BDESC): Add new builtins. * config/i386/i386-expand.cc (ix86_expand_args_builtin): Handle V16SI_FTYPE_V16SI_V16SI. * config/i386/sm4intrin.h: Add zmm insns. * config/i386/sse.md (vsm4key4_<mode>): Add EVEX pattern. (vsm4rnds4_<mode>): Ditto.
gcc/testsuite/ChangeLog: * gcc.target/i386/sm4-check.h: Add zmm test. * gcc.target/i386/sm4-avx10_2-1.c: New test. * gcc.target/i386/sm4-avx10_2-512-1.c: Ditto. * gcc.target/i386/sm4key4-avx10_2-512-2.c: Ditto. * gcc.target/i386/sm4rnds4-avx10_2-512-2.c: Ditto. --- gcc/config/i386/i386-builtin-types.def | 3 + gcc/config/i386/i386-builtin.def | 2 + gcc/config/i386/i386-expand.cc | 1 + gcc/config/i386/sm4intrin.h | 25 ++++++++ gcc/config/i386/sse.md | 20 ++++--- gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c | 58 +++++++++++++++++++ .../gcc.target/i386/sm4-avx10_2-512-1.c | 15 +++++ gcc/testsuite/gcc.target/i386/sm4-check.h | 36 +++++++++++- .../gcc.target/i386/sm4key4-avx10_2-512-2.c | 18 ++++++ .../gcc.target/i386/sm4rnds4-avx10_2-512-2.c | 18 ++++++ 10 files changed, 186 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sm4-avx10_2-512-1.c create mode 100644 gcc/testsuite/gcc.target/i386/sm4key4-avx10_2-512-2.c create mode 100644 gcc/testsuite/gcc.target/i386/sm4rnds4-avx10_2-512-2.c diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 290f6e649a9..c6034238ac4 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -1507,3 +1507,6 @@ DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, UQI, INT) DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT, V32HF, USI, INT) DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, INT, V16HF, UHI, INT) DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, UHI, INT) + +# SM4 builtins +DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 151ccf4f252..db87dd7e8e1 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -1668,8 +1668,10 @@ BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_SM3, CODE_FOR_vsm3rnds2, "__builtin /* SM4 */ BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4key4_v4si, "__builtin_ia32_vsm4key4128", IX86_BUILTIN_VSM4KEY4128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI) BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4key4_v8si, "__builtin_ia32_vsm4key4256", IX86_BUILTIN_VSM4KEY4256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI) +BDESC (0, OPTION_MASK_ISA2_SM4 | OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vsm4key4_v16si, "__builtin_ia32_vsm4key4512", IX86_BUILTIN_VSM4KEY4512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI) BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4rnds4_v4si, "__builtin_ia32_vsm4rnds4128", IX86_BUILTIN_VSM4RNDS4128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI) BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4rnds4_v8si, "__builtin_ia32_vsm4rnds4256", IX86_BUILTIN_VSM4RNDS4256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI) +BDESC (0, OPTION_MASK_ISA2_SM4 | OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vsm4rnds4_v16si, "__builtin_ia32_vsm4rnds4512", IX86_BUILTIN_VSM4RNDS4512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI) /* SHA512 */ BDESC (0, OPTION_MASK_ISA2_SHA512, CODE_FOR_vsha512msg1, "__builtin_ia32_vsha512msg1", IX86_BUILTIN_VSHA512MSG1, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index ba598701e34..1fffb111530 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -11477,6 +11477,7 @@ ix86_expand_args_builtin (const struct builtin_description *d, case V16QI_FTYPE_V8HI_V8HI: case V16HF_FTYPE_V16HF_V16HF: case V16SF_FTYPE_V16SF_V16SF: + case V16SI_FTYPE_V16SI_V16SI: case V8QI_FTYPE_V8QI_V8QI: case V8QI_FTYPE_V4HI_V4HI: case V8HI_FTYPE_V8HI_V8HI: diff --git a/gcc/config/i386/sm4intrin.h b/gcc/config/i386/sm4intrin.h index 4c212ccb566..e2d78f01e6e 100644 --- a/gcc/config/i386/sm4intrin.h +++ b/gcc/config/i386/sm4intrin.h @@ -67,4 +67,29 @@ _mm256_sm4rnds4_epi32 (__m256i __A, __m256i __B) #pragma GCC pop_options #endif /* __DISABLE_SM4__ */ +#if !defined (__SM4__) || !defined (__AVX10_2_512__) +#pragma GCC push_options +#pragma GCC target("sm4,avx10.2-512") +#define __DISABLE_SM4_512__ +#endif /* __SM4_512__ */ + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sm4key4_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vsm4key4512 ((__v16si) __A, (__v16si) __B); +} + +extern __inline __m512i +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm512_sm4rnds4_epi32 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vsm4rnds4512 ((__v16si) __A, (__v16si) __B); +} + +#ifdef __DISABLE_SM4_512__ +#undef __DISABLE_SM4_512__ +#pragma GCC pop_options +#endif /* __DISABLE_SM4_512__ */ + #endif /* _SM4INTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 6c28b74ac3f..e438189d56d 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -30054,25 +30054,29 @@ (set_attr "mode" "OI")]) (define_insn "vsm4key4_<mode>" - [(set (match_operand:VI4_AVX 0 "register_operand" "=x") - (unspec:VI4_AVX - [(match_operand:VI4_AVX 1 "register_operand" "x") - (match_operand:VI4_AVX 2 "vector_operand" "xBm")] + [(set (match_operand:VI4_AVX10_2 0 "register_operand" "=x,v") + (unspec:VI4_AVX10_2 + [(match_operand:VI4_AVX10_2 1 "register_operand" "x,v") + (match_operand:VI4_AVX10_2 2 "vector_operand" "xBm,vBm")] UNSPEC_SM4KEY4))] "TARGET_SM4" "vsm4key4\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "other") + (set_attr "prefix" "maybe_evex") + (set_attr "isa" "avx,avx10_2") (set_attr "mode" "<sseinsnmode>")]) (define_insn "vsm4rnds4_<mode>" - [(set (match_operand:VI4_AVX 0 "register_operand" "=x") - (unspec:VI4_AVX - [(match_operand:VI4_AVX 1 "register_operand" "x") - (match_operand:VI4_AVX 2 "vector_operand" "xBm")] + [(set (match_operand:VI4_AVX10_2 0 "register_operand" "=x,v") + (unspec:VI4_AVX10_2 + [(match_operand:VI4_AVX10_2 1 "register_operand" "x,v") + (match_operand:VI4_AVX10_2 2 "vector_operand" "xBm,vBm")] UNSPEC_SM4RNDS4))] "TARGET_SM4" "vsm4rnds4\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "other") + (set_attr "prefix" "maybe_evex") + (set_attr "isa" "avx,avx10_2") (set_attr "mode" "<sseinsnmode>")]) (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>" diff --git a/gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c b/gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c new file mode 100644 index 00000000000..4746f6f7800 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c @@ -0,0 +1,58 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2" } */ + +#include <immintrin.h> + +void +f1 (__m128i x, __m128i y) +{ + register __m128i a __asm("xmm16"); + register __m128i b __asm("xmm17"); + a = x; + b = y; + asm volatile ("" : "+v" (a), "+v" (b)); + a = _mm_sm4key4_epi32 (a, b); + asm volatile ("" : "+v" (a)); +} + +void +f2 (__m256i x, __m256i y) +{ + register __m256i a __asm("ymm16"); + register __m256i b __asm("ymm17"); + a = x; + b = y; + asm volatile ("" : "+v" (a), "+v" (b)); + a = _mm256_sm4key4_epi32 (a, b); + asm volatile ("" : "+v" (a)); +} + +void +f3 (__m128i x, __m128i y) +{ + register __m128i a __asm("xmm16"); + register __m128i b __asm("xmm17"); + a = x; + b = y; + asm volatile ("" : "+v" (a), "+v" (b)); + a = _mm_sm4rnds4_epi32 (a, b); + asm volatile ("" : "+v" (a)); +} + +void +f4 (__m256i x, __m256i y) +{ + register __m256i a __asm("ymm16"); + register __m256i b __asm("ymm17"); + a = x; + b = y; + asm volatile ("" : "+v" (a), "+v" (b)); + a = _mm256_sm4rnds4_epi32 (a, b); + asm volatile ("" : "+v" (a)); +} + +/* { dg-final { scan-assembler "vsm4key4\[ \\t\]+\[^\n\]*%xmm17\[^\n\]*%xmm16\[^\n\]*%xmm16" } } */ +/* { dg-final { scan-assembler "vsm4key4\[ \\t\]+\[^\n\]*%ymm17\[^\n\]*%ymm16\[^\n\]*%ymm16" } } */ +/* { dg-final { scan-assembler "vsm4rnds4\[ \\t\]+\[^\n\]*%xmm17\[^\n\]*%xmm16\[^\n\]*%xmm16" } } */ +/* { dg-final { scan-assembler "vsm4rnds4\[ \\t\]+\[^\n\]*%ymm17\[^\n\]*%ymm16\[^\n\]*%ymm16" } } */ + diff --git a/gcc/testsuite/gcc.target/i386/sm4-avx10_2-512-1.c b/gcc/testsuite/gcc.target/i386/sm4-avx10_2-512-1.c new file mode 100644 index 00000000000..546472a933b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sm4-avx10_2-512-1.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2-512" } */ +/* { dg-final { scan-assembler "vsm4key4\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]" } } */ +/* { dg-final { scan-assembler "vsm4rnds4\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]" } } */ + +#include <immintrin.h> + +volatile __m512i x, y, z; + +void extern +sm4_test (void) +{ + x = _mm512_sm4key4_epi32 (y, z); + x = _mm512_sm4rnds4_epi32 (y, z); +} diff --git a/gcc/testsuite/gcc.target/i386/sm4-check.h b/gcc/testsuite/gcc.target/i386/sm4-check.h index 435fcf2b17d..72543a72ad8 100644 --- a/gcc/testsuite/gcc.target/i386/sm4-check.h +++ b/gcc/testsuite/gcc.target/i386/sm4-check.h @@ -1,7 +1,11 @@ #include <stdlib.h> -#include "m256-check.h" +#include "m512-check.h" +#ifdef AVX10_2_512 +static void sm4_avx512f_test (void); +#else static void sm4_test (void); +#endif typedef union { @@ -156,18 +160,46 @@ compute_sm4##name##4 (int *dst, int *src1, int *src2, int vl) \ if (check_union256i_d (res2, dst2)) \ abort (); +#define SM4_AVX512F_SIMULATE(name) \ + union512i_d src5, src6, res3; \ + int dst3[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; \ + \ + src5.x = _mm512_set_epi32 (111, 222, 333, 444, 555, 666, 777, 888, \ + 999, 123, 456, 789, 135, 792, 468, 147); \ + src6.x = _mm512_set_epi32 (258, 369, 159, 483, 726, 162, 738, 495, \ + 174, 285, 396, 186, 429, 752, 198, 765); \ + res3.x = _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); \ + \ + res3.x = _mm512_sm4##name##4_epi32 (src5.x, src6.x); \ + \ + compute_sm4##name##4 (dst3, src5.a, src6.a, 512); \ + \ + if (check_union512i_d (res3, dst3)) \ + abort (); + static void __attribute__ ((noinline)) do_test (void) { +#ifdef AVX10_512BIT + sm4_avx512f_test (); +#else sm4_test (); +#endif } int main () { /* Check CPU support for SM4. */ - if (__builtin_cpu_supports ("sm4")) + if (__builtin_cpu_supports ("sm4") +#ifdef AVX10_2 + && __builtin_cpu_supports ("avx10.2") +#endif +#ifdef AVX10_2_512 + && __builtin_cpu_supports ("avx10.2-512") +#endif + ) { do_test (); #ifdef DEBUG diff --git a/gcc/testsuite/gcc.target/i386/sm4key4-avx10_2-512-2.c b/gcc/testsuite/gcc.target/i386/sm4key4-avx10_2-512-2.c new file mode 100644 index 00000000000..85b7e3ef118 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sm4key4-avx10_2-512-2.c @@ -0,0 +1,18 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2-512" } */ +/* { dg-require-effective-target sm4 } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#include "sm4-check.h" + +char key; +SM4_FUNC (key); + +static void +sm4_avx512f_test (void) +{ + SM4_AVX512F_SIMULATE (key); +} diff --git a/gcc/testsuite/gcc.target/i386/sm4rnds4-avx10_2-512-2.c b/gcc/testsuite/gcc.target/i386/sm4rnds4-avx10_2-512-2.c new file mode 100644 index 00000000000..1eaf08bde1f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sm4rnds4-avx10_2-512-2.c @@ -0,0 +1,18 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2-512" } */ +/* { dg-require-effective-target sm4 } */ +/* { dg-require-effective-target avx10_2_512 } */ + +#define AVX10_2 +#define AVX10_2_512 +#define AVX10_512BIT +#include "sm4-check.h" + +char rnds; +SM4_FUNC (rnds); + +static void +sm4_avx512f_test (void) +{ + SM4_AVX512F_SIMULATE (rnds); +} -- 2.31.1