---
gcc/ChangeLog:
* config/i386/avx10_v2_auxintrin.h (__attribute__):
(_mm_cvtrops_hf8):
(_mm_mask_cvtrops_hf8):
(_mm_maskz_cvtrops_hf8):
(_mm256_cvtrops_hf8):
(_mm256_mask_cvtrops_hf8):
(_mm256_maskz_cvtrops_hf8):
(_mm512_cvtrops_hf8):
(_mm512_mask_cvtrops_hf8):
(_mm512_maskz_cvtrops_hf8):
(_mm_cvts_rops_hf8):
(_mm_mask_cvts_rops_hf8):
(_mm_maskz_cvts_rops_hf8):
(_mm256_cvts_rops_hf8):
(_mm256_mask_cvts_rops_hf8):
(_mm256_maskz_cvts_rops_hf8):
(_mm512_cvts_rops_hf8):
(_mm512_mask_cvts_rops_hf8):
(_mm512_maskz_cvts_rops_hf8):
* config/i386/i386-builtin.def (BDESC):
* config/i386/sse.md:
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx10_2-v2-aux-convert-2.c: New test.
gcc/config/i386/avx10_v2_auxintrin.h | 186 ++++++++++++++++++
gcc/config/i386/i386-builtin.def | 6 +
gcc/config/i386/sse.md | 11 +-
.../i386/avx10_2-v2-aux-convert-2.c | 46 +++++
4 files changed, 246 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/avx10_2-v2-aux-convert-2.c
diff --git a/gcc/config/i386/avx10_v2_auxintrin.h
b/gcc/config/i386/avx10_v2_auxintrin.h
index 3ca4d0b1166..a0da0ec4f3a 100644
--- a/gcc/config/i386/avx10_v2_auxintrin.h
+++ b/gcc/config/i386/avx10_v2_auxintrin.h
@@ -401,6 +401,192 @@ _mm512_maskz_cvts_ps_hf8 (__mmask16 __U, __m512 __A)
(__mmask16) __U);
}
+// VCVTROPS2HF8 - 128-bit
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtrops_hf8 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8128_mask ((__v4sf) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvtrops_hf8 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8128_mask ((__v4sf) __A,
+ (__v16qi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvtrops_hf8 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8128_mask ((__v4sf) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+// VCVTROPS2HF8 - 256-bit
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtrops_hf8 (__m256 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8256_mask ((__v8sf) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtrops_hf8 (__m128i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8256_mask ((__v8sf) __A,
+ (__v16qi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtrops_hf8 (__mmask8 __U, __m256 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8256_mask ((__v8sf) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+// VCVTROPS2HF8 - 512-bit
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtrops_hf8 (__m512 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8512_mask ((__v16sf) __A,
+ (__v16qi)
+ _mm_undefined_si128 (),
+ (__mmask16) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtrops_hf8 (__m128i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8512_mask ((__v16sf) __A,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtrops_hf8 (__mmask16 __U, __m512 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8512_mask ((__v16sf) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
+// VCVTROPS2HF8S - 128-bit
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvts_rops_hf8 (__m128 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8s128_mask ((__v4sf) __A,
+ (__v16qi)
+ _mm_undefined_si128
(),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_cvts_rops_hf8 (__m128i __W, __mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8s128_mask ((__v4sf) __A,
+ (__v16qi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_cvts_rops_hf8 (__mmask8 __U, __m128 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8s128_mask ((__v4sf) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+// VCVTROPS2HF8S - 256-bit
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvts_rops_hf8 (__m256 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8s256_mask ((__v8sf) __A,
+ (__v16qi)
+ _mm_undefined_si128
(),
+ (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvts_rops_hf8 (__m128i __W, __mmask8 __U, __m256 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8s256_mask ((__v8sf) __A,
+ (__v16qi) __W,
+ (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvts_rops_hf8 (__mmask8 __U, __m256 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8s256_mask ((__v8sf) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask8) __U);
+}
+
+// VCVTROPS2HF8S - 512-bit
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_rops_hf8 (__m512 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8s512_mask ((__v16sf) __A,
+ (__v16qi)
+ _mm_undefined_si128
(),
+ (__mmask16) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_rops_hf8 (__m128i __W, __mmask16 __U, __m512 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8s512_mask ((__v16sf) __A,
+ (__v16qi) __W,
+ (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_rops_hf8 (__mmask16 __U, __m512 __A)
+{
+ return (__m128i) __builtin_ia32_vcvtrops2hf8s512_mask ((__v16sf) __A,
+ (__v16qi)
+ _mm_setzero_si128 (),
+ (__mmask16) __U);
+}
+
#ifdef __DISABLE_AVX10_V2_AUX__
#undef __DISABLE_AVX10_V2_AUX__
#pragma GCC pop_options
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 40afda1c366..6d4f9869d23 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -3382,6 +3382,12 @@ BDESC (0, OPTION_MASK_ISA2_AVX10_V2_AUX,
CODE_FOR_vcvtps2hf8v16sf_mask, "__built
BDESC (0, OPTION_MASK_ISA2_AVX10_V2_AUX, CODE_FOR_vcvtps2hf8sv4sf_mask,
"__builtin_ia32_vcvtps2hf8s128_mask", IX86_BUILTIN_VCVTPS2HF8S128_MASK,
UNKNOWN, (int) V16QI_FTYPE_V4SF_V16QI_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_V2_AUX, CODE_FOR_vcvtps2hf8sv8sf_mask,
"__builtin_ia32_vcvtps2hf8s256_mask", IX86_BUILTIN_VCVTPS2HF8S256_MASK,
UNKNOWN, (int) V16QI_FTYPE_V8SF_V16QI_UQI)
BDESC (0, OPTION_MASK_ISA2_AVX10_V2_AUX, CODE_FOR_vcvtps2hf8sv16sf_mask,
"__builtin_ia32_vcvtps2hf8s512_mask", IX86_BUILTIN_VCVTPS2HF8S512_MASK,
UNKNOWN, (int) V16QI_FTYPE_V16SF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_V2_AUX, CODE_FOR_vcvtrops2hf8v4sf_mask,
"__builtin_ia32_vcvtrops2hf8128_mask", IX86_BUILTIN_VCVTROPS2HF8128_MASK,
UNKNOWN, (int) V16QI_FTYPE_V4SF_V16QI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_V2_AUX, CODE_FOR_vcvtrops2hf8v8sf_mask,
"__builtin_ia32_vcvtrops2hf8256_mask", IX86_BUILTIN_VCVTROPS2HF8256_MASK,
UNKNOWN, (int) V16QI_FTYPE_V8SF_V16QI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_V2_AUX, CODE_FOR_vcvtrops2hf8v16sf_mask,
"__builtin_ia32_vcvtrops2hf8512_mask", IX86_BUILTIN_VCVTROPS2HF8512_MASK,
UNKNOWN, (int) V16QI_FTYPE_V16SF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_V2_AUX, CODE_FOR_vcvtrops2hf8sv4sf_mask,
"__builtin_ia32_vcvtrops2hf8s128_mask", IX86_BUILTIN_VCVTROPS2HF8S128_MASK,
UNKNOWN, (int) V16QI_FTYPE_V4SF_V16QI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_V2_AUX, CODE_FOR_vcvtrops2hf8sv8sf_mask,
"__builtin_ia32_vcvtrops2hf8s256_mask", IX86_BUILTIN_VCVTROPS2HF8S256_MASK,
UNKNOWN, (int) V16QI_FTYPE_V8SF_V16QI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_V2_AUX, CODE_FOR_vcvtrops2hf8sv16sf_mask,
"__builtin_ia32_vcvtrops2hf8s512_mask", IX86_BUILTIN_VCVTROPS2HF8S512_MASK,
UNKNOWN, (int) V16QI_FTYPE_V16SF_V16QI_UHI)
/* Builtins with rounding support. */
BDESC_END (ARGS, ROUND_ARGS)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c5c3b48e63e..cdaecc713a6 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -264,6 +264,8 @@
UNSPEC_VCVTPS2BF8S
UNSPEC_VCVTPS2HF8
UNSPEC_VCVTPS2HF8S
+ UNSPEC_VCVTROPS2HF8
+ UNSPEC_VCVTROPS2HF8S
])
(define_c_enum "unspecv" [
@@ -33805,17 +33807,20 @@
;; AVX10_V2_AUX instructions
;; FP32 to FP8 single-source converts (VCVTPS2BF8, VCVTPS2BF8S,
-;; VCVTPS2HF8, VCVTPS2HF8S)
+;; VCVTPS2HF8, VCVTPS2HF8S, VCVTROPS2HF8, VCVTROPS2HF8S)
(define_int_iterator UNSPEC_CONVERTPS2FP8
[UNSPEC_VCVTPS2BF8 UNSPEC_VCVTPS2BF8S
- UNSPEC_VCVTPS2HF8 UNSPEC_VCVTPS2HF8S])
+ UNSPEC_VCVTPS2HF8 UNSPEC_VCVTPS2HF8S
+ UNSPEC_VCVTROPS2HF8 UNSPEC_VCVTROPS2HF8S])
(define_int_attr convertps2fp8
[(UNSPEC_VCVTPS2BF8 "ps2bf8")
(UNSPEC_VCVTPS2BF8S "ps2bf8s")
(UNSPEC_VCVTPS2HF8 "ps2hf8")
- (UNSPEC_VCVTPS2HF8S "ps2hf8s")])
+ (UNSPEC_VCVTPS2HF8S "ps2hf8s")
+ (UNSPEC_VCVTROPS2HF8 "rops2hf8")
+ (UNSPEC_VCVTROPS2HF8S "rops2hf8s")])
(define_insn "vcvt<convertps2fp8><mode>"
[(set (match_operand:V16QI 0 "register_operand" "=v")
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-v2-aux-convert-2.c
b/gcc/testsuite/gcc.target/i386/avx10_2-v2-aux-convert-2.c
new file mode 100644
index 00000000000..382af8949f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-v2-aux-convert-2.c
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx10-v2-aux -O2" } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8\[
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8\[
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[
\\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8\[
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[
\\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8\[
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8\[
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[
\\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8\[
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[
\\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8\[
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8\[
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[
\\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8\[
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[
\\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8s\[
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8s\[
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[
\\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8s\[
\\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[
\\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8s\[
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8s\[
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[
\\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8s\[
\\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[
\\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8s\[
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8s\[
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[
\\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvtrops2hf8s\[
\\t\]+\[^\{\n\]*%zmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}(?:\n|\[
\\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+__m128i test_mm_cvtrops_hf8 (__m128 a) { return _mm_cvtrops_hf8 (a); }
+__m128i test_mm_mask_cvtrops_hf8 (__m128i w, __mmask8 u, __m128 a) { return
_mm_mask_cvtrops_hf8 (w, u, a); }
+__m128i test_mm_maskz_cvtrops_hf8 (__mmask8 u, __m128 a) { return
_mm_maskz_cvtrops_hf8 (u, a); }
+
+__m128i test_mm256_cvtrops_hf8 (__m256 a) { return _mm256_cvtrops_hf8 (a); }
+__m128i test_mm256_mask_cvtrops_hf8 (__m128i w, __mmask8 u, __m256 a) { return
_mm256_mask_cvtrops_hf8 (w, u, a); }
+__m128i test_mm256_maskz_cvtrops_hf8 (__mmask8 u, __m256 a) { return
_mm256_maskz_cvtrops_hf8 (u, a); }
+
+__m128i test_mm512_cvtrops_hf8 (__m512 a) { return _mm512_cvtrops_hf8 (a); }
+__m128i test_mm512_mask_cvtrops_hf8 (__m128i w, __mmask16 u, __m512 a) {
return _mm512_mask_cvtrops_hf8 (w, u, a); }
+__m128i test_mm512_maskz_cvtrops_hf8 (__mmask16 u, __m512 a) { return
_mm512_maskz_cvtrops_hf8 (u, a); }
+
+__m128i test_mm_cvts_rops_hf8 (__m128 a) { return _mm_cvts_rops_hf8 (a); }
+__m128i test_mm_mask_cvts_rops_hf8 (__m128i w, __mmask8 u, __m128 a) { return
_mm_mask_cvts_rops_hf8 (w, u, a); }
+__m128i test_mm_maskz_cvts_rops_hf8 (__mmask8 u, __m128 a) { return
_mm_maskz_cvts_rops_hf8 (u, a); }
+
+__m128i test_mm256_cvts_rops_hf8 (__m256 a) { return _mm256_cvts_rops_hf8 (a);
}
+__m128i test_mm256_mask_cvts_rops_hf8 (__m128i w, __mmask8 u, __m256 a) {
return _mm256_mask_cvts_rops_hf8 (w, u, a); }
+__m128i test_mm256_maskz_cvts_rops_hf8 (__mmask8 u, __m256 a) { return
_mm256_maskz_cvts_rops_hf8 (u, a); }
+
+__m128i test_mm512_cvts_rops_hf8 (__m512 a) { return _mm512_cvts_rops_hf8 (a);
}
+__m128i test_mm512_mask_cvts_rops_hf8 (__m128i w, __mmask16 u, __m512 a) {
return _mm512_mask_cvts_rops_hf8 (w, u, a); }
+__m128i test_mm512_maskz_cvts_rops_hf8 (__mmask16 u, __m512 a) { return
_mm512_maskz_cvts_rops_hf8 (u, a); }
--
2.34.1