https://gcc.gnu.org/g:9141bfdd483e2838f5dce767f1c1657710ef2daf
commit r15-4888-g9141bfdd483e2838f5dce767f1c1657710ef2daf Author: Antoni Boucher <boua...@zoho.com> Date: Mon Sep 23 18:58:47 2024 -0400 target: Fix asm codegen for vfpclasss* and vcvtph2* instructions This only happens when using -masm=intel. gcc/ChangeLog: PR target/116725 * config/i386/sse.md: Fix asm generation. gcc/testsuite/ChangeLog: PR target/116725 * gcc.target/i386/pr116725.c: Add test using those AVX builtins. Diff: --- gcc/config/i386/sse.md | 10 ++++++-- gcc/testsuite/gcc.target/i386/pr116725.c | 40 ++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 22c6c817dd75..15ed8ff99cbd 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1312,6 +1312,12 @@ (V8HF "w") (V8BF "w") (V4SF "k") (V2DF "q") (HF "w") (BF "w") (SF "k") (DF "q")]) +;; Pointer size override for 16-bit upper-convert modes (Intel asm dialect) +(define_mode_attr iptrh + [(V32HI "") (V16SI "") (V8DI "") + (V16HI "") (V8SI "") (V4DI "q") + (V8HI "") (V4SI "q") (V2DI "k")]) + ;; Mapping of vector modes to VPTERNLOG suffix (define_mode_attr ternlogsuffix [(V8DI "q") (V4DI "q") (V2DI "q") @@ -7606,7 +7612,7 @@ [(match_operand:<ssePHmode> 1 "<round_nimm_predicate>" "<round_constraint>")] UNSPEC_US_FIX_NOTRUNC))] "TARGET_AVX512FP16 && <round_mode_condition>" - "vcvtph2<sseintconvertsignprefix><sseintconvert>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" + "vcvtph2<sseintconvertsignprefix><sseintconvert>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %<iptrh>1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) @@ -29840,7 +29846,7 @@ UNSPEC_FPCLASS) (const_int 1)))] "TARGET_AVX512DQ || VALID_AVX512FP16_REG_MODE(<MODE>mode)" - "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"; + "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %<iptr>1, %2}"; [(set_attr "type" "sse") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") diff --git a/gcc/testsuite/gcc.target/i386/pr116725.c b/gcc/testsuite/gcc.target/i386/pr116725.c new file mode 100644 index 000000000000..9e5070e16e71 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr116725.c @@ -0,0 +1,40 @@ +/* PR gcc/116725 */ +/* { dg-do assemble } */ +/* { dg-options "-masm=intel -mavx512dq -mavx512fp16 -mavx512vl" } */ +/* { dg-require-effective-target masm_intel } */ + +#include <stdio.h> + +typedef double __m128d __attribute__ ((__vector_size__ (16))); +typedef float __m128f __attribute__ ((__vector_size__ (16))); +typedef int __v16si __attribute__ ((__vector_size__ (64))); +typedef _Float16 __m256h __attribute__ ((__vector_size__ (32))); +typedef long long __m512i __attribute__((__vector_size__(64))); +typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef int __v4si __attribute__ ((__vector_size__ (16))); +typedef long long __m128i __attribute__ ((__vector_size__ (16))); + +int main(void) { + __m128d vec = {1.0, 2.0}; + char res = __builtin_ia32_fpclasssd_mask(vec, 1, 1); + printf("%d\n", res); + + __m128f vec2 = {1.0, 2.0, 3.0, 4.0}; + char res2 = __builtin_ia32_fpclassss_mask(vec2, 1, 1); + printf("%d\n", res2); + + __m128h vec3 = {2.0, 1.0, 3.0}; + __v4si vec4 = {}; + __v4si res3 = __builtin_ia32_vcvtph2dq128_mask(vec3, vec4, -1); + printf("%d\n", res3[0]); + + __v4si res4 = __builtin_ia32_vcvtph2udq128_mask(vec3, vec4, -1); + printf("%d\n", res4[0]); + + __m128i vec5 = {}; + __m128i res5 = __builtin_ia32_vcvtph2qq128_mask(vec3, vec5, -1); + printf("%d\n", res5[0]); + + __m128i res6 = __builtin_ia32_vcvtph2uqq128_mask(vec3, vec5, -1); + printf("%d\n", res6[0]); +}