Hi. This is a patch for the bug 116725. I'm not sure if it is a good fix, but it seems to do the job.If you have suggestions for better comments than what I wrote that would explain what's happening, I'm open to suggestions.
Here are the tests results: === gcc Summary === # of expected passes 208652 # of unexpected failures 120 # of unexpected successes 25 # of expected failures 1545 # of unsupported tests 3518 === g++ Summary === # of expected passes 234412 # of unexpected failures 41 # of expected failures 2215 # of unsupported tests 2061 They are the same on master: # Comparing directories ## Dir1=/home/bouanto/tmp/master/: 2 sum files ## Dir2=/home/bouanto/tmp/branch/: 2 sum files # Comparing 2 common sum files ## /bin/sh ./contrib/compare_tests /tmp/gxx-sum1.29654 /tmp/gxx-sum2.29654 New tests that PASS (1 tests): gcc: gcc.target/i386/pr116725.c (test for excess errors) Old tests that passed, that have disappeared (136 tests): (Eeek!) g++: c-c++-common/tsan/atomic_stack.c -O0 execution test […] Old tests that failed, that have disappeared (69 tests): (Eeek!) g++: c-c++-common/tsan/atomic_stack.c -O0 output pattern test […] # No differences found in 2 common sum files Thanks.
From 20ba6ec63d29b5d1ac93bdb9d461527eaf8962f5 Mon Sep 17 00:00:00 2001 From: Antoni Boucher <boua...@zoho.com> Date: Mon, 23 Sep 2024 18:58:47 -0400 Subject: [PATCH] target: Fix asm codegen for vfpclasss* and vcvtph2* instructions This only happens when using -masm=intel. gcc/ChangeLog: PR target/116725 * config/i386/sse.md: Fix asm generation. gcc/testsuite/ChangeLog: PR target/116725 * gcc.target/i386/pr116725.c: Add test using those AVX builtins. --- gcc/config/i386/sse.md | 6 ++-- gcc/testsuite/gcc.target/i386/pr116725.c | 40 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr116725.c diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 685bce3094a..040f3ed3d14 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -7548,7 +7548,8 @@ (define_insn "avx512fp16_vcvtph2<sseintconvertsignprefix><sseintconvert>_<mode>< [(match_operand:<ssePHmode> 1 "<round_nimm_predicate>" "<round_constraint>")] UNSPEC_US_FIX_NOTRUNC))] "TARGET_AVX512FP16 && <round_mode_condition>" - "vcvtph2<sseintconvertsignprefix><sseintconvert>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}" +;; %X1 so that we don't emit any *WORD PTR for -masm=intel. + "vcvtph2<sseintconvertsignprefix><sseintconvert>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %X1<round_mask_op2>}" [(set_attr "type" "ssecvt") (set_attr "prefix" "evex") (set_attr "mode" "<sseinsnmode>")]) @@ -29854,7 +29855,8 @@ (define_insn "avx512dq_vmfpclass<mode><mask_scalar_merge_name>" UNSPEC_FPCLASS) (const_int 1)))] "TARGET_AVX512DQ || VALID_AVX512FP16_REG_MODE(<MODE>mode)" - "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"; +;; %X1 so that we don't emit any *WORD PTR for -masm=intel. + "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %X1, %2}"; [(set_attr "type" "sse") (set_attr "length_immediate" "1") (set_attr "prefix" "evex") diff --git a/gcc/testsuite/gcc.target/i386/pr116725.c b/gcc/testsuite/gcc.target/i386/pr116725.c new file mode 100644 index 00000000000..9e5070e16e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr116725.c @@ -0,0 +1,40 @@ +/* PR gcc/116725 */ +/* { dg-do assemble } */ +/* { dg-options "-masm=intel -mavx512dq -mavx512fp16 -mavx512vl" } */ +/* { dg-require-effective-target masm_intel } */ + +#include <stdio.h> + +typedef double __m128d __attribute__ ((__vector_size__ (16))); +typedef float __m128f __attribute__ ((__vector_size__ (16))); +typedef int __v16si __attribute__ ((__vector_size__ (64))); +typedef _Float16 __m256h __attribute__ ((__vector_size__ (32))); +typedef long long __m512i __attribute__((__vector_size__(64))); +typedef _Float16 __m128h __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef int __v4si __attribute__ ((__vector_size__ (16))); +typedef long long __m128i __attribute__ ((__vector_size__ (16))); + +int main(void) { + __m128d vec = {1.0, 2.0}; + char res = __builtin_ia32_fpclasssd_mask(vec, 1, 1); + printf("%d\n", res); + + __m128f vec2 = {1.0, 2.0, 3.0, 4.0}; + char res2 = __builtin_ia32_fpclassss_mask(vec2, 1, 1); + printf("%d\n", res2); + + __m128h vec3 = {2.0, 1.0, 3.0}; + __v4si vec4 = {}; + __v4si res3 = __builtin_ia32_vcvtph2dq128_mask(vec3, vec4, -1); + printf("%d\n", res3[0]); + + __v4si res4 = __builtin_ia32_vcvtph2udq128_mask(vec3, vec4, -1); + printf("%d\n", res4[0]); + + __m128i vec5 = {}; + __m128i res5 = __builtin_ia32_vcvtph2qq128_mask(vec3, vec5, -1); + printf("%d\n", res5[0]); + + __m128i res6 = __builtin_ia32_vcvtph2uqq128_mask(vec3, vec5, -1); + printf("%d\n", res6[0]); +} -- 2.47.0