On Thu, Jul 14, 2022 at 2:11 PM Kong, Lingling via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > Hi, > > The patch is to fix _mm_[u]comixx_{ss,sd} codegen and add PF result. These > intrinsics have changed over time, like `_mm_comieq_ss ` old operation is > `RETURN ( a[31:0] == b[31:0] ) ? 1 : 0`, and new operation update is `RETURN > ( a[31:0] != NaN AND b[31:0] != NaN AND a[31:0] == b[31:0] ) ? 1 : 0`. > > OK for master? All _mm_comiXX_ss uses order_compare except for mm_comine_ss which uses unordered_compare, now it's aligned with intrinsic guide. Ok for trunk. > > gcc/ChangeLog: > > PR target/106113 > * config/i386/i386-builtin.def (BDESC): Fix [u]comi{ss,sd} > comparison due to intrinsics changed over time. > * config/i386/i386-expand.cc (ix86_ssecom_setcc): > Add unordered check and mode for sse comi codegen. > (ix86_expand_sse_comi): Add unordered check and check a different > CCmode. > (ix86_expand_sse_comi_round):Extract unordered check and mode part > in ix86_ssecom_setcc. > > gcc/testsuite/ChangeLog: > > PR target/106113 > * gcc.target/i386/avx-vcomisd-pr106113-2.c: New test. > * gcc.target/i386/avx-vcomiss-pr106113-2.c: Ditto. > * gcc.target/i386/avx-vucomisd-pr106113-2.c: Ditto. > * gcc.target/i386/avx-vucomiss-pr106113-2.c: Ditto. > * gcc.target/i386/sse-comiss-pr106113-1.c: Ditto. > * gcc.target/i386/sse-comiss-pr106113-2.c: Ditto. > * gcc.target/i386/sse-ucomiss-pr106113-1.c: Ditto. > * gcc.target/i386/sse-ucomiss-pr106113-2.c: Ditto. > * gcc.target/i386/sse2-comisd-pr106113-1.c: Ditto. > * gcc.target/i386/sse2-comisd-pr106113-2.c: Ditto. > * gcc.target/i386/sse2-ucomisd-pr106113-1.c: Ditto. > * gcc.target/i386/sse2-ucomisd-pr106113-2.c: Ditto. > --- > gcc/config/i386/i386-builtin.def | 32 ++-- > gcc/config/i386/i386-expand.cc | 140 +++++++++++------- > .../gcc.target/i386/avx-vcomisd-pr106113-2.c | 8 + > .../gcc.target/i386/avx-vcomiss-pr106113-2.c | 8 + > .../gcc.target/i386/avx-vucomisd-pr106113-2.c | 8 + > .../gcc.target/i386/avx-vucomiss-pr106113-2.c | 8 + > .../gcc.target/i386/sse-comiss-pr106113-1.c | 19 +++ > .../gcc.target/i386/sse-comiss-pr106113-2.c | 59 ++++++++ > .../gcc.target/i386/sse-ucomiss-pr106113-1.c | 19 +++ > .../gcc.target/i386/sse-ucomiss-pr106113-2.c | 59 ++++++++ > .../gcc.target/i386/sse2-comisd-pr106113-1.c | 19 +++ > .../gcc.target/i386/sse2-comisd-pr106113-2.c | 59 ++++++++ > .../gcc.target/i386/sse2-ucomisd-pr106113-1.c | 19 +++ > .../gcc.target/i386/sse2-ucomisd-pr106113-2.c | 59 ++++++++ > 14 files changed, 450 insertions(+), 66 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/avx-vcomisd-pr106113-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx-vcomiss-pr106113-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx-vucomisd-pr106113-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/avx-vucomiss-pr106113-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-2.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-1.c > create mode 100644 gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-2.c > > diff --git a/gcc/config/i386/i386-builtin.def > b/gcc/config/i386/i386-builtin.def > index fd160935e67..acb7e8ca64b 100644 > --- a/gcc/config/i386/i386-builtin.def > +++ b/gcc/config/i386/i386-builtin.def > @@ -35,30 +35,30 @@ > IX86_BUILTIN__BDESC_##NEXT_KIND##_FIRST - 1. */ > > BDESC_FIRST (comi, COMI, > - OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comieq", > IX86_BUILTIN_COMIEQSS, UNEQ, 0) > -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comilt", > IX86_BUILTIN_COMILTSS, UNLT, 0) > -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comile", > IX86_BUILTIN_COMILESS, UNLE, 0) > + OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comieq", > IX86_BUILTIN_COMIEQSS, EQ, 0) > +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comilt", > IX86_BUILTIN_COMILTSS, LT, 0) > +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comile", > IX86_BUILTIN_COMILESS, LE, 0) > BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comigt", > IX86_BUILTIN_COMIGTSS, GT, 0) > BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comige", > IX86_BUILTIN_COMIGESS, GE, 0) > -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comineq", > IX86_BUILTIN_COMINEQSS, LTGT, 0) > -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", > IX86_BUILTIN_UCOMIEQSS, UNEQ, 0) > -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", > IX86_BUILTIN_UCOMILTSS, UNLT, 0) > -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", > IX86_BUILTIN_UCOMILESS, UNLE, 0) > +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_comi, "__builtin_ia32_comineq", > IX86_BUILTIN_COMINEQSS, NE, 0) > +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", > IX86_BUILTIN_UCOMIEQSS, EQ, 0) > +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", > IX86_BUILTIN_UCOMILTSS, LT, 0) > +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", > IX86_BUILTIN_UCOMILESS, LE, 0) > BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", > IX86_BUILTIN_UCOMIGTSS, GT, 0) > BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", > IX86_BUILTIN_UCOMIGESS, GE, 0) > -BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, > "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0) > -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, > "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0) > -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, > "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0) > -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, > "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0) > +BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_ucomi, > "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0) > +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, > "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0) > +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, > "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0) > +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, > "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0) > BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, > "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0) > BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, > "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0) > -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, > "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0) > -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, > "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0) > -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, > "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0) > -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, > "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0) > +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_comi, > "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0) > +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, > "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0) > +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, > "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0) > +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, > "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0) > BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, > "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0) > BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, > "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0) > -BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, > "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0) > +BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_sse2_ucomi, > "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0) > > BDESC_END (COMI, PCMPESTR) > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc > index 6a3fcde5738..40f821e7a11 100644 > --- a/gcc/config/i386/i386-expand.cc > +++ b/gcc/config/i386/i386-expand.cc > @@ -9770,47 +9770,121 @@ ix86_expand_sse_compare (const struct > builtin_description *d, > return target; > } > > +/* Subroutine of ix86_sse_comi and ix86_sse_comi_round to take care of > + * ordered EQ or unordered NE, generate PF jump. */ > + > +static rtx > +ix86_ssecom_setcc (const enum rtx_code comparison, > + bool check_unordered, machine_mode mode, > + rtx set_dst, rtx target) > +{ > + > + rtx_code_label *label = NULL; > + > + /* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient > + with NAN operands. */ > + if (check_unordered) > + { > + gcc_assert (comparison == EQ || comparison == NE); > + > + rtx flag = gen_rtx_REG (CCFPmode, FLAGS_REG); > + label = gen_label_rtx (); > + rtx tmp = gen_rtx_fmt_ee (UNORDERED, VOIDmode, flag, const0_rtx); > + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, > + gen_rtx_LABEL_REF (VOIDmode, label), > + pc_rtx); > + emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); > + } > + > + /* NB: Set CCFPmode and check a different CCmode which is in subset > + of CCFPmode. */ > + if (GET_MODE (set_dst) != mode) > + { > + gcc_assert (mode == CCAmode || mode == CCCmode > + || mode == CCOmode || mode == CCPmode > + || mode == CCSmode || mode == CCZmode); > + set_dst = gen_rtx_REG (mode, FLAGS_REG); > + } > + > + emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), > + gen_rtx_fmt_ee (comparison, QImode, > + set_dst, > + const0_rtx))); > + > + if (label) > + emit_label (label); > + > + return SUBREG_REG (target); > +} > + > /* Subroutine of ix86_expand_builtin to take care of comi insns. */ > > static rtx > ix86_expand_sse_comi (const struct builtin_description *d, tree exp, > rtx target) > { > - rtx pat; > + rtx pat, set_dst; > tree arg0 = CALL_EXPR_ARG (exp, 0); > tree arg1 = CALL_EXPR_ARG (exp, 1); > rtx op0 = expand_normal (arg0); > rtx op1 = expand_normal (arg1); > - machine_mode mode0 = insn_data[d->icode].operand[0].mode; > - machine_mode mode1 = insn_data[d->icode].operand[1].mode; > - enum rtx_code comparison = d->comparison; > + enum insn_code icode = d->icode; > + const struct insn_data_d *insn_p = &insn_data[icode]; > + machine_mode mode0 = insn_p->operand[0].mode; > + machine_mode mode1 = insn_p->operand[1].mode; > > if (VECTOR_MODE_P (mode0)) > op0 = safe_vector_operand (op0, mode0); > if (VECTOR_MODE_P (mode1)) > op1 = safe_vector_operand (op1, mode1); > > + enum rtx_code comparison = d->comparison; > + rtx const_val = const0_rtx; > + > + bool check_unordered = false; > + machine_mode mode = CCFPmode; > + switch (comparison) > + { > + case LE: /* -> GE */ > + case LT: /* -> GT */ > + std::swap (op0, op1); > + comparison = swap_condition (comparison); > + /* FALLTHRU */ > + case GT: > + case GE: > + break; > + case EQ: > + check_unordered = true; > + mode = CCZmode; > + break; > + case NE: > + check_unordered = true; > + mode = CCZmode; > + const_val = const1_rtx; > + break; > + default: > + gcc_unreachable (); > + } > + > target = gen_reg_rtx (SImode); > - emit_move_insn (target, const0_rtx); > + emit_move_insn (target, const_val); > target = gen_rtx_SUBREG (QImode, target, 0); > > if ((optimize && !register_operand (op0, mode0)) > - || !insn_data[d->icode].operand[0].predicate (op0, mode0)) > + || !insn_p->operand[0].predicate (op0, mode0)) > op0 = copy_to_mode_reg (mode0, op0); > if ((optimize && !register_operand (op1, mode1)) > - || !insn_data[d->icode].operand[1].predicate (op1, mode1)) > + || !insn_p->operand[1].predicate (op1, mode1)) > op1 = copy_to_mode_reg (mode1, op1); > > - pat = GEN_FCN (d->icode) (op0, op1); > + pat = GEN_FCN (icode) (op0, op1); > if (! pat) > return 0; > - emit_insn (pat); > - emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), > - gen_rtx_fmt_ee (comparison, QImode, > - SET_DEST (pat), > - const0_rtx))); > > - return SUBREG_REG (target); > + set_dst = SET_DEST (pat); > + emit_insn (pat); > + return ix86_ssecom_setcc (comparison, check_unordered, mode, > + set_dst, target); > } > > /* Subroutines of ix86_expand_args_builtin to take care of round insns. */ > @@ -11410,42 +11484,8 @@ ix86_expand_sse_comi_round (const struct > builtin_description *d, > > emit_insn (pat); > > - rtx_code_label *label = NULL; > - > - /* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient > - with NAN operands. */ > - if (check_unordered) > - { > - gcc_assert (comparison == EQ || comparison == NE); > - > - rtx flag = gen_rtx_REG (CCFPmode, FLAGS_REG); > - label = gen_label_rtx (); > - rtx tmp = gen_rtx_fmt_ee (UNORDERED, VOIDmode, flag, const0_rtx); > - tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, > - gen_rtx_LABEL_REF (VOIDmode, label), > - pc_rtx); > - emit_jump_insn (gen_rtx_SET (pc_rtx, tmp)); > - } > - > - /* NB: Set CCFPmode and check a different CCmode which is in subset > - of CCFPmode. */ > - if (GET_MODE (set_dst) != mode) > - { > - gcc_assert (mode == CCAmode || mode == CCCmode > - || mode == CCOmode || mode == CCPmode > - || mode == CCSmode || mode == CCZmode); > - set_dst = gen_rtx_REG (mode, FLAGS_REG); > - } > - > - emit_insn (gen_rtx_SET (gen_rtx_STRICT_LOW_PART (VOIDmode, target), > - gen_rtx_fmt_ee (comparison, QImode, > - set_dst, > - const0_rtx))); > - > - if (label) > - emit_label (label); > - > - return SUBREG_REG (target); > + return ix86_ssecom_setcc (comparison, check_unordered, mode, > + set_dst, target); > } > > static rtx > diff --git a/gcc/testsuite/gcc.target/i386/avx-vcomisd-pr106113-2.c > b/gcc/testsuite/gcc.target/i386/avx-vcomisd-pr106113-2.c > new file mode 100644 > index 00000000000..9025b1b57b6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx-vcomisd-pr106113-2.c > @@ -0,0 +1,8 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target avx } */ > +/* { dg-options "-O2 -mavx" } */ > + > +#define CHECK_H "avx-check.h" > +#define TEST avx_test > + > +#include "sse2-comisd-pr106113-2.c" > diff --git a/gcc/testsuite/gcc.target/i386/avx-vcomiss-pr106113-2.c > b/gcc/testsuite/gcc.target/i386/avx-vcomiss-pr106113-2.c > new file mode 100644 > index 00000000000..dc0bf514069 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx-vcomiss-pr106113-2.c > @@ -0,0 +1,8 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target avx } */ > +/* { dg-options "-O2 -mavx" } */ > + > +#define CHECK_H "avx-check.h" > +#define TEST avx_test > + > +#include "sse-comiss-pr106113-2.c" > diff --git a/gcc/testsuite/gcc.target/i386/avx-vucomisd-pr106113-2.c > b/gcc/testsuite/gcc.target/i386/avx-vucomisd-pr106113-2.c > new file mode 100644 > index 00000000000..3b0c5db2332 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx-vucomisd-pr106113-2.c > @@ -0,0 +1,8 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target avx } */ > +/* { dg-options "-O2 -mavx" } */ > + > +#define CHECK_H "avx-check.h" > +#define TEST avx_test > + > +#include "sse2-ucomisd-pr106113-2.c" > diff --git a/gcc/testsuite/gcc.target/i386/avx-vucomiss-pr106113-2.c > b/gcc/testsuite/gcc.target/i386/avx-vucomiss-pr106113-2.c > new file mode 100644 > index 00000000000..d67e4adffeb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/avx-vucomiss-pr106113-2.c > @@ -0,0 +1,8 @@ > +/* { dg-do run } */ > +/* { dg-require-effective-target avx } */ > +/* { dg-options "-O2 -mavx" } */ > + > +#define CHECK_H "avx-check.h" > +#define TEST avx_test > + > +#include "sse-ucomiss-pr106113-2.c" > diff --git a/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-1.c > b/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-1.c > new file mode 100644 > index 00000000000..95621029bf6 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-1.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-msse -O2" } */ > +/* { dg-final { scan-assembler-times "comiss\[ > \\t\]+\[^\n\]*\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6 } } */ > +/* { dg-final { scan-assembler-times "jp" 2 } } */ > +#include <xmmintrin.h> > + > +volatile __m128 x1, x2; > +volatile int res; > + > +void extern > +sse_comi_test (void) > +{ > + res = _mm_comieq_ss (x1, x2); > + res = _mm_comilt_ss (x1, x2); > + res = _mm_comile_ss (x1, x2); > + res = _mm_comigt_ss (x1, x2); > + res = _mm_comige_ss (x1, x2); > + res = _mm_comineq_ss (x1, x2); > +} > diff --git a/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-2.c > b/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-2.c > new file mode 100644 > index 00000000000..a90f3337034 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/sse-comiss-pr106113-2.c > @@ -0,0 +1,59 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -msse" } */ > +/* { dg-require-effective-target sse } */ > + > +#ifndef CHECK_H > +#define CHECK_H "sse-check.h" > +#endif > + > +#ifndef TEST > +#define TEST sse_test > +#endif > + > +#include CHECK_H > + > +#include <xmmintrin.h> > + > +#define CMP(PRED, EXP) \ > + res = _mm_comi##PRED##_ss (__A, __B); \ > + if (res != EXP) \ > + abort (); > +static void > +__attribute__((noinline, unused)) > +do_check (float s1, float s2) > +{ > + __m128 __A = _mm_load_ss (&s1); > + __m128 __B = _mm_load_ss (&s2); > + int res; > + > + CMP (eq, (!__builtin_isunordered (s1, s2) && s1 == s2)); > + CMP (ge, (!__builtin_isunordered (s1, s2) && s1 >= s2)); > + CMP (gt, (!__builtin_isunordered (s1, s2) && s1 > s2)); > + CMP (lt, (!__builtin_isunordered (s1, s2) && s1 < s2)); > + CMP (le, (!__builtin_isunordered (s1, s2) && s1 <= s2)); > + CMP (neq, (__builtin_isunordered (s1, s2) || s1 != s2)); > +} > + > +static void > +TEST (void) > +{ > + struct > + { > + float x1; > + float x2; > + } > + inputs[] = > + { > + { 4.3, 2.18 }, > + { -4.3, 3.18 }, > + { __builtin_nanf (""), -5.8 }, > + { -4.8, __builtin_nansf ("") }, > + { 3.8, __builtin_nansf ("") }, > + { 4.2, 4.2 }, > + { __builtin_nanf (""), __builtin_nansf ("") }, > + }; > + int i; > + > + for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++) > + do_check (inputs[i].x1, inputs[i].x2); > +} > diff --git a/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-1.c > b/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-1.c > new file mode 100644 > index 00000000000..e337e11a557 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-1.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-msse -O2" } */ > +/* { dg-final { scan-assembler-times "ucomiss\[ > \\t\]+\[^\n\]*\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6 } } */ > +/* { dg-final { scan-assembler-times "jp" 2 } } */ > +#include <xmmintrin.h> > + > +volatile __m128 x1, x2; > +volatile int res; > + > +void extern > +sse_ucomi_test (void) > +{ > + res = _mm_ucomieq_ss (x1, x2); > + res = _mm_ucomilt_ss (x1, x2); > + res = _mm_ucomile_ss (x1, x2); > + res = _mm_ucomigt_ss (x1, x2); > + res = _mm_ucomige_ss (x1, x2); > + res = _mm_ucomineq_ss (x1, x2); > +} > diff --git a/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-2.c > b/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-2.c > new file mode 100644 > index 00000000000..37d845025c8 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/sse-ucomiss-pr106113-2.c > @@ -0,0 +1,59 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -msse" } */ > +/* { dg-require-effective-target sse } */ > + > +#ifndef CHECK_H > +#define CHECK_H "sse-check.h" > +#endif > + > +#ifndef TEST > +#define TEST sse_test > +#endif > + > +#include CHECK_H > + > +#include <xmmintrin.h> > + > +#define CMP(PRED, EXP) \ > + res = _mm_ucomi##PRED##_ss (__A, __B); \ > + if (res != EXP) \ > + abort (); > +static void > +__attribute__((noinline, unused)) > +do_check (float s1, float s2) > +{ > + __m128 __A = _mm_load_ss (&s1); > + __m128 __B = _mm_load_ss (&s2); > + int res; > + > + CMP (eq, (!__builtin_isunordered (s1, s2) && s1 == s2)); > + CMP (ge, (!__builtin_isunordered (s1, s2) && s1 >= s2)); > + CMP (gt, (!__builtin_isunordered (s1, s2) && s1 > s2)); > + CMP (lt, (!__builtin_isunordered (s1, s2) && s1 < s2)); > + CMP (le, (!__builtin_isunordered (s1, s2) && s1 <= s2)); > + CMP (neq, (__builtin_isunordered (s1, s2) || s1 != s2)); > +} > + > +static void > +TEST (void) > +{ > + struct > + { > + float x1; > + float x2; > + } > + inputs[] = > + { > + { 4.3, 2.18 }, > + { -4.3, 3.18 }, > + { __builtin_nanf (""), -5.8 }, > + { -4.8, __builtin_nansf ("") }, > + { 3.8, __builtin_nansf ("") }, > + { 4.2, 4.2 }, > + { __builtin_nanf (""), __builtin_nansf ("") }, > + }; > + int i; > + > + for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++) > + do_check (inputs[i].x1, inputs[i].x2); > +} > diff --git a/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-1.c > b/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-1.c > new file mode 100644 > index 00000000000..6268977d268 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-1.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-msse2 -O2" } */ > +/* { dg-final { scan-assembler-times "comisd\[ > \\t\]+\[^\n\]*\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6 } } */ > +/* { dg-final { scan-assembler-times "jp" 2 } } */ > +#include <xmmintrin.h> > + > +volatile __m128d x1, x2; > +volatile int res; > + > +void extern > +sse2_comisd_test (void) > +{ > + res = _mm_comieq_sd (x1, x2); > + res = _mm_comilt_sd (x1, x2); > + res = _mm_comile_sd (x1, x2); > + res = _mm_comigt_sd (x1, x2); > + res = _mm_comige_sd (x1, x2); > + res = _mm_comineq_sd (x1, x2); > +} > diff --git a/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-2.c > b/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-2.c > new file mode 100644 > index 00000000000..f49771c9212 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/sse2-comisd-pr106113-2.c > @@ -0,0 +1,59 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -msse2" } */ > +/* { dg-require-effective-target sse2 } */ > + > +#ifndef CHECK_H > +#define CHECK_H "sse2-check.h" > +#endif > + > +#ifndef TEST > +#define TEST sse2_test > +#endif > + > +#include CHECK_H > + > +#include <emmintrin.h> > + > +#define CMP(PRED, EXP) \ > + res = _mm_comi##PRED##_sd (__A, __B); \ > + if (res != EXP) \ > + abort (); > +static void > +__attribute__((noinline, unused)) > +do_check (double s1, double s2) > +{ > + __m128d __A = _mm_load_sd (&s1); > + __m128d __B = _mm_load_sd (&s2); > + int res; > + > + CMP (eq, (!__builtin_isunordered (s1, s2) && s1 == s2)); > + CMP (ge, (!__builtin_isunordered (s1, s2) && s1 >= s2)); > + CMP (gt, (!__builtin_isunordered (s1, s2) && s1 > s2)); > + CMP (lt, (!__builtin_isunordered (s1, s2) && s1 < s2)); > + CMP (le, (!__builtin_isunordered (s1, s2) && s1 <= s2)); > + CMP (neq, (__builtin_isunordered (s1, s2) || s1 != s2)); > +} > + > +static void > +TEST (void) > +{ > + struct > + { > + double x1; > + double x2; > + } > + inputs[] = > + { > + { 4.3, 2.18 }, > + { -4.3, 3.18 }, > + { __builtin_nan (""), -5.8 }, > + { -4.8, __builtin_nans ("") }, > + { 3.8, __builtin_nans ("") }, > + { 4.2, 4.2 }, > + { __builtin_nan (""), __builtin_nans ("") }, > + }; > + int i; > + > + for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++) > + do_check (inputs[i].x1, inputs[i].x2); > +} > diff --git a/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-1.c > b/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-1.c > new file mode 100644 > index 00000000000..e64c0ace0cc > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-1.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-msse2 -O2" } */ > +/* { dg-final { scan-assembler-times "ucomisd\[ > \\t\]+\[^\n\]*\[^\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6 } } */ > +/* { dg-final { scan-assembler-times "jp" 2 } } */ > +#include <xmmintrin.h> > + > +volatile __m128d x1, x2; > +volatile int res; > + > +void extern > +sse2_ucomisd_test (void) > +{ > + res = _mm_ucomieq_sd (x1, x2); > + res = _mm_ucomilt_sd (x1, x2); > + res = _mm_ucomile_sd (x1, x2); > + res = _mm_ucomigt_sd (x1, x2); > + res = _mm_ucomige_sd (x1, x2); > + res = _mm_ucomineq_sd (x1, x2); > +} > diff --git a/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-2.c > b/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-2.c > new file mode 100644 > index 00000000000..606a8971c26 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/sse2-ucomisd-pr106113-2.c > @@ -0,0 +1,59 @@ > +/* { dg-do run } */ > +/* { dg-options "-O2 -msse2" } */ > +/* { dg-require-effective-target sse2 } */ > + > +#ifndef CHECK_H > +#define CHECK_H "sse2-check.h" > +#endif > + > +#ifndef TEST > +#define TEST sse2_test > +#endif > + > +#include CHECK_H > + > +#include <emmintrin.h> > + > +#define CMP(PRED, EXP) \ > + res = _mm_ucomi##PRED##_sd (__A, __B); \ > + if (res != EXP) \ > + abort (); > +static void > +__attribute__((noinline, unused)) > +do_check (double s1, double s2) > +{ > + __m128d __A = _mm_load_sd (&s1); > + __m128d __B = _mm_load_sd (&s2); > + int res; > + > + CMP (eq, (!__builtin_isunordered (s1, s2) && s1 == s2)); > + CMP (ge, (!__builtin_isunordered (s1, s2) && s1 >= s2)); > + CMP (gt, (!__builtin_isunordered (s1, s2) && s1 > s2)); > + CMP (lt, (!__builtin_isunordered (s1, s2) && s1 < s2)); > + CMP (le, (!__builtin_isunordered (s1, s2) && s1 <= s2)); > + CMP (neq, (__builtin_isunordered (s1, s2) || s1 != s2)); > +} > + > +static void > +TEST (void) > +{ > + struct > + { > + double x1; > + double x2; > + } > + inputs[] = > + { > + { 4.3, 2.18 }, > + { -4.3, 3.18 }, > + { __builtin_nan (""), -5.8 }, > + { -4.8, __builtin_nans ("") }, > + { 3.8, __builtin_nans ("") }, > + { 4.2, 4.2 }, > + { __builtin_nan (""), __builtin_nans ("") }, > + }; > + int i; > + > + for (i = 0; i < sizeof (inputs) / sizeof (inputs[0]); i++) > + do_check (inputs[i].x1, inputs[i].x2); > +} > -- > 2.18.2 >
-- BR, Hongtao