Re: [PATCH] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

Richard Biener Thu, 30 Jan 2025 23:05:34 -0800

On Fri, Jan 31, 2025 at 3:55 AM Michael Meissner <[email protected]> wrote:
>
> Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.
>
> In bug PR target/118541 on power9, power10, and power11 systems, for the
> function:
>
>         extern double __ieee754_acos (double);
>
>         double
>         __acospi (double x)
>         {
>           double ret = __ieee754_acos (x) / 3.14;
>           return __builtin_isgreater (ret, 1.0) ? 1.0 : ret;
>         }
>
> GCC currently generates the following code:
>
>         Power9                          Power10 and Power11
>         ======                          ===================
>         bl __ieee754_acos               bl __ieee754_acos@notoc
>         nop                             plfd 0,.LC0@pcrel
>         addis 9,2,.LC2@toc@ha           xxspltidp 12,1065353216
>         addi 1,1,32                     addi 1,1,32
>         lfd 0,.LC2@toc@l(9)             ld 0,16(1)
>         addis 9,2,.LC0@toc@ha           fdiv 0,1,0
>         ld 0,16(1)                      mtlr 0
>         lfd 12,.LC0@toc@l(9)            xscmpgtdp 1,0,12
>         fdiv 0,1,0                      xxsel 1,0,12,1
>         mtlr 0                          blr
>         xscmpgtdp 1,0,12
>         xxsel 1,0,12,1
>         blr
>
> This is because ifcvt.c optimizes the conditional floating point move to use 
> the
> XSCMPGTDP instruction.
>
> However, the XSCMPGTDP instruction will generate an interrupt if one of the
> arguments is a signalling NaN and signalling NaNs can generate an interrupt.
> The IEEE comparison functions (isgreater, etc.) require that the comparison 
> not
> raise an interrupt.
>
> The following patch changes the PowerPC back end so that ifcvt.c will not 
> change
> the if/then test and move into a conditional move if the comparison is one of
> the comparisons that do not raise an error with signalling NaNs and -Ofast is
> not used.  If a normal comparison is used or -Ofast is used, GCC will continue
> to generate XSCMPGTDP and XXSEL.
>
> For the following code:
>
>         double
>         ordered_compare (double a, double b, double c, double d)
>         {
>           return __builtin_isgreater (a, b) ? c : d;
>         }
>
>         /* Verify normal > does generate xscmpgtdp.  */
>
>         double
>         normal_compare (double a, double b, double c, double d)
>         {
>           return a > b ? c : d;
>         }
>
> with the following patch, GCC generates the following for power9, power10, and
> power11:
>
>         ordered_compare:
>                 fcmpu 0,1,2
>                 fmr 1,4
>                 bnglr 0
>                 fmr 1,3
>                 blr
>
>         normal_compare:
>                 xscmpgtdp 1,1,2
>                 xxsel 1,4,3,1
>                 blr
>
> I have built bootstrap compilers on big endian power9 systems and little 
> endian
> power9/power10 systems and there were no regressions.  Can I check this patch
> into the GCC trunk, and after a waiting period, can I check this into the 
> active
> older branches?
>
> 2025-01-30  Michael Meissner  <[email protected]>
>
> gcc/
>
>         PR target/118541
>         * config/rs6000/rs6000-protos.h (REVERSE_COND_ORDERED_OK): New macro.
>         (REVERSE_COND_NO_ORDERED): Likewise.
>         (rs6000_reverse_condition): Add argument.
>         * config/rs6000/rs6000.cc (rs6000_reverse_condition): Do not allow
>         ordered comparisons to be reversed for floating point cmoves.
>         (rs6000_emit_sCOND): Adjust rs6000_reverse_condition call.
>         * config/rs6000/rs6000.h (REVERSE_CONDITION): Likewise.
>         * config/rs6000/rs6000.md (reverse_branch_comparison): Name insn.
>         Adjust rs6000_reverse_condition call.
>
> gcc/testsuite/
>
>         PR target/118541
>         * gcc.target/powerpc/pr118541.c: New test.
> ---
>  gcc/config/rs6000/rs6000-protos.h           |  6 ++-
>  gcc/config/rs6000/rs6000.cc                 | 23 ++++++++---
>  gcc/config/rs6000/rs6000.h                  | 10 ++++-
>  gcc/config/rs6000/rs6000.md                 | 24 +++++++-----
>  gcc/testsuite/gcc.target/powerpc/pr118541.c | 43 +++++++++++++++++++++
>  5 files changed, 89 insertions(+), 17 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr118541.c
>
> diff --git a/gcc/config/rs6000/rs6000-protos.h 
> b/gcc/config/rs6000/rs6000-protos.h
> index 4619142d197..112332660d3 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -114,8 +114,12 @@ extern const char *rs6000_sibcall_template (rtx *, 
> unsigned int);
>  extern const char *rs6000_indirect_call_template (rtx *, unsigned int);
>  extern const char *rs6000_indirect_sibcall_template (rtx *, unsigned int);
>  extern const char *rs6000_pltseq_template (rtx *, int);
> +
> +#define REVERSE_COND_ORDERED_OK        false
> +#define REVERSE_COND_NO_ORDERED        true
> +
>  extern enum rtx_code rs6000_reverse_condition (machine_mode,
> -                                              enum rtx_code);
> +                                              enum rtx_code, bool);
>  extern rtx rs6000_emit_eqne (machine_mode, rtx, rtx, rtx);
>  extern rtx rs6000_emit_fp_cror (rtx_code, machine_mode, rtx);
>  extern void rs6000_emit_sCOND (machine_mode, rtx[]);
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index f9f9a0b931d..eaf79435ec3 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -15360,15 +15360,25 @@ rs6000_print_patchable_function_entry (FILE *file,
>  }
>
>  enum rtx_code
> -rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
> +rs6000_reverse_condition (machine_mode mode,
> +                         enum rtx_code code,
> +                         bool no_ordered)
>  {
>    /* Reversal of FP compares takes care -- an ordered compare
> -     becomes an unordered compare and vice versa.  */
> +     becomes an unordered compare and vice versa.
> +
> +     However, this is not safe for ordered comparisons (i.e. for isgreater,
> +     etc.)  starting with the power9 because ifcvt.cc will want to create a 
> fp
> +     cmove, and the x{s,v}cmp{eq,gt,ge}{dp,qp} instructions will trap if one 
> of
> +     the arguments is a signalling NaN.  */
> +
>    if (mode == CCFPmode
>        && (!flag_finite_math_only


I fail to see where -Ofast is allowed, but only see the pre-existing
flag check above
which checks for no NaN/Inf rather than sNaN - the latter would be checked with
HONOR_SNANS (mode).

I guess you really want

>           || code == UNLT || code == UNLE || code == UNGT || code == UNGE
>           || code == UNEQ || code == LTGT))
> -    return reverse_condition_maybe_unordered (code);
> +    return (no_ordered

    return (no_ordered && HONOR_SNANS (mode)

no?

> +           ? UNKNOWN
> +           : reverse_condition_maybe_unordered (code));
>    else
>      return reverse_condition (code);
>  }
> @@ -15980,11 +15990,14 @@ rs6000_emit_sCOND (machine_mode mode, rtx 
> operands[])
>        rtx not_result = gen_reg_rtx (CCEQmode);
>        rtx not_op, rev_cond_rtx;
>        machine_mode cc_mode;
> +      enum rtx_code rev;
>
>        cc_mode = GET_MODE (XEXP (condition_rtx, 0));
>
> -      rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, 
> cond_code),
> -                                    SImode, XEXP (condition_rtx, 0), 
> const0_rtx);
> +      rev = rs6000_reverse_condition (cc_mode, cond_code,
> +                                     REVERSE_COND_ORDERED_OK);
> +      rev_cond_rtx = gen_rtx_fmt_ee (rev, SImode, XEXP (condition_rtx, 0),
> +                                    const0_rtx);
>        not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
>        emit_insn (gen_rtx_SET (not_result, not_op));
>        condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index ec08c96d0f6..c595d7138bc 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -1812,11 +1812,17 @@ extern scalar_int_mode rs6000_pmode;
>
>  /* Can the condition code MODE be safely reversed?  This is safe in
>     all cases on this port, because at present it doesn't use the
> -   trapping FP comparisons (fcmpo).  */
> +   trapping FP comparisons (fcmpo).
> +
> +   However, this is not safe for ordered comparisons (i.e. for isgreater, 
> etc.)
> +   starting with the power9 because ifcvt.cc will want to create a fp cmove,
> +   and the x{s,v}cmp{eq,gt,ge}{dp,qp} instructions will trap if one of the
> +   arguments is a signalling NaN.  */
>  #define REVERSIBLE_CC_MODE(MODE) 1
>
>  /* Given a condition code and a mode, return the inverse condition.  */
> -#define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE)
> +#define REVERSE_CONDITION(CODE, MODE) \
> +  rs6000_reverse_condition (MODE, CODE, REVERSE_COND_NO_ORDERED)
>
>
>  /* Target cpu costs.  */
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 65da0c65330..5f7ff36617d 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -13497,7 +13497,7 @@ (define_insn "@cceq_rev_compare_<mode>"
>  ;; If we are comparing the result of two comparisons, this can be done
>  ;; using creqv or crxor.
>
> -(define_insn_and_split ""
> +(define_insn_and_split "*reverse_branch_comparison"
>    [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y")
>         (compare:CCEQ (match_operator 1 "branch_comparison_operator"
>                               [(match_operand 2 "cc_reg_operand" "y")
> @@ -13519,19 +13519,25 @@ (define_insn_and_split ""
>                                                     GET_MODE (operands[3]));
>
>    if (! positive_1)
> -    operands[1] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE 
> (operands[2]),
> -                                                           GET_CODE 
> (operands[1])),
> -                                 SImode,
> -                                 operands[2], const0_rtx);
> +    {
> +      enum rtx_code rev = rs6000_reverse_condition (GET_MODE (operands[2]),
> +                                                   GET_CODE (operands[1]),
> +                                                   REVERSE_COND_ORDERED_OK);
> +      gcc_assert (rev != UNKNOWN);
> +      operands[1] = gen_rtx_fmt_ee (rev, SImode, operands[2], const0_rtx);
> +    }
>    else if (GET_MODE (operands[1]) != SImode)
>      operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), SImode,
>                                   operands[2], const0_rtx);
>
>    if (! positive_2)
> -    operands[3] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE 
> (operands[4]),
> -                                                           GET_CODE 
> (operands[3])),
> -                                 SImode,
> -                                 operands[4], const0_rtx);
> +    {
> +      enum rtx_code rev = rs6000_reverse_condition (GET_MODE (operands[4]),
> +                                                   GET_CODE (operands[3]),
> +                                                   REVERSE_COND_ORDERED_OK);
> +      gcc_assert (rev != UNKNOWN);
> +      operands[3] = gen_rtx_fmt_ee (rev, SImode, operands[4], const0_rtx);
> +    }
>    else if (GET_MODE (operands[3]) != SImode)
>      operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode,
>                                   operands[4], const0_rtx);
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr118541.c 
> b/gcc/testsuite/gcc.target/powerpc/pr118541.c
> new file mode 100644
> index 00000000000..6ef67a2cf76
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr118541.c
> @@ -0,0 +1,43 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
> +/* { dg-require-effective-target powerpc_vsx } */
> +
> +/* PR target/118541 says that the ordered comparison functions like isgreater
> +   should not optimize floating point conditional moves to use
> +   x{s,v}cmp{eq,gt,ge}{dp,qp} and xxsel since that instruction can cause 
> traps
> +   if one of the arguments is a signaling NaN.  */
> +
> +/* Verify isgreater does not generate xscmpgtdp.  */
> +
> +double
> +ordered_compare (double a, double b, double c, double d)
> +{
> +  /*
> +   * fcmpu 0,1,2
> +   * fmr   1,4
> +   * bnglr 0
> +   * fmr   1,3
> +   * blr
> +   */
> +
> +  return __builtin_isgreater (a, b) ? c : d;
> +}
> +
> +/* Verify normal > does generate xscmpgtdp.  */
> +
> +double
> +normal_compare (double a, double b, double c, double d)
> +{
> +  /*
> +   * xscmpgtdp 1,1,2
> +   * xxsel     1,4,3,1
> +   * blr
> +   */
> +
> +  return a > b ? c : d;
> +}
> +
> +/* { dg-final { scan-assembler-times {\mxscmpg[te]dp\M}       1 } } */
> +/* { dg-final { scan-assembler-times {\mxxsel\M}              1 } } */
> +/* { dg-final { scan-assembler-times {\mxscmpudp\M|\mfcmpu\M} 1 } } */
> +
> --
> 2.48.1
>
>
> --
> Michael Meissner, IBM
> PO Box 98, Ayer, Massachusetts, USA, 01432
> email: [email protected]

Re: [PATCH] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

Reply via email to