On Fri, Jan 31, 2025 at 3:55 AM Michael Meissner <[email protected]> wrote:
>
> Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.
>
> In bug PR target/118541 on power9, power10, and power11 systems, for the
> function:
>
> extern double __ieee754_acos (double);
>
> double
> __acospi (double x)
> {
> double ret = __ieee754_acos (x) / 3.14;
> return __builtin_isgreater (ret, 1.0) ? 1.0 : ret;
> }
>
> GCC currently generates the following code:
>
> Power9 Power10 and Power11
> ====== ===================
> bl __ieee754_acos bl __ieee754_acos@notoc
> nop plfd 0,.LC0@pcrel
> addis 9,2,.LC2@toc@ha xxspltidp 12,1065353216
> addi 1,1,32 addi 1,1,32
> lfd 0,.LC2@toc@l(9) ld 0,16(1)
> addis 9,2,.LC0@toc@ha fdiv 0,1,0
> ld 0,16(1) mtlr 0
> lfd 12,.LC0@toc@l(9) xscmpgtdp 1,0,12
> fdiv 0,1,0 xxsel 1,0,12,1
> mtlr 0 blr
> xscmpgtdp 1,0,12
> xxsel 1,0,12,1
> blr
>
> This is because ifcvt.c optimizes the conditional floating point move to use
> the
> XSCMPGTDP instruction.
>
> However, the XSCMPGTDP instruction will generate an interrupt if one of the
> arguments is a signalling NaN and signalling NaNs can generate an interrupt.
> The IEEE comparison functions (isgreater, etc.) require that the comparison
> not
> raise an interrupt.
>
> The following patch changes the PowerPC back end so that ifcvt.c will not
> change
> the if/then test and move into a conditional move if the comparison is one of
> the comparisons that do not raise an error with signalling NaNs and -Ofast is
> not used. If a normal comparison is used or -Ofast is used, GCC will continue
> to generate XSCMPGTDP and XXSEL.
>
> For the following code:
>
> double
> ordered_compare (double a, double b, double c, double d)
> {
> return __builtin_isgreater (a, b) ? c : d;
> }
>
> /* Verify normal > does generate xscmpgtdp. */
>
> double
> normal_compare (double a, double b, double c, double d)
> {
> return a > b ? c : d;
> }
>
> with the following patch, GCC generates the following for power9, power10, and
> power11:
>
> ordered_compare:
> fcmpu 0,1,2
> fmr 1,4
> bnglr 0
> fmr 1,3
> blr
>
> normal_compare:
> xscmpgtdp 1,1,2
> xxsel 1,4,3,1
> blr
>
> I have built bootstrap compilers on big endian power9 systems and little
> endian
> power9/power10 systems and there were no regressions. Can I check this patch
> into the GCC trunk, and after a waiting period, can I check this into the
> active
> older branches?
>
> 2025-01-30 Michael Meissner <[email protected]>
>
> gcc/
>
> PR target/118541
> * config/rs6000/rs6000-protos.h (REVERSE_COND_ORDERED_OK): New macro.
> (REVERSE_COND_NO_ORDERED): Likewise.
> (rs6000_reverse_condition): Add argument.
> * config/rs6000/rs6000.cc (rs6000_reverse_condition): Do not allow
> ordered comparisons to be reversed for floating point cmoves.
> (rs6000_emit_sCOND): Adjust rs6000_reverse_condition call.
> * config/rs6000/rs6000.h (REVERSE_CONDITION): Likewise.
> * config/rs6000/rs6000.md (reverse_branch_comparison): Name insn.
> Adjust rs6000_reverse_condition call.
>
> gcc/testsuite/
>
> PR target/118541
> * gcc.target/powerpc/pr118541.c: New test.
> ---
> gcc/config/rs6000/rs6000-protos.h | 6 ++-
> gcc/config/rs6000/rs6000.cc | 23 ++++++++---
> gcc/config/rs6000/rs6000.h | 10 ++++-
> gcc/config/rs6000/rs6000.md | 24 +++++++-----
> gcc/testsuite/gcc.target/powerpc/pr118541.c | 43 +++++++++++++++++++++
> 5 files changed, 89 insertions(+), 17 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/powerpc/pr118541.c
>
> diff --git a/gcc/config/rs6000/rs6000-protos.h
> b/gcc/config/rs6000/rs6000-protos.h
> index 4619142d197..112332660d3 100644
> --- a/gcc/config/rs6000/rs6000-protos.h
> +++ b/gcc/config/rs6000/rs6000-protos.h
> @@ -114,8 +114,12 @@ extern const char *rs6000_sibcall_template (rtx *,
> unsigned int);
> extern const char *rs6000_indirect_call_template (rtx *, unsigned int);
> extern const char *rs6000_indirect_sibcall_template (rtx *, unsigned int);
> extern const char *rs6000_pltseq_template (rtx *, int);
> +
> +#define REVERSE_COND_ORDERED_OK false
> +#define REVERSE_COND_NO_ORDERED true
> +
> extern enum rtx_code rs6000_reverse_condition (machine_mode,
> - enum rtx_code);
> + enum rtx_code, bool);
> extern rtx rs6000_emit_eqne (machine_mode, rtx, rtx, rtx);
> extern rtx rs6000_emit_fp_cror (rtx_code, machine_mode, rtx);
> extern void rs6000_emit_sCOND (machine_mode, rtx[]);
> diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
> index f9f9a0b931d..eaf79435ec3 100644
> --- a/gcc/config/rs6000/rs6000.cc
> +++ b/gcc/config/rs6000/rs6000.cc
> @@ -15360,15 +15360,25 @@ rs6000_print_patchable_function_entry (FILE *file,
> }
>
> enum rtx_code
> -rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
> +rs6000_reverse_condition (machine_mode mode,
> + enum rtx_code code,
> + bool no_ordered)
> {
> /* Reversal of FP compares takes care -- an ordered compare
> - becomes an unordered compare and vice versa. */
> + becomes an unordered compare and vice versa.
> +
> + However, this is not safe for ordered comparisons (i.e. for isgreater,
> + etc.) starting with the power9 because ifcvt.cc will want to create a
> fp
> + cmove, and the x{s,v}cmp{eq,gt,ge}{dp,qp} instructions will trap if one
> of
> + the arguments is a signalling NaN. */
> +
> if (mode == CCFPmode
> && (!flag_finite_math_only
I fail to see where -Ofast is allowed, but only see the pre-existing
flag check above
which checks for no NaN/Inf rather than sNaN - the latter would be checked with
HONOR_SNANS (mode).
I guess you really want
> || code == UNLT || code == UNLE || code == UNGT || code == UNGE
> || code == UNEQ || code == LTGT))
> - return reverse_condition_maybe_unordered (code);
> + return (no_ordered
return (no_ordered && HONOR_SNANS (mode)
no?
> + ? UNKNOWN
> + : reverse_condition_maybe_unordered (code));
> else
> return reverse_condition (code);
> }
> @@ -15980,11 +15990,14 @@ rs6000_emit_sCOND (machine_mode mode, rtx
> operands[])
> rtx not_result = gen_reg_rtx (CCEQmode);
> rtx not_op, rev_cond_rtx;
> machine_mode cc_mode;
> + enum rtx_code rev;
>
> cc_mode = GET_MODE (XEXP (condition_rtx, 0));
>
> - rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode,
> cond_code),
> - SImode, XEXP (condition_rtx, 0),
> const0_rtx);
> + rev = rs6000_reverse_condition (cc_mode, cond_code,
> + REVERSE_COND_ORDERED_OK);
> + rev_cond_rtx = gen_rtx_fmt_ee (rev, SImode, XEXP (condition_rtx, 0),
> + const0_rtx);
> not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
> emit_insn (gen_rtx_SET (not_result, not_op));
> condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
> diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
> index ec08c96d0f6..c595d7138bc 100644
> --- a/gcc/config/rs6000/rs6000.h
> +++ b/gcc/config/rs6000/rs6000.h
> @@ -1812,11 +1812,17 @@ extern scalar_int_mode rs6000_pmode;
>
> /* Can the condition code MODE be safely reversed? This is safe in
> all cases on this port, because at present it doesn't use the
> - trapping FP comparisons (fcmpo). */
> + trapping FP comparisons (fcmpo).
> +
> + However, this is not safe for ordered comparisons (i.e. for isgreater,
> etc.)
> + starting with the power9 because ifcvt.cc will want to create a fp cmove,
> + and the x{s,v}cmp{eq,gt,ge}{dp,qp} instructions will trap if one of the
> + arguments is a signalling NaN. */
> #define REVERSIBLE_CC_MODE(MODE) 1
>
> /* Given a condition code and a mode, return the inverse condition. */
> -#define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE)
> +#define REVERSE_CONDITION(CODE, MODE) \
> + rs6000_reverse_condition (MODE, CODE, REVERSE_COND_NO_ORDERED)
>
>
> /* Target cpu costs. */
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 65da0c65330..5f7ff36617d 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -13497,7 +13497,7 @@ (define_insn "@cceq_rev_compare_<mode>"
> ;; If we are comparing the result of two comparisons, this can be done
> ;; using creqv or crxor.
>
> -(define_insn_and_split ""
> +(define_insn_and_split "*reverse_branch_comparison"
> [(set (match_operand:CCEQ 0 "cc_reg_operand" "=y")
> (compare:CCEQ (match_operator 1 "branch_comparison_operator"
> [(match_operand 2 "cc_reg_operand" "y")
> @@ -13519,19 +13519,25 @@ (define_insn_and_split ""
> GET_MODE (operands[3]));
>
> if (! positive_1)
> - operands[1] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE
> (operands[2]),
> - GET_CODE
> (operands[1])),
> - SImode,
> - operands[2], const0_rtx);
> + {
> + enum rtx_code rev = rs6000_reverse_condition (GET_MODE (operands[2]),
> + GET_CODE (operands[1]),
> + REVERSE_COND_ORDERED_OK);
> + gcc_assert (rev != UNKNOWN);
> + operands[1] = gen_rtx_fmt_ee (rev, SImode, operands[2], const0_rtx);
> + }
> else if (GET_MODE (operands[1]) != SImode)
> operands[1] = gen_rtx_fmt_ee (GET_CODE (operands[1]), SImode,
> operands[2], const0_rtx);
>
> if (! positive_2)
> - operands[3] = gen_rtx_fmt_ee (rs6000_reverse_condition (GET_MODE
> (operands[4]),
> - GET_CODE
> (operands[3])),
> - SImode,
> - operands[4], const0_rtx);
> + {
> + enum rtx_code rev = rs6000_reverse_condition (GET_MODE (operands[4]),
> + GET_CODE (operands[3]),
> + REVERSE_COND_ORDERED_OK);
> + gcc_assert (rev != UNKNOWN);
> + operands[3] = gen_rtx_fmt_ee (rev, SImode, operands[4], const0_rtx);
> + }
> else if (GET_MODE (operands[3]) != SImode)
> operands[3] = gen_rtx_fmt_ee (GET_CODE (operands[3]), SImode,
> operands[4], const0_rtx);
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr118541.c
> b/gcc/testsuite/gcc.target/powerpc/pr118541.c
> new file mode 100644
> index 00000000000..6ef67a2cf76
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr118541.c
> @@ -0,0 +1,43 @@
> +/* { dg-do compile } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
> +/* { dg-require-effective-target powerpc_vsx } */
> +
> +/* PR target/118541 says that the ordered comparison functions like isgreater
> + should not optimize floating point conditional moves to use
> + x{s,v}cmp{eq,gt,ge}{dp,qp} and xxsel since that instruction can cause
> traps
> + if one of the arguments is a signaling NaN. */
> +
> +/* Verify isgreater does not generate xscmpgtdp. */
> +
> +double
> +ordered_compare (double a, double b, double c, double d)
> +{
> + /*
> + * fcmpu 0,1,2
> + * fmr 1,4
> + * bnglr 0
> + * fmr 1,3
> + * blr
> + */
> +
> + return __builtin_isgreater (a, b) ? c : d;
> +}
> +
> +/* Verify normal > does generate xscmpgtdp. */
> +
> +double
> +normal_compare (double a, double b, double c, double d)
> +{
> + /*
> + * xscmpgtdp 1,1,2
> + * xxsel 1,4,3,1
> + * blr
> + */
> +
> + return a > b ? c : d;
> +}
> +
> +/* { dg-final { scan-assembler-times {\mxscmpg[te]dp\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mxxsel\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mxscmpudp\M|\mfcmpu\M} 1 } } */
> +
> --
> 2.48.1
>
>
> --
> Michael Meissner, IBM
> PO Box 98, Ayer, Massachusetts, USA, 01432
> email: [email protected]