https://gcc.gnu.org/g:33d453d937554d8d93861541ea8926647a674ea8
commit 33d453d937554d8d93861541ea8926647a674ea8 Author: Michael Meissner <meiss...@linux.ibm.com> Date: Wed May 21 14:01:38 2025 -0400 Fix PR 118541, do not generate unordered fp cmoves for IEEE compares. In bug PR target/118541 on power9, power10, and power11 systems, for the function: extern double __ieee754_acos (double); double __acospi (double x) { double ret = __ieee754_acos (x) / 3.14; return __builtin_isgreater (ret, 1.0) ? 1.0 : ret; } GCC currently generates the following code: Power9 Power10 and Power11 ====== =================== bl __ieee754_acos bl __ieee754_acos@notoc nop plfd 0,.LC0@pcrel addis 9,2,.LC2@toc@ha xxspltidp 12,1065353216 addi 1,1,32 addi 1,1,32 lfd 0,.LC2@toc@l(9) ld 0,16(1) addis 9,2,.LC0@toc@ha fdiv 0,1,0 ld 0,16(1) mtlr 0 lfd 12,.LC0@toc@l(9) xscmpgtdp 1,0,12 fdiv 0,1,0 xxsel 1,0,12,1 mtlr 0 blr xscmpgtdp 1,0,12 xxsel 1,0,12,1 blr This is because ifcvt.c optimizes the conditional floating point move to use the XSCMPGTDP instruction. However, the XSCMPGTDP instruction will generate an interrupt if one of the arguments is a signalling NaN and signalling NaNs can generate an interrupt. The IEEE comparison functions (isgreater, etc.) require that the comparison not raise an interrupt. The root cause of this is we allow floating point comparisons to be reversed (i.e. LT will be reversed to UNGE). Before power9, this was ok because we only generated the FCMPU or XSCMPUDP instructions. But with power9, we can generate the XSCMPEQDP, XSCMPGTDP, or XSCMPGEDP instructions. If NaNs are allowed, we no longer allow FP comparisons to be reversed. If FP comparisons can't be reversed, the machine independent portions of the compiler will generate the comparison with the arguments reversed. Since we do not support reversing FP comparisons, the code to support inverting fpmask operations on power9 has been removed. For the following code: double ordered_compare (double a, double b, double c, double d) { return __builtin_isgreater (a, b) ? c : d; } /* Verify normal > does generate xscmpgtdp. */ double normal_compare (double a, double b, double c, double d) { return a > b ? c : d; } with the following patch, GCC generates the following for power9, power10, and power11: ordered_compare: fcmpu 0,1,2 fmr 1,4 bnglr 0 fmr 1,3 blr normal_compare: xscmpgtdp 1,1,2 xxsel 1,4,3,1 blr I have built bootstrap compilers on big endian power9 systems and little endian power9/power10 systems and there were no regressions. Can I check this patch into the GCC trunk, and after a waiting period, can I check this into the active older branches? 2025-05-21 Michael Meissner <meiss...@linux.ibm.com> gcc/ PR target/118541 * config/rs6000/predicates.md (invert_fpmask_comparison_operator): Delete. * config/rs6000/rs6000.h (REVERSIBLE_CC_MODE): Do not allow floating point comparisons to be reversed unless -ffinite-math-only is used. (rs6000_reverse_condition): Add argument. * config/rs6000/rs6000.md (mov<SFDF:mode><SFDF2:mode>cc_p9): Add comment. (mov<SFDF:mode><SFDF2:mode>cc_invert_p9): Delete insns. (mov<mode>cc_invert_p10): Likewise. gcc/testsuite/ PR target/118541 * gcc.target/powerpc/pr118541.c: New test. Diff: --- gcc/config/rs6000/predicates.md | 6 -- gcc/config/rs6000/rs6000.h | 15 ++- gcc/config/rs6000/rs6000.md | 81 ++------------- gcc/testsuite/gcc.target/powerpc/pr118541.c | 147 ++++++++++++++++++++++++++++ 4 files changed, 165 insertions(+), 84 deletions(-) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 647e89afb6a7..02ba8fa6c9b0 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1463,12 +1463,6 @@ (define_predicate "fpmask_comparison_operator" (match_code "eq,gt,ge")) -;; Return 1 if OP is a comparison operator suitable for vector/scalar -;; comparisons that generate a 0/-1 mask (i.e. the inverse of -;; fpmask_comparison_operator). -(define_predicate "invert_fpmask_comparison_operator" - (match_code "ne,unlt,unle")) - ;; Return 1 if OP is a comparison operation suitable for integer vector/scalar ;; comparisons that generate a -1/0 mask. (define_predicate "vecint_comparison_operator" diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 9267612fbc9c..c8d9456e0912 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -1810,10 +1810,17 @@ extern scalar_int_mode rs6000_pmode; : (((OP) == EQ || (OP) == NE) && COMPARISON_P (X) \ ? CCEQmode : CCmode)) -/* Can the condition code MODE be safely reversed? This is safe in - all cases on this port, because at present it doesn't use the - trapping FP comparisons (fcmpo). */ -#define REVERSIBLE_CC_MODE(MODE) 1 +/* Can the condition code MODE be safely reversed? Don't allow floating point + comparisons to be reversed unless NaNs are not allowed. + + In the past, we used to allow reversing FP operations because we only + generated FCMPU comparisons and not FCMPO. However, starting with power9, + the XSCMPEQDP, XSCMPGTDP, and XSCMPGEDP instructions will trap if a + signalling NaN is used. If we allow reversing FP operations, we could wind + up converting a LT operation into UNGE and the instruction will trap. The + machine independent parts of the compiler will handle reversing the + arguments if the FP comparison cannot be reversed. */ +#define REVERSIBLE_CC_MODE(MODE) ((MODE) != CCFPmode || flag_finite_math_only) /* Given a condition code and a mode, return the inverse condition. */ #define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 65da0c653304..b9a91b0eab24 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -5699,6 +5699,13 @@ "fsel %0,%1,%2,%3" [(set_attr "type" "fp")]) +;; On power9 and above generate the XSCMPEQFP, XSCMPGTDP, and XSCMPGEDP +;; instructions followed by XXSEL to do a floating point conditional move. In +;; the past, we provided patterns for inverting the comparison, converting a LE +;; into UNGT. However, the XSCMPEQDP, XSCMPGTDP, and XSCMPGEDP instructions +;; will trap if one of the arguments is a signalling NaN. Since we aren't +;; providing the inverted operation, the machine independent parts of the +;; compiler generate code with the arguments swapped. (define_insn_and_split "*mov<SFDF:mode><SFDF2:mode>cc_p9" [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa") (if_then_else:SFDF @@ -5730,43 +5737,6 @@ [(set_attr "length" "8") (set_attr "type" "vecperm")]) -;; Handle inverting the fpmask comparisons. -(define_insn_and_split "*mov<SFDF:mode><SFDF2:mode>cc_invert_p9" - [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa") - (if_then_else:SFDF - (match_operator:CCFP 1 "invert_fpmask_comparison_operator" - [(match_operand:SFDF2 2 "vsx_register_operand" "wa,wa") - (match_operand:SFDF2 3 "vsx_register_operand" "wa,wa")]) - (match_operand:SFDF 4 "vsx_register_operand" "wa,wa") - (match_operand:SFDF 5 "vsx_register_operand" "wa,wa"))) - (clobber (match_scratch:V2DI 6 "=0,&wa"))] - "TARGET_P9_MINMAX" - "#" - "&& 1" - [(set (match_dup 6) - (if_then_else:V2DI (match_dup 9) - (match_dup 7) - (match_dup 8))) - (set (match_dup 0) - (if_then_else:SFDF (ne (match_dup 6) - (match_dup 8)) - (match_dup 5) - (match_dup 4)))] -{ - rtx op1 = operands[1]; - enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1)); - - if (GET_CODE (operands[6]) == SCRATCH) - operands[6] = gen_reg_rtx (V2DImode); - - operands[7] = CONSTM1_RTX (V2DImode); - operands[8] = CONST0_RTX (V2DImode); - - operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]); -} - [(set_attr "length" "8") - (set_attr "type" "vecperm")]) - (define_insn "*fpmask<mode>" [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa") (if_then_else:V2DI @@ -5835,43 +5805,6 @@ [(set_attr "length" "8") (set_attr "type" "vecperm")]) -;; Handle inverting the fpmask comparisons. -(define_insn_and_split "*mov<mode>cc_invert_p10" - [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v") - (if_then_else:IEEE128 - (match_operator:CCFP 1 "invert_fpmask_comparison_operator" - [(match_operand:IEEE128 2 "altivec_register_operand" "v,v") - (match_operand:IEEE128 3 "altivec_register_operand" "v,v")]) - (match_operand:IEEE128 4 "altivec_register_operand" "v,v") - (match_operand:IEEE128 5 "altivec_register_operand" "v,v"))) - (clobber (match_scratch:V2DI 6 "=0,&v"))] - "TARGET_POWER10 && TARGET_FLOAT128_HW" - "#" - "&& 1" - [(set (match_dup 6) - (if_then_else:V2DI (match_dup 9) - (match_dup 7) - (match_dup 8))) - (set (match_dup 0) - (if_then_else:IEEE128 (ne (match_dup 6) - (match_dup 8)) - (match_dup 5) - (match_dup 4)))] -{ - rtx op1 = operands[1]; - enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1)); - - if (GET_CODE (operands[6]) == SCRATCH) - operands[6] = gen_reg_rtx (V2DImode); - - operands[7] = CONSTM1_RTX (V2DImode); - operands[8] = CONST0_RTX (V2DImode); - - operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]); -} - [(set_attr "length" "8") - (set_attr "type" "vecperm")]) - (define_insn "*fpmask<mode>" [(set (match_operand:V2DI 0 "altivec_register_operand" "=v") (if_then_else:V2DI diff --git a/gcc/testsuite/gcc.target/powerpc/pr118541.c b/gcc/testsuite/gcc.target/powerpc/pr118541.c new file mode 100644 index 000000000000..a38c94c8b7ed --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr118541.c @@ -0,0 +1,147 @@ +/* { dg-do compile } */ +/* { dg-options "-mdejagnu-cpu=power9 -O2" } */ +/* { dg-require-effective-target powerpc_vsx } */ + +/* PR target/118541 says that the ordered comparison functions like isgreater + should not optimize floating point conditional moves to use + x{s,v}cmp{eq,gt,ge}{dp,qp} and xxsel since that instruction can cause traps + if one of the arguments is a signaling NaN. */ + +/* Verify isgreater, isgreaterequal, isless, and islessequal does not generate + xscmpltdp, xscmpgtdp, or xscmpeqdp. */ + +double +ordered_gt (double a, double b, double c, double d) +{ + /* + * fcmpu 0,1,2 + * fmr 1,4 + * bnglr 0 + * fmr 1,3 + * blr + */ + + return __builtin_isgreater (a, b) ? c : d; +} + +double +ordered_ge (double a, double b, double c, double d) +{ + /* + * fcmpu 0,1,2 + * fmr 1,4 + * cror 2,0,3 + * beqlr 0 + * fmr 1,3 + * blr + */ + + return __builtin_isgreaterequal (a, b) ? c : d; +} + +double +ordered_lt (double a, double b, double c, double d) +{ + /* + * fcmpu 0,1,2 + * fmr 1,4 + * bnllr 0 + * fmr 1,3 + * blr + */ + + return __builtin_isless (a, b) ? c : d; +} + +double +ordered_le (double a, double b, double c, double d) +{ + /* + * fcmpu 0,1,2 + * fmr 1,4 + * cror 2,1,3 + * beqlr 0 + * fmr 1,3 + * blr + */ + + return __builtin_islessequal (a, b) ? c : d; +} + +double +normal_gt (double a, double b, double c, double d) +{ + /* + * xscmpgtdp 1,1,2 + * xxsel 1,4,3,1 + * blr + */ + + return a > b ? c : d; +} + +double +normal_ge (double a, double b, double c, double d) +{ + /* + * xscmpgedp 1,1,2 + * xxsel 1,4,3,1 + * blr + */ + + return a >= b ? c : d; +} + +double +normal_lt (double a, double b, double c, double d) +{ + /* + * xscmpgtdp 1,2,1 + * xxsel 1,4,3,1 + * blr + */ + + return a < b ? c : d; +} + +double +normal_le (double a, double b, double c, double d) +{ + /* + * xscmpgedp 1,2,1 + * xxsel 1,4,3,1 + * blr + */ + + return a <= b ? c : d; +} + +double +normal_eq (double a, double b, double c, double d) +{ + /* + * xscmpeqdp 1,1,2 + * xxsel 1,4,3,1 + * blr + */ + + return a == b ? c : d; +} + +double +normal_ne (double a, double b, double c, double d) +{ + /* + * xscmpeqdp 1,1,2 + * xxsel 1,3,4,1 + * blr + */ + + return a != b ? c : d; +} + +/* { dg-final { scan-assembler-times {\mxscmpudp\M|\mfcmpu\M} 4 } } */ +/* { dg-final { scan-assembler-times {\mxscmpeqdp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxscmpgedp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxscmpgtdp\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mxxsel\M} 6 } } */