https://gcc.gnu.org/g:7fdca3d4dcc96e50e8369634edb89bb95c99a3ea
commit 7fdca3d4dcc96e50e8369634edb89bb95c99a3ea Author: Michael Meissner <meiss...@linux.ibm.com> Date: Thu May 22 15:48:09 2025 -0400 Fix PR 118541, do not generate unordered fp cmoves for IEEE compares. In bug PR target/118541 on power9, power10, and power11 systems, for the function: extern double __ieee754_acos (double); double __acospi (double x) { double ret = __ieee754_acos (x) / 3.14; return __builtin_isgreater (ret, 1.0) ? 1.0 : ret; } GCC currently generates the following code: Power9 Power10 and Power11 ====== =================== bl __ieee754_acos bl __ieee754_acos@notoc nop plfd 0,.LC0@pcrel addis 9,2,.LC2@toc@ha xxspltidp 12,1065353216 addi 1,1,32 addi 1,1,32 lfd 0,.LC2@toc@l(9) ld 0,16(1) addis 9,2,.LC0@toc@ha fdiv 0,1,0 ld 0,16(1) mtlr 0 lfd 12,.LC0@toc@l(9) xscmpgtdp 1,0,12 fdiv 0,1,0 xxsel 1,0,12,1 mtlr 0 blr xscmpgtdp 1,0,12 xxsel 1,0,12,1 blr This is because ifcvt.c optimizes the conditional floating point move to use the XSCMPGTDP instruction. However, the XSCMPGTDP instruction traps if one of the arguments is a signaling NaN. This patch disables generating XSCMP{EQ,GT,GE}{DP,QP} instructions unless -ffinite-math-only is in effect so that we do not get a trap. 2025-05-22 Michael Meissner <meiss...@linux.ibm.com> gcc/ PR target/118541 * config/rs6000/rs6000.md (mov<SFDF:mode><SFDF2:mode>cc_p9): Disable generating XSCMP{EQ,GT,GE}{DP,QP} unless -ffinite-math-only is in effect. (mov<SFDF:mode><SFDF2:mode>cc_invert_p9): Likewise. (fpmask<mode>, SFDF iterator): Likewise. (xxsel<mode>, SFDF iterator): Likewise. (mov<mode>cc, IEEE128 iterator): Likewise. (mov<mode>cc_p10): Likewise. (mov<mode>cc_invert_p10): Likewise. (fpmask<mode>, IEEE128 iterator): Likewise. (xxsel<mode>, IEEE128 iterator): Likewise. gcc/testsuite/ PR target/118541 * gcc.target/powerpc/float128-cmove.c: Change optimization flag to -Ofast instead of -O2. Diff: --- gcc/config/rs6000/rs6000.md | 27 +++++++++++++++-------- gcc/testsuite/gcc.target/powerpc/float128-cmove.c | 6 ++++- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 65da0c653304..1f8cfcf0d255 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -5699,6 +5699,10 @@ "fsel %0,%1,%2,%3" [(set_attr "type" "fp")]) +;; On power9, we can generate XSCMP{EQ,GT,GE}DP and XXSEL to do a floating +;; point conditional move. However, these instructions trap if one of the +;; arguments is a signalling NaN. Therefore we can only do this optimize if +;; NaNs are not expected in the code. (define_insn_and_split "*mov<SFDF:mode><SFDF2:mode>cc_p9" [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa") (if_then_else:SFDF @@ -5708,7 +5712,7 @@ (match_operand:SFDF 4 "vsx_register_operand" "wa,wa") (match_operand:SFDF 5 "vsx_register_operand" "wa,wa"))) (clobber (match_scratch:V2DI 6 "=0,&wa"))] - "TARGET_P9_MINMAX" + "TARGET_P9_MINMAX && flag_finite_math_only" "#" "&& 1" [(set (match_dup 6) @@ -5740,7 +5744,7 @@ (match_operand:SFDF 4 "vsx_register_operand" "wa,wa") (match_operand:SFDF 5 "vsx_register_operand" "wa,wa"))) (clobber (match_scratch:V2DI 6 "=0,&wa"))] - "TARGET_P9_MINMAX" + "TARGET_P9_MINMAX && flag_finite_math_only" "#" "&& 1" [(set (match_dup 6) @@ -5775,7 +5779,7 @@ (match_operand:SFDF 3 "vsx_register_operand" "wa")]) (match_operand:V2DI 4 "all_ones_constant" "") (match_operand:V2DI 5 "zero_constant" "")))] - "TARGET_P9_MINMAX" + "TARGET_P9_MINMAX && flag_finite_math_only" "xscmp%V1dp %x0,%x2,%x3" [(set_attr "type" "fpcompare")]) @@ -5785,18 +5789,23 @@ (match_operand:V2DI 2 "zero_constant" "")) (match_operand:SFDF 3 "vsx_register_operand" "wa") (match_operand:SFDF 4 "vsx_register_operand" "wa")))] - "TARGET_P9_MINMAX" + "TARGET_P9_MINMAX && flag_finite_math_only" "xxsel %x0,%x4,%x3,%x1" [(set_attr "type" "vecmove")]) ;; Support for ISA 3.1 IEEE 128-bit conditional move. The mode used in the ;; comparison must be the same as used in the move. +;; +;; On power10, we can generate XSCMP{EQ,GT,GE}QP and XXSEL to do a floating +;; point conditional move for IEEE 128-bit values. However, these instructions +;; trap if one of the arguments is a signalling NaN. Therefore we can only do +;; this optimize if NaNs are not expected in the code. (define_expand "mov<mode>cc" [(set (match_operand:IEEE128 0 "gpc_reg_operand") (if_then_else:IEEE128 (match_operand 1 "comparison_operator") (match_operand:IEEE128 2 "gpc_reg_operand") (match_operand:IEEE128 3 "gpc_reg_operand")))] - "TARGET_POWER10 && TARGET_FLOAT128_HW" + "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only" { if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3])) DONE; @@ -5813,7 +5822,7 @@ (match_operand:IEEE128 4 "altivec_register_operand" "v,v") (match_operand:IEEE128 5 "altivec_register_operand" "v,v"))) (clobber (match_scratch:V2DI 6 "=0,&v"))] - "TARGET_POWER10 && TARGET_FLOAT128_HW" + "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only" "#" "&& 1" [(set (match_dup 6) @@ -5845,7 +5854,7 @@ (match_operand:IEEE128 4 "altivec_register_operand" "v,v") (match_operand:IEEE128 5 "altivec_register_operand" "v,v"))) (clobber (match_scratch:V2DI 6 "=0,&v"))] - "TARGET_POWER10 && TARGET_FLOAT128_HW" + "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only" "#" "&& 1" [(set (match_dup 6) @@ -5880,7 +5889,7 @@ (match_operand:IEEE128 3 "altivec_register_operand" "v")]) (match_operand:V2DI 4 "all_ones_constant" "") (match_operand:V2DI 5 "zero_constant" "")))] - "TARGET_POWER10 && TARGET_FLOAT128_HW" + "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only" "xscmp%V1qp %0,%2,%3" [(set_attr "type" "fpcompare")]) @@ -5891,7 +5900,7 @@ (match_operand:V2DI 2 "zero_constant" "")) (match_operand:IEEE128 3 "altivec_register_operand" "v") (match_operand:IEEE128 4 "altivec_register_operand" "v")))] - "TARGET_POWER10 && TARGET_FLOAT128_HW" + "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only" "xxsel %x0,%x4,%x3,%x1" [(set_attr "type" "vecmove")]) diff --git a/gcc/testsuite/gcc.target/powerpc/float128-cmove.c b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c index 2fae8dc23bcf..496fe29740c8 100644 --- a/gcc/testsuite/gcc.target/powerpc/float128-cmove.c +++ b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c @@ -1,7 +1,11 @@ /* { dg-do compile } */ /* { dg-require-effective-target ppc_float128_hw } */ /* { dg-require-effective-target power10_ok } */ -/* { dg-options "-mdejagnu-cpu=power10 -O2" } */ +/* { dg-options "-mdejagnu-cpu=power10 -Ofast" } */ + +/* The XSCMP{EQ,GT,GE}QP instructions will trap if a signaling NaN is one of + the arguments, so this code is now only generated if -Ofast or + -ffinite-math-only is used. */ #ifndef TYPE #ifdef __LONG_DOUBLE_IEEE128__