work206-bugs)] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

Michael Meissner via Gcc-cvs Thu, 22 May 2025 12:48:52 -0700

https://gcc.gnu.org/g:7fdca3d4dcc96e50e8369634edb89bb95c99a3ea


commit 7fdca3d4dcc96e50e8369634edb89bb95c99a3ea
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Thu May 22 15:48:09 2025 -0400

    Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.
    
    In bug PR target/118541 on power9, power10, and power11 systems, for the
    function:
    
            extern double __ieee754_acos (double);
    
            double
            __acospi (double x)
            {
              double ret = __ieee754_acos (x) / 3.14;
              return __builtin_isgreater (ret, 1.0) ? 1.0 : ret;
            }
    
    GCC currently generates the following code:
    
            Power9                          Power10 and Power11
            ======                          ===================
            bl __ieee754_acos               bl __ieee754_acos@notoc
            nop                             plfd 0,.LC0@pcrel
            addis 9,2,.LC2@toc@ha           xxspltidp 12,1065353216
            addi 1,1,32                     addi 1,1,32
            lfd 0,.LC2@toc@l(9)             ld 0,16(1)
            addis 9,2,.LC0@toc@ha           fdiv 0,1,0
            ld 0,16(1)                      mtlr 0
            lfd 12,.LC0@toc@l(9)            xscmpgtdp 1,0,12
            fdiv 0,1,0                      xxsel 1,0,12,1
            mtlr 0                          blr
            xscmpgtdp 1,0,12
            xxsel 1,0,12,1
            blr
    
    This is because ifcvt.c optimizes the conditional floating point move to 
use the
    XSCMPGTDP instruction.
    
    However, the XSCMPGTDP instruction traps if one of the arguments is a 
signaling
    NaN.  This patch disables generating XSCMP{EQ,GT,GE}{DP,QP} instructions 
unless
    -ffinite-math-only is in effect so that we do not get a trap.
    
    2025-05-22  Michael Meissner  <meiss...@linux.ibm.com>
    
    gcc/
    
            PR target/118541
            * config/rs6000/rs6000.md (mov<SFDF:mode><SFDF2:mode>cc_p9): Disable
            generating XSCMP{EQ,GT,GE}{DP,QP} unless -ffinite-math-only is in
            effect.
            (mov<SFDF:mode><SFDF2:mode>cc_invert_p9): Likewise.
            (fpmask<mode>, SFDF iterator): Likewise.
            (xxsel<mode>, SFDF iterator): Likewise.
            (mov<mode>cc, IEEE128 iterator): Likewise.
            (mov<mode>cc_p10): Likewise.
            (mov<mode>cc_invert_p10): Likewise.
            (fpmask<mode>, IEEE128 iterator): Likewise.
            (xxsel<mode>, IEEE128 iterator): Likewise.
    
    gcc/testsuite/
    
            PR target/118541
            * gcc.target/powerpc/float128-cmove.c: Change optimization flag to
            -Ofast instead of -O2.

Diff:
---
 gcc/config/rs6000/rs6000.md                       | 27 +++++++++++++++--------
 gcc/testsuite/gcc.target/powerpc/float128-cmove.c |  6 ++++-
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 65da0c653304..1f8cfcf0d255 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5699,6 +5699,10 @@
   "fsel %0,%1,%2,%3"
   [(set_attr "type" "fp")])
 
+;; On power9, we can generate XSCMP{EQ,GT,GE}DP and XXSEL to do a floating
+;; point conditional move.  However, these instructions trap if one of the
+;; arguments is a signalling NaN.  Therefore we can only do this optimize if
+;; NaNs are not expected in the code.
 (define_insn_and_split "*mov<SFDF:mode><SFDF2:mode>cc_p9"
   [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa")
        (if_then_else:SFDF
@@ -5708,7 +5712,7 @@
         (match_operand:SFDF 4 "vsx_register_operand" "wa,wa")
         (match_operand:SFDF 5 "vsx_register_operand" "wa,wa")))
    (clobber (match_scratch:V2DI 6 "=0,&wa"))]
-  "TARGET_P9_MINMAX"
+  "TARGET_P9_MINMAX && flag_finite_math_only"
   "#"
   "&& 1"
   [(set (match_dup 6)
@@ -5740,7 +5744,7 @@
         (match_operand:SFDF 4 "vsx_register_operand" "wa,wa")
         (match_operand:SFDF 5 "vsx_register_operand" "wa,wa")))
    (clobber (match_scratch:V2DI 6 "=0,&wa"))]
-  "TARGET_P9_MINMAX"
+  "TARGET_P9_MINMAX && flag_finite_math_only"
   "#"
   "&& 1"
   [(set (match_dup 6)
@@ -5775,7 +5779,7 @@
                 (match_operand:SFDF 3 "vsx_register_operand" "wa")])
         (match_operand:V2DI 4 "all_ones_constant" "")
         (match_operand:V2DI 5 "zero_constant" "")))]
-  "TARGET_P9_MINMAX"
+  "TARGET_P9_MINMAX && flag_finite_math_only"
   "xscmp%V1dp %x0,%x2,%x3"
   [(set_attr "type" "fpcompare")])
 
@@ -5785,18 +5789,23 @@
                               (match_operand:V2DI 2 "zero_constant" ""))
                           (match_operand:SFDF 3 "vsx_register_operand" "wa")
                           (match_operand:SFDF 4 "vsx_register_operand" "wa")))]
-  "TARGET_P9_MINMAX"
+  "TARGET_P9_MINMAX && flag_finite_math_only"
   "xxsel %x0,%x4,%x3,%x1"
   [(set_attr "type" "vecmove")])
 
 ;; Support for ISA 3.1 IEEE 128-bit conditional move.  The mode used in the
 ;; comparison must be the same as used in the move.
+;;
+;; On power10, we can generate XSCMP{EQ,GT,GE}QP and XXSEL to do a floating
+;; point conditional move for IEEE 128-bit values.  However, these instructions
+;; trap if one of the arguments is a signalling NaN.  Therefore we can only do
+;; this optimize if NaNs are not expected in the code.
 (define_expand "mov<mode>cc"
    [(set (match_operand:IEEE128 0 "gpc_reg_operand")
         (if_then_else:IEEE128 (match_operand 1 "comparison_operator")
                               (match_operand:IEEE128 2 "gpc_reg_operand")
                               (match_operand:IEEE128 3 "gpc_reg_operand")))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only"
 {
   if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3]))
     DONE;
@@ -5813,7 +5822,7 @@
         (match_operand:IEEE128 4 "altivec_register_operand" "v,v")
         (match_operand:IEEE128 5 "altivec_register_operand" "v,v")))
    (clobber (match_scratch:V2DI 6 "=0,&v"))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only"
   "#"
   "&& 1"
   [(set (match_dup 6)
@@ -5845,7 +5854,7 @@
         (match_operand:IEEE128 4 "altivec_register_operand" "v,v")
         (match_operand:IEEE128 5 "altivec_register_operand" "v,v")))
    (clobber (match_scratch:V2DI 6 "=0,&v"))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only"
   "#"
   "&& 1"
   [(set (match_dup 6)
@@ -5880,7 +5889,7 @@
                 (match_operand:IEEE128 3 "altivec_register_operand" "v")])
         (match_operand:V2DI 4 "all_ones_constant" "")
         (match_operand:V2DI 5 "zero_constant" "")))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only"
   "xscmp%V1qp %0,%2,%3"
   [(set_attr "type" "fpcompare")])
 
@@ -5891,7 +5900,7 @@
             (match_operand:V2DI 2 "zero_constant" ""))
         (match_operand:IEEE128 3 "altivec_register_operand" "v")
         (match_operand:IEEE128 4 "altivec_register_operand" "v")))]
-  "TARGET_POWER10 && TARGET_FLOAT128_HW"
+  "TARGET_POWER10 && TARGET_FLOAT128_HW && flag_finite_math_only"
   "xxsel %x0,%x4,%x3,%x1"
   [(set_attr "type" "vecmove")])
 
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-cmove.c 
b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c
index 2fae8dc23bcf..496fe29740c8 100644
--- a/gcc/testsuite/gcc.target/powerpc/float128-cmove.c
+++ b/gcc/testsuite/gcc.target/powerpc/float128-cmove.c
@@ -1,7 +1,11 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target ppc_float128_hw } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -Ofast" } */
+
+/* The XSCMP{EQ,GT,GE}QP instructions will trap if a signaling NaN is one of
+   the arguments, so this code is now only generated if -Ofast or
+   -ffinite-math-only is used.  */
 
 #ifndef TYPE
 #ifdef __LONG_DOUBLE_IEEE128__

[gcc(refs/users/meissner/heads/work206-bugs)] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

Reply via email to