work206-bugs)] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

Michael Meissner via Gcc-cvs Wed, 21 May 2025 17:03:54 -0700

https://gcc.gnu.org/g:d88170074249387a79537291b3548cb115712d86


commit d88170074249387a79537291b3548cb115712d86
Author: Michael Meissner <meiss...@linux.ibm.com>
Date:   Wed May 21 20:03:02 2025 -0400

    Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.
    
    In bug PR target/118541 on power9, power10, and power11 systems, for the
    function:
    
            extern double __ieee754_acos (double);
    
            double
            __acospi (double x)
            {
              double ret = __ieee754_acos (x) / 3.14;
              return __builtin_isgreater (ret, 1.0) ? 1.0 : ret;
            }
    
    GCC currently generates the following code:
    
            Power9                          Power10 and Power11
            ======                          ===================
            bl __ieee754_acos               bl __ieee754_acos@notoc
            nop                             plfd 0,.LC0@pcrel
            addis 9,2,.LC2@toc@ha           xxspltidp 12,1065353216
            addi 1,1,32                     addi 1,1,32
            lfd 0,.LC2@toc@l(9)             ld 0,16(1)
            addis 9,2,.LC0@toc@ha           fdiv 0,1,0
            ld 0,16(1)                      mtlr 0
            lfd 12,.LC0@toc@l(9)            xscmpgtdp 1,0,12
            fdiv 0,1,0                      xxsel 1,0,12,1
            mtlr 0                          blr
            xscmpgtdp 1,0,12
            xxsel 1,0,12,1
            blr
    
    This is because ifcvt.c optimizes the conditional floating point move to 
use the
    XSCMPGTDP instruction.
    
    However, the XSCMPGTDP instruction will generate an interrupt if one of the
    arguments is a signalling NaN and signalling NaNs can generate an interrupt.
    The IEEE comparison functions (isgreater, etc.) require that the comparison 
not
    raise an interrupt.
    
    The root cause of this is we allow floating point comparisons to be reversed
    (i.e. LT will be reversed to UNGE).  Before power9, this was ok because we 
only
    generated the FCMPU or XSCMPUDP instructions.
    
    But with power9, we can generate the XSCMPEQDP, XSCMPGTDP, or XSCMPGEDP
    instructions.  This code now does not convert an unordered compare into an
    ordered compare.  Instead, it does the opposite comparison and swaps the
    arguments.  I.e. it converts:
    
            r = (a < b) ? c : d;
    
    into:
    
            r = (b >= a) ? c : d;
    
    For the following code:
    
            double
            ordered_compare (double a, double b, double c, double d)
            {
              return __builtin_isgreater (a, b) ? c : d;
            }
    
            /* Verify normal > does generate xscmpgtdp.  */
    
            double
            normal_compare (double a, double b, double c, double d)
            {
              return a > b ? c : d;
            }
    
    with the following patch, GCC generates the following for power9, power10, 
and
    power11:
    
            ordered_compare:
                    fcmpu 0,1,2
                    fmr 1,4
                    bnglr 0
                    fmr 1,3
                    blr
    
            normal_compare:
                    xscmpgtdp 1,1,2
                    xxsel 1,4,3,1
                    blr
    
    I have built bootstrap compilers on big endian power9 systems and little 
endian
    power9/power10 systems and there were no regressions.  Can I check this 
patch
    into the GCC trunk, and after a waiting period, can I check this into the 
active
    older branches?
    
    2025-05-21  Michael Meissner  <meiss...@linux.ibm.com>
    
    gcc/
    
            PR target/118541
            * config/rs6000/predicates.md (invert_fpmask_comparison_operator):
            Delete.
            (fpmask_reverse_args_comparison_operator): New predicate.
            * config/rs6000/rs6000-proto.h (rs6000_fpmask_reverse_args): New
            declaration.
            * config/rs6000/rs6000.cc (rs6000_fpmask_reverse_args): New 
function.
            * config/rs6000/rs6000.h (REVERSIBLE_CC_MODE): Do not allow floating
            point comparisons to be reversed unless -ffinite-math-only is used.
            * config/rs6000/rs6000.md (mov<SFDF:mode><SFDF2:mode>cc_p9): Add
            comment.
            (mov<SFDF:mode><SFDF2:mode>cc_invert_p9): Reverse the argument 
order for
            the comparison, and use an unordered comparison, instead of ordered
            comparison.
            (mov<mode>cc_invert_p10): Likewise.
    
    gcc/testsuite/
    
            PR target/118541
            * gcc.target/powerpc/pr118541.c: New test.

Diff:
---
 gcc/config/rs6000/predicates.md             |   6 +-
 gcc/config/rs6000/rs6000-protos.h           |   1 +
 gcc/config/rs6000/rs6000.cc                 |  24 +++++
 gcc/config/rs6000/rs6000.h                  |  15 ++-
 gcc/config/rs6000/rs6000.md                 |  12 +--
 gcc/testsuite/gcc.target/powerpc/pr118541.c | 147 ++++++++++++++++++++++++++++
 6 files changed, 192 insertions(+), 13 deletions(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 647e89afb6a7..3a82113833c3 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1465,9 +1465,9 @@
 
 ;; Return 1 if OP is a comparison operator suitable for vector/scalar
 ;; comparisons that generate a 0/-1 mask (i.e. the inverse of
-;; fpmask_comparison_operator).
-(define_predicate "invert_fpmask_comparison_operator"
-  (match_code "ne,unlt,unle"))
+;; fpmask_comparison_operator when the arguments are swapped).
+(define_predicate "fpmask_reverse_args_comparison_operator"
+  (match_code "ne,lt,le"))
 
 ;; Return 1 if OP is a comparison operation suitable for integer vector/scalar
 ;; comparisons that generate a -1/0 mask.
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 4619142d197b..f3cbfbfe5a13 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -116,6 +116,7 @@ extern const char *rs6000_indirect_sibcall_template (rtx *, 
unsigned int);
 extern const char *rs6000_pltseq_template (rtx *, int);
 extern enum rtx_code rs6000_reverse_condition (machine_mode,
                                               enum rtx_code);
+extern enum rtx_code rs6000_fpmask_reverse_args (enum rtx_code);
 extern rtx rs6000_emit_eqne (machine_mode, rtx, rtx, rtx);
 extern rtx rs6000_emit_fp_cror (rtx_code, machine_mode, rtx);
 extern void rs6000_emit_sCOND (machine_mode, rtx[]);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 11dfde7f288b..8eaca7988d80 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -15379,6 +15379,30 @@ rs6000_reverse_condition (machine_mode mode, enum 
rtx_code code)
     return reverse_condition (code);
 }
 
+/* Like reverse_condition, but return the condition code where the arguments
+   are reversed.  I.e. do not convert an unordered compare into an ordered
+   compare.  This is used for the fpmask operations in power9 to implement
+   floating point conditional moves.  */
+
+enum rtx_code
+rs6000_fpmask_reverse_args (enum rtx_code code)
+{
+  switch (code)
+    {
+    case NE:
+      return EQ;
+
+    case LE:
+      return GT;
+
+    case LT:
+      return GE;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
 /* Check if C (as 64bit integer) can be rotated to a constant which constains
    nonzero bits at the LOWBITS low bits only.
 
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 9267612fbc9c..c8d9456e0912 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1810,10 +1810,17 @@ extern scalar_int_mode rs6000_pmode;
    : (((OP) == EQ || (OP) == NE) && COMPARISON_P (X)                     \
       ? CCEQmode : CCmode))
 
-/* Can the condition code MODE be safely reversed?  This is safe in
-   all cases on this port, because at present it doesn't use the
-   trapping FP comparisons (fcmpo).  */
-#define REVERSIBLE_CC_MODE(MODE) 1
+/* Can the condition code MODE be safely reversed?  Don't allow floating point
+   comparisons to be reversed unless NaNs are not allowed.
+
+   In the past, we used to allow reversing FP operations because we only
+   generated FCMPU comparisons and not FCMPO.  However, starting with power9,
+   the XSCMPEQDP, XSCMPGTDP, and XSCMPGEDP instructions will trap if a
+   signalling NaN is used.  If we allow reversing FP operations, we could wind
+   up converting a LT operation into UNGE and the instruction will trap.  The
+   machine independent parts of the compiler will handle reversing the
+   arguments if the FP comparison cannot be reversed.  */
+#define REVERSIBLE_CC_MODE(MODE) ((MODE) != CCFPmode || flag_finite_math_only)
 
 /* Given a condition code and a mode, return the inverse condition.  */
 #define REVERSE_CONDITION(CODE, MODE) rs6000_reverse_condition (MODE, CODE)
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 65da0c653304..b5cc98bcb0eb 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -5734,7 +5734,7 @@
 (define_insn_and_split "*mov<SFDF:mode><SFDF2:mode>cc_invert_p9"
   [(set (match_operand:SFDF 0 "vsx_register_operand" "=&wa,wa")
        (if_then_else:SFDF
-        (match_operator:CCFP 1 "invert_fpmask_comparison_operator"
+        (match_operator:CCFP 1 "fpmask_reverse_args_comparison_operator"
                [(match_operand:SFDF2 2 "vsx_register_operand" "wa,wa")
                 (match_operand:SFDF2 3 "vsx_register_operand" "wa,wa")])
         (match_operand:SFDF 4 "vsx_register_operand" "wa,wa")
@@ -5754,7 +5754,7 @@
                           (match_dup 4)))]
 {
   rtx op1 = operands[1];
-  enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1));
+  enum rtx_code cond = rs6000_fpmask_reverse_args (GET_CODE (op1));
 
   if (GET_CODE (operands[6]) == SCRATCH)
     operands[6] = gen_reg_rtx (V2DImode);
@@ -5762,7 +5762,7 @@
   operands[7] = CONSTM1_RTX (V2DImode);
   operands[8] = CONST0_RTX (V2DImode);
 
-  operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]);
+  operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[3], operands[2]);
 }
  [(set_attr "length" "8")
   (set_attr "type" "vecperm")])
@@ -5839,7 +5839,7 @@
 (define_insn_and_split "*mov<mode>cc_invert_p10"
   [(set (match_operand:IEEE128 0 "altivec_register_operand" "=&v,v")
        (if_then_else:IEEE128
-        (match_operator:CCFP 1 "invert_fpmask_comparison_operator"
+        (match_operator:CCFP 1 "fpmask_reverse_args_comparison_operator"
                [(match_operand:IEEE128 2 "altivec_register_operand" "v,v")
                 (match_operand:IEEE128 3 "altivec_register_operand" "v,v")])
         (match_operand:IEEE128 4 "altivec_register_operand" "v,v")
@@ -5859,7 +5859,7 @@
                              (match_dup 4)))]
 {
   rtx op1 = operands[1];
-  enum rtx_code cond = reverse_condition_maybe_unordered (GET_CODE (op1));
+  enum rtx_code cond = rs6000_fpmask_reverse_args (GET_CODE (op1));
 
   if (GET_CODE (operands[6]) == SCRATCH)
     operands[6] = gen_reg_rtx (V2DImode);
@@ -5867,7 +5867,7 @@
   operands[7] = CONSTM1_RTX (V2DImode);
   operands[8] = CONST0_RTX (V2DImode);
 
-  operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[2], operands[3]);
+  operands[9] = gen_rtx_fmt_ee (cond, CCFPmode, operands[3], operands[2]);
 }
  [(set_attr "length" "8")
   (set_attr "type" "vecperm")])
diff --git a/gcc/testsuite/gcc.target/powerpc/pr118541.c 
b/gcc/testsuite/gcc.target/powerpc/pr118541.c
new file mode 100644
index 000000000000..a38c94c8b7ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr118541.c
@@ -0,0 +1,147 @@
+/* { dg-do compile } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_vsx } */
+
+/* PR target/118541 says that the ordered comparison functions like isgreater
+   should not optimize floating point conditional moves to use
+   x{s,v}cmp{eq,gt,ge}{dp,qp} and xxsel since that instruction can cause traps
+   if one of the arguments is a signaling NaN.  */
+
+/* Verify isgreater, isgreaterequal, isless, and islessequal does not generate
+   xscmpltdp, xscmpgtdp, or xscmpeqdp.  */
+
+double
+ordered_gt (double a, double b, double c, double d)
+{
+  /*
+   * fcmpu 0,1,2
+   * fmr 1,4
+   * bnglr 0
+   * fmr 1,3
+   * blr
+   */
+
+  return __builtin_isgreater (a, b) ? c : d;
+}
+
+double
+ordered_ge (double a, double b, double c, double d)
+{
+  /*
+   * fcmpu 0,1,2
+   * fmr 1,4
+   * cror 2,0,3
+   * beqlr 0
+   * fmr 1,3
+   * blr
+   */
+
+  return __builtin_isgreaterequal (a, b) ? c : d;
+}
+
+double
+ordered_lt (double a, double b, double c, double d)
+{
+  /*
+   * fcmpu 0,1,2
+   * fmr 1,4
+   * bnllr 0
+   * fmr 1,3
+   * blr
+   */
+
+  return __builtin_isless (a, b) ? c : d;
+}
+
+double
+ordered_le (double a, double b, double c, double d)
+{
+  /*
+   * fcmpu 0,1,2
+   * fmr 1,4
+   * cror 2,1,3
+   * beqlr 0
+   * fmr 1,3
+   * blr
+   */
+
+  return __builtin_islessequal (a, b) ? c : d;
+}
+
+double
+normal_gt (double a, double b, double c, double d)
+{
+  /*
+   * xscmpgtdp 1,1,2
+   * xxsel 1,4,3,1
+   * blr
+   */
+
+  return a > b ? c : d;
+}
+
+double
+normal_ge (double a, double b, double c, double d)
+{
+  /*
+   * xscmpgedp 1,1,2
+   * xxsel 1,4,3,1
+   * blr
+   */
+
+  return a >= b ? c : d;
+}
+
+double
+normal_lt (double a, double b, double c, double d)
+{
+  /*
+   * xscmpgtdp 1,2,1
+   * xxsel 1,4,3,1
+   * blr
+   */
+
+  return a < b ? c : d;
+}
+
+double
+normal_le (double a, double b, double c, double d)
+{
+  /*
+   * xscmpgedp 1,2,1
+   * xxsel 1,4,3,1
+   * blr
+   */
+
+  return a <= b ? c : d;
+}
+
+double
+normal_eq (double a, double b, double c, double d)
+{
+  /*
+   * xscmpeqdp 1,1,2
+   * xxsel 1,4,3,1
+   * blr
+   */
+
+  return a == b ? c : d;
+}
+
+double
+normal_ne (double a, double b, double c, double d)
+{
+  /*
+   * xscmpeqdp 1,1,2
+   * xxsel 1,3,4,1
+   * blr
+   */
+
+  return a != b ? c : d;
+}
+
+/* { dg-final { scan-assembler-times {\mxscmpudp\M|\mfcmpu\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxscmpeqdp\M}          2 } } */
+/* { dg-final { scan-assembler-times {\mxscmpgedp\M}          2 } } */
+/* { dg-final { scan-assembler-times {\mxscmpgtdp\M}          2 } } */
+/* { dg-final { scan-assembler-times {\mxxsel\M}              6 } } */

[gcc(refs/users/meissner/heads/work206-bugs)] Fix PR 118541, do not generate unordered fp cmoves for IEEE compares.

Reply via email to