https://gcc.gnu.org/g:6f8475e63684e55a6c25a683f57d53c51854bbf9

commit 6f8475e63684e55a6c25a683f57d53c51854bbf9
Author: Aurelien Jarno <[email protected]>
Date:   Thu Oct 2 09:05:34 2025 -0600

    [PATCH v2] RISC-V: fix __builtin_round NaN handling [PR target/121652]
    
    __builtin_round() fails to correctly generate invalid exceptions for NaN
    inputs when -ftrapping-math is used (which is the default). According to
    the specification, an invalid exception should be raised for sNaN, but
    not for qNaN.
    
    Commit f12a27216952 ("RISC-V: fix __builtin_round clobbering FP...")
    attempted to avoid raising an invalid exception for qNaN by saving and
    restoring the FP exception flags. However this inadvertently suppressed
    the invalid exception for sNaN as well.
    
    Instead of saving/restoring fflags, this patch uses the same approach
    than the well tested GLIBC round implementation. When flag_trapping_math
    is enabled, it first checks whether the input is a NaN using feq.s/d. In
    that case it adds the input value with itself to possibly convert sNaN
    into qNaN. With this change, the glibc testsuite passes again.
    
    The generated code with -ftrapping-math now looks like:
    
    convert_float_to_float_round
      feq.s       a5,fa0,fa0
      beqz        a5,.L6
      auipc       a5,0x0
      flw         fa4,42(a5)
      fabs.s      fa5,fa0
      flt.s       a5,fa5,fa4
      beqz        a5,.L5
      fcvt.w.s    a5,fa0,rmm
      fcvt.s.w    fa5,a5
      fsgnj.s     fa0,fa5,fa0
      ret
    .L6:
      fadd.s      fa0,fa0,fa0
    .L5:
      ret
    
    With -fno-trapping-math, the additional checks are omitted so the
    resulting code is unchanged.
    
    In addition this fixes the following failures:
    FAIL: gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c -O3 
-ftree-vectorize -mrvv-vector-bits=scalable  scan-assembler-times 
frflags\\s+[atx][0-9]+ 32
    FAIL: gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c -O3 
-ftree-vectorize -mrvv-vector-bits=scalable  scan-assembler-times 
fsflags\\s+[atx][0-9]+ 32
    
    Fixes: f652a35877e3 ("This is almost exclusively Jivan's work....")
    Fixes: f12a27216952 ("RISC-V: fix __builtin_round clobbering FP...")
    
            PR target/121652
    
    gcc/ChangeLog:
    
            * config/riscv/riscv.md (round_pattern): special case NaN input
            instead of saving/restoring fflags.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c: Adjust
            scan pattern for fewer instances of frflags/fsrflags.
    
    (cherry picked from commit 60c6f92fb192352d784ab3153a9e11c19a3216a0)

Diff:
---
 gcc/config/riscv/riscv.md                          | 39 ++++++++++++++--------
 .../riscv/rvv/autovec/vls/math-nearbyint-1.c       |  4 +--
 2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 843a048e9d8c..78a01ef30c75 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2322,27 +2322,38 @@
   else
     {
       rtx reg;
-      rtx label = gen_label_rtx ();
+      rtx label1 = gen_label_rtx ();
+      rtx label2 = gen_label_rtx ();
+      rtx label3 = gen_label_rtx ();
       rtx end_label = gen_label_rtx ();
       rtx abs_reg = gen_reg_rtx (<ANYF:MODE>mode);
       rtx coeff_reg = gen_reg_rtx (<ANYF:MODE>mode);
       rtx tmp_reg = gen_reg_rtx (<ANYF:MODE>mode);
-      rtx fflags = gen_reg_rtx (SImode);
 
       riscv_emit_move (tmp_reg, operands[1]);
+
+      if (flag_trapping_math)
+       {
+         /* Check if the input is a NaN.  */
+         riscv_expand_conditional_branch (label1, EQ,
+                                          operands[1], operands[1]);
+
+         emit_jump_insn (gen_jump (label3));
+         emit_barrier ();
+
+         emit_label (label1);
+       }
+
       riscv_emit_move (coeff_reg,
                       riscv_vector::get_fp_rounding_coefficient 
(<ANYF:MODE>mode));
       emit_insn (gen_abs<ANYF:mode>2 (abs_reg, operands[1]));
 
-      /* fp compare can set invalid flag for NaN, so backup fflags.  */
-      if (flag_trapping_math)
-        emit_insn (gen_riscv_frflags (fflags));
-      riscv_expand_conditional_branch (label, LT, abs_reg, coeff_reg);
+      riscv_expand_conditional_branch (label2, LT, abs_reg, coeff_reg);
 
       emit_jump_insn (gen_jump (end_label));
       emit_barrier ();
 
-      emit_label (label);
+      emit_label (label2);
       switch (<ANYF:MODE>mode)
        {
        case SFmode:
@@ -2361,15 +2372,17 @@
 
       emit_insn (gen_copysign<ANYF:mode>3 (tmp_reg, abs_reg, operands[1]));
 
-      emit_label (end_label);
+      emit_jump_insn (gen_jump (end_label));
+      emit_barrier ();
 
-      /* Restore fflags, but after label.  This is slightly different
-         than glibc implementation which only needs to restore under
-         the label, since it checks for NaN first, meaning following fp
-         compare can't raise fp exceptons and thus not clobber fflags.  */
       if (flag_trapping_math)
-        emit_insn (gen_riscv_fsflags (fflags));
+       {
+         emit_label (label3);
+         /* Generate a qNaN from an sNaN if needed.  */
+         emit_insn (gen_add<ANYF:mode>3 (tmp_reg, operands[1], operands[1]));
+       }
 
+      emit_label (end_label);
       riscv_emit_move (operands[0], tmp_reg);
     }
 
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c
index 89af160112c9..bb62ce2ef8a8 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c
@@ -54,5 +54,5 @@ DEF_OP_V (nearbyint, 512, double, __builtin_nearbyint)
 /* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
 /* { dg-final { scan-assembler-times 
{vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
 /* { dg-final { scan-assembler-times 
{vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
-/* { dg-final { scan-assembler-times {frflags\s+[atx][0-9]+} 32 } } */
-/* { dg-final { scan-assembler-times {fsflags\s+[atx][0-9]+} 32 } } */
+/* { dg-final { scan-assembler-times {frflags\s+[atx][0-9]+} 30 } } */
+/* { dg-final { scan-assembler-times {fsflags\s+[atx][0-9]+} 30 } } */

Reply via email to