Changes since v1:
  - Fix scan pattern for math-nearbyint-1.c
  - Per Andrew's suggestion gate flag save/restore on -ftrapping-math
---
__builtin_round() fails to save/restore FP exception flags around the FP
compare insn which can potentially clobber the same.

Worth noting that the fflags restore bracketing is slightly different
than the glibc implementation. gcc implementation w/ this patch
generates following, where fsflags is called even if fcvt* were not
executed because the prior flt can also clobber the flags. glibc
implementation due to early NaN check (before the flt.s) only needs
inside of the branch.

| convert_float_to_float_round
| ...
|   frflags     a5
|   fabs.s      fa5,fa0
|   flt.s       a4,fa5,fa4    <--- can clobber fflags
|   beq a4,zero,.L3
|     fcvt.w.s a4,fa0,rmm     <--- also
|     fcvt.s.w  fa5,a4
|     fsgnj.s   fa0,fa5,fa0
| .L3:
|    fsflags    a5            <-- both code paths

Fixes: f652a35877e3 ("This is almost exclusively Jivan's work....")

        PR target/121534

gcc/ChangeLog:

        * config/riscv/riscv.md (round_pattern): save/restore fflags.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/round_64.c: Scan for frflags and fsflags.
        * gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c: Adjust
        scan pattern for instances of frflags/fsrflags.

Signed-off-by: Vineet Gupta <vine...@rivosinc.com>
---
 gcc/config/riscv/riscv.md                            | 12 ++++++++++++
 gcc/testsuite/gcc.target/riscv/round_64.c            |  2 ++
 .../riscv/rvv/autovec/vls/math-nearbyint-1.c         |  4 ++--
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index a72604e29218..82a7e2ff6792 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2313,12 +2313,16 @@ (define_expand "<round_pattern><ANYF:mode>2"
       rtx abs_reg = gen_reg_rtx (<ANYF:MODE>mode);
       rtx coeff_reg = gen_reg_rtx (<ANYF:MODE>mode);
       rtx tmp_reg = gen_reg_rtx (<ANYF:MODE>mode);
+      rtx fflags = gen_reg_rtx (SImode);
 
       riscv_emit_move (tmp_reg, operands[1]);
       riscv_emit_move (coeff_reg,
                       riscv_vector::get_fp_rounding_coefficient 
(<ANYF:MODE>mode));
       emit_insn (gen_abs<ANYF:mode>2 (abs_reg, operands[1]));
 
+      /* fp compare can set invalid flag for NaN, so backup fflags.  */
+      if (flag_trapping_math)
+        emit_insn (gen_riscv_frflags (fflags));
       riscv_expand_conditional_branch (label, LT, abs_reg, coeff_reg);
 
       emit_jump_insn (gen_jump (end_label));
@@ -2344,6 +2348,14 @@ (define_expand "<round_pattern><ANYF:mode>2"
       emit_insn (gen_copysign<ANYF:mode>3 (tmp_reg, abs_reg, operands[1]));
 
       emit_label (end_label);
+
+      /* Restore fflags, but after label.  This is slightly different
+         than glibc implementation which only needs to restore under
+         the label, since it checks for NaN first, meaning following fp
+         compare can't raise fp exceptons and thus not clobber fflags.  */
+      if (flag_trapping_math)
+        emit_insn (gen_riscv_fsflags (fflags));
+
       riscv_emit_move (operands[0], tmp_reg);
     }
 
diff --git a/gcc/testsuite/gcc.target/riscv/round_64.c 
b/gcc/testsuite/gcc.target/riscv/round_64.c
index 5e13bccdcd2a..ab6ef0431413 100644
--- a/gcc/testsuite/gcc.target/riscv/round_64.c
+++ b/gcc/testsuite/gcc.target/riscv/round_64.c
@@ -14,6 +14,8 @@
 /* { dg-final { scan-assembler-times {,rmm} 6 } } */
 /* { dg-final { scan-assembler-times {,rdn} 6 } } */
 /* { dg-final { scan-assembler-times {,rtz} 6 } } */
+/* { dg-final { scan-assembler-times {\mfrflags} 10 } } */
+/* { dg-final { scan-assembler-times {\mfsflags} 10 } } */
 /* { dg-final { scan-assembler-not "\\sceil\\s" } } */
 /* { dg-final { scan-assembler-not "\\sfloor\\s" } } */
 /* { dg-final { scan-assembler-not "\\sround\\s" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c
index bb62ce2ef8a8..89af160112c9 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/math-nearbyint-1.c
@@ -54,5 +54,5 @@ DEF_OP_V (nearbyint, 512, double, __builtin_nearbyint)
 /* { dg-final { scan-tree-dump-not "4096,4096" "optimized" } } */
 /* { dg-final { scan-assembler-times 
{vfcvt\.x\.f\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
 /* { dg-final { scan-assembler-times 
{vfcvt\.f\.x\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0\.t} 30 } } */
-/* { dg-final { scan-assembler-times {frflags\s+[atx][0-9]+} 30 } } */
-/* { dg-final { scan-assembler-times {fsflags\s+[atx][0-9]+} 30 } } */
+/* { dg-final { scan-assembler-times {frflags\s+[atx][0-9]+} 32 } } */
+/* { dg-final { scan-assembler-times {fsflags\s+[atx][0-9]+} 32 } } */
-- 
2.43.0

Reply via email to