Firstly, thanks to Haochen Gui for recently adding optab support for
isfinite and isnormal to the middle-end.  This patch adds define_expand
for both these functions to the nvptx backend, which conveniently has
special instructions to simplify their implementation.  As this patch
adds UNSPEC_ISFINITE and UNSPEC_ISNORMAL, I've also taken the opportunity
to include/repost my tweak to clean-up/eliminate UNSPEC_COPYSIGN.

Previously, for isfinite, GCC on nvptx-none with -O2 would generate:

                mov.f64 %r26, %ar0;
                abs.f64 %r28, %r26;
                setp.gtu.f64    %r31, %r28, 0d7fefffffffffffff;
                selp.u32        %value, 0, 1, %r31;

and with this patch, we now generate:

                mov.f64 %r23, %ar0;
                testp.finite.f64        %r24, %r23;
                selp.u32        %value, 1, 0, %r24;

Previously, for isnormal, GCC -O2 would generate:

                mov.f64 %r28, %ar0;
                abs.f64 %r22, %r28;
                setp.gtu.f64    %r32, %r22, 0d7fefffffffffffff;
                setp.ltu.f64    %r35, %r22, 0d0010000000000000;
                or.pred %r43, %r35, %r32;
                selp.u32        %value, 0, 1, %r43;

and with this patch becomes:

                mov.f64 %r23, %ar0;
                setp.neu.f64    %r24, %r23, 0d0000000000000000;
                testp.normal.f64        %r25, %r23;
                and.pred        %r26, %r24, %r25;
                selp.u32        %value, 1, 0, %r26;

Notice that although nvptx provides a testp.normal.f{32,64} instruction,
the semantics don't quite match those required of libm [+0.0 and -0.0
are considered normal by this instruction, but need to return false
for __builtin_isnormal, hence the additional logic, which is still
better than the original].

This patch has been tested on nvptx-none hosted by x86_64-pc-linux-gnu
using make and make -k check, with only one new failure in the testsuite.
The test case g++.dg/opt/pr107569.C exposes a latent bug in the middle-end
(actually a missed optimization) as evrp fails to bound the results of
isfinite.  This issue is independent of the back-end, as the tree-ssa
evrp pass is run long before __builtin_finite is expanded by the backend,
and the existence of an (any) isfinite optab is sufficient to expose it.
Fortunately, Haochem Gui has already posted/proposed a fix at
https://gcc.gnu.org/pipermail/gcc-patches/2024-July/657881.html
[which I'm sad to see is taking a while to review/get approved].

Ok for mainline?


2024-07-27  Roger Sayle  <ro...@nextmovesoftware.com>

gcc/ChangeLog
        * config/nvptx/nptx.md (UNSPEC_COPYSIGN): No longer required.
        (UNSPEC_ISFINITE): New UNSPEC.
        (UNSPEC_ISNORMAL): Likewise.
        (*cmp<mode>): Rename to...
        (cmp<mode>): Remove '*' prefix to generate gen_cmp{s,d}f.
        (copysign<mode>3): Replace UNSPEC_COPYSIGN with copysign RTX.
        (*setcc_isfinite<mode>): New define_insn using UNSPEC_ISFINITE.
        (isfinite<mode>2): Expand isfinite.
        (*setcc_isnormal<mode>): New define_insn using UNSPEC_ISNORMAL.
        (isnormal<mode>2): Expand isnormal.

gcc/testsuite/ChangeLog
        * gcc.target/nvptx/isfinite.c: New test case.
        * gcc.target/nvptx/isnormal.c: Likewise.


Thanks in advance (p.s. don't forget the nvptx_rtx_costs patch),
Roger
--

diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 7878a3b..ae711bb 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -21,13 +21,14 @@
 (define_c_enum "unspec" [
    UNSPEC_ARG_REG
 
-   UNSPEC_COPYSIGN
    UNSPEC_LOG2
    UNSPEC_EXP2
    UNSPEC_SIN
    UNSPEC_COS
    UNSPEC_TANH
    UNSPEC_ISINF
+   UNSPEC_ISFINITE
+   UNSPEC_ISNORMAL
 
    UNSPEC_FPINT_FLOOR
    UNSPEC_FPINT_BTRUNC
@@ -888,7 +889,7 @@
   ""
   "%.\\tsetp%c1\\t%0, %2, %3;")
 
-(define_insn "*cmp<mode>"
+(define_insn "cmp<mode>"
   [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
        (match_operator:BI 1 "nvptx_float_comparison_operator"
           [(match_operand:SDFM 2 "nvptx_register_operand" "R")
@@ -1253,9 +1254,8 @@
 
 (define_insn "copysign<mode>3"
   [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
-       (unspec:SDFM [(match_operand:SDFM 1 "nvptx_nonmemory_operand" "RF")
-                     (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")]
-                     UNSPEC_COPYSIGN))]
+       (copysign:SDFM (match_operand:SDFM 1 "nvptx_nonmemory_operand" "RF")
+                      (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
   ""
   "%.\\tcopysign%t0\\t%0, %2, %1;")
 
@@ -1330,6 +1330,8 @@
   "flag_unsafe_math_optimizations"
   "%.\\tex2.approx%t0\\t%0, %1;")
 
+;; FP classify predicates
+
 (define_insn "setcc_isinf<mode>"
   [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
        (unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
@@ -1349,6 +1351,50 @@
   DONE;
 })
 
+(define_insn "setcc_isfinite<mode>"
+  [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
+       (unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
+                  UNSPEC_ISFINITE))]
+  ""
+  "%.\\ttestp.finite%t1\\t%0, %1;")
+
+(define_expand "isfinite<mode>2"
+  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+       (unspec:SI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
+                  UNSPEC_ISFINITE))]
+  ""
+{
+  rtx pred = gen_reg_rtx (BImode);
+  emit_insn (gen_setcc_isfinite<mode> (pred, operands[1]));
+  emit_insn (gen_setccsi_from_bi (operands[0], pred));
+  DONE;
+})
+
+(define_insn "setcc_isnormal<mode>"
+  [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
+       (unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
+                  UNSPEC_ISNORMAL))]
+  ""
+  "%.\\ttestp.normal%t1\\t%0, %1;")
+
+(define_expand "isnormal<mode>2"
+  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+       (unspec:SI [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
+                  UNSPEC_ISNORMAL))]
+  ""
+{
+  rtx pred1 = gen_reg_rtx (BImode);
+  rtx pred2 = gen_reg_rtx (BImode);
+  rtx pred3 = gen_reg_rtx (BImode);
+  rtx zero = CONST0_RTX (<MODE>mode);
+  rtx cmp = gen_rtx_fmt_ee (NE, BImode, operands[1], zero);
+  emit_insn (gen_cmp<mode> (pred1, cmp, operands[1], zero));
+  emit_insn (gen_setcc_isnormal<mode> (pred2, operands[1]));
+  emit_insn (gen_andbi3 (pred3, pred1, pred2));
+  emit_insn (gen_setccsi_from_bi (operands[0], pred3));
+  DONE;
+})
+
 ;; HFmode floating point arithmetic.
 
 (define_insn "addhf3"
diff --git a/gcc/testsuite/gcc.target/nvptx/isfinite.c 
b/gcc/testsuite/gcc.target/nvptx/isfinite.c
new file mode 100644
index 0000000..83099fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/isfinite.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int foo(double x)
+{
+  return __builtin_isfinite(x);
+}
+
+/* { dg-final { scan-assembler-times "testp.finite.f64" 1 } } */
diff --git a/gcc/testsuite/gcc.target/nvptx/isnormal.c 
b/gcc/testsuite/gcc.target/nvptx/isnormal.c
new file mode 100644
index 0000000..83c4fb6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/isnormal.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int isnormal(double x)
+{
+  return __builtin_isnormal(x);
+}
+
+/* { dg-final { scan-assembler-times "testp.normal.f64" 1 } } */

Reply via email to