Firstly, thanks to Haochen Gui for recently adding optab support for isfinite and isnormal to the middle-end. This patch adds define_expand for both these functions to the nvptx backend, which conveniently has special instructions to simplify their implementation. As this patch adds UNSPEC_ISFINITE and UNSPEC_ISNORMAL, I've also taken the opportunity to include/repost my tweak to clean-up/eliminate UNSPEC_COPYSIGN.
Previously, for isfinite, GCC on nvptx-none with -O2 would generate: mov.f64 %r26, %ar0; abs.f64 %r28, %r26; setp.gtu.f64 %r31, %r28, 0d7fefffffffffffff; selp.u32 %value, 0, 1, %r31; and with this patch, we now generate: mov.f64 %r23, %ar0; testp.finite.f64 %r24, %r23; selp.u32 %value, 1, 0, %r24; Previously, for isnormal, GCC -O2 would generate: mov.f64 %r28, %ar0; abs.f64 %r22, %r28; setp.gtu.f64 %r32, %r22, 0d7fefffffffffffff; setp.ltu.f64 %r35, %r22, 0d0010000000000000; or.pred %r43, %r35, %r32; selp.u32 %value, 0, 1, %r43; and with this patch becomes: mov.f64 %r23, %ar0; setp.neu.f64 %r24, %r23, 0d0000000000000000; testp.normal.f64 %r25, %r23; and.pred %r26, %r24, %r25; selp.u32 %value, 1, 0, %r26; Notice that although nvptx provides a testp.normal.f{32,64} instruction, the semantics don't quite match those required of libm [+0.0 and -0.0 are considered normal by this instruction, but need to return false for __builtin_isnormal, hence the additional logic, which is still better than the original]. This patch has been tested on nvptx-none hosted by x86_64-pc-linux-gnu using make and make -k check, with only one new failure in the testsuite. The test case g++.dg/opt/pr107569.C exposes a latent bug in the middle-end (actually a missed optimization) as evrp fails to bound the results of isfinite. This issue is independent of the back-end, as the tree-ssa evrp pass is run long before __builtin_finite is expanded by the backend, and the existence of an (any) isfinite optab is sufficient to expose it. Fortunately, Haochem Gui has already posted/proposed a fix at https://gcc.gnu.org/pipermail/gcc-patches/2024-July/657881.html [which I'm sad to see is taking a while to review/get approved]. Ok for mainline? 2024-07-27 Roger Sayle <ro...@nextmovesoftware.com> gcc/ChangeLog * config/nvptx/nptx.md (UNSPEC_COPYSIGN): No longer required. (UNSPEC_ISFINITE): New UNSPEC. (UNSPEC_ISNORMAL): Likewise. (*cmp<mode>): Rename to... (cmp<mode>): Remove '*' prefix to generate gen_cmp{s,d}f. (copysign<mode>3): Replace UNSPEC_COPYSIGN with copysign RTX. (*setcc_isfinite<mode>): New define_insn using UNSPEC_ISFINITE. (isfinite<mode>2): Expand isfinite. (*setcc_isnormal<mode>): New define_insn using UNSPEC_ISNORMAL. (isnormal<mode>2): Expand isnormal. gcc/testsuite/ChangeLog * gcc.target/nvptx/isfinite.c: New test case. * gcc.target/nvptx/isnormal.c: Likewise. Thanks in advance (p.s. don't forget the nvptx_rtx_costs patch), Roger --
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 7878a3b..ae711bb 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -21,13 +21,14 @@ (define_c_enum "unspec" [ UNSPEC_ARG_REG - UNSPEC_COPYSIGN UNSPEC_LOG2 UNSPEC_EXP2 UNSPEC_SIN UNSPEC_COS UNSPEC_TANH UNSPEC_ISINF + UNSPEC_ISFINITE + UNSPEC_ISNORMAL UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC @@ -888,7 +889,7 @@ "" "%.\\tsetp%c1\\t%0, %2, %3;") -(define_insn "*cmp<mode>" +(define_insn "cmp<mode>" [(set (match_operand:BI 0 "nvptx_register_operand" "=R") (match_operator:BI 1 "nvptx_float_comparison_operator" [(match_operand:SDFM 2 "nvptx_register_operand" "R") @@ -1253,9 +1254,8 @@ (define_insn "copysign<mode>3" [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") - (unspec:SDFM [(match_operand:SDFM 1 "nvptx_nonmemory_operand" "RF") - (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")] - UNSPEC_COPYSIGN))] + (copysign:SDFM (match_operand:SDFM 1 "nvptx_nonmemory_operand" "RF") + (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))] "" "%.\\tcopysign%t0\\t%0, %2, %1;") @@ -1330,6 +1330,8 @@ "flag_unsafe_math_optimizations" "%.\\tex2.approx%t0\\t%0, %1;") +;; FP classify predicates + (define_insn "setcc_isinf<mode>" [(set (match_operand:BI 0 "nvptx_register_operand" "=R") (unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")] @@ -1349,6 +1351,50 @@ DONE; }) +(define_insn "setcc_isfinite<mode>" + [(set (match_operand:BI 0 "nvptx_register_operand" "=R") + (unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")] + UNSPEC_ISFINITE))] + "" + "%.\\ttestp.finite%t1\\t%0, %1;") + +(define_expand "isfinite<mode>2" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (unspec:SI [(match_operand:SDFM 1 "nvptx_register_operand" "R")] + UNSPEC_ISFINITE))] + "" +{ + rtx pred = gen_reg_rtx (BImode); + emit_insn (gen_setcc_isfinite<mode> (pred, operands[1])); + emit_insn (gen_setccsi_from_bi (operands[0], pred)); + DONE; +}) + +(define_insn "setcc_isnormal<mode>" + [(set (match_operand:BI 0 "nvptx_register_operand" "=R") + (unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")] + UNSPEC_ISNORMAL))] + "" + "%.\\ttestp.normal%t1\\t%0, %1;") + +(define_expand "isnormal<mode>2" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (unspec:SI [(match_operand:SDFM 1 "nvptx_register_operand" "R")] + UNSPEC_ISNORMAL))] + "" +{ + rtx pred1 = gen_reg_rtx (BImode); + rtx pred2 = gen_reg_rtx (BImode); + rtx pred3 = gen_reg_rtx (BImode); + rtx zero = CONST0_RTX (<MODE>mode); + rtx cmp = gen_rtx_fmt_ee (NE, BImode, operands[1], zero); + emit_insn (gen_cmp<mode> (pred1, cmp, operands[1], zero)); + emit_insn (gen_setcc_isnormal<mode> (pred2, operands[1])); + emit_insn (gen_andbi3 (pred3, pred1, pred2)); + emit_insn (gen_setccsi_from_bi (operands[0], pred3)); + DONE; +}) + ;; HFmode floating point arithmetic. (define_insn "addhf3" diff --git a/gcc/testsuite/gcc.target/nvptx/isfinite.c b/gcc/testsuite/gcc.target/nvptx/isfinite.c new file mode 100644 index 0000000..83099fc --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/isfinite.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int foo(double x) +{ + return __builtin_isfinite(x); +} + +/* { dg-final { scan-assembler-times "testp.finite.f64" 1 } } */ diff --git a/gcc/testsuite/gcc.target/nvptx/isnormal.c b/gcc/testsuite/gcc.target/nvptx/isnormal.c new file mode 100644 index 0000000..83c4fb6 --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/isnormal.c @@ -0,0 +1,9 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int isnormal(double x) +{ + return __builtin_isnormal(x); +} + +/* { dg-final { scan-assembler-times "testp.normal.f64" 1 } } */