On Oct 23, 2020, Richard Biener <richard.guent...@gmail.com> wrote: > Can you move it one pass further after sink please?
I did, but it didn't solve the recip regressions that my first attempt brought about. > Also I don't > remember exactly but does pass_sincos only handle sin/cos unifying? It rearranges some powi computations, and that's what breaks recip. It adds a copy of y*y in extract_recip_[34], we'd need a forwprop or similar to get rid of the trivial copy before recip could do its job properly again. So I figured I'd try to cse type conversions before sincos, and that turned out to be pretty easy, and it didn't regress anything. Regstrapped on x86_64-linux-gnu. Ok to install? CSE conversions within sincos From: Alexandre Oliva <ol...@adacore.com> On platforms in which Aux_[Real_Type] involves non-NOP conversions (e.g., between single- and double-precision, or between short float and float), the conversions before the calls are CSEd too late for sincos to combine calls. This patch enables the sincos pass to CSE type casts used as arguments to eligible calls before looking for other calls using the same operand. for gcc/ChangeLog * tree-ssa-math-opts.c (sincos_stats): Rename inserted to sincos_inserted. Add conv_inserted. (maybe_record_sincos): Rename to... (maybe_record_stmt): ... this. (execute_cse_conv_1): New. (execute_cse_sincos_1): Call it. Adjust. (pass_cse_sincos::execute): Adjust. Report conv_inserted. for gcc/testsuite/ChangeLog * gnat.dg/sin_cos.ads: New. * gnat.dg/sin_cos.adb: New. * gcc.dg/sin_cos.c: New. --- gcc/testsuite/gcc.dg/sin_cos.c | 41 +++++++++++++ gcc/testsuite/gnat.dg/sin_cos.adb | 14 ++++ gcc/testsuite/gnat.dg/sin_cos.ads | 4 + gcc/tree-ssa-math-opts.c | 119 +++++++++++++++++++++++++++++++++++-- 4 files changed, 171 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/sin_cos.c create mode 100644 gcc/testsuite/gnat.dg/sin_cos.adb create mode 100644 gcc/testsuite/gnat.dg/sin_cos.ads diff --git a/gcc/testsuite/gcc.dg/sin_cos.c b/gcc/testsuite/gcc.dg/sin_cos.c new file mode 100644 index 00000000..aa71dca --- /dev/null +++ b/gcc/testsuite/gcc.dg/sin_cos.c @@ -0,0 +1,41 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +/* This maps to essentially the same gimple that is generated for + gnat.dg/sin_cos.adb, on platforms that use the wraplf variant of + Ada.Numerics.Aux_Float. The value of EPSILON is not relevant to + the test, but the test must be there to keep the conversions in + different BBs long enough to trigger the problem that prevented the + sincos optimization, because the arguments passed to sin and cos + didn't get unified into a single SSA_NAME in time for sincos. */ + +#include <math.h> + +#define EPSILON 3.4526697709225118160247802734375e-4 + +static float my_sinf(float x) { + return (float) sin ((double) x); +} + +static float wrap_sinf(float x) { + if (fabs (x) < EPSILON) + return 0; + return my_sinf (x); +} + +static float my_cosf(float x) { + return (float) cos ((double) x); +} + +static float wrap_cosf(float x) { + if (fabs (x) < EPSILON) + return 1; + return my_cosf (x); +} + +float my_sin_cos(float x, float *s, float *c) { + *s = wrap_sinf (x); + *c = wrap_cosf (x); +} + +/* { dg-final { scan-assembler "sincos\|cexp" { target *-linux-gnu* *-w64-mingw* *-*-vxworks* } } } */ diff --git a/gcc/testsuite/gnat.dg/sin_cos.adb b/gcc/testsuite/gnat.dg/sin_cos.adb new file mode 100644 index 00000000..6e18df9 --- /dev/null +++ b/gcc/testsuite/gnat.dg/sin_cos.adb @@ -0,0 +1,14 @@ +-- { dg-do compile } +-- { dg-options "-O2 -gnatn" } + +with Ada.Numerics.Elementary_Functions; +use Ada.Numerics.Elementary_Functions; +package body Sin_Cos is + procedure Sin_Cos (Angle : T; SinA, CosA : out T) is + begin + SinA := Sin (Angle); + CosA := Cos (Angle); + end; +end Sin_Cos; + +-- { dg-final { scan-assembler "sincos\|cexp" { target *-linux-gnu* *-w64-mingw* *-*-vxworks* } } } diff --git a/gcc/testsuite/gnat.dg/sin_cos.ads b/gcc/testsuite/gnat.dg/sin_cos.ads new file mode 100644 index 00000000..a0eff3d --- /dev/null +++ b/gcc/testsuite/gnat.dg/sin_cos.ads @@ -0,0 +1,4 @@ +package Sin_Cos is + subtype T is Float; + procedure Sin_Cos (Angle : T; SinA, CosA : out T); +end Sin_Cos; diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c index 90dfb98..a32f5ca 100644 --- a/gcc/tree-ssa-math-opts.c +++ b/gcc/tree-ssa-math-opts.c @@ -186,7 +186,11 @@ static struct static struct { /* Number of cexpi calls inserted. */ - int inserted; + int sincos_inserted; + + /* Number of conversions inserted. */ + int conv_inserted; + } sincos_stats; static struct @@ -1106,7 +1110,7 @@ make_pass_cse_reciprocals (gcc::context *ctxt) statements in the vector. */ static bool -maybe_record_sincos (vec<gimple *> *stmts, +maybe_record_stmt (vec<gimple *> *stmts, basic_block *top_bb, gimple *use_stmt) { basic_block use_bb = gimple_bb (use_stmt); @@ -1126,6 +1130,103 @@ maybe_record_sincos (vec<gimple *> *stmts, return true; } +/* If NAME is the result of a type conversion, look for other + conversions from the same source to the same type and CSE them. + Replace results of conversions in sin, cos and cexpi calls with the + CSEd SSA_NAME. Return a SSA_NAME that holds the result of the + conversion to be used instead of NAME. */ + +static tree +execute_cse_conv_1 (tree name) +{ + if (SSA_NAME_IS_DEFAULT_DEF (name)) + return name; + + gimple *def_stmt = SSA_NAME_DEF_STMT (name); + + if (!gimple_assign_cast_p (def_stmt)) + return name; + + tree src = gimple_assign_rhs1 (def_stmt); + int count = 0; + imm_use_iterator use_iter; + gimple *use_stmt; + auto_vec<gimple *> stmts; + basic_block top_bb = NULL; + + /* Record equivalent conversions from the same source. */ + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, src) + { + if (!gimple_assign_cast_p (use_stmt)) + continue; + + if (!types_compatible_p (TREE_TYPE (name), + TREE_TYPE (gimple_assign_lhs (use_stmt)))) + continue; + + gcc_checking_assert (gimple_assign_rhs1 (use_stmt) + == gimple_assign_rhs1 (def_stmt)); + + if (maybe_record_stmt (&stmts, &top_bb, use_stmt)) + count++; + } + + if (count <= 1) + return name; + + /* Build and insert a new conversion. */ + name = make_temp_ssa_name (TREE_TYPE (name), def_stmt, "convtmp"); + gimple *stmt = gimple_build_assign (name, + gimple_assign_rhs_code (def_stmt), + gimple_assign_rhs1 (def_stmt)); + + gimple_stmt_iterator gsi; + if (!SSA_NAME_IS_DEFAULT_DEF (name) + && gimple_code (def_stmt) != GIMPLE_PHI + && gimple_bb (def_stmt) == top_bb) + gsi = gsi_for_stmt (def_stmt); + else + gsi = gsi_after_labels (top_bb); + gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); + update_stmt (stmt); + sincos_stats.conv_inserted++; + + /* And adjust the recorded old conversion sites. */ + for (int i = 0; stmts.iterate (i, &use_stmt); ++i) + { + tree lhs = gimple_assign_lhs (use_stmt); + gimple_assign_set_rhs1 (use_stmt, name); + gimple_assign_set_rhs_code (use_stmt, SSA_NAME); + update_stmt (use_stmt); + + /* Replace uses of LHS with NAME in sin, cos and cexpi calls + right away, so that we can recognize them as taking the same + arguments. */ + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs) + { + if (gimple_code (use_stmt) != GIMPLE_CALL + || !gimple_call_lhs (use_stmt)) + continue; + + switch (gimple_call_combined_fn (use_stmt)) + { + CASE_CFN_COS: + CASE_CFN_SIN: + CASE_CFN_CEXPI: + gcc_checking_assert (gimple_call_arg (use_stmt, 0) == lhs); + gimple_call_set_arg (use_stmt, 0, name); + update_stmt (use_stmt); + break; + + default: + continue; + } + } + } + + return name; +} + /* Look for sin, cos and cexpi calls with the same argument NAME and create a single call to cexpi CSEing the result in this case. We first walk over all immediate uses of the argument collecting @@ -1147,6 +1248,8 @@ execute_cse_sincos_1 (tree name) int i; bool cfg_changed = false; + name = execute_cse_conv_1 (name); + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, name) { if (gimple_code (use_stmt) != GIMPLE_CALL @@ -1156,15 +1259,15 @@ execute_cse_sincos_1 (tree name) switch (gimple_call_combined_fn (use_stmt)) { CASE_CFN_COS: - seen_cos |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; + seen_cos |= maybe_record_stmt (&stmts, &top_bb, use_stmt) ? 1 : 0; break; CASE_CFN_SIN: - seen_sin |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; + seen_sin |= maybe_record_stmt (&stmts, &top_bb, use_stmt) ? 1 : 0; break; CASE_CFN_CEXPI: - seen_cexpi |= maybe_record_sincos (&stmts, &top_bb, use_stmt) ? 1 : 0; + seen_cexpi |= maybe_record_stmt (&stmts, &top_bb, use_stmt) ? 1 : 0; break; default:; @@ -1208,7 +1311,7 @@ execute_cse_sincos_1 (tree name) gsi = gsi_after_labels (top_bb); gsi_insert_before (&gsi, stmt, GSI_SAME_STMT); } - sincos_stats.inserted++; + sincos_stats.sincos_inserted++; /* And adjust the recorded old call sites. */ for (i = 0; stmts.iterate (i, &use_stmt); ++i) @@ -2295,7 +2398,9 @@ pass_cse_sincos::execute (function *fun) } statistics_counter_event (fun, "sincos statements inserted", - sincos_stats.inserted); + sincos_stats.sincos_inserted); + statistics_counter_event (fun, "conv statements inserted", + sincos_stats.conv_inserted); return cfg_changed ? TODO_cleanup_cfg : 0; } -- Alexandre Oliva, happy hacker https://FSFLA.org/blogs/lxo/ Free Software Activist GNU Toolchain Engineer