On Mon, Jul 1, 2024 at 11:48 PM Andrew Pinski <quic_apin...@quicinc.com> wrote: > > Expanding cabs in powcab might be too late as forwprop might > recombine the load from a memory with the complex expr. Moving > instead to complex lowering allows us to use directly the real/imag > component from the loads instead. This allows for vectorization too. > > Bootstrapped and tested on x86_64-linux-gnu with no regressions.
OK. Thanks, Richard. > PR tree-optimization/115710 > > gcc/ChangeLog: > > * tree-complex.cc (init_dont_simulate_again): Handle CABS. > (gimple_expand_builtin_cabs): New function, moved mostly > from tree-ssa-math-opts.cc. > (expand_complex_operations_1): Call gimple_expand_builtin_cabs. > * tree-ssa-math-opts.cc (gimple_expand_builtin_cabs): Remove. > (build_and_insert_binop): Remove. > (pass_data_expand_powcabs): Update comment. > (pass_expand_powcabs::execute): Don't handle CABS. > > gcc/testsuite/ChangeLog: > > * gcc.dg/tree-ssa/cabs-1.c: New test. > * gcc.dg/tree-ssa/cabs-2.c: New test. > * gfortran.dg/vect/pr115710.f90: New test. > > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > --- > gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c | 14 +++++ > gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c | 13 ++++ > gcc/testsuite/gfortran.dg/vect/pr115710.f90 | 18 ++++++ > gcc/tree-complex.cc | 68 +++++++++++++++++++- > gcc/tree-ssa-math-opts.cc | 70 +-------------------- > 5 files changed, 113 insertions(+), 70 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c > create mode 100644 gcc/testsuite/gfortran.dg/vect/pr115710.f90 > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c > b/gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c > new file mode 100644 > index 00000000000..12ff6049e63 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/cabs-1.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile { target sqrt_insn } } */ > +/* { dg-options "-Ofast -fdump-tree-cplxlower1" } */ > +/* { dg-add-options sqrt_insn } */ > + > + > +double f(_Complex double a) > +{ > + a+= 1.0f; > + return __builtin_cabs(a); > +} > + > +/* Check that cabs is expanded during complex lowering. */ > +/* { dg-final { scan-tree-dump-not "__builtin_cabs " "cplxlower1" } } */ > +/* { dg-final { scan-tree-dump "__builtin_sqrt " "cplxlower1" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c > b/gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c > new file mode 100644 > index 00000000000..efe3de90cba > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/cabs-2.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -fdump-tree-cplxlower1" } */ > + > + > +double f(_Complex double a) > +{ > + a+= 1.0f; > + return __builtin_cabs(a); > +} > + > +/* Check that cabs is not expanded during complex lowering. */ > +/* { dg-final { scan-tree-dump "__builtin_cabs " "cplxlower1" } } */ > +/* { dg-final { scan-tree-dump-not "__builtin_sqrt " "cplxlower1" } } */ > diff --git a/gcc/testsuite/gfortran.dg/vect/pr115710.f90 > b/gcc/testsuite/gfortran.dg/vect/pr115710.f90 > new file mode 100644 > index 00000000000..3749210ac80 > --- /dev/null > +++ b/gcc/testsuite/gfortran.dg/vect/pr115710.f90 > @@ -0,0 +1,18 @@ > +! { dg-do compile } > +! { dg-additional-options "-Ofast" } > +! { dg-require-effective-target vect_float } > +! { dg-require-effective-target vect_call_sqrtf } > + > +! { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } > +! CABS expansion should allow for the vectorization to happen. > + > +subroutine foo(a,b,n) > + complex(kind(1.0))::a(*) > + real(kind(1.0))::b(*) > + integer::i,n > + > + do i=1,n > + b(i)=abs(a(i))**2 > + end do > + > +end subroutine foo > diff --git a/gcc/tree-complex.cc b/gcc/tree-complex.cc > index dfebec18ec3..d1276dc1c2f 100644 > --- a/gcc/tree-complex.cc > +++ b/gcc/tree-complex.cc > @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3. If not see > #include "system.h" > #include "coretypes.h" > #include "backend.h" > +#include "target.h" > #include "rtl.h" > #include "tree.h" > #include "gimple.h" > @@ -42,7 +43,9 @@ along with GCC; see the file COPYING3. If not see > #include "cfganal.h" > #include "gimple-fold.h" > #include "diagnostic-core.h" > - > +#include "case-cfn-macros.h" > +#include "builtins.h" > +#include "optabs-tree.h" > > /* For each complex ssa name, a lattice value. We're interested in finding > out whether a complex number is degenerate in some way, having only real > @@ -238,7 +241,18 @@ init_dont_simulate_again (void) > { > case GIMPLE_CALL: > if (gimple_call_lhs (stmt)) > - sim_again_p = is_complex_reg (gimple_call_lhs (stmt)); > + { > + sim_again_p = is_complex_reg (gimple_call_lhs (stmt)); > + switch (gimple_call_combined_fn (stmt)) > + { > + CASE_CFN_CABS: > + /* Expand cabs only if unsafe math and optimizing. */ > + if (optimize && flag_unsafe_math_optimizations) > + saw_a_complex_op = true; > + break; > + default:; > + } > + } > break; > > case GIMPLE_ASSIGN: > @@ -1686,6 +1700,46 @@ expand_complex_asm (gimple_stmt_iterator *gsi) > } > } > > + > +/* ARG is the argument to a cabs builtin call in GSI with location info > + LOC. Create a sequence of statements prior to GSI that calculates > + sqrt(R*R + I*I), where R and I are the real and imaginary components > + of ARG, respectively. */ > + > +static void > +gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, gimple *old_stmt) > +{ > + tree real_part, imag_part, addend1, addend2, sum; > + tree arg = gimple_call_arg (old_stmt, 0); > + tree type = TREE_TYPE (TREE_TYPE (arg)); > + tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); > + machine_mode mode = TYPE_MODE (type); > + gimple *new_stmt; > + > + if (!flag_unsafe_math_optimizations > + || !optimize_bb_for_speed_p (gimple_bb (old_stmt)) > + || !sqrtfn > + || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing) > + return; > + > + real_part = extract_component (gsi, arg, false, true); > + imag_part = extract_component (gsi, arg, true, true); > + location_t loc = gimple_location (old_stmt); > + > + gimple_seq stmts = NULL; > + addend1 = gimple_build (&stmts, loc, MULT_EXPR, type, real_part, > real_part); > + addend2 = gimple_build (&stmts, loc, MULT_EXPR, type, imag_part, > imag_part); > + sum = gimple_build (&stmts, loc, PLUS_EXPR, type, addend1, addend2); > + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); > + > + /* Build the sqrt call. */ > + new_stmt = gimple_build_call (sqrtfn, 1, sum); > + gimple_set_location (new_stmt, loc); > + tree lhs = gimple_call_lhs (old_stmt); > + gimple_call_set_lhs (new_stmt, lhs); > + gsi_replace (gsi, new_stmt, true); > +} > + > /* Process one statement. If we identify a complex operation, expand it. */ > > static void > @@ -1696,6 +1750,16 @@ expand_complex_operations_1 (gimple_stmt_iterator *gsi) > tree ac, ar, ai, bc, br, bi; > complex_lattice_t al, bl; > enum tree_code code; > + if (gimple_code (stmt) == GIMPLE_CALL) > + { > + switch (gimple_call_combined_fn (stmt)) > + { > + CASE_CFN_CABS: > + gimple_expand_builtin_cabs (gsi, stmt); > + return; > + default:; > + } > + } > > if (gimple_code (stmt) == GIMPLE_ASM) > { > diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc > index 3b5433ec000..71f896a9790 100644 > --- a/gcc/tree-ssa-math-opts.cc > +++ b/gcc/tree-ssa-math-opts.cc > @@ -1622,22 +1622,6 @@ build_and_insert_binop (gimple_stmt_iterator *gsi, > location_t loc, > return result; > } > > -/* Build a gimple reference operation with the given CODE and argument > - ARG, assigning the result to a new SSA name of TYPE with NAME. > - Insert the statement prior to GSI's current position, and return > - the fresh SSA name. */ > - > -static inline tree > -build_and_insert_ref (gimple_stmt_iterator *gsi, location_t loc, tree type, > - const char *name, enum tree_code code, tree arg0) > -{ > - tree result = make_temp_ssa_name (type, NULL, name); > - gimple *stmt = gimple_build_assign (result, build1 (code, type, arg0)); > - gimple_set_location (stmt, loc); > - gsi_insert_before (gsi, stmt, GSI_SAME_STMT); > - return result; > -} > - > /* Build a gimple assignment to cast VAL to TYPE. Insert the statement > prior to GSI's current position, and return the fresh SSA name. */ > > @@ -2193,39 +2177,6 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *gsi, > location_t loc, > return NULL_TREE; > } > > -/* ARG is the argument to a cabs builtin call in GSI with location info > - LOC. Create a sequence of statements prior to GSI that calculates > - sqrt(R*R + I*I), where R and I are the real and imaginary components > - of ARG, respectively. Return an expression holding the result. */ > - > -static tree > -gimple_expand_builtin_cabs (gimple_stmt_iterator *gsi, location_t loc, tree > arg) > -{ > - tree real_part, imag_part, addend1, addend2, sum, result; > - tree type = TREE_TYPE (TREE_TYPE (arg)); > - tree sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT); > - machine_mode mode = TYPE_MODE (type); > - > - if (!flag_unsafe_math_optimizations > - || !optimize_bb_for_speed_p (gimple_bb (gsi_stmt (*gsi))) > - || !sqrtfn > - || optab_handler (sqrt_optab, mode) == CODE_FOR_nothing) > - return NULL_TREE; > - > - real_part = build_and_insert_ref (gsi, loc, type, "cabs", > - REALPART_EXPR, arg); > - addend1 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR, > - real_part, real_part); > - imag_part = build_and_insert_ref (gsi, loc, type, "cabs", > - IMAGPART_EXPR, arg); > - addend2 = build_and_insert_binop (gsi, loc, "cabs", MULT_EXPR, > - imag_part, imag_part); > - sum = build_and_insert_binop (gsi, loc, "cabs", PLUS_EXPR, addend1, > addend2); > - result = build_and_insert_call (gsi, loc, sqrtfn, sum); > - > - return result; > -} > - > /* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1 > on the SSA_NAME argument of each of them. */ > > @@ -2322,7 +2273,8 @@ make_pass_cse_sincos (gcc::context *ctxt) > } > > /* Expand powi(x,n) into an optimal number of multiplies, when n is a > constant. > - Also expand CABS. */ > + Note the name is powcabs but cabs expansion was moved to the lower complex > + pass. */ > namespace { > > const pass_data pass_data_expand_powcabs = > @@ -2455,24 +2407,6 @@ pass_expand_powcabs::execute (function *fun) > } > break; > > - CASE_CFN_CABS: > - arg0 = gimple_call_arg (stmt, 0); > - loc = gimple_location (stmt); > - result = gimple_expand_builtin_cabs (&gsi, loc, arg0); > - > - if (result) > - { > - tree lhs = gimple_get_lhs (stmt); > - gassign *new_stmt = gimple_build_assign (lhs, result); > - gimple_set_location (new_stmt, loc); > - unlink_stmt_vdef (stmt); > - gsi_replace (&gsi, new_stmt, true); > - cleanup_eh = true; > - if (gimple_vdef (stmt)) > - release_ssa_name (gimple_vdef (stmt)); > - } > - break; > - > default:; > } > } > -- > 2.43.0 >