On Thu, Oct 5, 2023 at 11:22 AM Tamar Christina <tamar.christ...@arm.com> wrote: > > Hi All, > > copysign (x, -1) is effectively fneg (abs (x)) which on AArch64 can be > most efficiently done by doing an OR of the signbit. > > The middle-end will optimize fneg (abs (x)) now to copysign as the > canonical form and so this optimizes the expansion. > > If the target has an inclusive-OR that takes an immediate, then the > transformed > instruction is both shorter and faster. For those that don't, the immediate > has to be separately constructed, but this still ends up being faster as the > immediate construction is not on the critical path. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Note that this is part of another patch series, the additional testcases > are mutually dependent on the match.pd patch. As such the tests are added > there insteadof here. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/109154 > * config/aarch64/aarch64.md (copysign<GPF:mode>3): Handle > copysign (x, -1). > * config/aarch64/aarch64-simd.md (copysign<mode>3): Likewise. > * config/aarch64/aarch64-sve.md (copysign<mode>3): Likewise. > > --- inline copy of patch -- > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > 25a1e4e8ecf767636c0ff3cdab6cad6e1482f73e..a78e77dcc3473445108b06c50f9c28a8369f3e3f > 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -754,15 +754,33 @@ (define_insn > "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle>< > (define_expand "copysign<mode>3" > [(match_operand:VHSDF 0 "register_operand") > (match_operand:VHSDF 1 "register_operand") > - (match_operand:VHSDF 2 "register_operand")] > + (match_operand:VHSDF 2 "nonmemory_operand")] > "TARGET_SIMD" > { > - rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode); > + machine_mode int_mode = <V_INT_EQUIV>mode; > + rtx v_bitmask = gen_reg_rtx (int_mode); > int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1; > > emit_move_insn (v_bitmask, > aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, > HOST_WIDE_INT_M1U << > bits)); > + > + /* copysign (x, -1) should instead be expanded as orr with the sign > + bit. */ > + if (!REG_P (operands[2])) > + { > + auto r0 > + = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate (operands[2])); > + if (-1 == real_to_integer (r0))
I think this should be: REAL_VALUE_NEGATIVE (r0) instead. Just copying the sign here is needed, right? Also seems like double check that this is a vec_duplicate of a const and that the constant is a CONST_DOUBLE? > + { > + emit_insn (gen_ior<v_int_equiv>3 ( > + lowpart_subreg (int_mode, operands[0], <MODE>mode), > + lowpart_subreg (int_mode, operands[1], <MODE>mode), v_bitmask)); > + DONE; > + } > + } > + > + operands[2] = force_reg (<MODE>mode, operands[2]); > emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask, > operands[2], operands[1])); > DONE; > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index > 5a652d8536a0ef9461f40da7b22834e683e73ceb..071400c820a5b106ddf9dc9faebb117975d74ea0 > 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -6387,7 +6387,7 @@ (define_insn "*<optab><mode>3" > (define_expand "copysign<mode>3" > [(match_operand:SVE_FULL_F 0 "register_operand") > (match_operand:SVE_FULL_F 1 "register_operand") > - (match_operand:SVE_FULL_F 2 "register_operand")] > + (match_operand:SVE_FULL_F 2 "nonmemory_operand")] > "TARGET_SVE" > { > rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode); > @@ -6398,11 +6398,26 @@ (define_expand "copysign<mode>3" > rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode); > rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode); > > - emit_insn (gen_and<v_int_equiv>3 > - (sign, arg2, > - aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, > - HOST_WIDE_INT_M1U > - << bits))); > + rtx v_sign_bitmask > + = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, > + HOST_WIDE_INT_M1U << bits); > + > + /* copysign (x, -1) should instead be expanded as orr with the sign > + bit. */ > + if (!REG_P (operands[2])) > + { > + auto r0 > + = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate > (operands[2])); > + if (-1 == real_to_integer (r0)) Likewise. > + { > + emit_insn (gen_ior<v_int_equiv>3 (int_res, arg1, v_sign_bitmask)); > + emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res)); > + DONE; > + } > + } > + > + operands[2] = force_reg (<MODE>mode, operands[2]); > + emit_insn (gen_and<v_int_equiv>3 (sign, arg2, v_sign_bitmask)); > emit_insn (gen_and<v_int_equiv>3 > (mant, arg1, > aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode, > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index > 24349ecdbbab875f21975f116732a9e53762d4c1..d6c581ad81615b4feb095391cbcf4f5b78fa72f1 > 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -6940,12 +6940,25 @@ (define_expand "lrint<GPF:mode><GPI:mode>2" > (define_expand "copysign<GPF:mode>3" > [(match_operand:GPF 0 "register_operand") > (match_operand:GPF 1 "register_operand") > - (match_operand:GPF 2 "register_operand")] > + (match_operand:GPF 2 "nonmemory_operand")] > "TARGET_SIMD" > { > - rtx bitmask = gen_reg_rtx (<V_INT_EQUIV>mode); > + machine_mode int_mode = <V_INT_EQUIV>mode; > + rtx bitmask = gen_reg_rtx (int_mode); > emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U > << (GET_MODE_BITSIZE (<MODE>mode) - 1))); > + /* copysign (x, -1) should instead be expanded as orr with the sign > + bit. */ > + auto r0 = CONST_DOUBLE_REAL_VALUE (operands[2]); > + if (-1 == real_to_integer (r0)) Likewise. Thanks, Andrew > + { > + emit_insn (gen_ior<v_int_equiv>3 ( > + lowpart_subreg (int_mode, operands[0], <MODE>mode), > + lowpart_subreg (int_mode, operands[1], <MODE>mode), bitmask)); > + DONE; > + } > + > + operands[2] = force_reg (<MODE>mode, operands[2]); > emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2], > bitmask)); > DONE; > > > > > --