Re: [PATCH]AArch64 Handle copysign (x, -1) expansion efficiently

Andrew Pinski Thu, 05 Oct 2023 11:29:56 -0700

On Thu, Oct 5, 2023 at 11:22 AM Tamar Christina <tamar.christ...@arm.com> wrote:
>
> Hi All,
>
> copysign (x, -1) is effectively fneg (abs (x)) which on AArch64 can be
> most efficiently done by doing an OR of the signbit.
>
> The middle-end will optimize fneg (abs (x)) now to copysign as the
> canonical form and so this optimizes the expansion.
>
> If the target has an inclusive-OR that takes an immediate, then the 
> transformed
> instruction is both shorter and faster.  For those that don't, the immediate
> has to be separately constructed, but this still ends up being faster as the
> immediate construction is not on the critical path.
>
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Note that this is part of another patch series, the additional testcases
> are mutually dependent on the match.pd patch.  As such the tests are added
> there insteadof here.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
>         PR tree-optimization/109154
>         * config/aarch64/aarch64.md (copysign<GPF:mode>3): Handle
>         copysign (x, -1).
>         * config/aarch64/aarch64-simd.md (copysign<mode>3): Likewise.
>         * config/aarch64/aarch64-sve.md (copysign<mode>3): Likewise.
>
> --- inline copy of patch --
> diff --git a/gcc/config/aarch64/aarch64-simd.md 
> b/gcc/config/aarch64/aarch64-simd.md
> index 
> 25a1e4e8ecf767636c0ff3cdab6cad6e1482f73e..a78e77dcc3473445108b06c50f9c28a8369f3e3f
>  100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -754,15 +754,33 @@ (define_insn 
> "aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><
>  (define_expand "copysign<mode>3"
>    [(match_operand:VHSDF 0 "register_operand")
>     (match_operand:VHSDF 1 "register_operand")
> -   (match_operand:VHSDF 2 "register_operand")]
> +   (match_operand:VHSDF 2 "nonmemory_operand")]
>    "TARGET_SIMD"
>  {
> -  rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
> +  machine_mode int_mode = <V_INT_EQUIV>mode;
> +  rtx v_bitmask = gen_reg_rtx (int_mode);
>    int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
>
>    emit_move_insn (v_bitmask,
>                   aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
>                                                      HOST_WIDE_INT_M1U << 
> bits));
> +
> +  /* copysign (x, -1) should instead be expanded as orr with the sign
> +     bit.  */
> +  if (!REG_P (operands[2]))
> +    {
> +      auto r0
> +       = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate (operands[2]));
> +      if (-1 == real_to_integer (r0))


I think this should be: REAL_VALUE_NEGATIVE (r0) instead. Just copying
the sign here is needed, right?
Also seems like double check that this is a vec_duplicate of a const
and that the constant is a CONST_DOUBLE?


> +       {
> +         emit_insn (gen_ior<v_int_equiv>3 (
> +           lowpart_subreg (int_mode, operands[0], <MODE>mode),
> +           lowpart_subreg (int_mode, operands[1], <MODE>mode), v_bitmask));
> +         DONE;
> +       }
> +    }
> +
> +  operands[2] = force_reg (<MODE>mode, operands[2]);
>    emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
>                                          operands[2], operands[1]));
>    DONE;
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index 
> 5a652d8536a0ef9461f40da7b22834e683e73ceb..071400c820a5b106ddf9dc9faebb117975d74ea0
>  100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -6387,7 +6387,7 @@ (define_insn "*<optab><mode>3"
>  (define_expand "copysign<mode>3"
>    [(match_operand:SVE_FULL_F 0 "register_operand")
>     (match_operand:SVE_FULL_F 1 "register_operand")
> -   (match_operand:SVE_FULL_F 2 "register_operand")]
> +   (match_operand:SVE_FULL_F 2 "nonmemory_operand")]
>    "TARGET_SVE"
>    {
>      rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
> @@ -6398,11 +6398,26 @@ (define_expand "copysign<mode>3"
>      rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
>      rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
>
> -    emit_insn (gen_and<v_int_equiv>3
> -              (sign, arg2,
> -               aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
> -                                                  HOST_WIDE_INT_M1U
> -                                                  << bits)));
> +    rtx v_sign_bitmask
> +      = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
> +                                          HOST_WIDE_INT_M1U << bits);
> +
> +    /* copysign (x, -1) should instead be expanded as orr with the sign
> +       bit.  */
> +    if (!REG_P (operands[2]))
> +      {
> +       auto r0
> +         = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate 
> (operands[2]));
> +       if (-1 == real_to_integer (r0))

Likewise.

> +         {
> +           emit_insn (gen_ior<v_int_equiv>3 (int_res, arg1, v_sign_bitmask));
> +           emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
> +           DONE;
> +         }
> +      }
> +
> +    operands[2] = force_reg (<MODE>mode, operands[2]);
> +    emit_insn (gen_and<v_int_equiv>3 (sign, arg2, v_sign_bitmask));
>      emit_insn (gen_and<v_int_equiv>3
>                (mant, arg1,
>                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 
> 24349ecdbbab875f21975f116732a9e53762d4c1..d6c581ad81615b4feb095391cbcf4f5b78fa72f1
>  100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -6940,12 +6940,25 @@ (define_expand "lrint<GPF:mode><GPI:mode>2"
>  (define_expand "copysign<GPF:mode>3"
>    [(match_operand:GPF 0 "register_operand")
>     (match_operand:GPF 1 "register_operand")
> -   (match_operand:GPF 2 "register_operand")]
> +   (match_operand:GPF 2 "nonmemory_operand")]
>    "TARGET_SIMD"
>  {
> -  rtx bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
> +  machine_mode int_mode = <V_INT_EQUIV>mode;
> +  rtx bitmask = gen_reg_rtx (int_mode);
>    emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U
>                                     << (GET_MODE_BITSIZE (<MODE>mode) - 1)));
> +  /* copysign (x, -1) should instead be expanded as orr with the sign
> +     bit.  */
> +  auto r0 = CONST_DOUBLE_REAL_VALUE (operands[2]);
> +  if (-1 == real_to_integer (r0))

Likewise.

Thanks,
Andrew

> +    {
> +      emit_insn (gen_ior<v_int_equiv>3 (
> +       lowpart_subreg (int_mode, operands[0], <MODE>mode),
> +       lowpart_subreg (int_mode, operands[1], <MODE>mode), bitmask));
> +      DONE;
> +    }
> +
> +  operands[2] = force_reg (<MODE>mode, operands[2]);
>    emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
>                                        bitmask));
>    DONE;
>
>
>
>
> --

Re: [PATCH]AArch64 Handle copysign (x, -1) expansion efficiently

Reply via email to