Hi All,

copysign (x, -1) is effectively fneg (abs (x)) which on AArch64 can be
most efficiently done by doing an OR of the signbit.

The middle-end will optimize fneg (abs (x)) now to copysign as the
canonical form and so this optimizes the expansion.

If the target has an inclusive-OR that takes an immediate, then the transformed
instruction is both shorter and faster.  For those that don't, the immediate
has to be separately constructed, but this still ends up being faster as the
immediate construction is not on the critical path.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Note that this is part of another patch series, the additional testcases
are mutually dependent on the match.pd patch.  As such the tests are added
there insteadof here.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

        PR tree-optimization/109154
        * config/aarch64/aarch64.md (copysign<GPF:mode>3): Handle
        copysign (x, -1).
        * config/aarch64/aarch64-simd.md (copysign<mode>3): Likewise.
        * config/aarch64/aarch64-sve.md (copysign<mode>3): Likewise.

--- inline copy of patch -- 
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
25a1e4e8ecf767636c0ff3cdab6cad6e1482f73e..a78e77dcc3473445108b06c50f9c28a8369f3e3f
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -754,15 +754,33 @@ (define_insn 
"aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><
 (define_expand "copysign<mode>3"
   [(match_operand:VHSDF 0 "register_operand")
    (match_operand:VHSDF 1 "register_operand")
-   (match_operand:VHSDF 2 "register_operand")]
+   (match_operand:VHSDF 2 "nonmemory_operand")]
   "TARGET_SIMD"
 {
-  rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
+  machine_mode int_mode = <V_INT_EQUIV>mode;
+  rtx v_bitmask = gen_reg_rtx (int_mode);
   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 
   emit_move_insn (v_bitmask,
                  aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
                                                     HOST_WIDE_INT_M1U << 
bits));
+
+  /* copysign (x, -1) should instead be expanded as orr with the sign
+     bit.  */
+  if (!REG_P (operands[2]))
+    {
+      auto r0
+       = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate (operands[2]));
+      if (-1 == real_to_integer (r0))
+       {
+         emit_insn (gen_ior<v_int_equiv>3 (
+           lowpart_subreg (int_mode, operands[0], <MODE>mode),
+           lowpart_subreg (int_mode, operands[1], <MODE>mode), v_bitmask));
+         DONE;
+       }
+    }
+
+  operands[2] = force_reg (<MODE>mode, operands[2]);
   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
                                         operands[2], operands[1]));
   DONE;
diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 
5a652d8536a0ef9461f40da7b22834e683e73ceb..071400c820a5b106ddf9dc9faebb117975d74ea0
 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -6387,7 +6387,7 @@ (define_insn "*<optab><mode>3"
 (define_expand "copysign<mode>3"
   [(match_operand:SVE_FULL_F 0 "register_operand")
    (match_operand:SVE_FULL_F 1 "register_operand")
-   (match_operand:SVE_FULL_F 2 "register_operand")]
+   (match_operand:SVE_FULL_F 2 "nonmemory_operand")]
   "TARGET_SVE"
   {
     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
@@ -6398,11 +6398,26 @@ (define_expand "copysign<mode>3"
     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
 
-    emit_insn (gen_and<v_int_equiv>3
-              (sign, arg2,
-               aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
-                                                  HOST_WIDE_INT_M1U
-                                                  << bits)));
+    rtx v_sign_bitmask
+      = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+                                          HOST_WIDE_INT_M1U << bits);
+
+    /* copysign (x, -1) should instead be expanded as orr with the sign
+       bit.  */
+    if (!REG_P (operands[2]))
+      {
+       auto r0
+         = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate (operands[2]));
+       if (-1 == real_to_integer (r0))
+         {
+           emit_insn (gen_ior<v_int_equiv>3 (int_res, arg1, v_sign_bitmask));
+           emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+           DONE;
+         }
+      }
+
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+    emit_insn (gen_and<v_int_equiv>3 (sign, arg2, v_sign_bitmask));
     emit_insn (gen_and<v_int_equiv>3
               (mant, arg1,
                aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
24349ecdbbab875f21975f116732a9e53762d4c1..d6c581ad81615b4feb095391cbcf4f5b78fa72f1
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6940,12 +6940,25 @@ (define_expand "lrint<GPF:mode><GPI:mode>2"
 (define_expand "copysign<GPF:mode>3"
   [(match_operand:GPF 0 "register_operand")
    (match_operand:GPF 1 "register_operand")
-   (match_operand:GPF 2 "register_operand")]
+   (match_operand:GPF 2 "nonmemory_operand")]
   "TARGET_SIMD"
 {
-  rtx bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
+  machine_mode int_mode = <V_INT_EQUIV>mode;
+  rtx bitmask = gen_reg_rtx (int_mode);
   emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U
                                    << (GET_MODE_BITSIZE (<MODE>mode) - 1)));
+  /* copysign (x, -1) should instead be expanded as orr with the sign
+     bit.  */
+  auto r0 = CONST_DOUBLE_REAL_VALUE (operands[2]);
+  if (-1 == real_to_integer (r0))
+    {
+      emit_insn (gen_ior<v_int_equiv>3 (
+       lowpart_subreg (int_mode, operands[0], <MODE>mode),
+       lowpart_subreg (int_mode, operands[1], <MODE>mode), bitmask));
+      DONE;
+    }
+
+  operands[2] = force_reg (<MODE>mode, operands[2]);
   emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
                                       bitmask));
   DONE;




-- 
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
25a1e4e8ecf767636c0ff3cdab6cad6e1482f73e..a78e77dcc3473445108b06c50f9c28a8369f3e3f
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -754,15 +754,33 @@ (define_insn 
"aarch64_<DOTPROD_I8MM:sur>dot_lane<VB:isquadop><VS:vsi2qi><vczle><
 (define_expand "copysign<mode>3"
   [(match_operand:VHSDF 0 "register_operand")
    (match_operand:VHSDF 1 "register_operand")
-   (match_operand:VHSDF 2 "register_operand")]
+   (match_operand:VHSDF 2 "nonmemory_operand")]
   "TARGET_SIMD"
 {
-  rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
+  machine_mode int_mode = <V_INT_EQUIV>mode;
+  rtx v_bitmask = gen_reg_rtx (int_mode);
   int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
 
   emit_move_insn (v_bitmask,
                  aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
                                                     HOST_WIDE_INT_M1U << 
bits));
+
+  /* copysign (x, -1) should instead be expanded as orr with the sign
+     bit.  */
+  if (!REG_P (operands[2]))
+    {
+      auto r0
+       = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate (operands[2]));
+      if (-1 == real_to_integer (r0))
+       {
+         emit_insn (gen_ior<v_int_equiv>3 (
+           lowpart_subreg (int_mode, operands[0], <MODE>mode),
+           lowpart_subreg (int_mode, operands[1], <MODE>mode), v_bitmask));
+         DONE;
+       }
+    }
+
+  operands[2] = force_reg (<MODE>mode, operands[2]);
   emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
                                         operands[2], operands[1]));
   DONE;
diff --git a/gcc/config/aarch64/aarch64-sve.md 
b/gcc/config/aarch64/aarch64-sve.md
index 
5a652d8536a0ef9461f40da7b22834e683e73ceb..071400c820a5b106ddf9dc9faebb117975d74ea0
 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -6387,7 +6387,7 @@ (define_insn "*<optab><mode>3"
 (define_expand "copysign<mode>3"
   [(match_operand:SVE_FULL_F 0 "register_operand")
    (match_operand:SVE_FULL_F 1 "register_operand")
-   (match_operand:SVE_FULL_F 2 "register_operand")]
+   (match_operand:SVE_FULL_F 2 "nonmemory_operand")]
   "TARGET_SVE"
   {
     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
@@ -6398,11 +6398,26 @@ (define_expand "copysign<mode>3"
     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
 
-    emit_insn (gen_and<v_int_equiv>3
-              (sign, arg2,
-               aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
-                                                  HOST_WIDE_INT_M1U
-                                                  << bits)));
+    rtx v_sign_bitmask
+      = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+                                          HOST_WIDE_INT_M1U << bits);
+
+    /* copysign (x, -1) should instead be expanded as orr with the sign
+       bit.  */
+    if (!REG_P (operands[2]))
+      {
+       auto r0
+         = CONST_DOUBLE_REAL_VALUE (unwrap_const_vec_duplicate (operands[2]));
+       if (-1 == real_to_integer (r0))
+         {
+           emit_insn (gen_ior<v_int_equiv>3 (int_res, arg1, v_sign_bitmask));
+           emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
+           DONE;
+         }
+      }
+
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+    emit_insn (gen_and<v_int_equiv>3 (sign, arg2, v_sign_bitmask));
     emit_insn (gen_and<v_int_equiv>3
               (mant, arg1,
                aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
24349ecdbbab875f21975f116732a9e53762d4c1..d6c581ad81615b4feb095391cbcf4f5b78fa72f1
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6940,12 +6940,25 @@ (define_expand "lrint<GPF:mode><GPI:mode>2"
 (define_expand "copysign<GPF:mode>3"
   [(match_operand:GPF 0 "register_operand")
    (match_operand:GPF 1 "register_operand")
-   (match_operand:GPF 2 "register_operand")]
+   (match_operand:GPF 2 "nonmemory_operand")]
   "TARGET_SIMD"
 {
-  rtx bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
+  machine_mode int_mode = <V_INT_EQUIV>mode;
+  rtx bitmask = gen_reg_rtx (int_mode);
   emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U
                                    << (GET_MODE_BITSIZE (<MODE>mode) - 1)));
+  /* copysign (x, -1) should instead be expanded as orr with the sign
+     bit.  */
+  auto r0 = CONST_DOUBLE_REAL_VALUE (operands[2]);
+  if (-1 == real_to_integer (r0))
+    {
+      emit_insn (gen_ior<v_int_equiv>3 (
+       lowpart_subreg (int_mode, operands[0], <MODE>mode),
+       lowpart_subreg (int_mode, operands[1], <MODE>mode), bitmask));
+      DONE;
+    }
+
+  operands[2] = force_reg (<MODE>mode, operands[2]);
   emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
                                       bitmask));
   DONE;



Reply via email to