Hi All, 

This fixes a bug in the scalar version of copysign where due to a subreg
were generating less than efficient code.

This patch replaces

  return x * __builtin_copysignf (150.0f, y);

which used to generate

        adrp    x1, .LC1
        mov     x0, 2147483648
        ins     v3.d[0], x0
        ldr     s2, [x1, #:lo12:.LC1]
        bsl     v3.8b, v1.8b, v2.8b
        fmul    s0, s0, s3
        ret

.LC1:
        .word   1125515264

with
        mov     x0, 1125515264
        movi    v2.2s, 0x80, lsl 24
        fmov    d3, x0
        bit     v3.8b, v1.8b, v2.8b
        fmul    s0, s0, s3
        ret

removing the incorrect ins.

Regression tested on aarch64-none-linux-gnu and no regressions.

OK for trunk?

Thanks,
Tamar

gcc/
2017-03-15  Tamar Christina  <tamar.christ...@arm.com>

        * config/aarch64/aarch64.md
        (copysignsf3): Fix mask generation.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 5adc5edb8dde9c30450b04932a37c41f84cc5ed1..435c8f50c0e521b3057c26a482315c5a82574711 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5030,14 +5030,16 @@
    (match_operand:SF 2 "register_operand")]
   "TARGET_FLOAT && TARGET_SIMD"
 {
-  rtx mask = gen_reg_rtx (DImode);
+  rtx v_bitmask = gen_reg_rtx (V2SImode);
 
   /* Juggle modes to get us in to a vector mode for BSL.  */
-  rtx op1 = lowpart_subreg (V2SFmode, operands[1], SFmode);
+  rtx op1 = lowpart_subreg (DImode, operands[1], SFmode);
   rtx op2 = lowpart_subreg (V2SFmode, operands[2], SFmode);
   rtx tmp = gen_reg_rtx (V2SFmode);
-  emit_move_insn (mask, GEN_INT (HOST_WIDE_INT_1U << 31));
-  emit_insn (gen_aarch64_simd_bslv2sf (tmp, mask, op2, op1));
+  emit_move_insn (v_bitmask,
+		  aarch64_simd_gen_const_vector_dup (V2SImode,
+						     HOST_WIDE_INT_M1U << 31));
+  emit_insn (gen_aarch64_simd_bslv2sf (tmp, v_bitmask, op2, op1));
   emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode));
   DONE;
 }

Reply via email to