Hi All,
This fixes a bug in the scalar version of copysign where due to a subreg
were generating less than efficient code.
This patch replaces
return x * __builtin_copysignf (150.0f, y);
which used to generate
adrp x1, .LC1
mov x0, 2147483648
ins v3.d[0], x0
ldr s2, [x1, #:lo12:.LC1]
bsl v3.8b, v1.8b, v2.8b
fmul s0, s0, s3
ret
.LC1:
.word 1125515264
with
mov x0, 1125515264
movi v2.2s, 0x80, lsl 24
fmov d3, x0
bit v3.8b, v1.8b, v2.8b
fmul s0, s0, s3
ret
removing the incorrect ins.
Regression tested on aarch64-none-linux-gnu and no regressions.
OK for trunk?
Thanks,
Tamar
gcc/
2017-03-15 Tamar Christina <tamar.christ...@arm.com>
* config/aarch64/aarch64.md
(copysignsf3): Fix mask generation.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 5adc5edb8dde9c30450b04932a37c41f84cc5ed1..435c8f50c0e521b3057c26a482315c5a82574711 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5030,14 +5030,16 @@
(match_operand:SF 2 "register_operand")]
"TARGET_FLOAT && TARGET_SIMD"
{
- rtx mask = gen_reg_rtx (DImode);
+ rtx v_bitmask = gen_reg_rtx (V2SImode);
/* Juggle modes to get us in to a vector mode for BSL. */
- rtx op1 = lowpart_subreg (V2SFmode, operands[1], SFmode);
+ rtx op1 = lowpart_subreg (DImode, operands[1], SFmode);
rtx op2 = lowpart_subreg (V2SFmode, operands[2], SFmode);
rtx tmp = gen_reg_rtx (V2SFmode);
- emit_move_insn (mask, GEN_INT (HOST_WIDE_INT_1U << 31));
- emit_insn (gen_aarch64_simd_bslv2sf (tmp, mask, op2, op1));
+ emit_move_insn (v_bitmask,
+ aarch64_simd_gen_const_vector_dup (V2SImode,
+ HOST_WIDE_INT_M1U << 31));
+ emit_insn (gen_aarch64_simd_bslv2sf (tmp, v_bitmask, op2, op1));
emit_move_insn (operands[0], lowpart_subreg (SFmode, tmp, V2SFmode));
DONE;
}