On Wed, Jun 07, 2017 at 12:38:37PM +0100, Tamar Christina wrote:
> Hi All,
>
>
> This patch adds support for creating floating point constants
> using mov immediate instructions. The movi SIMD instruction can
> be used for HFmode and SFmode constants, eg. for -0.0f we generate:
>
> movi v0.2s, 0x80, lsl 24
>
> More complex constants can be generated using an integer MOV or
> MOV+MOVK:
>
> mov w0, 48128
> movk w0, 0x47f0, lsl 16
> fmov s0, w0
>
> We allow up to 3 instructions as this allows all HF, SF and most DF
> constants to be generated without a literal load, and is overall best
> for codesize.
>
>
> Regression tested on aarch64-none-linux-gnu and no regressions.
>
> OK for trunk?
>
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index
> 5adc5edb8dde9c30450b04932a37c41f84cc5ed1..7f107672882b13809be01355ffafbc2807cc5adb
> 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1167,66 +1167,120 @@
> }
> )
>
> -(define_insn "*movhf_aarch64"
> - [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w,m,r,m ,r")
> - (match_operand:HF 1 "general_operand" "Y ,?rY, w,w,m,w,m,rY,r"))]
> +(define_insn_and_split "*movhf_aarch64"
> + [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w
> ,w,m,r,m ,r")
> + (match_operand:HF 1 "general_operand" "Y ,?rY,
> w,w,Ufc,Uvi,m,w,m,rY,r"))]
> "TARGET_FLOAT && (register_operand (operands[0], HFmode)
> - || aarch64_reg_or_fp_zero (operands[1], HFmode))"
> + || aarch64_reg_or_fp_float (operands[1], HFmode))"
> "@
> movi\\t%0.4h, #0
> - mov\\t%0.h[0], %w1
> + fmov\\t%s0, %w1
Should this not be %h0?
> umov\\t%w0, %1.h[0]
> mov\\t%0.h[0], %1.h[0]
> + fmov\\t%s0, %1
Likewise, and much more important for correctness as it changes the way
the bit pattern ends up in the register (see table C2-1 in release B.a of
the ARM Architecture Reference Manual for ARMv8-A), here.
> + * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
> ldr\\t%h0, %1
> str\\t%h1, %0
> ldrh\\t%w0, %1
> strh\\t%w1, %0
> mov\\t%w0, %w1"
> - [(set_attr "type" "neon_move,neon_from_gp,neon_to_gp,neon_move,\
> - f_loads,f_stores,load1,store1,mov_reg")
> - (set_attr "simd" "yes,yes,yes,yes,*,*,*,*,*")]
> + "&& can_create_pseudo_p ()
> + && !aarch64_can_const_movi_rtx_p (operands[1], HFmode)
> + && !aarch64_float_const_representable_p (operands[1])
> + && aarch64_float_const_rtx_p (operands[1])"
> + [(const_int 0)]
> + "{
> + unsigned HOST_WIDE_INT ival;
> + if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
> + FAIL;
> +
> + rtx tmp = gen_reg_rtx (SImode);
> + aarch64_expand_mov_immediate (tmp, GEN_INT (ival));
> + tmp = simplify_gen_subreg (HImode, tmp, SImode, 0);
> + emit_move_insn (operands[0], gen_lowpart (HFmode, tmp));
> + DONE;
> + }"
> + [(set_attr "type" "neon_move,f_mcr,neon_to_gp,neon_move,fconsts, \
> + neon_move,f_loads,f_stores,load1,store1,mov_reg")
> + (set_attr "simd" "yes,*,yes,yes,*,yes,*,*,*,*,*")]
> )
Thanks,
James