Hi, This patch clears up some remaining confusion in the vector lane orderings for the two intrinsics mentioned in the title.
Bootstrapped on aarch64-none-linux-gnu and regression tested for aarch64_be-none-elf with no issues. OK? Thanks, James --- 2015-09-09 James Greenhalgh <james.greenha...@arm.com> * config/aarch64/aarch64-simd.md (vec_unpacks_lo_v4sf): Rewrite as an expand. (vec_unpacks_hi_v4sf): Likewise. (aarch64_float_extend_lo_v2df): Rename to... (aarch64_fcvtl_v4sf): This. (aarch64_fcvtl2_v4sf): New. (aarch64_float_truncate_hi_v4sf): Rewrite as an expand. (aarch64_float_truncate_hi_v4sf_le): New. (aarch64_float_truncate_hi_v4sf_be): Likewise.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 75fa0ab..c7ae956 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1691,39 +1691,65 @@ ;; Float widening operations. -(define_insn "vec_unpacks_lo_v4sf" +(define_insn "aarch64_float_extend_lo_v2df" [(set (match_operand:V2DF 0 "register_operand" "=w") (float_extend:V2DF - (vec_select:V2SF - (match_operand:V4SF 1 "register_operand" "w") - (parallel [(const_int 0) (const_int 1)]) - )))] + (match_operand:V2SF 1 "register_operand" "w")))] "TARGET_SIMD" "fcvtl\\t%0.2d, %1.2s" [(set_attr "type" "neon_fp_cvt_widen_s")] ) -(define_insn "aarch64_float_extend_lo_v2df" +(define_insn "aarch64_fcvtl_v4sf" [(set (match_operand:V2DF 0 "register_operand" "=w") (float_extend:V2DF - (match_operand:V2SF 1 "register_operand" "w")))] + (vec_select:V2SF + (match_operand:V4SF 1 "register_operand" "w") + (match_operand:V4SF 2 "vect_par_cnst_lo_half" ""))))] "TARGET_SIMD" "fcvtl\\t%0.2d, %1.2s" [(set_attr "type" "neon_fp_cvt_widen_s")] ) -(define_insn "vec_unpacks_hi_v4sf" +(define_insn "aarch64_fcvtl2_v4sf" [(set (match_operand:V2DF 0 "register_operand" "=w") (float_extend:V2DF (vec_select:V2SF (match_operand:V4SF 1 "register_operand" "w") - (parallel [(const_int 2) (const_int 3)]) - )))] + (match_operand:V4SF 2 "vect_par_cnst_hi_half" ""))))] "TARGET_SIMD" "fcvtl2\\t%0.2d, %1.4s" [(set_attr "type" "neon_fp_cvt_widen_s")] ) +(define_expand "vec_unpacks_lo_v4sf" + [(match_operand:V2DF 0 "register_operand" "=w") + (match_operand:V4SF 1 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (V4SFmode, false); + rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN + ? gen_aarch64_fcvtl2_v4sf + : gen_aarch64_fcvtl_v4sf; + emit_insn (gen (operands[0], operands[1], p)); + DONE; +} +) + +(define_expand "vec_unpacks_hi_v4sf" + [(match_operand:V2DF 0 "register_operand" "=w") + (match_operand:V4SF 1 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx p = aarch64_simd_vect_par_cnst_half (V4SFmode, true); + rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN + ? gen_aarch64_fcvtl_v4sf + : gen_aarch64_fcvtl2_v4sf; + emit_insn (gen (operands[0], operands[1], p)); + DONE; +} +) + ;; Float narrowing operations. (define_insn "aarch64_float_truncate_lo_v2sf" @@ -1735,17 +1761,42 @@ [(set_attr "type" "neon_fp_cvt_narrow_d_q")] ) -(define_insn "aarch64_float_truncate_hi_v4sf" +(define_insn "aarch64_float_truncate_hi_v4sf_le" [(set (match_operand:V4SF 0 "register_operand" "=w") (vec_concat:V4SF (match_operand:V2SF 1 "register_operand" "0") (float_truncate:V2SF (match_operand:V2DF 2 "register_operand" "w"))))] - "TARGET_SIMD" + "TARGET_SIMD && !BYTES_BIG_ENDIAN" "fcvtn2\\t%0.4s, %2.2d" [(set_attr "type" "neon_fp_cvt_narrow_d_q")] ) +(define_insn "aarch64_float_truncate_hi_v4sf_be" + [(set (match_operand:V4SF 0 "register_operand" "=w") + (vec_concat:V4SF + (float_truncate:V2SF + (match_operand:V2DF 2 "register_operand" "w")) + (match_operand:V2SF 1 "register_operand" "0")))] + "TARGET_SIMD && BYTES_BIG_ENDIAN" + "fcvtn2\\t%0.4s, %2.2d" + [(set_attr "type" "neon_fp_cvt_narrow_d_q")] +) + +(define_expand "aarch64_float_truncate_hi_v4sf" + [(match_operand:V4SF 0 "register_operand" "=w") + (match_operand:V2SF 1 "register_operand" "0") + (match_operand:V2DF 2 "register_operand" "w")] + "TARGET_SIMD" +{ + rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN + ? gen_aarch64_float_truncate_hi_v4sf_be + : gen_aarch64_float_truncate_hi_v4sf_le; + emit_insn (gen (operands[0], operands[1], operands[2])); + DONE; +} +) + (define_expand "vec_pack_trunc_v2df" [(set (match_operand:V4SF 0 "register_operand") (vec_concat:V4SF