Hi,
This patch clears up some remaining confusion in the vector lane orderings
for the two intrinsics mentioned in the title.
Bootstrapped on aarch64-none-linux-gnu and regression tested for
aarch64_be-none-elf with no issues.
OK?
Thanks,
James
---
2015-09-09 James Greenhalgh <[email protected]>
* config/aarch64/aarch64-simd.md (vec_unpacks_lo_v4sf): Rewrite
as an expand.
(vec_unpacks_hi_v4sf): Likewise.
(aarch64_float_extend_lo_v2df): Rename to...
(aarch64_fcvtl_v4sf): This.
(aarch64_fcvtl2_v4sf): New.
(aarch64_float_truncate_hi_v4sf): Rewrite as an expand.
(aarch64_float_truncate_hi_v4sf_le): New.
(aarch64_float_truncate_hi_v4sf_be): Likewise.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 75fa0ab..c7ae956 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1691,39 +1691,65 @@
;; Float widening operations.
-(define_insn "vec_unpacks_lo_v4sf"
+(define_insn "aarch64_float_extend_lo_v2df"
[(set (match_operand:V2DF 0 "register_operand" "=w")
(float_extend:V2DF
- (vec_select:V2SF
- (match_operand:V4SF 1 "register_operand" "w")
- (parallel [(const_int 0) (const_int 1)])
- )))]
+ (match_operand:V2SF 1 "register_operand" "w")))]
"TARGET_SIMD"
"fcvtl\\t%0.2d, %1.2s"
[(set_attr "type" "neon_fp_cvt_widen_s")]
)
-(define_insn "aarch64_float_extend_lo_v2df"
+(define_insn "aarch64_fcvtl_v4sf"
[(set (match_operand:V2DF 0 "register_operand" "=w")
(float_extend:V2DF
- (match_operand:V2SF 1 "register_operand" "w")))]
+ (vec_select:V2SF
+ (match_operand:V4SF 1 "register_operand" "w")
+ (match_operand:V4SF 2 "vect_par_cnst_lo_half" ""))))]
"TARGET_SIMD"
"fcvtl\\t%0.2d, %1.2s"
[(set_attr "type" "neon_fp_cvt_widen_s")]
)
-(define_insn "vec_unpacks_hi_v4sf"
+(define_insn "aarch64_fcvtl2_v4sf"
[(set (match_operand:V2DF 0 "register_operand" "=w")
(float_extend:V2DF
(vec_select:V2SF
(match_operand:V4SF 1 "register_operand" "w")
- (parallel [(const_int 2) (const_int 3)])
- )))]
+ (match_operand:V4SF 2 "vect_par_cnst_hi_half" ""))))]
"TARGET_SIMD"
"fcvtl2\\t%0.2d, %1.4s"
[(set_attr "type" "neon_fp_cvt_widen_s")]
)
+(define_expand "vec_unpacks_lo_v4sf"
+ [(match_operand:V2DF 0 "register_operand" "=w")
+ (match_operand:V4SF 1 "register_operand" "w")]
+ "TARGET_SIMD"
+{
+ rtx p = aarch64_simd_vect_par_cnst_half (V4SFmode, false);
+ rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
+ ? gen_aarch64_fcvtl2_v4sf
+ : gen_aarch64_fcvtl_v4sf;
+ emit_insn (gen (operands[0], operands[1], p));
+ DONE;
+}
+)
+
+(define_expand "vec_unpacks_hi_v4sf"
+ [(match_operand:V2DF 0 "register_operand" "=w")
+ (match_operand:V4SF 1 "register_operand" "w")]
+ "TARGET_SIMD"
+{
+ rtx p = aarch64_simd_vect_par_cnst_half (V4SFmode, true);
+ rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
+ ? gen_aarch64_fcvtl_v4sf
+ : gen_aarch64_fcvtl2_v4sf;
+ emit_insn (gen (operands[0], operands[1], p));
+ DONE;
+}
+)
+
;; Float narrowing operations.
(define_insn "aarch64_float_truncate_lo_v2sf"
@@ -1735,17 +1761,42 @@
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
-(define_insn "aarch64_float_truncate_hi_v4sf"
+(define_insn "aarch64_float_truncate_hi_v4sf_le"
[(set (match_operand:V4SF 0 "register_operand" "=w")
(vec_concat:V4SF
(match_operand:V2SF 1 "register_operand" "0")
(float_truncate:V2SF
(match_operand:V2DF 2 "register_operand" "w"))))]
- "TARGET_SIMD"
+ "TARGET_SIMD && !BYTES_BIG_ENDIAN"
"fcvtn2\\t%0.4s, %2.2d"
[(set_attr "type" "neon_fp_cvt_narrow_d_q")]
)
+(define_insn "aarch64_float_truncate_hi_v4sf_be"
+ [(set (match_operand:V4SF 0 "register_operand" "=w")
+ (vec_concat:V4SF
+ (float_truncate:V2SF
+ (match_operand:V2DF 2 "register_operand" "w"))
+ (match_operand:V2SF 1 "register_operand" "0")))]
+ "TARGET_SIMD && BYTES_BIG_ENDIAN"
+ "fcvtn2\\t%0.4s, %2.2d"
+ [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
+)
+
+(define_expand "aarch64_float_truncate_hi_v4sf"
+ [(match_operand:V4SF 0 "register_operand" "=w")
+ (match_operand:V2SF 1 "register_operand" "0")
+ (match_operand:V2DF 2 "register_operand" "w")]
+ "TARGET_SIMD"
+{
+ rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
+ ? gen_aarch64_float_truncate_hi_v4sf_be
+ : gen_aarch64_float_truncate_hi_v4sf_le;
+ emit_insn (gen (operands[0], operands[1], operands[2]));
+ DONE;
+}
+)
+
(define_expand "vec_pack_trunc_v2df"
[(set (match_operand:V4SF 0 "register_operand")
(vec_concat:V4SF