[AArch64] Fix vcvt_high_f64_f32 and vcvt_figh_f32_f64 intrinsics.

James Greenhalgh Wed, 09 Sep 2015 01:32:49 -0700

Hi,

This patch clears up some remaining confusion in the vector lane orderings
for the two intrinsics mentioned in the title.


Bootstrapped on aarch64-none-linux-gnu and regression tested for
aarch64_be-none-elf with no issues.

OK?

Thanks,
James

---
2015-09-09  James Greenhalgh  <[email protected]>

        * config/aarch64/aarch64-simd.md (vec_unpacks_lo_v4sf): Rewrite
        as an expand.
        (vec_unpacks_hi_v4sf):  Likewise.
        (aarch64_float_extend_lo_v2df): Rename to...
        (aarch64_fcvtl_v4sf): This.
        (aarch64_fcvtl2_v4sf): New.
        (aarch64_float_truncate_hi_v4sf): Rewrite as an expand.
        (aarch64_float_truncate_hi_v4sf_le): New.
        (aarch64_float_truncate_hi_v4sf_be): Likewise.

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 75fa0ab..c7ae956 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1691,39 +1691,65 @@
 
 ;; Float widening operations.
 
-(define_insn "vec_unpacks_lo_v4sf"
+(define_insn "aarch64_float_extend_lo_v2df"
   [(set (match_operand:V2DF 0 "register_operand" "=w")
 	(float_extend:V2DF
-	  (vec_select:V2SF
-	    (match_operand:V4SF 1 "register_operand" "w")
-	    (parallel [(const_int 0) (const_int 1)])
-	  )))]
+	  (match_operand:V2SF 1 "register_operand" "w")))]
   "TARGET_SIMD"
   "fcvtl\\t%0.2d, %1.2s"
   [(set_attr "type" "neon_fp_cvt_widen_s")]
 )
 
-(define_insn "aarch64_float_extend_lo_v2df"
+(define_insn "aarch64_fcvtl_v4sf"
   [(set (match_operand:V2DF 0 "register_operand" "=w")
 	(float_extend:V2DF
-	  (match_operand:V2SF 1 "register_operand" "w")))]
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "register_operand" "w")
+	    (match_operand:V4SF 2 "vect_par_cnst_lo_half" ""))))]
   "TARGET_SIMD"
   "fcvtl\\t%0.2d, %1.2s"
   [(set_attr "type" "neon_fp_cvt_widen_s")]
 )
 
-(define_insn "vec_unpacks_hi_v4sf"
+(define_insn "aarch64_fcvtl2_v4sf"
   [(set (match_operand:V2DF 0 "register_operand" "=w")
 	(float_extend:V2DF
 	  (vec_select:V2SF
 	    (match_operand:V4SF 1 "register_operand" "w")
-	    (parallel [(const_int 2) (const_int 3)])
-	  )))]
+	    (match_operand:V4SF 2 "vect_par_cnst_hi_half" ""))))]
   "TARGET_SIMD"
   "fcvtl2\\t%0.2d, %1.4s"
   [(set_attr "type" "neon_fp_cvt_widen_s")]
 )
 
+(define_expand "vec_unpacks_lo_v4sf"
+  [(match_operand:V2DF 0 "register_operand" "=w")
+   (match_operand:V4SF 1 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (V4SFmode, false);
+  rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
+			     ? gen_aarch64_fcvtl2_v4sf
+			     : gen_aarch64_fcvtl_v4sf;
+  emit_insn (gen (operands[0], operands[1], p));
+  DONE;
+}
+)
+
+(define_expand "vec_unpacks_hi_v4sf"
+  [(match_operand:V2DF 0 "register_operand" "=w")
+   (match_operand:V4SF 1 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx p = aarch64_simd_vect_par_cnst_half (V4SFmode, true);
+  rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
+			     ? gen_aarch64_fcvtl_v4sf
+			     : gen_aarch64_fcvtl2_v4sf;
+  emit_insn (gen (operands[0], operands[1], p));
+  DONE;
+}
+)
+
 ;; Float narrowing operations.
 
 (define_insn "aarch64_float_truncate_lo_v2sf"
@@ -1735,17 +1761,42 @@
   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
 )
 
-(define_insn "aarch64_float_truncate_hi_v4sf"
+(define_insn "aarch64_float_truncate_hi_v4sf_le"
   [(set (match_operand:V4SF 0 "register_operand" "=w")
     (vec_concat:V4SF
       (match_operand:V2SF 1 "register_operand" "0")
       (float_truncate:V2SF
 	(match_operand:V2DF 2 "register_operand" "w"))))]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
   "fcvtn2\\t%0.4s, %2.2d"
   [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
 )
 
+(define_insn "aarch64_float_truncate_hi_v4sf_be"
+  [(set (match_operand:V4SF 0 "register_operand" "=w")
+    (vec_concat:V4SF
+      (float_truncate:V2SF
+	(match_operand:V2DF 2 "register_operand" "w"))
+      (match_operand:V2SF 1 "register_operand" "0")))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "fcvtn2\\t%0.4s, %2.2d"
+  [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
+)
+
+(define_expand "aarch64_float_truncate_hi_v4sf"
+  [(match_operand:V4SF 0 "register_operand" "=w")
+   (match_operand:V2SF 1 "register_operand" "0")
+   (match_operand:V2DF 2 "register_operand" "w")]
+  "TARGET_SIMD"
+{
+  rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
+			     ? gen_aarch64_float_truncate_hi_v4sf_be
+			     : gen_aarch64_float_truncate_hi_v4sf_le;
+  emit_insn (gen (operands[0], operands[1], operands[2]));
+  DONE;
+}
+)
+
 (define_expand "vec_pack_trunc_v2df"
   [(set (match_operand:V4SF 0 "register_operand")
       (vec_concat:V4SF

[AArch64] Fix vcvt_high_f64_f32 and vcvt_figh_f32_f64 intrinsics.

Reply via email to