[PATCH][AArch64] Use __aarch64_vget_lane* macros for getting the lane in some lane multiply intrinsics

Kyrill Tkachov Mon, 08 Sep 2014 03:30:13 -0700

Hi all,

The included testcase currently ICEs at -O0 because vget_lane_f64 is a 
function, so if it's properly called with a constant argument but without 
constant propagation it will not be recognised as constant, causing an ICE.
This patch changes it to use the macro version directly.


I think there is work being done to fix this issue up as part of a more general 
rework, but until that comes this patch implements the concerned intrinsics 
using the __aarch64_vget_lane* macros like the other lane intrinsics around 
them.

Tested aarch64-none-elf.

Ok for trunk?

Thanks,
Kyrill

2014-09-08  Kyrylo Tkachov  <[email protected]>

        * config/aarch64/arm_neon.h (vmuld_lane_f64): Use macro for getting
        the lane.
        (vmuld_laneq_f64): Likewise.
        (vmuls_lane_f32): Likewise.
        (vmuls_laneq_f32): Likewise.

2014-09-08  Kyrylo Tkachov  <[email protected]>

        * gcc.target/aarch64/simd/vmul_lane_const_lane_1.c: New test.

diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 0a86172..cf2b0b6 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -18970,13 +18970,13 @@ vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane)
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
 vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane)
 {
-  return __a * vget_lane_f64 (__b, __lane);
+  return __a * __aarch64_vget_lane_f64 (__b, __lane);
 }
 
 __extension__ static __inline float64_t __attribute__ ((__always_inline__))
 vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane)
 {
-  return __a * vgetq_lane_f64 (__b, __lane);
+  return __a * __aarch64_vgetq_lane_f64 (__b, __lane);
 }
 
 /* vmuls_lane  */
@@ -18984,13 +18984,13 @@ vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane)
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
 vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane)
 {
-  return __a * vget_lane_f32 (__b, __lane);
+  return __a * __aarch64_vget_lane_f32 (__b, __lane);
 }
 
 __extension__ static __inline float32_t __attribute__ ((__always_inline__))
 vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane)
 {
-  return __a * vgetq_lane_f32 (__b, __lane);
+  return __a * __aarch64_vgetq_lane_f32 (__b, __lane);
 }
 
 /* vmul_laneq  */
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmul_lane_const_lane_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmul_lane_const_lane_1.c
new file mode 100644
index 0000000..2455181
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/vmul_lane_const_lane_1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+
+#include "arm_neon.h"
+
+float64_t
+wrap_vmuld_lane_f64 (float64_t a, float64x1_t b)
+{
+  return vmuld_lane_f64 (a, b, 0);
+}
+
+float64_t
+wrap_vmuld_laneq_f64 (float64_t a, float64x2_t b)
+{
+  return vmuld_laneq_f64 (a, b, 0);
+}
+
+float32_t
+wrap_vmuls_lane_f32 (float32_t a, float32x2_t b)
+{
+  return vmuls_lane_f32 (a, b, 0);
+}
+
+float32_t
+wrap_vmuls_laneq_f32 (float32_t a, float32x4_t b)
+{
+  return vmuls_laneq_f32 (a, b, 0);
+}

[PATCH][AArch64] Use __aarch64_vget_lane* macros for getting the lane in some lane multiply intrinsics

Reply via email to