Hello, This patch adds the ACLE intrinsics for the instructions introduced in ARMv8.1. It adds the vqrmdlah and vqrdmlsh forms of the instrinsics to the arm_neon.h header, together with the ARM builtins used to implement them. The intrinsics are available when -march=armv8.1-a is enabled together with appropriate fpu options.
Tested the series for arm-none-eabi with cross-compiled check-gcc on an ARMv8.1 emulator. Also tested arm-none-linux-gnueabihf with native bootstrap and make check. Ok for trunk? Matthew gcc/ 2015-11-26 Matthew Wahab <matthew.wa...@arm.com> * config/arm/arm_neon.h (vqrdmlah_s16, vqrdmlah_s32): New. (vqrdmlahq_s16, vqrdmlahq_s32): New. (vqrdmlsh_s16, vqrdmlsh_s32): New. (vqrdmlahq_s16, vqrdmlshq_s32): New. * config/arm/arm_neon_builtins.def: Add "vqrdmlah" and "vqrdmlsh".
>From 93e9db5bf06172f18f4e89e9533c66d8a0c4f2ca Mon Sep 17 00:00:00 2001 From: Matthew Wahab <matthew.wa...@arm.com> Date: Tue, 1 Sep 2015 16:21:44 +0100 Subject: [PATCH 6/7] [ARM] Add neon intrinsics vqrdmlah, vqrdmlsh. Change-Id: Ic40ff4d477f36ec01714c68e3b83b66208c7958b --- gcc/config/arm/arm_neon.h | 50 ++++++++++++++++++++++++++++++++++++ gcc/config/arm/arm_neon_builtins.def | 2 ++ 2 files changed, 52 insertions(+) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 0a33d21..b617f80 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -1158,6 +1158,56 @@ vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b) return (int32x4_t)__builtin_neon_vqrdmulhv4si (__a, __b); } +#ifdef __ARM_FEATURE_QRDMX +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vqrdmlahv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vqrdmlahv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vqrdmlahv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vqrdmlahv4si (__a, __b, __c); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) +{ + return (int16x4_t)__builtin_neon_vqrdmlshv4hi (__a, __b, __c); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) +{ + return (int32x2_t)__builtin_neon_vqrdmlshv2si (__a, __b, __c); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) +{ + return (int16x8_t)__builtin_neon_vqrdmlshv8hi (__a, __b, __c); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) +{ + return (int32x4_t)__builtin_neon_vqrdmlshv4si (__a, __b, __c); +} +#endif + __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vmull_s8 (int8x8_t __a, int8x8_t __b) { diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 0b719df..8d5c0ca 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -45,6 +45,8 @@ VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) VAR4 (BINOP, vqrdmulh, v4hi, v2si, v8hi, v4si) VAR2 (TERNOP, vqdmlal, v4hi, v2si) VAR2 (TERNOP, vqdmlsl, v4hi, v2si) +VAR4 (TERNOP, vqrdmlah, v4hi, v2si, v8hi, v4si) +VAR4 (TERNOP, vqrdmlsh, v4hi, v2si, v8hi, v4si) VAR3 (BINOP, vmullp, v8qi, v4hi, v2si) VAR3 (BINOP, vmulls, v8qi, v4hi, v2si) VAR3 (BINOP, vmullu, v8qi, v4hi, v2si) -- 2.1.4