The ARMv8.1 architecture extension adds two Adv.SIMD instructions, sqrdmlah and sqrdmlsh. This patch adds the NEON intrinsics vqrdmlah_lane and vqrdmlsh_lane for these instructions. The new intrinsics are of the form vqrdml{as}h[q]_lane_<type>.
Tested the series for aarch64-none-linux-gnu with native bootstrap and make check on an ARMv8 architecture. Also tested aarch64-none-elf with cross-compiled check-gcc on an ARMv8.1 emulator. Ok for trunk? Matthew gcc/ 2015-10-23 Matthew Wahab <matthew.wa...@arm.com> * gcc/config/aarch64/arm_neon.h (vqrdmlah_laneq_s16, vqrdmlah_laneq_s32): New. (vqrdmlahq_laneq_s16, vqrdmlahq_laneq_s32): New. (vqrdmlsh_laneq_s16, vqrdmlsh_laneq_s32): New. (vqrdmlshq_laneq_s16, vqrdmlshq_laneq_s32): New. (vqrdmlah_lane_s16, vqrdmlah_lane_s32): New. (vqrdmlahq_lane_s16, vqrdmlahq_lane_s32): New. (vqrdmlahh_s16, vqrdmlahh_lane_s16, vqrdmlahh_laneq_s16): New. (vqrdmlahs_s32, vqrdmlahs_lane_s32, vqrdmlahs_laneq_s32): New. (vqrdmlsh_lane_s16, vqrdmlsh_lane_s32): New. (vqrdmlshq_lane_s16, vqrdmlshq_lane_s32): New. (vqrdmlshh_s16, vqrdmlshh_lane_s16, vqrdmlshh_laneq_s16): New. (vqrdmlshs_s32, vqrdmlshs_lane_s32, vqrdmlshs_laneq_s32): New. gcc/testsuite 2015-10-23 Matthew Wahab <matthew.wa...@arm.com> * gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc: New file, support code for vqrdml{as}h_lane tests. * gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c: New. * gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c: New.
>From a2399818dba85ff2801a28bad77ef51697990da7 Mon Sep 17 00:00:00 2001 From: Matthew Wahab <matthew.wa...@arm.com> Date: Thu, 27 Aug 2015 14:17:26 +0100 Subject: [PATCH 7/7] Add neon intrinsics: vqrdmlah_lane, vqrdmlsh_lane. Change-Id: I6d7a372e0a5b83ef0846ab62abbe9b24ada69fc4 --- gcc/config/aarch64/arm_neon.h | 182 +++++++++++++++++++++ .../aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc | 154 +++++++++++++++++ .../aarch64/advsimd-intrinsics/vqrdmlah_lane.c | 57 +++++++ .../aarch64/advsimd-intrinsics/vqrdmlsh_lane.c | 61 +++++++ 4 files changed, 454 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c create mode 100644 gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 9e73809..9b68e4a 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -10675,6 +10675,59 @@ vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c); } +#pragma GCC push_options +#pragma GCC target ("arch=armv8.1-a") + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlah_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_laneqv4hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlah_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_laneqv2si (__a, __b, __c, __d); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlahq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_laneqv8hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlahq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_laneqv4si (__a, __b, __c, __d); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlsh_laneq_s16 (int16x4_t __a, int16x4_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_laneqv4hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlsh_laneq_s32 (int32x2_t __a, int32x2_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_laneqv2si (__a, __b, __c, __d); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlshq_laneq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_laneqv8hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlshq_laneq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_laneqv4si (__a, __b, __c, __d); +} + +#pragma GCC pop_options + /* Table intrinsics. */ __extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) @@ -20014,6 +20067,135 @@ vqrdmulhs_laneq_s32 (int32_t __a, int32x4_t __b, const int __c) return __builtin_aarch64_sqrdmulh_laneqsi (__a, __b, __c); } +#pragma GCC push_options +#pragma GCC target ("arch=armv8.1-a") + +/* vqrdmlah. */ + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlah_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_lanev4hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlah_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_lanev2si (__a, __b, __c, __d); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlahq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_lanev8hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlahq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_lanev4si (__a, __b, __c, __d); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmlahh_s16 (int16_t __a, int16_t __b, int16_t __c) +{ + return (int16_t) __builtin_aarch64_sqrdmlahhi (__a, __b, __c); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmlahh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_lanehi (__a, __b, __c, __d); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmlahh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_laneqhi (__a, __b, __c, __d); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmlahs_s32 (int32_t __a, int32_t __b, int32_t __c) +{ + return (int32_t) __builtin_aarch64_sqrdmlahsi (__a, __b, __c); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmlahs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_lanesi (__a, __b, __c, __d); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmlahs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlah_laneqsi (__a, __b, __c, __d); +} + +/* vqrdmlsh. */ + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vqrdmlsh_lane_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_lanev4hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vqrdmlsh_lane_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_lanev2si (__a, __b, __c, __d); +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vqrdmlshq_lane_s16 (int16x8_t __a, int16x8_t __b, int16x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_lanev8hi (__a, __b, __c, __d); +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vqrdmlshq_lane_s32 (int32x4_t __a, int32x4_t __b, int32x2_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_lanev4si (__a, __b, __c, __d); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmlshh_s16 (int16_t __a, int16_t __b, int16_t __c) +{ + return (int16_t) __builtin_aarch64_sqrdmlshhi (__a, __b, __c); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmlshh_lane_s16 (int16_t __a, int16_t __b, int16x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_lanehi (__a, __b, __c, __d); +} + +__extension__ static __inline int16_t __attribute__ ((__always_inline__)) +vqrdmlshh_laneq_s16 (int16_t __a, int16_t __b, int16x8_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_laneqhi (__a, __b, __c, __d); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmlshs_s32 (int32_t __a, int32_t __b, int32_t __c) +{ + return (int32_t) __builtin_aarch64_sqrdmlshsi (__a, __b, __c); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmlshs_lane_s32 (int32_t __a, int32_t __b, int32x2_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_lanesi (__a, __b, __c, __d); +} + +__extension__ static __inline int32_t __attribute__ ((__always_inline__)) +vqrdmlshs_laneq_s32 (int32_t __a, int32_t __b, int32x4_t __c, const int __d) +{ + return __builtin_aarch64_sqrdmlsh_laneqsi (__a, __b, __c, __d); +} + +#pragma GCC pop_options + /* vqrshl */ __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc new file mode 100644 index 0000000..a855502 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlXh_lane.inc @@ -0,0 +1,154 @@ +#define FNNAME1(NAME) exec_ ## NAME ## _lane +#define FNNAME(NAME) FNNAME1 (NAME) + +void FNNAME (INSN) (void) +{ + /* vector_res = vqrdmlXh_lane (vector, vector2, vector3, lane), + then store the result. */ +#define TEST_VQRDMLXH_LANE2(INSN, Q, T1, T2, W, N, N2, L, \ + EXPECTED_CUMULATIVE_SAT, CMT) \ + Set_Neon_Cumulative_Sat (0, VECT_VAR (vector_res, T1, W, N)); \ + VECT_VAR (vector_res, T1, W, N) = \ + INSN##Q##_lane_##T2##W (VECT_VAR (vector, T1, W, N), \ + VECT_VAR (vector2, T1, W, N), \ + VECT_VAR (vector3, T1, W, N2), \ + L); \ + vst1##Q##_##T2##W (VECT_VAR (result, T1, W, N), \ + VECT_VAR (vector_res, T1, W, N)); \ + CHECK_CUMULATIVE_SAT (TEST_MSG, T1, W, N, EXPECTED_CUMULATIVE_SAT, CMT) + + /* Two auxliary macros are necessary to expand INSN. */ +#define TEST_VQRDMLXH_LANE1(INSN, Q, T1, T2, W, N, N2, L, \ + EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQRDMLXH_LANE2 (INSN, Q, T1, T2, W, N, N2, L, \ + EXPECTED_CUMULATIVE_SAT, CMT) + +#define TEST_VQRDMLXH_LANE(Q, T1, T2, W, N, N2, L, \ + EXPECTED_CUMULATIVE_SAT, CMT) \ + TEST_VQRDMLXH_LANE1 (INSN, Q, T1, T2, W, N, N2, L, \ + EXPECTED_CUMULATIVE_SAT, CMT) + + + DECL_VARIABLE (vector, int, 16, 4); + DECL_VARIABLE (vector, int, 32, 2); + DECL_VARIABLE (vector, int, 16, 8); + DECL_VARIABLE (vector, int, 32, 4); + + DECL_VARIABLE (vector_res, int, 16, 4); + DECL_VARIABLE (vector_res, int, 32, 2); + DECL_VARIABLE (vector_res, int, 16, 8); + DECL_VARIABLE (vector_res, int, 32, 4); + + DECL_VARIABLE (vector2, int, 16, 4); + DECL_VARIABLE (vector2, int, 32, 2); + DECL_VARIABLE (vector2, int, 16, 8); + DECL_VARIABLE (vector2, int, 32, 4); + + DECL_VARIABLE (vector3, int, 16, 4); + DECL_VARIABLE (vector3, int, 32, 2); + DECL_VARIABLE (vector3, int, 16, 8); + DECL_VARIABLE (vector3, int, 32, 4); + + clean_results (); + + VLOAD (vector, buffer, , int, s, 16, 4); + VLOAD (vector, buffer, , int, s, 32, 2); + + VLOAD (vector, buffer, q, int, s, 16, 8); + VLOAD (vector, buffer, q, int, s, 32, 4); + + /* Initialize vector2. */ + VDUP (vector2, , int, s, 16, 4, 0x5555); + VDUP (vector2, , int, s, 32, 2, 0xBB); + VDUP (vector2, q, int, s, 16, 8, 0xBB); + VDUP (vector2, q, int, s, 32, 4, 0x22); + + /* Initialize vector3. */ + VDUP (vector3, , int, s, 16, 4, 0x5555); + VDUP (vector3, , int, s, 32, 2, 0xBB); + VDUP (vector3, q, int, s, 16, 8, 0x33); + VDUP (vector3, q, int, s, 32, 4, 0x22); + + /* Choose lane arbitrarily. */ +#define CMT "" + TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat, CMT); + TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat, CMT); + TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat, CMT); + TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat, CMT); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected, CMT); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected, CMT); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected, CMT); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected, CMT); + + /* Now use input values such that the multiplication causes + saturation. */ +#define TEST_MSG_MUL " (check mul cumulative saturation)" + VDUP (vector, , int, s, 16, 4, 0x8000); + VDUP (vector, , int, s, 32, 2, 0x80000000); + VDUP (vector, q, int, s, 16, 8, 0x8000); + VDUP (vector, q, int, s, 32, 4, 0x80000000); + + VDUP (vector2, , int, s, 16, 4, 0x8000); + VDUP (vector2, , int, s, 32, 2, 0x80000000); + VDUP (vector2, q, int, s, 16, 8, 0x8000); + VDUP (vector2, q, int, s, 32, 4, 0x80000000); + + VDUP (vector3, , int, s, 16, 4, 0x8000); + VDUP (vector3, , int, s, 32, 2, 0x80000000); + VDUP (vector3, q, int, s, 16, 8, 0x8000); + VDUP (vector3, q, int, s, 32, 4, 0x80000000); + + TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat_mul, + TEST_MSG_MUL); + TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat_mul, + TEST_MSG_MUL); + TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat_mul, + TEST_MSG_MUL); + TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat_mul, + TEST_MSG_MUL); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_mul, TEST_MSG_MUL); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_mul, TEST_MSG_MUL); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_mul, TEST_MSG_MUL); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_mul, TEST_MSG_MUL); + + VDUP (vector, , int, s, 16, 4, 0x8000); + VDUP (vector, , int, s, 32, 2, 0x80000000); + VDUP (vector, q, int, s, 16, 8, 0x8000); + VDUP (vector, q, int, s, 32, 4, 0x80000000); + + VDUP (vector2, , int, s, 16, 4, 0x8001); + VDUP (vector2, , int, s, 32, 2, 0x80000001); + VDUP (vector2, q, int, s, 16, 8, 0x8001); + VDUP (vector2, q, int, s, 32, 4, 0x80000001); + + VDUP (vector3, , int, s, 16, 4, 0x8001); + VDUP (vector3, , int, s, 32, 2, 0x80000001); + VDUP (vector3, q, int, s, 16, 8, 0x8001); + VDUP (vector3, q, int, s, 32, 4, 0x80000001); + + /* Use input values where rounding produces a result equal to the + saturation value, but does not set the saturation flag. */ +#define TEST_MSG_ROUND " (check rounding)" + TEST_VQRDMLXH_LANE (, int, s, 16, 4, 4, 2, expected_cumulative_sat_round, + TEST_MSG_ROUND); + TEST_VQRDMLXH_LANE (, int, s, 32, 2, 2, 1, expected_cumulative_sat_round, + TEST_MSG_ROUND); + TEST_VQRDMLXH_LANE (q, int, s, 16, 8, 4, 3, expected_cumulative_sat_round, + TEST_MSG_ROUND); + TEST_VQRDMLXH_LANE (q, int, s, 32, 4, 2, 0, expected_cumulative_sat_round, + TEST_MSG_ROUND); + + CHECK (TEST_MSG, int, 16, 4, PRIx16, expected_round, TEST_MSG_ROUND); + CHECK (TEST_MSG, int, 32, 2, PRIx32, expected_round, TEST_MSG_ROUND); + CHECK (TEST_MSG, int, 16, 8, PRIx16, expected_round, TEST_MSG_ROUND); + CHECK (TEST_MSG, int, 32, 4, PRIx32, expected_round, TEST_MSG_ROUND); +} + +int +main (void) +{ + FNNAME (INSN) (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c new file mode 100644 index 0000000..ed43e01 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlah_lane.c @@ -0,0 +1,57 @@ +/* { dg-require-effective-target arm_v8_1a_neon_hw } */ +/* { dg-add-options arm_v8_1a_neon } */ + +#include <arm_neon.h> +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0; + +/* Expected results. */ +VECT_VAR_DECL (expected, int, 16, 4) [] = { 0x38d3, 0x38d4, 0x38d5, 0x38d6 }; +VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL (expected, int, 16, 8) [] = { 0x006d, 0x006e, 0x006f, 0x0070, + 0x0071, 0x0072, 0x0073, 0x0074 }; +VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; + +/* Expected values of cumulative_saturation flag when multiplication + saturates. */ +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 0; + +/* Expected results when multiplication saturates. */ +VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x0, 0x0 }; +VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0 }; +VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x0, 0x0, 0x0, 0x0 }; + +/* Expected values of cumulative_saturation flag when rounding + should not cause saturation. */ +int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 0; + +/* Expected results when rounding should not cause saturation. */ +VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0xfffe, 0xfffe, + 0xfffe, 0xfffe }; +VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0xfffffffe, 0xfffffffe }; +VECT_VAR_DECL (expected_round,int, 16, 8) [] = { 0xfffe, 0xfffe, + 0xfffe, 0xfffe, + 0xfffe, 0xfffe, + 0xfffe, 0xfffe }; +VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0xfffffffe, 0xfffffffe, + 0xfffffffe, 0xfffffffe }; + +#define INSN vqrdmlah +#define TEST_MSG "VQRDMLAH_LANE" + +#include "vqrdmlXh_lane.inc" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c new file mode 100644 index 0000000..6010b42 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrdmlsh_lane.c @@ -0,0 +1,61 @@ +/* { dg-require-effective-target arm_v8_1a_neon_hw } */ +/* { dg-add-options arm_v8_1a_neon } */ + +#include <arm_neon.h> +#include "arm-neon-ref.h" +#include "compute-ref-data.h" + +/* Expected values of cumulative_saturation flag. */ +int VECT_VAR (expected_cumulative_sat, int, 16, 4) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 2) = 0; +int VECT_VAR (expected_cumulative_sat, int, 16, 8) = 0; +int VECT_VAR (expected_cumulative_sat, int, 32, 4) = 0; + +/* Expected results. */ +VECT_VAR_DECL (expected, int, 16, 4) [] = { 0xc70d, 0xc70e, 0xc70f, 0xc710 }; +VECT_VAR_DECL (expected, int, 32, 2) [] = { 0xfffffff0, 0xfffffff1 }; +VECT_VAR_DECL (expected, int, 16, 8) [] = { 0xff73, 0xff74, 0xff75, 0xff76, + 0xff77, 0xff78, 0xff79, 0xff7a }; +VECT_VAR_DECL (expected, int, 32, 4) [] = { 0xfffffff0, 0xfffffff1, + 0xfffffff2, 0xfffffff3 }; + +/* Expected values of cumulative_saturation flag when multiplication + saturates. */ +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 4) = 1; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 2) = 1; +int VECT_VAR (expected_cumulative_sat_mul, int, 16, 8) = 1; +int VECT_VAR (expected_cumulative_sat_mul, int, 32, 4) = 1; + +/* Expected results when multiplication saturates. */ +VECT_VAR_DECL (expected_mul, int, 16, 4) [] = { 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_mul, int, 32, 2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL (expected_mul, int, 16, 8) [] = { 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_mul, int, 32, 4) [] = { 0x80000000, 0x80000000, + 0x80000000, 0x80000000 }; + +/* Expected values of cumulative_saturation flag when rounding + should not cause saturation. */ +int VECT_VAR (expected_cumulative_sat_round, int, 16, 4) = 1; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 2) = 1; +int VECT_VAR (expected_cumulative_sat_round, int, 16, 8) = 1; +int VECT_VAR (expected_cumulative_sat_round, int, 32, 4) = 1; + +/* Expected results when rounding should not cause saturation. */ +VECT_VAR_DECL (expected_round, int, 16, 4) [] = { 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_round, int, 32, 2) [] = { 0x80000000, 0x80000000 }; +VECT_VAR_DECL (expected_round, int, 16, 8) [] = { 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000, + 0x8000, 0x8000 }; +VECT_VAR_DECL (expected_round, int, 32, 4) [] = { 0x80000000, 0x80000000, + 0x80000000, 0x80000000 }; + +#define INSN vqrdmlsh +#define TEST_MSG "VQRDMLSH_LANE" + +#include "vqrdmlXh_lane.inc" -- 2.1.4