This implements the new vector optabs vec_<su>addh_narrow<mode>
adding support for in-vectorizer use for early break.
Bootstrapped Regtested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
-m32, -m64 and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (vec_<su>addh_narrow<mode>): New.
* config/aarch64/iterators.md (UNSPEC_SADDHN, UNSPEC_UADDHN): New.
(su, ADDHN): Use them.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/vect-addhn_1.c: New test.
---
diff --git a/gcc/config/aarch64/aarch64-simd.md
b/gcc/config/aarch64/aarch64-simd.md
index
8b75c3d7f6d5ddc5c44f841da961423caaebe8b8..a1737792093ffea5f72c10c1cb4df6322280dbf4
100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -949,6 +949,22 @@ (define_expand "vec_widen_<su>abd_lo_<mode>"
}
)
+(define_expand "vec_<su>addh_narrow<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand")
+ (unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand")
+ (match_operand:VQN 2 "register_operand"))]
+ ADDHN))]
+ "TARGET_SIMD"
+ {
+ rtx shft
+ = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+ GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
+ emit_insn (gen_aarch64_addhn<mode>_insn (operands[0], operands[1],
+ operands[2], shft));
+ DONE;
+ }
+)
+
(define_insn "aarch64_<su>abal<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index
b15e57843fed78ffe7edd927cfd7acbf395414a4..c65509fe949ad3056d194f05f901e76382d62ad8
100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -806,6 +806,8 @@ (define_c_enum "unspec"
UNSPEC_UHADD ; Used in aarch64-simd.md.
UNSPEC_SRHADD ; Used in aarch64-simd.md.
UNSPEC_URHADD ; Used in aarch64-simd.md.
+ UNSPEC_SADDHN ; Used in aarch64-simd.md.
+ UNSPEC_UADDHN ; Used in aarch64-simd.md.
UNSPEC_SHSUB ; Used in aarch64-simd.md.
UNSPEC_UHSUB ; Used in aarch64-simd.md.
UNSPEC_SQDMULH ; Used in aarch64-simd.md.
@@ -3251,6 +3253,8 @@ (define_int_iterator HADD [UNSPEC_SHADD UNSPEC_UHADD])
(define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD])
+(define_int_iterator ADDHN [UNSPEC_SADDHN UNSPEC_UADDHN])
+
(define_int_iterator BSL_DUP [1 2])
(define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
@@ -4250,7 +4254,8 @@ (define_int_attr su [(UNSPEC_SADDV "s")
(UNSPEC_COND_SCVTF "s")
(UNSPEC_COND_UCVTF "u")
(UNSPEC_SMULHS "s") (UNSPEC_UMULHS "u")
- (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")])
+ (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")
+ (UNSPEC_SADDHN "s") (UNSPEC_UADDHN "u")])
(define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
(UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
new file mode 100644
index
0000000000000000000000000000000000000000..b6f66f979ff71d40d1e09292c1c12df3c262ce9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
@@ -0,0 +1,88 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "tree-vect.h"
+
+#define N 1000
+#define CHECK_ERROR(cond, fmt, ...) \
+ do { if (cond) { printf(fmt "\n", ##__VA_ARGS__); __builtin_abort (); } }
while (0)
+
+// Generates all test components for a given type combo
+#define TEST_COMBO(A_TYPE, C_TYPE, CAST_TYPE, SHIFT)
\
+ A_TYPE a_##A_TYPE##_##C_TYPE[N];
\
+ A_TYPE b_##A_TYPE##_##C_TYPE[N];
\
+ C_TYPE c_##A_TYPE##_##C_TYPE[N];
\
+ C_TYPE ref_##A_TYPE##_##C_TYPE[N];
\
+
\
+ void init_##A_TYPE##_##C_TYPE() {
\
+ _Pragma ("GCC novector")
\
+ for (int i = 0; i < N; i++) {
\
+ a_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 3);
\
+ b_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 7);
\
+ }
\
+ }
\
+
\
+ void foo_##A_TYPE##_##C_TYPE() {
\
+ for (int i = 0; i < N; i++)
\
+ c_##A_TYPE##_##C_TYPE[i] =
\
+ ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +
\
+ (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;
\
+ }
\
+
\
+ void ref_##A_TYPE##_##C_TYPE##_compute() {
\
+ _Pragma ("GCC novector")
\
+ for (int i = 0; i < N; i++)
\
+ ref_##A_TYPE##_##C_TYPE[i] =
\
+ ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +
\
+ (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;
\
+ }
\
+
\
+ void validate_##A_TYPE##_##C_TYPE(const char* variant_name) {
\
+ _Pragma ("GCC novector")
\
+ for (int i = 0; i < N; i++) {
\
+ if (c_##A_TYPE##_##C_TYPE[i] != ref_##A_TYPE##_##C_TYPE[i]) {
\
+ printf("FAIL [%s]: Index %d: got %lld, expected %lld\n",
\
+ variant_name, i,
\
+ (long long)c_##A_TYPE##_##C_TYPE[i],
\
+ (long long)ref_##A_TYPE##_##C_TYPE[i]);
\
+ __builtin_abort ();
\
+ }
\
+ }
\
+ }
+
+// Runs the test for one combo with name output
+#define RUN_COMBO(A_TYPE, C_TYPE) \
+ do { \
+ init_##A_TYPE##_##C_TYPE(); \
+ foo_##A_TYPE##_##C_TYPE(); \
+ ref_##A_TYPE##_##C_TYPE##_compute(); \
+ validate_##A_TYPE##_##C_TYPE(#A_TYPE " -> " #C_TYPE); \
+ } while (0)
+
+// Instantiate all valid combinations
+TEST_COMBO(int16_t, int8_t, int32_t, 8)
+TEST_COMBO(uint16_t, uint8_t, uint32_t, 8)
+TEST_COMBO(int32_t, int16_t, int64_t, 16)
+TEST_COMBO(uint32_t, uint16_t, uint64_t, 16)
+#if defined(__aarch64__)
+TEST_COMBO(int64_t, int32_t, __int128_t, 32)
+TEST_COMBO(uint64_t, uint32_t, unsigned __int128, 32)
+#endif
+
+int main() {
+ check_vect ();
+
+ RUN_COMBO(int16_t, int8_t);
+ RUN_COMBO(uint16_t, uint8_t);
+ RUN_COMBO(int32_t, int16_t);
+ RUN_COMBO(uint32_t, uint16_t);
+#if defined(__aarch64__)
+ RUN_COMBO(int64_t, int32_t);
+ RUN_COMBO(uint64_t, uint32_t);
+#endif
+
+ return 0;
+}
+
--
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 8b75c3d7f6d5ddc5c44f841da961423caaebe8b8..a1737792093ffea5f72c10c1cb4df6322280dbf4 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -949,6 +949,22 @@ (define_expand "vec_widen_<su>abd_lo_<mode>"
}
)
+(define_expand "vec_<su>addh_narrow<mode>"
+ [(set (match_operand:<VNARROWQ> 0 "register_operand")
+ (unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand")
+ (match_operand:VQN 2 "register_operand"))]
+ ADDHN))]
+ "TARGET_SIMD"
+ {
+ rtx shft
+ = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+ GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
+ emit_insn (gen_aarch64_addhn<mode>_insn (operands[0], operands[1],
+ operands[2], shft));
+ DONE;
+ }
+)
+
(define_insn "aarch64_<su>abal<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(plus:<VWIDE>
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index b15e57843fed78ffe7edd927cfd7acbf395414a4..c65509fe949ad3056d194f05f901e76382d62ad8 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -806,6 +806,8 @@ (define_c_enum "unspec"
UNSPEC_UHADD ; Used in aarch64-simd.md.
UNSPEC_SRHADD ; Used in aarch64-simd.md.
UNSPEC_URHADD ; Used in aarch64-simd.md.
+ UNSPEC_SADDHN ; Used in aarch64-simd.md.
+ UNSPEC_UADDHN ; Used in aarch64-simd.md.
UNSPEC_SHSUB ; Used in aarch64-simd.md.
UNSPEC_UHSUB ; Used in aarch64-simd.md.
UNSPEC_SQDMULH ; Used in aarch64-simd.md.
@@ -3251,6 +3253,8 @@ (define_int_iterator HADD [UNSPEC_SHADD UNSPEC_UHADD])
(define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD])
+(define_int_iterator ADDHN [UNSPEC_SADDHN UNSPEC_UADDHN])
+
(define_int_iterator BSL_DUP [1 2])
(define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
@@ -4250,7 +4254,8 @@ (define_int_attr su [(UNSPEC_SADDV "s")
(UNSPEC_COND_SCVTF "s")
(UNSPEC_COND_UCVTF "u")
(UNSPEC_SMULHS "s") (UNSPEC_UMULHS "u")
- (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")])
+ (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")
+ (UNSPEC_SADDHN "s") (UNSPEC_UADDHN "u")])
(define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
(UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..b6f66f979ff71d40d1e09292c1c12df3c262ce9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
@@ -0,0 +1,88 @@
+/* { dg-require-effective-target vect_int } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "tree-vect.h"
+
+#define N 1000
+#define CHECK_ERROR(cond, fmt, ...) \
+ do { if (cond) { printf(fmt "\n", ##__VA_ARGS__); __builtin_abort (); } } while (0)
+
+// Generates all test components for a given type combo
+#define TEST_COMBO(A_TYPE, C_TYPE, CAST_TYPE, SHIFT) \
+ A_TYPE a_##A_TYPE##_##C_TYPE[N]; \
+ A_TYPE b_##A_TYPE##_##C_TYPE[N]; \
+ C_TYPE c_##A_TYPE##_##C_TYPE[N]; \
+ C_TYPE ref_##A_TYPE##_##C_TYPE[N]; \
+ \
+ void init_##A_TYPE##_##C_TYPE() { \
+ _Pragma ("GCC novector") \
+ for (int i = 0; i < N; i++) { \
+ a_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 3); \
+ b_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 7); \
+ } \
+ } \
+ \
+ void foo_##A_TYPE##_##C_TYPE() { \
+ for (int i = 0; i < N; i++) \
+ c_##A_TYPE##_##C_TYPE[i] = \
+ ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] + \
+ (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT; \
+ } \
+ \
+ void ref_##A_TYPE##_##C_TYPE##_compute() { \
+ _Pragma ("GCC novector") \
+ for (int i = 0; i < N; i++) \
+ ref_##A_TYPE##_##C_TYPE[i] = \
+ ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] + \
+ (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT; \
+ } \
+ \
+ void validate_##A_TYPE##_##C_TYPE(const char* variant_name) { \
+ _Pragma ("GCC novector") \
+ for (int i = 0; i < N; i++) { \
+ if (c_##A_TYPE##_##C_TYPE[i] != ref_##A_TYPE##_##C_TYPE[i]) { \
+ printf("FAIL [%s]: Index %d: got %lld, expected %lld\n", \
+ variant_name, i, \
+ (long long)c_##A_TYPE##_##C_TYPE[i], \
+ (long long)ref_##A_TYPE##_##C_TYPE[i]); \
+ __builtin_abort (); \
+ } \
+ } \
+ }
+
+// Runs the test for one combo with name output
+#define RUN_COMBO(A_TYPE, C_TYPE) \
+ do { \
+ init_##A_TYPE##_##C_TYPE(); \
+ foo_##A_TYPE##_##C_TYPE(); \
+ ref_##A_TYPE##_##C_TYPE##_compute(); \
+ validate_##A_TYPE##_##C_TYPE(#A_TYPE " -> " #C_TYPE); \
+ } while (0)
+
+// Instantiate all valid combinations
+TEST_COMBO(int16_t, int8_t, int32_t, 8)
+TEST_COMBO(uint16_t, uint8_t, uint32_t, 8)
+TEST_COMBO(int32_t, int16_t, int64_t, 16)
+TEST_COMBO(uint32_t, uint16_t, uint64_t, 16)
+#if defined(__aarch64__)
+TEST_COMBO(int64_t, int32_t, __int128_t, 32)
+TEST_COMBO(uint64_t, uint32_t, unsigned __int128, 32)
+#endif
+
+int main() {
+ check_vect ();
+
+ RUN_COMBO(int16_t, int8_t);
+ RUN_COMBO(uint16_t, uint8_t);
+ RUN_COMBO(int32_t, int16_t);
+ RUN_COMBO(uint32_t, uint16_t);
+#if defined(__aarch64__)
+ RUN_COMBO(int64_t, int32_t);
+ RUN_COMBO(uint64_t, uint32_t);
+#endif
+
+ return 0;
+}
+