This implements the new vector optabs vec_<su>addh_narrow_hi_<mode>, vec_<su>addh_narrow_lo_<mode>, vec_<su>addh_narrow<mode> adding support for in-vectorizer recognition of addhn.
The existing codegen tests will now recognize the instructions through the optabs rather than combine. Bootstrapped Regtested on aarch64-none-linux-gnu, arm-none-linux-gnueabihf, x86_64-pc-linux-gnu -m32, -m64 and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-simd.md (vec_<su>addh_narrow_hi_<mode>, vec_<su>addh_narrow_lo_<mode>, vec_<su>addh_narrow<mode>): New. * config/aarch64/iterators.md (UNSPEC_SADDHN, UNSPEC_UADDHN): New. (su, ADDHN): Use them. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-addhn_1.c: New test. --- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 8b75c3d7f6d5ddc5c44f841da961423caaebe8b8..905f7cfc23d6245f545094f12fc220e49dbf333e 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -949,6 +949,61 @@ (define_expand "vec_widen_<su>abd_lo_<mode>" } ) +(define_expand "vec_<su>addh_narrow_hi_<mode>" + [(set (match_operand:<VNARROWQ2> 0 "register_operand") + (unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand") + (match_operand:VQN 2 "register_operand")) + (match_operand:<VNARROWQ> 3 "register_operand")] + ADDHN))] + "TARGET_SIMD" + { + rtx shft + = aarch64_simd_gen_const_vector_dup (<MODE>mode, + GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_addhn2<mode>_insn_be (operands[0], + operands[3], operands[1], operands[2], shft)); + else + emit_insn (gen_aarch64_addhn2<mode>_insn_le (operands[0], + operands[3], operands[1], operands[2], shft)); + DONE; + } +) + +(define_expand "vec_<su>addh_narrow_lo_<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand") + (unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand") + (match_operand:VQN 2 "register_operand"))] + ADDHN))] + "TARGET_SIMD" + { + rtx shft + = aarch64_simd_gen_const_vector_dup (<MODE>mode, + GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2); + emit_insn (gen_aarch64_addhn<mode>_insn (operands[0], operands[1], + operands[2], shft)); + DONE; + } +) + +(define_expand "vec_<su>addh_narrow<mode>" + [(set (match_operand:<VNARROWQ2> 0 "register_operand") + (unspec:VQN [(match_operand:VQN 1 "register_operand") + (match_operand:VQN 2 "register_operand") + (match_operand:VQN 3 "register_operand") + (match_operand:VQN 4 "register_operand")] + ADDHN))] + "TARGET_SIMD" + { + rtx low = gen_reg_rtx (<VNARROWQ>mode); + emit_insn (gen_vec_<su>addh_narrow_lo_<mode> (low, operands[1], + operands[2])); + emit_insn (gen_vec_<su>addh_narrow_hi_<mode> (operands[0], operands[3], + operands[4], low)); + DONE; + } +) + (define_insn "aarch64_<su>abal<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (plus:<VWIDE> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index c3771d9402baf1a09ad51e6149e65dcadf0adc20..f559c1508749766652e523640a6d7df9a5162dee 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -806,6 +806,8 @@ (define_c_enum "unspec" UNSPEC_UHADD ; Used in aarch64-simd.md. UNSPEC_SRHADD ; Used in aarch64-simd.md. UNSPEC_URHADD ; Used in aarch64-simd.md. + UNSPEC_SADDHN ; Used in aarch64-simd.md. + UNSPEC_UADDHN ; Used in aarch64-simd.md. UNSPEC_SHSUB ; Used in aarch64-simd.md. UNSPEC_UHSUB ; Used in aarch64-simd.md. UNSPEC_SQDMULH ; Used in aarch64-simd.md. @@ -3249,6 +3251,8 @@ (define_int_iterator HADD [UNSPEC_SHADD UNSPEC_UHADD]) (define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD]) +(define_int_iterator ADDHN [UNSPEC_SADDHN UNSPEC_UADDHN]) + (define_int_iterator BSL_DUP [1 2]) (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT]) @@ -4248,7 +4252,8 @@ (define_int_attr su [(UNSPEC_SADDV "s") (UNSPEC_COND_SCVTF "s") (UNSPEC_COND_UCVTF "u") (UNSPEC_SMULHS "s") (UNSPEC_UMULHS "u") - (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")]) + (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u") + (UNSPEC_SADDHN "s") (UNSPEC_UADDHN "u")]) (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u") (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur") diff --git a/gcc/testsuite/gcc.dg/vect/vect-addhn_1.c b/gcc/testsuite/gcc.dg/vect/vect-addhn_1.c new file mode 100644 index 0000000000000000000000000000000000000000..60c1c6364cd46d6fd1e8368a90a307a5b8f08308 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-addhn_1.c @@ -0,0 +1,91 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_neon_ok { target arm*-*-* } } */ +/* { dg-add-options arm_neon } */ + +#include <stdint.h> +#include <stdio.h> + +#include "tree-vect.h" + +#define N 1000 +#define CHECK_ERROR(cond, fmt, ...) \ + do { if (cond) { printf(fmt "\n", ##__VA_ARGS__); __builtin_abort (); } } while (0) + +// Generates all test components for a given type combo +#define TEST_COMBO(A_TYPE, C_TYPE, CAST_TYPE, SHIFT) \ + A_TYPE a_##A_TYPE##_##C_TYPE[N]; \ + A_TYPE b_##A_TYPE##_##C_TYPE[N]; \ + C_TYPE c_##A_TYPE##_##C_TYPE[N]; \ + C_TYPE ref_##A_TYPE##_##C_TYPE[N]; \ + \ + void init_##A_TYPE##_##C_TYPE() { \ + _Pragma ("GCC novector") \ + for (int i = 0; i < N; i++) { \ + a_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 3); \ + b_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 7); \ + } \ + } \ + \ + void foo_##A_TYPE##_##C_TYPE() { \ + for (int i = 0; i < N; i++) \ + c_##A_TYPE##_##C_TYPE[i] = \ + ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] + \ + (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT; \ + } \ + \ + void ref_##A_TYPE##_##C_TYPE##_compute() { \ + _Pragma ("GCC novector") \ + for (int i = 0; i < N; i++) \ + ref_##A_TYPE##_##C_TYPE[i] = \ + ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] + \ + (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT; \ + } \ + \ + void validate_##A_TYPE##_##C_TYPE(const char* variant_name) { \ + _Pragma ("GCC novector") \ + for (int i = 0; i < N; i++) { \ + if (c_##A_TYPE##_##C_TYPE[i] != ref_##A_TYPE##_##C_TYPE[i]) { \ + printf("FAIL [%s]: Index %d: got %lld, expected %lld\n", \ + variant_name, i, \ + (long long)c_##A_TYPE##_##C_TYPE[i], \ + (long long)ref_##A_TYPE##_##C_TYPE[i]); \ + __builtin_abort (); \ + } \ + } \ + } + +// Runs the test for one combo with name output +#define RUN_COMBO(A_TYPE, C_TYPE) \ + do { \ + init_##A_TYPE##_##C_TYPE(); \ + foo_##A_TYPE##_##C_TYPE(); \ + ref_##A_TYPE##_##C_TYPE##_compute(); \ + validate_##A_TYPE##_##C_TYPE(#A_TYPE " -> " #C_TYPE); \ + } while (0) + +// Instantiate all valid combinations +TEST_COMBO(int16_t, int8_t, int32_t, 8) +TEST_COMBO(uint16_t, uint8_t, uint32_t, 8) +TEST_COMBO(int32_t, int16_t, int64_t, 16) +TEST_COMBO(uint32_t, uint16_t, uint64_t, 16) +#if defined(__aarch64 __) +TEST_COMBO(int64_t, int32_t, __int128_t, 32) +TEST_COMBO(uint64_t, uint32_t, unsigned __int128, 32) +#endif + +int main() { + check_vect (); + + RUN_COMBO(int16_t, int8_t); + RUN_COMBO(uint16_t, uint8_t); + RUN_COMBO(int32_t, int16_t); + RUN_COMBO(uint32_t, uint16_t); +#if defined(__aarch64__) + RUN_COMBO(int64_t, int32_t); + RUN_COMBO(uint64_t, uint32_t); +#endif + + return 0; +} + +/* { dg-final { scan-tree-dump-times "addhn pattern recognized" 16 "vect" { target { aarch64-*-* } } } } */ --
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 8b75c3d7f6d5ddc5c44f841da961423caaebe8b8..905f7cfc23d6245f545094f12fc220e49dbf333e 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -949,6 +949,61 @@ (define_expand "vec_widen_<su>abd_lo_<mode>" } ) +(define_expand "vec_<su>addh_narrow_hi_<mode>" + [(set (match_operand:<VNARROWQ2> 0 "register_operand") + (unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand") + (match_operand:VQN 2 "register_operand")) + (match_operand:<VNARROWQ> 3 "register_operand")] + ADDHN))] + "TARGET_SIMD" + { + rtx shft + = aarch64_simd_gen_const_vector_dup (<MODE>mode, + GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_aarch64_addhn2<mode>_insn_be (operands[0], + operands[3], operands[1], operands[2], shft)); + else + emit_insn (gen_aarch64_addhn2<mode>_insn_le (operands[0], + operands[3], operands[1], operands[2], shft)); + DONE; + } +) + +(define_expand "vec_<su>addh_narrow_lo_<mode>" + [(set (match_operand:<VNARROWQ> 0 "register_operand") + (unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand") + (match_operand:VQN 2 "register_operand"))] + ADDHN))] + "TARGET_SIMD" + { + rtx shft + = aarch64_simd_gen_const_vector_dup (<MODE>mode, + GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2); + emit_insn (gen_aarch64_addhn<mode>_insn (operands[0], operands[1], + operands[2], shft)); + DONE; + } +) + +(define_expand "vec_<su>addh_narrow<mode>" + [(set (match_operand:<VNARROWQ2> 0 "register_operand") + (unspec:VQN [(match_operand:VQN 1 "register_operand") + (match_operand:VQN 2 "register_operand") + (match_operand:VQN 3 "register_operand") + (match_operand:VQN 4 "register_operand")] + ADDHN))] + "TARGET_SIMD" + { + rtx low = gen_reg_rtx (<VNARROWQ>mode); + emit_insn (gen_vec_<su>addh_narrow_lo_<mode> (low, operands[1], + operands[2])); + emit_insn (gen_vec_<su>addh_narrow_hi_<mode> (operands[0], operands[3], + operands[4], low)); + DONE; + } +) + (define_insn "aarch64_<su>abal<mode>" [(set (match_operand:<VWIDE> 0 "register_operand" "=w") (plus:<VWIDE> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index c3771d9402baf1a09ad51e6149e65dcadf0adc20..f559c1508749766652e523640a6d7df9a5162dee 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -806,6 +806,8 @@ (define_c_enum "unspec" UNSPEC_UHADD ; Used in aarch64-simd.md. UNSPEC_SRHADD ; Used in aarch64-simd.md. UNSPEC_URHADD ; Used in aarch64-simd.md. + UNSPEC_SADDHN ; Used in aarch64-simd.md. + UNSPEC_UADDHN ; Used in aarch64-simd.md. UNSPEC_SHSUB ; Used in aarch64-simd.md. UNSPEC_UHSUB ; Used in aarch64-simd.md. UNSPEC_SQDMULH ; Used in aarch64-simd.md. @@ -3249,6 +3251,8 @@ (define_int_iterator HADD [UNSPEC_SHADD UNSPEC_UHADD]) (define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD]) +(define_int_iterator ADDHN [UNSPEC_SADDHN UNSPEC_UADDHN]) + (define_int_iterator BSL_DUP [1 2]) (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT]) @@ -4248,7 +4252,8 @@ (define_int_attr su [(UNSPEC_SADDV "s") (UNSPEC_COND_SCVTF "s") (UNSPEC_COND_UCVTF "u") (UNSPEC_SMULHS "s") (UNSPEC_UMULHS "u") - (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")]) + (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u") + (UNSPEC_SADDHN "s") (UNSPEC_UADDHN "u")]) (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u") (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur") diff --git a/gcc/testsuite/gcc.dg/vect/vect-addhn_1.c b/gcc/testsuite/gcc.dg/vect/vect-addhn_1.c new file mode 100644 index 0000000000000000000000000000000000000000..60c1c6364cd46d6fd1e8368a90a307a5b8f08308 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-addhn_1.c @@ -0,0 +1,91 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-require-effective-target arm_neon_ok { target arm*-*-* } } */ +/* { dg-add-options arm_neon } */ + +#include <stdint.h> +#include <stdio.h> + +#include "tree-vect.h" + +#define N 1000 +#define CHECK_ERROR(cond, fmt, ...) \ + do { if (cond) { printf(fmt "\n", ##__VA_ARGS__); __builtin_abort (); } } while (0) + +// Generates all test components for a given type combo +#define TEST_COMBO(A_TYPE, C_TYPE, CAST_TYPE, SHIFT) \ + A_TYPE a_##A_TYPE##_##C_TYPE[N]; \ + A_TYPE b_##A_TYPE##_##C_TYPE[N]; \ + C_TYPE c_##A_TYPE##_##C_TYPE[N]; \ + C_TYPE ref_##A_TYPE##_##C_TYPE[N]; \ + \ + void init_##A_TYPE##_##C_TYPE() { \ + _Pragma ("GCC novector") \ + for (int i = 0; i < N; i++) { \ + a_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 3); \ + b_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 7); \ + } \ + } \ + \ + void foo_##A_TYPE##_##C_TYPE() { \ + for (int i = 0; i < N; i++) \ + c_##A_TYPE##_##C_TYPE[i] = \ + ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] + \ + (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT; \ + } \ + \ + void ref_##A_TYPE##_##C_TYPE##_compute() { \ + _Pragma ("GCC novector") \ + for (int i = 0; i < N; i++) \ + ref_##A_TYPE##_##C_TYPE[i] = \ + ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] + \ + (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT; \ + } \ + \ + void validate_##A_TYPE##_##C_TYPE(const char* variant_name) { \ + _Pragma ("GCC novector") \ + for (int i = 0; i < N; i++) { \ + if (c_##A_TYPE##_##C_TYPE[i] != ref_##A_TYPE##_##C_TYPE[i]) { \ + printf("FAIL [%s]: Index %d: got %lld, expected %lld\n", \ + variant_name, i, \ + (long long)c_##A_TYPE##_##C_TYPE[i], \ + (long long)ref_##A_TYPE##_##C_TYPE[i]); \ + __builtin_abort (); \ + } \ + } \ + } + +// Runs the test for one combo with name output +#define RUN_COMBO(A_TYPE, C_TYPE) \ + do { \ + init_##A_TYPE##_##C_TYPE(); \ + foo_##A_TYPE##_##C_TYPE(); \ + ref_##A_TYPE##_##C_TYPE##_compute(); \ + validate_##A_TYPE##_##C_TYPE(#A_TYPE " -> " #C_TYPE); \ + } while (0) + +// Instantiate all valid combinations +TEST_COMBO(int16_t, int8_t, int32_t, 8) +TEST_COMBO(uint16_t, uint8_t, uint32_t, 8) +TEST_COMBO(int32_t, int16_t, int64_t, 16) +TEST_COMBO(uint32_t, uint16_t, uint64_t, 16) +#if defined(__aarch64 __) +TEST_COMBO(int64_t, int32_t, __int128_t, 32) +TEST_COMBO(uint64_t, uint32_t, unsigned __int128, 32) +#endif + +int main() { + check_vect (); + + RUN_COMBO(int16_t, int8_t); + RUN_COMBO(uint16_t, uint8_t); + RUN_COMBO(int32_t, int16_t); + RUN_COMBO(uint32_t, uint16_t); +#if defined(__aarch64__) + RUN_COMBO(int64_t, int32_t); + RUN_COMBO(uint64_t, uint32_t); +#endif + + return 0; +} + +/* { dg-final { scan-tree-dump-times "addhn pattern recognized" 16 "vect" { target { aarch64-*-* } } } } */