Hi Tamar,
> On 2 Sep 2025, at 11:46, Tamar Christina <[email protected]> wrote:
>
> This implements the new vector optabs vec_<su>addh_narrow<mode>
> adding support for in-vectorizer use for early break.
The Advanced SIMD ADDHN instruction doesn’t perform a widening of the operands
for the addition as far as I know.
That is why there are no “SADDHN” or “UADDHN” instructions, which would
correspond to a sign-extend or zero-extend of the operands.
So I think there’s something off here….
>
> Bootstrapped Regtested on aarch64-none-linux-gnu,
> arm-none-linux-gnueabihf, x86_64-pc-linux-gnu
> -m32, -m64 and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> * config/aarch64/aarch64-simd.md (vec_<su>addh_narrow<mode>): New.
> * config/aarch64/iterators.md (UNSPEC_SADDHN, UNSPEC_UADDHN): New.
> (su, ADDHN): Use them.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/vect-addhn_1.c: New test.
>
> ---
> diff --git a/gcc/config/aarch64/aarch64-simd.md
> b/gcc/config/aarch64/aarch64-simd.md
> index
> 8b75c3d7f6d5ddc5c44f841da961423caaebe8b8..a1737792093ffea5f72c10c1cb4df6322280dbf4
> 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -949,6 +949,22 @@ (define_expand "vec_widen_<su>abd_lo_<mode>"
> }
> )
>
> +(define_expand "vec_<su>addh_narrow<mode>"
> + [(set (match_operand:<VNARROWQ> 0 "register_operand")
> + (unspec:VQN [(plus:VQN (match_operand:VQN 1 "register_operand")
> + (match_operand:VQN 2 "register_operand"))]
> + ADDHN))]
> + "TARGET_SIMD"
> + {
> + rtx shft
> + = aarch64_simd_gen_const_vector_dup (<MODE>mode,
> + GET_MODE_UNIT_BITSIZE (<MODE>mode) / 2);
> + emit_insn (gen_aarch64_addhn<mode>_insn (operands[0], operands[1],
> + operands[2], shft));
> + DONE;
… So this would produce the exact same RTL for the “vec_saddh_narrow” and
“vec_uaddh_narrow” cases.
> + }
> +)
> +
> (define_insn "aarch64_<su>abal<mode>"
> [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
> (plus:<VWIDE>
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index
> b15e57843fed78ffe7edd927cfd7acbf395414a4..c65509fe949ad3056d194f05f901e76382d62ad8
> 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -806,6 +806,8 @@ (define_c_enum "unspec"
> UNSPEC_UHADD ; Used in aarch64-simd.md.
> UNSPEC_SRHADD ; Used in aarch64-simd.md.
> UNSPEC_URHADD ; Used in aarch64-simd.md.
> + UNSPEC_SADDHN ; Used in aarch64-simd.md.
> + UNSPEC_UADDHN ; Used in aarch64-simd.md.
As per above, there are no such instructions in Advanced SIMD so it would be
confusing to have UNSPEC_ values to represent them.
Thanks,
Kyrill
> UNSPEC_SHSUB ; Used in aarch64-simd.md.
> UNSPEC_UHSUB ; Used in aarch64-simd.md.
> UNSPEC_SQDMULH ; Used in aarch64-simd.md.
> @@ -3251,6 +3253,8 @@ (define_int_iterator HADD [UNSPEC_SHADD UNSPEC_UHADD])
>
> (define_int_iterator RHADD [UNSPEC_SRHADD UNSPEC_URHADD])
>
> +(define_int_iterator ADDHN [UNSPEC_SADDHN UNSPEC_UADDHN])
> +
> (define_int_iterator BSL_DUP [1 2])
>
> (define_int_iterator DOTPROD [UNSPEC_SDOT UNSPEC_UDOT])
> @@ -4250,7 +4254,8 @@ (define_int_attr su [(UNSPEC_SADDV "s")
> (UNSPEC_COND_SCVTF "s")
> (UNSPEC_COND_UCVTF "u")
> (UNSPEC_SMULHS "s") (UNSPEC_UMULHS "u")
> - (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")])
> + (UNSPEC_SMULHRS "s") (UNSPEC_UMULHRS "u")
> + (UNSPEC_SADDHN "s") (UNSPEC_UADDHN "u")])
>
> (define_int_attr sur [(UNSPEC_SHADD "s") (UNSPEC_UHADD "u")
> (UNSPEC_SRHADD "sr") (UNSPEC_URHADD "ur")
> diff --git a/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
> b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
> new file mode 100644
> index
> 0000000000000000000000000000000000000000..b6f66f979ff71d40d1e09292c1c12df3c262ce9c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
> @@ -0,0 +1,88 @@
> +/* { dg-require-effective-target vect_int } */
> +
> +#include <stdint.h>
> +#include <stdio.h>
> +
> +#include "tree-vect.h"
> +
> +#define N 1000
> +#define CHECK_ERROR(cond, fmt, ...) \
> + do { if (cond) { printf(fmt "\n", ##__VA_ARGS__); __builtin_abort (); } }
> while (0)
> +
> +// Generates all test components for a given type combo
> +#define TEST_COMBO(A_TYPE, C_TYPE, CAST_TYPE, SHIFT)
> \
> + A_TYPE a_##A_TYPE##_##C_TYPE[N];
> \
> + A_TYPE b_##A_TYPE##_##C_TYPE[N];
> \
> + C_TYPE c_##A_TYPE##_##C_TYPE[N];
> \
> + C_TYPE ref_##A_TYPE##_##C_TYPE[N];
> \
> +
> \
> + void init_##A_TYPE##_##C_TYPE() {
> \
> + _Pragma ("GCC novector") \
> + for (int i = 0; i < N; i++) {
> \
> + a_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 3);
> \
> + b_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 7);
> \
> + }
> \
> + }
> \
> +
> \
> + void foo_##A_TYPE##_##C_TYPE() {
> \
> + for (int i = 0; i < N; i++)
> \
> + c_##A_TYPE##_##C_TYPE[i] =
> \
> + ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +
> \
> + (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;
> \
> + }
> \
> +
> \
> + void ref_##A_TYPE##_##C_TYPE##_compute() {
> \
> + _Pragma ("GCC novector") \
> + for (int i = 0; i < N; i++)
> \
> + ref_##A_TYPE##_##C_TYPE[i] =
> \
> + ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +
> \
> + (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;
> \
> + }
> \
> +
> \
> + void validate_##A_TYPE##_##C_TYPE(const char* variant_name) {
> \
> + _Pragma ("GCC novector") \
> + for (int i = 0; i < N; i++) {
> \
> + if (c_##A_TYPE##_##C_TYPE[i] != ref_##A_TYPE##_##C_TYPE[i]) {
> \
> + printf("FAIL [%s]: Index %d: got %lld, expected %lld\n",
> \
> + variant_name, i,
> \
> + (long long)c_##A_TYPE##_##C_TYPE[i],
> \
> + (long long)ref_##A_TYPE##_##C_TYPE[i]);
> \
> + __builtin_abort ();
> \
> + }
> \
> + }
> \
> + }
> +
> +// Runs the test for one combo with name output
> +#define RUN_COMBO(A_TYPE, C_TYPE) \
> + do { \
> + init_##A_TYPE##_##C_TYPE(); \
> + foo_##A_TYPE##_##C_TYPE(); \
> + ref_##A_TYPE##_##C_TYPE##_compute(); \
> + validate_##A_TYPE##_##C_TYPE(#A_TYPE " -> " #C_TYPE); \
> + } while (0)
> +
> +// Instantiate all valid combinations
> +TEST_COMBO(int16_t, int8_t, int32_t, 8)
> +TEST_COMBO(uint16_t, uint8_t, uint32_t, 8)
> +TEST_COMBO(int32_t, int16_t, int64_t, 16)
> +TEST_COMBO(uint32_t, uint16_t, uint64_t, 16)
> +#if defined(__aarch64__)
> +TEST_COMBO(int64_t, int32_t, __int128_t, 32)
> +TEST_COMBO(uint64_t, uint32_t, unsigned __int128, 32)
> +#endif
> +
> +int main() {
> + check_vect ();
> +
> + RUN_COMBO(int16_t, int8_t);
> + RUN_COMBO(uint16_t, uint8_t);
> + RUN_COMBO(int32_t, int16_t);
> + RUN_COMBO(uint32_t, uint16_t);
> +#if defined(__aarch64__)
> + RUN_COMBO(int64_t, int32_t);
> + RUN_COMBO(uint64_t, uint32_t);
> +#endif
> +
> + return 0;
> +}
> +
>
>
> --
> <rb19744.patch>