[PATCH][AArch64] Use intrinsics for upper saturating shift right
The use of vqshrn_high_n_s32 was triggering an unneeded register move, because sqshrn2 is destructive but was declared as inline assembly in arm_neon.h. This patch implements sqshrn2 and uqshrn2 as actual intrinsics which do not trigger the unnecessary move, along with new tests to cover them. Bootstrapped and regression tested on aarch64-none-linux-gnu gcc/ChangeLog 2020-11-03 David Candler * config/aarch64/aarch64-builtins.c (TYPES_SHIFT2IMM): Add define. (TYPES_SHIFT2IMM_UUSS): Add define. * config/aarch64/aarch64-simd.md (aarch64_qshrn2_n): Add new insn for upper saturating shift right. * config/aarch64/aarch64-simd-builtins.def: Add intrinsics. * config/aarch64/arm_neon.h: (vqrshrn_high_n_s16): Expand using intrinsic rather than inline asm. (vqrshrn_high_n_s32): Likewise. (vqrshrn_high_n_s64): Likewise. (vqrshrn_high_n_u16): Likewise. (vqrshrn_high_n_u32): Likewise. (vqrshrn_high_n_u64): Likewise. (vqrshrun_high_n_s16): Likewise. (vqrshrun_high_n_s32): Likewise. (vqrshrun_high_n_s64): Likewise. (vqshrn_high_n_s16): Likewise. (vqshrn_high_n_s32): Likewise. (vqshrn_high_n_s64): Likewise. (vqshrn_high_n_u16): Likewise. (vqshrn_high_n_u32): Likewise. (vqshrn_high_n_u64): Likewise. (vqshrun_high_n_s16): Likewise. (vqshrun_high_n_s32): Likewise. (vqshrun_high_n_s64): Likewise. gcc/testsuite/ChangeLog 2020-11-03 David Candler * gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c: New testcase. * gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c: Likewise. * gcc.target/aarch64/narrow_high-intrinsics.c: Update expected assembler for sqshrun2, sqrshrun2, sqshrn2, uqshrn2, sqrshrn2 and uqrshrn2.diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 4f33dd936c7..f93f4e29c89 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -254,6 +254,10 @@ aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers) #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers) static enum aarch64_type_qualifiers +aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate}; +#define TYPES_SHIFT2IMM (aarch64_types_ternop_s_imm_qualifiers) +static enum aarch64_type_qualifiers aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_none, qualifier_immediate }; #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers) @@ -265,14 +269,16 @@ static enum aarch64_type_qualifiers aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers) +#define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_none, qualifier_immediate }; +#define TYPES_SHIFT2IMM_UUSS (aarch64_types_shift2_to_unsigned_qualifiers) static enum aarch64_type_qualifiers aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_poly, qualifier_immediate}; #define TYPES_SETREGP (aarch64_types_ternop_s_imm_p_qualifiers) -static enum aarch64_type_qualifiers -aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate}; #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers) #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers) #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers) diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index d1b21102b2f..0b82b9c072b 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -285,6 +285,13 @@ BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0, ALL) BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0, ALL) BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0, ALL) + /* Implemented by aarch64_qshrn2_n. */ + BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, ALL) + BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, ALL) + BUILTIN_VQN (SHIFT2IMM, sqshrn2_n, 0, ALL) + BUILTIN_VQN (USHIFT2IMM, uqshrn2_n, 0, ALL) + BUILTIN_VQN (SHIFT2IMM, sqrshrn2_n, 0, ALL) + BUILTIN_VQN (USHIFT2IMM, uqrshrn2_n, 0, ALL) /* Implemented by aarch64_si_n. */ BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0, ALL) BUILTIN_VSDQ_I_DI (USHI
Re: [PATCH][AArch64] Use intrinsics for upper saturating shift right
Hi Richard, Thanks for the feedback. Richard Sandiford writes: > > diff --git a/gcc/config/aarch64/aarch64-builtins.c > > b/gcc/config/aarch64/aarch64-builtins.c > > index 4f33dd936c7..f93f4e29c89 100644 > > --- a/gcc/config/aarch64/aarch64-builtins.c > > +++ b/gcc/config/aarch64/aarch64-builtins.c > > @@ -254,6 +254,10 @@ > > aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] > > #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers) > > #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers) > > static enum aarch64_type_qualifiers > > +aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] > > + = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate}; > > +#define TYPES_SHIFT2IMM (aarch64_types_ternop_s_imm_qualifiers) > > +static enum aarch64_type_qualifiers > > aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] > >= { qualifier_unsigned, qualifier_none, qualifier_immediate }; > > #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers) > > @@ -265,14 +269,16 @@ static enum aarch64_type_qualifiers > > aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS] > >= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; > > #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers) > > +#define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers) > > +static enum aarch64_type_qualifiers > > +aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] > > + = { qualifier_unsigned, qualifier_unsigned, qualifier_none, > > qualifier_immediate }; > > +#define TYPES_SHIFT2IMM_UUSS (aarch64_types_shift2_to_unsigned_qualifiers) > > > > static enum aarch64_type_qualifiers > > aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] > >= { qualifier_none, qualifier_none, qualifier_poly, qualifier_immediate}; > > #define TYPES_SETREGP (aarch64_types_ternop_s_imm_p_qualifiers) > > -static enum aarch64_type_qualifiers > > -aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] > > - = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate}; > > #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers) > > #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers) > > #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers) > > Very minor, but I think it would be better to keep > aarch64_types_ternop_s_imm_qualifiers where it is and define > TYPES_SHIFT2IMM here rather than above. For better or worse, > the current style seems to be to keep the defines next to the > associated arrays, rather than group them based on the TYPES_* name. > > > diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def > > b/gcc/config/aarch64/aarch64-simd-builtins.def > > index d1b21102b2f..0b82b9c072b 100644 > > --- a/gcc/config/aarch64/aarch64-simd-builtins.def > > +++ b/gcc/config/aarch64/aarch64-simd-builtins.def > > @@ -285,6 +285,13 @@ > >BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0, ALL) > >BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0, ALL) > >BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0, ALL) > > + /* Implemented by aarch64_qshrn2_n. */ > > + BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, ALL) > > + BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, ALL) > > + BUILTIN_VQN (SHIFT2IMM, sqshrn2_n, 0, ALL) > > + BUILTIN_VQN (USHIFT2IMM, uqshrn2_n, 0, ALL) > > + BUILTIN_VQN (SHIFT2IMM, sqrshrn2_n, 0, ALL) > > + BUILTIN_VQN (USHIFT2IMM, uqrshrn2_n, 0, ALL) > > Using ALL is a holdover from the time (until a few weeks ago) when we > didn't record function attributes. New intrinsics should therefore > have something more specific than ALL. > > We discussed offline whether the Q flag side effect of the intrinsics > should be observable or not, and the conclusion was that it shouldn't. > I think we can therefore treat these functions as pure functions, > meaning that they should have flags NONE rather than ALL. > > For that reason, I think we should also remove the Set_Neon_Cumulative_Sat > and CHECK_CUMULATIVE_SAT parts of the test (sorry). > > Other than that, the patch looks good to go. > > Thanks, > Richard I've updated the patch with TYPES_SHIFT2IMM moved, the builtins changed to NONE, and the Q flag portion of the tests removed. Thanks, David ChangeLog Description: ChangeLog diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 4f33dd936c7..a9fc0de9de9 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -265,6 +265,11 @@ static enum aarch64_type_qualifiers aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate }; #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers) +#define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers) +static enum aarch64_type_qualifiers +aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_uns
[PATCH][AArch64] Skip arm targets in vq*shr*n_high_n intrinsic tests
Hi, These tests should be skipped for arm targets as the instrinsics are only supported on aarch64. Tested on aarch64 and aarch32 gcc/testsuite/ChangeLog 2020-11-09 David Candler * gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c: Added skip directive. * gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c: Likewise. * gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c: Likewise.diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c index d9add2908d1..6ebe0743cc4 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c @@ -1,3 +1,6 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { arm*-*-* } } */ + #include #include "arm-neon-ref.h" #include "compute-ref-data.h" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c index 1a3788cd14a..49d319d0181 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c @@ -1,3 +1,6 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { arm*-*-* } } */ + #include #include "arm-neon-ref.h" #include "compute-ref-data.h" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c index 72aecc15ba2..8d06f113dc8 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c @@ -1,3 +1,6 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { arm*-*-* } } */ + #include #include "arm-neon-ref.h" #include "compute-ref-data.h" diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c index 4885c029d1a..e8235fe9693 100644 --- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c +++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c @@ -1,3 +1,6 @@ +/* { dg-do run } */ +/* { dg-skip-if "" { arm*-*-* } } */ + #include #include "arm-neon-ref.h" #include "compute-ref-data.h"