[PATCH][AArch64] Use intrinsics for upper saturating shift right

2020-11-03 Thread David Candler via Gcc-patches
The use of vqshrn_high_n_s32 was triggering an unneeded register move, because
sqshrn2 is destructive but was declared as inline assembly in arm_neon.h. This
patch implements sqshrn2 and uqshrn2 as actual intrinsics which do not trigger
the unnecessary move, along with new tests to cover them.

Bootstrapped and regression tested on aarch64-none-linux-gnu

gcc/ChangeLog

2020-11-03  David Candler  

* config/aarch64/aarch64-builtins.c
(TYPES_SHIFT2IMM): Add define.
(TYPES_SHIFT2IMM_UUSS): Add define.
* config/aarch64/aarch64-simd.md
(aarch64_qshrn2_n): Add new insn for upper saturating 
shift right.
* config/aarch64/aarch64-simd-builtins.def: Add intrinsics.
* config/aarch64/arm_neon.h:
(vqrshrn_high_n_s16): Expand using intrinsic rather than inline asm.
(vqrshrn_high_n_s32): Likewise.
(vqrshrn_high_n_s64): Likewise.
(vqrshrn_high_n_u16): Likewise.
(vqrshrn_high_n_u32): Likewise.
(vqrshrn_high_n_u64): Likewise.
(vqrshrun_high_n_s16): Likewise.
(vqrshrun_high_n_s32): Likewise.
(vqrshrun_high_n_s64): Likewise.
(vqshrn_high_n_s16): Likewise.
(vqshrn_high_n_s32): Likewise.
(vqshrn_high_n_s64): Likewise.
(vqshrn_high_n_u16): Likewise.
(vqshrn_high_n_u32): Likewise.
(vqshrn_high_n_u64): Likewise.
(vqshrun_high_n_s16): Likewise.
(vqshrun_high_n_s32): Likewise.
(vqshrun_high_n_s64): Likewise.

gcc/testsuite/ChangeLog

2020-11-03  David Candler  

* gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c: New testcase.
* gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c: Likewise.
* gcc.target/aarch64/narrow_high-intrinsics.c: Update expected assembler
for sqshrun2, sqrshrun2, sqshrn2, uqshrn2, sqrshrn2 and uqrshrn2.diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 4f33dd936c7..f93f4e29c89 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -254,6 +254,10 @@ aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
 #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers)
 #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers)
 static enum aarch64_type_qualifiers
+aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
+#define TYPES_SHIFT2IMM (aarch64_types_ternop_s_imm_qualifiers)
+static enum aarch64_type_qualifiers
 aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_none, qualifier_immediate };
 #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
@@ -265,14 +269,16 @@ static enum aarch64_type_qualifiers
 aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
 #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
+#define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_unsigned, qualifier_none, qualifier_immediate };
+#define TYPES_SHIFT2IMM_UUSS (aarch64_types_shift2_to_unsigned_qualifiers)
 
 static enum aarch64_type_qualifiers
 aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_none, qualifier_none, qualifier_poly, qualifier_immediate};
 #define TYPES_SETREGP (aarch64_types_ternop_s_imm_p_qualifiers)
-static enum aarch64_type_qualifiers
-aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
 #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
 #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
 #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index d1b21102b2f..0b82b9c072b 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -285,6 +285,13 @@
   BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0, ALL)
   BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0, ALL)
   BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0, ALL)
+  /* Implemented by aarch64_qshrn2_n.  */
+  BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, ALL)
+  BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, ALL)
+  BUILTIN_VQN (SHIFT2IMM, sqshrn2_n, 0, ALL)
+  BUILTIN_VQN (USHIFT2IMM, uqshrn2_n, 0, ALL)
+  BUILTIN_VQN (SHIFT2IMM, sqrshrn2_n, 0, ALL)
+  BUILTIN_VQN (USHIFT2IMM, uqrshrn2_n, 0, ALL)
   /* Implemented by aarch64_si_n.  */
   BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0, ALL)
   BUILTIN_VSDQ_I_DI (USHI

Re: [PATCH][AArch64] Use intrinsics for upper saturating shift right

2020-11-05 Thread David Candler via Gcc-patches
Hi Richard,

Thanks for the feedback.

Richard Sandiford  writes:
> > diff --git a/gcc/config/aarch64/aarch64-builtins.c 
> > b/gcc/config/aarch64/aarch64-builtins.c
> > index 4f33dd936c7..f93f4e29c89 100644
> > --- a/gcc/config/aarch64/aarch64-builtins.c
> > +++ b/gcc/config/aarch64/aarch64-builtins.c
> > @@ -254,6 +254,10 @@ 
> > aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> >  #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers)
> >  #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers)
> >  static enum aarch64_type_qualifiers
> > +aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> > +  = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
> > +#define TYPES_SHIFT2IMM (aarch64_types_ternop_s_imm_qualifiers)
> > +static enum aarch64_type_qualifiers
> >  aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> >= { qualifier_unsigned, qualifier_none, qualifier_immediate };
> >  #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
> > @@ -265,14 +269,16 @@ static enum aarch64_type_qualifiers
> >  aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> >= { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
> >  #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
> > +#define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers)
> > +static enum aarch64_type_qualifiers
> > +aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> > +  = { qualifier_unsigned, qualifier_unsigned, qualifier_none, 
> > qualifier_immediate };
> > +#define TYPES_SHIFT2IMM_UUSS (aarch64_types_shift2_to_unsigned_qualifiers)
> >  
> >  static enum aarch64_type_qualifiers
> >  aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> >= { qualifier_none, qualifier_none, qualifier_poly, qualifier_immediate};
> >  #define TYPES_SETREGP (aarch64_types_ternop_s_imm_p_qualifiers)
> > -static enum aarch64_type_qualifiers
> > -aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
> > -  = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
> >  #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
> >  #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
> >  #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
> 
> Very minor, but I think it would be better to keep
> aarch64_types_ternop_s_imm_qualifiers where it is and define
> TYPES_SHIFT2IMM here rather than above.  For better or worse,
> the current style seems to be to keep the defines next to the
> associated arrays, rather than group them based on the TYPES_* name.
> 
> > diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def 
> > b/gcc/config/aarch64/aarch64-simd-builtins.def
> > index d1b21102b2f..0b82b9c072b 100644
> > --- a/gcc/config/aarch64/aarch64-simd-builtins.def
> > +++ b/gcc/config/aarch64/aarch64-simd-builtins.def
> > @@ -285,6 +285,13 @@
> >BUILTIN_VSQN_HSDI (USHIFTIMM, uqshrn_n, 0, ALL)
> >BUILTIN_VSQN_HSDI (SHIFTIMM, sqrshrn_n, 0, ALL)
> >BUILTIN_VSQN_HSDI (USHIFTIMM, uqrshrn_n, 0, ALL)
> > +  /* Implemented by aarch64_qshrn2_n.  */
> > +  BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, ALL)
> > +  BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, ALL)
> > +  BUILTIN_VQN (SHIFT2IMM, sqshrn2_n, 0, ALL)
> > +  BUILTIN_VQN (USHIFT2IMM, uqshrn2_n, 0, ALL)
> > +  BUILTIN_VQN (SHIFT2IMM, sqrshrn2_n, 0, ALL)
> > +  BUILTIN_VQN (USHIFT2IMM, uqrshrn2_n, 0, ALL)
> 
> Using ALL is a holdover from the time (until a few weeks ago) when we
> didn't record function attributes.  New intrinsics should therefore
> have something more specific than ALL.
> 
> We discussed offline whether the Q flag side effect of the intrinsics
> should be observable or not, and the conclusion was that it shouldn't.
> I think we can therefore treat these functions as pure functions,
> meaning that they should have flags NONE rather than ALL.
> 
> For that reason, I think we should also remove the Set_Neon_Cumulative_Sat
> and CHECK_CUMULATIVE_SAT parts of the test (sorry).
> 
> Other than that, the patch looks good to go.
> 
> Thanks,
> Richard

I've updated the patch with TYPES_SHIFT2IMM moved, the builtins changed
to NONE, and the Q flag portion of the tests removed.

Thanks,
David


ChangeLog
Description: ChangeLog
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 4f33dd936c7..a9fc0de9de9 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -265,6 +265,11 @@ static enum aarch64_type_qualifiers
 aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
 #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
+#define TYPES_USHIFT2IMM (aarch64_types_ternopu_imm_qualifiers)
+static enum aarch64_type_qualifiers
+aarch64_types_shift2_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_unsigned, qualifier_uns

[PATCH][AArch64] Skip arm targets in vq*shr*n_high_n intrinsic tests

2020-11-09 Thread David Candler via Gcc-patches
Hi,

These tests should be skipped for arm targets as the instrinsics
are only supported on aarch64.

Tested on aarch64 and aarch32

gcc/testsuite/ChangeLog

2020-11-09  David Candler  

* gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c: Added skip 
directive.
* gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c: Likewise.
* gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c: Likewise.diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c
index d9add2908d1..6ebe0743cc4 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrn_high_n.c
@@ -1,3 +1,6 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { arm*-*-* } } */
+
 #include 
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c
index 1a3788cd14a..49d319d0181 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqrshrun_high_n.c
@@ -1,3 +1,6 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { arm*-*-* } } */
+
 #include 
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c
index 72aecc15ba2..8d06f113dc8 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrn_high_n.c
@@ -1,3 +1,6 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { arm*-*-* } } */
+
 #include 
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c
index 4885c029d1a..e8235fe9693 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/vqshrun_high_n.c
@@ -1,3 +1,6 @@
+/* { dg-do run } */
+/* { dg-skip-if "" { arm*-*-* } } */
+
 #include 
 #include "arm-neon-ref.h"
 #include "compute-ref-data.h"