On Tue, Nov 9, 2021 at 6:51 AM Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > apinski--- via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > > From: Andrew Pinski <apin...@marvell.com> > > > > The function aarch64_evpc_ins would reuse the target even though > > it might be the same register as the two inputs. > > Instead of checking to see if we can reuse the target, creating > > a new register always is better. > > > > OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions. > > > > PR target/101529 > > > > gcc/ChangeLog: > > > > * config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target > > as an input instead create a new reg. > > > > gcc/testsuite/ChangeLog: > > > > * c-c++-common/torture/builtin-convertvector-2.c: New test. > > * c-c++-common/torture/builtin-shufflevector-2.c: New test. > > --- > > gcc/config/aarch64/aarch64.c | 8 ++++-- > > .../torture/builtin-convertvector-2.c | 26 +++++++++++++++++++ > > .../torture/builtin-shufflevector-2.c | 26 +++++++++++++++++++ > > 3 files changed, 58 insertions(+), 2 deletions(-) > > create mode 100644 > > gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c > > create mode 100644 > > gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c > > > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > > index 2c00583e12c..e4fc546fae7 100644 > > --- a/gcc/config/aarch64/aarch64.c > > +++ b/gcc/config/aarch64/aarch64.c > > @@ -23084,11 +23084,15 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d) > > } > > gcc_assert (extractindex < nelt); > > > > - emit_move_insn (d->target, insv); > > + /* Use a new reg instead of target as one of the > > + operands might be target. */ > > + rtx original = gen_reg_rtx (GET_MODE (d->target)); > > + > > + emit_move_insn (original, insv); > > Why can't we just remove the move and pass insv directly to > create_input_operand? That'll encourage the RA to allocate insv and > d->target to the same register where possible (but will reload where > tying is not possible). > > OK with that change if it works.
Yes that worked, committed as r12-5078-g52fa771758 . Thanks, Andrew > > Thanks, > Richard > > > insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode); > > expand_operand ops[5]; > > create_output_operand (&ops[0], d->target, mode); > > - create_input_operand (&ops[1], d->target, mode); > > + create_input_operand (&ops[1], original, mode); > > create_integer_operand (&ops[2], 1 << idx); > > create_input_operand (&ops[3], extractv, mode); > > create_integer_operand (&ops[4], extractindex); > > diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c > > b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c > > new file mode 100644 > > index 00000000000..d88f6a72b5c > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c > > @@ -0,0 +1,26 @@ > > +/* { dg-do run } */ > > +/* PR target/101529 */ > > + > > +typedef unsigned char __attribute__((__vector_size__ (1))) W; > > +typedef unsigned char __attribute__((__vector_size__ (8))) V; > > +typedef unsigned short __attribute__((__vector_size__ (16))) U; > > + > > +unsigned short us; > > + > > +/* aarch64 used to miscompile foo to just return 0. */ > > +W > > +foo (unsigned char uc) > > +{ > > + V v = __builtin_convertvector ((U){ } >= us, V); > > + return __builtin_shufflevector ((W){ }, v, 4) & uc; > > +} > > + > > +int > > +main (void) > > +{ > > + W x = foo (5); > > + if (x[0] != 5) > > + __builtin_abort(); > > + return 0; > > +} > > + > > diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c > > b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c > > new file mode 100644 > > index 00000000000..7c4999ed4e9 > > --- /dev/null > > +++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c > > @@ -0,0 +1,26 @@ > > +/* { dg-do run} */ > > +/* PR target/101529 */ > > +typedef unsigned char C; > > +typedef unsigned char __attribute__((__vector_size__ (8))) V; > > +typedef unsigned char __attribute__((__vector_size__ (32))) U; > > + > > +C c; > > + > > +/* aarch64 used to miscompile foo to just return a vector of 0s */ > > +V > > +foo (V v) > > +{ > > + v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }), > > + 0, 1, 8, 32, 8, 20, 36, 36); > > + return v; > > +} > > + > > +int > > +main (void) > > +{ > > + V v = foo ((V) { }); > > + for (unsigned i = 0; i < sizeof (v); i++) > > + if (v[i] != (i >= 2 ? 0xff : 0)) > > + __builtin_abort (); > > + return 0; > > +}