Joel Hutton via Gcc-patches <gcc-patches@gcc.gnu.org> writes: > Hi all, > > This patch adds support in the aarch64 backend for the vec_widen_shift > vect-pattern and makes a minor mid-end fix to support it. > > All 3 patches together bootstrapped and regression tested on aarch64. > > Ok for stage 1? > > gcc/ChangeLog: > > 2020-11-12 Joel Hutton <joel.hut...@arm.com> > > * config/aarch64/aarch64-simd.md: vec_widen_lshift_hi/lo<mode> > patterns > * tree-vect-stmts.c > (vectorizable_conversion): Fix for widen_lshift case > > gcc/testsuite/ChangeLog: > > 2020-11-12 Joel Hutton <joel.hut...@arm.com> > > * gcc.target/aarch64/vect-widen-lshift.c: New test. > > From 97af35b2d2a505dcefd8474cbd4bc3441b83ab02 Mon Sep 17 00:00:00 2001 > From: Joel Hutton <joel.hut...@arm.com> > Date: Thu, 12 Nov 2020 11:48:25 +0000 > Subject: [PATCH 3/3] [AArch64][vect] vec_widen_lshift pattern > > Add aarch64 vec_widen_lshift_lo/hi patterns and fix bug it triggers in > mid-end. > --- > gcc/config/aarch64/aarch64-simd.md | 66 +++++++++++++++++++ > .../gcc.target/aarch64/vect-widen-lshift.c | 60 +++++++++++++++++ > gcc/tree-vect-stmts.c | 9 ++- > 3 files changed, 133 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c > > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index > b4f56a2295926f027bd53e7456eec729af0cd6df..2bb39c530a1a861cb9bd3df0c2943f62bd6153d7 > 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -4711,8 +4711,74 @@ > [(set_attr "type" "neon_sat_shift_reg<q>")] > ) > > +(define_expand "vec_widen_<sur>shiftl_lo_<mode>" > + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") > + (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") > + (match_operand:SI 2 > + "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] > + VSHLL))] > + "TARGET_SIMD" > + { > + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); > + emit_insn (gen_aarch64_<sur>shll<mode>_internal (operands[0], > operands[1], > + p, operands[2])); > + DONE; > + } > +) > + > +(define_expand "vec_widen_<sur>shiftl_hi_<mode>" > + [(set (match_operand:<VWIDE> 0 "register_operand") > + (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w") > + (match_operand:SI 2 > + "immediate_operand" "i")] > + VSHLL))] > + "TARGET_SIMD" > + { > + rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); > + emit_insn (gen_aarch64_<sur>shll2<mode>_internal (operands[0], > operands[1], > + p, operands[2])); > + DONE; > + } > +) > + > ;; vshll_n > > +(define_insn "aarch64_<sur>shll<mode>_internal" > + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") > + (unspec:<VWIDE> [(vec_select:<VHALF> > + (match_operand:VQW 1 "register_operand" "w") > + (match_operand:VQW 2 "vect_par_cnst_lo_half" "")) > + (match_operand:SI 3 > + "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] > + VSHLL))] > + "TARGET_SIMD" > + { > + if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) > + return "shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3"; > + else > + return "<sur>shll\\t%0.<Vwtype>, %1.<Vhalftype>, %3"; > + } > + [(set_attr "type" "neon_shift_imm_long")] > +) > + > +(define_insn "aarch64_<sur>shll2<mode>_internal" > + [(set (match_operand:<VWIDE> 0 "register_operand" "=w") > + (unspec:<VWIDE> [(vec_select:<VHALF> > + (match_operand:VQW 1 "register_operand" "w") > + (match_operand:VQW 2 "vect_par_cnst_hi_half" "")) > + (match_operand:SI 3 > + "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")] > + VSHLL))] > + "TARGET_SIMD" > + { > + if (INTVAL (operands[3]) == GET_MODE_UNIT_BITSIZE (<MODE>mode)) > + return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %3"; > + else > + return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %3"; > + } > + [(set_attr "type" "neon_shift_imm_long")] > +) > + > (define_insn "aarch64_<sur>shll_n<mode>" > [(set (match_operand:<VWIDE> 0 "register_operand" "=w") > (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w") > diff --git a/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c > b/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..23ed93d1dcbc3ca559efa6708b4ed5855fb6a050 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/vect-widen-lshift.c > @@ -0,0 +1,60 @@ > +/* { dg-do run } */ > +/* { dg-options "-O3 -save-temps" } */ > +#include <stdint.h> > +#include <string.h> > +
SVE targets will need a: #pragma GCC target "+nosve" here, since we'll generate different code for SVE. > +#define ARR_SIZE 1024 > + > +/* Should produce an shll,shll2 pair*/ > +void sshll_opt (int32_t *foo, int16_t *a, int16_t *b) > +{ > + for( int i = 0; i < ARR_SIZE - 3;i=i+4) > + { > + foo[i] = a[i] << 16; > + foo[i+1] = a[i+1] << 16; > + foo[i+2] = a[i+2] << 16; > + foo[i+3] = a[i+3] << 16; > + } > +} > + > +__attribute__((optimize (0))) > +void sshll_nonopt (int32_t *foo, int16_t *a, int16_t *b) > +{ > + for( int i = 0; i < ARR_SIZE - 3;i=i+4) > + { > + foo[i] = a[i] << 16; > + foo[i+1] = a[i+1] << 16; > + foo[i+2] = a[i+2] << 16; > + foo[i+3] = a[i+3] << 16; > + } > +} > + > + > +void __attribute__((optimize (0))) > +init(uint16_t *a, uint16_t *b) > +{ > + for( int i = 0; i < ARR_SIZE;i++) > + { > + a[i] = i; > + b[i] = 2*i; > + } > +} > + > +int __attribute__((optimize (0))) > +main() > +{ > + uint32_t foo_arr[ARR_SIZE]; > + uint32_t bar_arr[ARR_SIZE]; > + uint16_t a[ARR_SIZE]; > + uint16_t b[ARR_SIZE]; > + > + init(a, b); > + sshll_opt(foo_arr, a, b); > + sshll_nonopt(bar_arr, a, b); > + if (memcmp(foo_arr, bar_arr, ARR_SIZE) != 0) > + return 1; > + return 0; > +} > + > +/* { dg-final { scan-assembler-times "shll\t" 1} } */ > +/* { dg-final { scan-assembler-times "shll2\t" 1} } */ Very minor nit, sorry, but I think: /* { dg-final { scan-assembler-times {\tshll\t} 1 } } */ would be better. Using "…\t" works, but IIRC it shows up as a tab character in the testsuite result summary too. > diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c > index > f12fd158b13656ee24022ec7e445c53444be6554..1f40b59c0560eec675af1d9a0e3e818d47589de6 > 100644 > --- a/gcc/tree-vect-stmts.c > +++ b/gcc/tree-vect-stmts.c > @@ -4934,8 +4934,13 @@ vectorizable_conversion (vec_info *vinfo, > &vec_oprnds1); > if (code == WIDEN_LSHIFT_EXPR) > { > - vec_oprnds1.create (ncopies * ninputs); > - for (i = 0; i < ncopies * ninputs; ++i) > + int oprnds_size = ncopies * ninputs; > + /* In the case of SLP ncopies = 1, so the size of vec_oprnds1 here > + * should be obtained by the the size of vec_oprnds0. */ This is redundant given Richard's comment, but FWIW, GCC style is to indent without “*”s, so: /* In the case of SLP ncopies = 1, so the size of vec_oprnds1 here should be obtained by the the size of vec_oprnds0. */ OK for the aarch64 bits with the testsuite changes above. Thanks, Richard > + if (slp_node) > + oprnds_size = vec_oprnds0.length (); > + vec_oprnds1.create (oprnds_size); > + for (i = 0; i < oprnds_size; ++i) > vec_oprnds1.quick_push (op1); > } > /* Arguments are ready. Create the new vector stmts. */