Kyrylo Tkachov <kyrylo.tkac...@arm.com> writes: > Hi all, > > SVE2 supports an unpredicated vector integer MUL form that we can emit from > our SVE expanders > without using up a predicate registers. This patch does so. > As the SVE MUL expansion currently is templated away through a code iterator > I did not split it > off just for this case but instead special-cased it in the define_expand. It > seemed somewhat less > invasive than the alternatives but I could split it off more explicitly if > others want to. > The div-by-bitmask_1.c testcase is adjusted to expect this new MUL form. > > Bootstrapped and tested on aarch64-none-linux-gnu. > > Ok for trunk? > Thanks, > Kyrill > > gcc/ChangeLog: > > PR target/109406 > * config/aarch64/aarch64-sve.md (<optab><mode>3): Handle TARGET_SVE2 MUL > case. > * config/aarch64/aarch64-sve2.md (*aarch64_mul_unpredicated_<mode>): New > pattern. > > gcc/testsuite/ChangeLog: > > PR target/109406 > * gcc.target/aarch64/sve2/div-by-bitmask_1.c: Adjust for unpredicated > SVE2 > MUL. > * gcc.target/aarch64/sve2/unpred_mul_1.c: New test.
LGTM. Thanks, Richard > diff --git a/gcc/config/aarch64/aarch64-sve.md > b/gcc/config/aarch64/aarch64-sve.md > index > b11b55f7ac718db199920b61bf3e4b4881c69660..4b4c02c90fec6ce1ff15a8b2a5df348224a307b7 > 100644 > --- a/gcc/config/aarch64/aarch64-sve.md > +++ b/gcc/config/aarch64/aarch64-sve.md > @@ -3657,6 +3657,15 @@ (define_expand "<optab><mode>3" > UNSPEC_PRED_X))] > "TARGET_SVE" > { > + /* SVE2 supports the MUL (vectors, unpredicated) form. Emit the simple > + pattern for it here rather than splitting off the MULT expander > + separately. */ > + if (TARGET_SVE2 && <CODE> == MULT) > + { > + emit_move_insn (operands[0], gen_rtx_MULT (<MODE>mode, > + operands[1], operands[2])); > + DONE; > + } > operands[3] = aarch64_ptrue_reg (<VPRED>mode); > } > ) > diff --git a/gcc/config/aarch64/aarch64-sve2.md > b/gcc/config/aarch64/aarch64-sve2.md > index > 2346f9f835d26f5b87afd47cdc9e44f9f47604ed..da8a424dd57fc5482cb20ba417d4141148ac61b6 > 100644 > --- a/gcc/config/aarch64/aarch64-sve2.md > +++ b/gcc/config/aarch64/aarch64-sve2.md > @@ -189,7 +189,7 @@ (define_insn > "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>" > ;; ------------------------------------------------------------------------- > ;; ---- [INT] Multiplication > ;; ------------------------------------------------------------------------- > -;; Includes the lane forms of: > +;; Includes the lane and unpredicated forms of: > ;; - MUL > ;; ------------------------------------------------------------------------- > > @@ -205,6 +205,21 @@ (define_insn "@aarch64_mul_lane_<mode>" > "mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]" > ) > > +;; The 2nd and 3rd alternatives are valid for just TARGET_SVE as well but > +;; we include them here to allow matching simpler, unpredicated RTL. > +(define_insn "*aarch64_mul_unpredicated_<mode>" > + [(set (match_operand:SVE_I 0 "register_operand" "=w,w,?&w") > + (mult:SVE_I > + (match_operand:SVE_I 1 "register_operand" "w,0,w") > + (match_operand:SVE_I 2 "aarch64_sve_vsm_operand" "w,vsm,vsm")))] > + "TARGET_SVE2" > + "@ > + mul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype> > + mul\t%0.<Vetype>, %0.<Vetype>, #%2 > + movprfx\t%0, %1\;mul\t%0.<Vetype>, %0.<Vetype>, #%2" > + [(set_attr "movprfx" "*,*,yes")] > +) > + > ;; ------------------------------------------------------------------------- > ;; ---- [INT] Scaled high-part multiplication > ;; ------------------------------------------------------------------------- > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c > b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c > index > e6f5098c30f4e2eb8ed1af153c0bb0d204cda6d9..1e546a93906962ba2469ddb3bf2ee9c0166dbae1 > 100644 > --- a/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_1.c > @@ -7,7 +7,7 @@ > /* > ** draw_bitmap1: > ** ... > -** mul z[0-9]+.h, p[0-9]+/m, z[0-9]+.h, z[0-9]+.h > +** mul z[0-9]+.h, z[0-9]+.h, z[0-9]+.h > ** addhnb z[0-9]+.b, z[0-9]+.h, z[0-9]+.h > ** addhnb z[0-9]+.b, z[0-9]+.h, z[0-9]+.h > ** ... > @@ -27,7 +27,7 @@ void draw_bitmap2(uint8_t* restrict pixel, uint8_t level, > int n) > /* > ** draw_bitmap3: > ** ... > -** mul z[0-9]+.s, p[0-9]+/m, z[0-9]+.s, z[0-9]+.s > +** mul z[0-9]+.s, z[0-9]+.s, z[0-9]+.s > ** addhnb z[0-9]+.h, z[0-9]+.s, z[0-9]+.s > ** addhnb z[0-9]+.h, z[0-9]+.s, z[0-9]+.s > ** ... > @@ -41,7 +41,7 @@ void draw_bitmap3(uint16_t* restrict pixel, uint16_t level, > int n) > /* > ** draw_bitmap4: > ** ... > -** mul z[0-9]+.d, p[0-9]+/m, z[0-9]+.d, z[0-9]+.d > +** mul z[0-9]+.d, z[0-9]+.d, z[0-9]+.d > ** addhnb z[0-9]+.s, z[0-9]+.d, z[0-9]+.d > ** addhnb z[0-9]+.s, z[0-9]+.d, z[0-9]+.d > ** ... > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c > b/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c > new file mode 100644 > index > 0000000000000000000000000000000000000000..aaf0ce49c99447439146a1e17ed0533231e141c2 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/unpred_mul_1.c > @@ -0,0 +1,29 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2 -ftree-vectorize" } */ > + > +#include <stdint.h> > + > +#define N 1024 > + > +#define TYPE(N) int##N##_t > + > +#define TEMPLATE(SIZE) \ > +void __attribute__ ((noinline, noclone)) \ > +f_##SIZE##_##OP \ > + (TYPE(SIZE) *restrict a, TYPE(SIZE) *restrict b, \ > + TYPE(SIZE) *restrict c) \ > +{ \ > + for (int i = 0; i < N; i++) \ > + a[i] = b[i] * c[i]; \ > +} > + > +TEMPLATE (8); > +TEMPLATE (16); > +TEMPLATE (32); > +TEMPLATE (64); > + > +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.d, z[0-9]+\.d, > z[0-9]+\.d} 1 } } */ > +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.s, z[0-9]+\.s, > z[0-9]+\.s} 1 } } */ > +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.h, z[0-9]+\.h, > z[0-9]+\.h} 1 } } */ > +/* { dg-final { scan-assembler-times {\tmul\tz[0-9]+\.b, z[0-9]+\.b, > z[0-9]+\.b} 1 } } */ > +