Andrew Pinski <quic_apin...@quicinc.com> writes: > For popcount for bytes, we don't need the reduction addition > after the vector cnt instruction as we are only counting one > byte's popcount. > This changes the popcount extend to cover all ALLI rather than GPI. > > Changes since v1: > * v2 - Use ALLI iterator and combine all into one pattern. > Add new testcases popcnt[6-8].c. > * v3 - Simplify TARGET_CSSC path. > Use convert_to_mode instead of gen_zero_extend* directly. > Some other small cleanups. > > Bootstrapped and tested on aarch64-linux-gnu with no regressions. > > PR target/113042 > > gcc/ChangeLog: > > * config/aarch64/aarch64.md (popcount<mode>2): Update pattern > to support ALLI modes. > > gcc/testsuite/ChangeLog: > > * gcc.target/aarch64/popcnt5.c: New test. > * gcc.target/aarch64/popcnt6.c: New test. > * gcc.target/aarch64/popcnt7.c: New test. > * gcc.target/aarch64/popcnt8.c: New test.
LGTM, thanks. OK if there are no comments in 24 hours. Richard > Signed-off-by: Andrew Pinski <quic_apin...@quicinc.com> > --- > gcc/config/aarch64/aarch64.md | 37 ++++++++++++++-------- > gcc/testsuite/gcc.target/aarch64/popcnt5.c | 19 +++++++++++ > gcc/testsuite/gcc.target/aarch64/popcnt6.c | 19 +++++++++++ > gcc/testsuite/gcc.target/aarch64/popcnt7.c | 18 +++++++++++ > gcc/testsuite/gcc.target/aarch64/popcnt8.c | 18 +++++++++++ > 5 files changed, 98 insertions(+), 13 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt5.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt6.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt7.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/popcnt8.c > > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md > index 665a333903c..12dcc16529a 100644 > --- a/gcc/config/aarch64/aarch64.md > +++ b/gcc/config/aarch64/aarch64.md > @@ -5341,9 +5341,9 @@ (define_insn "*aarch64_popcount<mode>2_cssc_insn" > ;; MOV w0, v2.b[0] > > (define_expand "popcount<mode>2" > - [(set (match_operand:GPI 0 "register_operand") > - (popcount:GPI (match_operand:GPI 1 "register_operand")))] > - "TARGET_CSSC || TARGET_SIMD" > + [(set (match_operand:ALLI 0 "register_operand") > + (popcount:ALLI (match_operand:ALLI 1 "register_operand")))] > + "TARGET_CSSC ? GET_MODE_BITSIZE (<MODE>mode) >= 32 : TARGET_SIMD" > { > if (!TARGET_CSSC) > { > @@ -5351,18 +5351,29 @@ (define_expand "popcount<mode>2" > rtx v1 = gen_reg_rtx (V8QImode); > rtx in = operands[1]; > rtx out = operands[0]; > - if(<MODE>mode == SImode) > - { > - rtx tmp; > - tmp = gen_reg_rtx (DImode); > - /* If we have SImode, zero extend to DImode, pop count does > - not change if we have extra zeros. */ > - emit_insn (gen_zero_extendsidi2 (tmp, in)); > - in = tmp; > - } > + /* SImode and HImode should be zero extended to DImode. > + popcount does not change if we have extra zeros. */ > + if (<MODE>mode == SImode || <MODE>mode == HImode) > + in = convert_to_mode (DImode, in, true); > + > emit_move_insn (v, gen_lowpart (V8QImode, in)); > emit_insn (gen_popcountv8qi2 (v1, v)); > - emit_insn (gen_aarch64_zero_extend<mode>_reduc_plus_v8qi (out, v1)); > + /* QImode, just extract from the v8qi vector. */ > + if (<MODE>mode == QImode) > + emit_move_insn (out, gen_lowpart (QImode, v1)); > + /* HI and SI, reduction is zero extended to SImode. */ > + else if (<MODE>mode == SImode || <MODE>mode == HImode) > + { > + rtx out1 = gen_reg_rtx (SImode); > + emit_insn (gen_aarch64_zero_extendsi_reduc_plus_v8qi (out1, v1)); > + emit_move_insn (out, gen_lowpart (<MODE>mode, out1)); > + } > + /* DImode, reduction is zero extended to DImode. */ > + else > + { > + gcc_assert (<MODE>mode == DImode); > + emit_insn (gen_aarch64_zero_extenddi_reduc_plus_v8qi (out, v1)); > + } > DONE; > } > }) > diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt5.c > b/gcc/testsuite/gcc.target/aarch64/popcnt5.c > new file mode 100644 > index 00000000000..406369d9b29 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt5.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > +/* PR target/113042 */ > + > +#pragma GCC target "+nocssc" > + > +/* > +** h8: > +** ldr b[0-9]+, \[x0\] > +** cnt v[0-9]+.8b, v[0-9]+.8b > +** smov w0, v[0-9]+.b\[0\] > +** ret > +*/ > +/* We should not need the addv here since we only need a byte popcount. */ > + > +unsigned h8 (const unsigned char *a) { > + return __builtin_popcountg (a[0]); > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt6.c > b/gcc/testsuite/gcc.target/aarch64/popcnt6.c > new file mode 100644 > index 00000000000..e882cb24126 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt6.c > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > +/* PR target/113042 */ > + > +#pragma GCC target "+nocssc" > + > +/* > +** h8: > +** ldr h[0-9]+, \[x0\] > +** cnt v[0-9]+.8b, v[0-9]+.8b > +** addv b[0-9]+, v[0-9]+.8b > +** fmov w0, s[0-9]+ > +** ret > +*/ > + > +unsigned h8 (const unsigned short *a) { > + return __builtin_popcountg (a[0]); > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt7.c > b/gcc/testsuite/gcc.target/aarch64/popcnt7.c > new file mode 100644 > index 00000000000..8dfff211ae4 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt7.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > +/* PR target/113042 */ > + > +#pragma GCC target "+cssc" > + > +/* > +** h8: > +** ldrb w[0-9]+, \[x0\] > +** cnt w[0-9]+, w[0-9]+ > +** ret > +*/ > +/* We should not produce any extra zero extend for this code */ > + > +unsigned h8 (const unsigned char *a) { > + return __builtin_popcountg (a[0]); > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/popcnt8.c > b/gcc/testsuite/gcc.target/aarch64/popcnt8.c > new file mode 100644 > index 00000000000..66a88b6a929 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/popcnt8.c > @@ -0,0 +1,18 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { check-function-bodies "**" "" } } */ > +/* PR target/113042 */ > + > +#pragma GCC target "+cssc" > + > +/* > +** h8: > +** ldrh w[0-9]+, \[x0\] > +** cnt w[0-9]+, w[0-9]+ > +** ret > +*/ > +/* We should not produce any extra zero extend for this code */ > + > +unsigned h8 (const unsigned short *a) { > + return __builtin_popcountg (a[0]); > +}