On Wed, Mar 31, 2021 at 12:24 PM Richard Sandiford via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > This PR is caused by POLY_INT_CSTs being (necessarily) valid > in tree-level VECTOR_CSTs but CONST_POLY_INTs not being valid > in RTL CONST_VECTORs. I can't tell/remember how deliberate > that was, but I'm guessing not very. In particular, > valid_for_const_vector_p was added to guard against symbolic > constants rather than CONST_POLY_INTs. > > I did briefly consider whether we should maintain the current > status anyway. However, that would then require a way of > constructing variable-length vectors from individiual elements > if, say, we have: > > { [2, 2], [3, 2], [4, 2], … } > > So I'm chalking this up to an oversight. I think the intention > (and certainly the natural thing) is to have the same rules for > both trees and RTL. > > The SVE CONST_VECTOR code should already be set up to handle > CONST_POLY_INTs. However, we need to add support for Advanced SIMD > CONST_VECTORs that happen to contain SVE-based values. The patch does > that by expanding such CONST_VECTORs in the same way as variable vectors. > > Tested on aarch64-linux-gnu, armeb-eabi and x86_64-linux-gnu. > OK for the target-independent bits?
OK. Richard. > Richard > > > gcc/ > PR rtl-optimization/97141 > PR rtl-optimization/98726 > * emit-rtl.c (valid_for_const_vector_p): Return true for > CONST_POLY_INT_P. > * rtx-vector-builder.h (rtx_vector_builder::step): Return a > poly_wide_int instead of a wide_int. > (rtx_vector_builder::apply_set): Take a poly_wide_int instead > of a wide_int. > * rtx-vector-builder.c (rtx_vector_builder::apply_set): Likewise. > * config/aarch64/aarch64.c (aarch64_legitimate_constant_p): Return > false for CONST_VECTORs that cannot be forced to memory. > * config/aarch64/aarch64-simd.md (mov<mode>): If a CONST_VECTOR > is too complex to force to memory, build it up from individual > elements instead. > > gcc/testsuite/ > PR rtl-optimization/97141 > PR rtl-optimization/98726 > * gcc.c-torture/compile/pr97141.c: New test. > * gcc.c-torture/compile/pr98726.c: Likewise. > * gcc.target/aarch64/sve/pr97141.c: Likewise. > * gcc.target/aarch64/sve/pr98726.c: Likewise. > --- > gcc/config/aarch64/aarch64-simd.md | 11 +++++++++ > gcc/config/aarch64/aarch64.c | 24 +++++++++++-------- > gcc/emit-rtl.c | 1 + > gcc/rtx-vector-builder.c | 6 ++--- > gcc/rtx-vector-builder.h | 10 ++++---- > gcc/testsuite/gcc.c-torture/compile/pr97141.c | 8 +++++++ > gcc/testsuite/gcc.c-torture/compile/pr98726.c | 7 ++++++ > .../gcc.target/aarch64/sve/pr97141.c | 10 ++++++++ > .../gcc.target/aarch64/sve/pr98726.c | 9 +++++++ > 9 files changed, 68 insertions(+), 18 deletions(-) > create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr97141.c > create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr98726.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr97141.c > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr98726.c > > diff --git a/gcc/config/aarch64/aarch64-simd.md > b/gcc/config/aarch64/aarch64-simd.md > index d86e8e72f18..4edee99051c 100644 > --- a/gcc/config/aarch64/aarch64-simd.md > +++ b/gcc/config/aarch64/aarch64-simd.md > @@ -35,6 +35,17 @@ (define_expand "mov<mode>" > && aarch64_mem_pair_operand (operands[0], DImode)) > || known_eq (GET_MODE_SIZE (<MODE>mode), 8)))) > operands[1] = force_reg (<MODE>mode, operands[1]); > + > + /* If a constant is too complex to force to memory (e.g. because it > + contains CONST_POLY_INTs), build it up from individual elements instead. > + We should only need to do this before RA; aarch64_legitimate_constant_p > + should ensure that we don't try to rematerialize the constant later. */ > + if (GET_CODE (operands[1]) == CONST_VECTOR > + && targetm.cannot_force_const_mem (<MODE>mode, operands[1])) > + { > + aarch64_expand_vector_init (operands[0], operands[1]); > + DONE; > + } > " > ) > > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index 5eda9e80bd0..77573e96820 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -17925,10 +17925,22 @@ aarch64_legitimate_constant_p (machine_mode mode, > rtx x) > { > /* Support CSE and rematerialization of common constants. */ > if (CONST_INT_P (x) > - || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT) > - || GET_CODE (x) == CONST_VECTOR) > + || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT)) > return true; > > + /* Only accept variable-length vector constants if they can be > + handled directly. > + > + ??? It would be possible (but complex) to handle rematerialization > + of other constants via secondary reloads. */ > + if (!GET_MODE_SIZE (mode).is_constant ()) > + return aarch64_simd_valid_immediate (x, NULL); > + > + /* Otherwise, accept any CONST_VECTOR that, if all else fails, can at > + least be forced to memory and loaded from there. */ > + if (GET_CODE (x) == CONST_VECTOR) > + return !targetm.cannot_force_const_mem (mode, x); > + > /* Do not allow vector struct mode constants for Advanced SIMD. > We could support 0 and -1 easily, but they need support in > aarch64-simd.md. */ > @@ -17936,14 +17948,6 @@ aarch64_legitimate_constant_p (machine_mode mode, > rtx x) > if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT)) > return false; > > - /* Only accept variable-length vector constants if they can be > - handled directly. > - > - ??? It would be possible to handle rematerialization of other > - constants via secondary reloads. */ > - if (vec_flags & VEC_ANY_SVE) > - return aarch64_simd_valid_immediate (x, NULL); > - > if (GET_CODE (x) == HIGH) > x = XEXP (x, 0); > > diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c > index b23284e5e56..1113c58c49e 100644 > --- a/gcc/emit-rtl.c > +++ b/gcc/emit-rtl.c > @@ -5949,6 +5949,7 @@ bool > valid_for_const_vector_p (machine_mode, rtx x) > { > return (CONST_SCALAR_INT_P (x) > + || CONST_POLY_INT_P (x) > || CONST_DOUBLE_AS_FLOAT_P (x) > || CONST_FIXED_P (x)); > } > diff --git a/gcc/rtx-vector-builder.c b/gcc/rtx-vector-builder.c > index 58b3e10a14a..cf9b3dd1af9 100644 > --- a/gcc/rtx-vector-builder.c > +++ b/gcc/rtx-vector-builder.c > @@ -46,11 +46,11 @@ rtx_vector_builder::build (rtvec v) > > rtx > rtx_vector_builder::apply_step (rtx base, unsigned int factor, > - const wide_int &step) const > + const poly_wide_int &step) const > { > scalar_int_mode int_mode = as_a <scalar_int_mode> (GET_MODE_INNER > (m_mode)); > - return immed_wide_int_const (wi::add (rtx_mode_t (base, int_mode), > - factor * step), > + return immed_wide_int_const (wi::to_poly_wide (base, int_mode) > + + factor * step, > int_mode); > } > > diff --git a/gcc/rtx-vector-builder.h b/gcc/rtx-vector-builder.h > index ea32208b30a..30a91e80b40 100644 > --- a/gcc/rtx-vector-builder.h > +++ b/gcc/rtx-vector-builder.h > @@ -44,8 +44,8 @@ private: > bool equal_p (rtx, rtx) const; > bool allow_steps_p () const; > bool integral_p (rtx) const; > - wide_int step (rtx, rtx) const; > - rtx apply_step (rtx, unsigned int, const wide_int &) const; > + poly_wide_int step (rtx, rtx) const; > + rtx apply_step (rtx, unsigned int, const poly_wide_int &) const; > bool can_elide_p (rtx) const { return true; } > void note_representative (rtx *, rtx) {} > > @@ -115,11 +115,11 @@ rtx_vector_builder::integral_p (rtx elt) const > /* Return the value of element ELT2 minus the value of element ELT1. > Both elements are known to be CONST_SCALAR_INT_Ps. */ > > -inline wide_int > +inline poly_wide_int > rtx_vector_builder::step (rtx elt1, rtx elt2) const > { > - return wi::sub (rtx_mode_t (elt2, GET_MODE_INNER (m_mode)), > - rtx_mode_t (elt1, GET_MODE_INNER (m_mode))); > + return (wi::to_poly_wide (elt2, GET_MODE_INNER (m_mode)) > + - wi::to_poly_wide (elt1, GET_MODE_INNER (m_mode))); > } > > #endif > diff --git a/gcc/testsuite/gcc.c-torture/compile/pr97141.c > b/gcc/testsuite/gcc.c-torture/compile/pr97141.c > new file mode 100644 > index 00000000000..1a9ff830a22 > --- /dev/null > +++ b/gcc/testsuite/gcc.c-torture/compile/pr97141.c > @@ -0,0 +1,8 @@ > +int a; > +short b, c; > +short d(short e, short f) { return e + f; } > +void g(void) { > + a = -9; > + for (; a != 51; a = d(a, 5)) > + b |= c; > +} > diff --git a/gcc/testsuite/gcc.c-torture/compile/pr98726.c > b/gcc/testsuite/gcc.c-torture/compile/pr98726.c > new file mode 100644 > index 00000000000..ce24b18ce55 > --- /dev/null > +++ b/gcc/testsuite/gcc.c-torture/compile/pr98726.c > @@ -0,0 +1,7 @@ > +int a, c; > +char b; > +int d() { > + a = 0; > + for (; a <= 21; a = (short)a + 1) > + b &= c; > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr97141.c > b/gcc/testsuite/gcc.target/aarch64/sve/pr97141.c > new file mode 100644 > index 00000000000..942e4a48d91 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr97141.c > @@ -0,0 +1,10 @@ > +/* { dg-options "-O3" } */ > + > +int a; > +short b, c; > +short d(short e, short f) { return e + f; } > +void g(void) { > + a = -9; > + for (; a != 51; a = d(a, 5)) > + b |= c; > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr98726.c > b/gcc/testsuite/gcc.target/aarch64/sve/pr98726.c > new file mode 100644 > index 00000000000..2395cab57e5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr98726.c > @@ -0,0 +1,9 @@ > +/* { dg-options "-O3" } */ > + > +int a, c; > +char b; > +int d() { > + a = 0; > + for (; a <= 21; a = (short)a + 1) > + b &= c; > +}