PR99929 is one of those “how did we get away with this for so long” bugs: the equality routines weren't checking whether two variable-length CONST_VECTORs had the same encoding. This meant that:
{ 1, 0, 0, 0, 0, 0, ... } would appear to be equal to: { 1, 0, 1, 0, 1, 0, ... } since both are represented using the elements { 1, 0 }. Tested on aarch64-linux-gnu, arm-linux-gnueabihf, armeb-eabi and x86_64-linux-gnu. OK to install? I'd like to backport as far as GCC 8, even though the testcase itself requires GCC 10 or later. Richard gcc/ PR rtl-optimization/99929 * rtl.h (same_vector_encodings_p): New function. * cse.c (exp_equiv_p): Check that CONST_VECTORs have the same encoding. * cselib.c (rtx_equal_for_cselib_1): Likewise. * jump.c (rtx_renumbered_equal_p): Likewise. * lra-constraints.c (operands_match_p): Likewise. * reload.c (operands_match_p): Likewise. * rtl.c (rtx_equal_p_cb, rtx_equal_p): Likewise. gcc/testsuite/ * gcc.target/aarch64/sve/pr99929_1.c: New file. * gcc.target/aarch64/sve/pr99929_2.c: Likewise. --- gcc/cse.c | 5 +++++ gcc/cselib.c | 5 +++++ gcc/jump.c | 5 +++++ gcc/lra-constraints.c | 5 +++++ gcc/reload.c | 5 +++++ gcc/rtl.c | 10 ++++++++++ gcc/rtl.h | 17 +++++++++++++++++ .../gcc.target/aarch64/sve/pr99929_1.c | 16 ++++++++++++++++ .../gcc.target/aarch64/sve/pr99929_2.c | 5 +++++ 9 files changed, 73 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr99929_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr99929_2.c diff --git a/gcc/cse.c b/gcc/cse.c index 37c6959abea..df191d5aa3f 100644 --- a/gcc/cse.c +++ b/gcc/cse.c @@ -2637,6 +2637,11 @@ exp_equiv_p (const_rtx x, const_rtx y, int validate, bool for_gcse) CASE_CONST_UNIQUE: return x == y; + case CONST_VECTOR: + if (!same_vector_encodings_p (x, y)) + return false; + break; + case LABEL_REF: return label_ref_label (x) == label_ref_label (y); diff --git a/gcc/cselib.c b/gcc/cselib.c index 2d34a914c6b..779874eeb2d 100644 --- a/gcc/cselib.c +++ b/gcc/cselib.c @@ -1048,6 +1048,11 @@ rtx_equal_for_cselib_1 (rtx x, rtx y, machine_mode memmode, int depth) case DEBUG_EXPR: return 0; + case CONST_VECTOR: + if (!same_vector_encodings_p (x, y)) + return false; + break; + case DEBUG_IMPLICIT_PTR: return DEBUG_IMPLICIT_PTR_DECL (x) == DEBUG_IMPLICIT_PTR_DECL (y); diff --git a/gcc/jump.c b/gcc/jump.c index 561dbb70d15..67b5c3374a6 100644 --- a/gcc/jump.c +++ b/gcc/jump.c @@ -1777,6 +1777,11 @@ rtx_renumbered_equal_p (const_rtx x, const_rtx y) CASE_CONST_UNIQUE: return 0; + case CONST_VECTOR: + if (!same_vector_encodings_p (x, y)) + return false; + break; + case LABEL_REF: /* We can't assume nonlocal labels have their following insns yet. */ if (LABEL_REF_NONLOCAL_P (x) || LABEL_REF_NONLOCAL_P (y)) diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index 62bcfc31772..1560f652da6 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -834,6 +834,11 @@ operands_match_p (rtx x, rtx y, int y_hard_regno) CASE_CONST_UNIQUE: return false; + case CONST_VECTOR: + if (!same_vector_encodings_p (x, y)) + return false; + break; + case LABEL_REF: return label_ref_label (x) == label_ref_label (y); case SYMBOL_REF: diff --git a/gcc/reload.c b/gcc/reload.c index 461fd0272eb..e18e27c2405 100644 --- a/gcc/reload.c +++ b/gcc/reload.c @@ -2310,6 +2310,11 @@ operands_match_p (rtx x, rtx y) CASE_CONST_UNIQUE: return 0; + case CONST_VECTOR: + if (!same_vector_encodings_p (x, y)) + return false; + break; + case LABEL_REF: return label_ref_label (x) == label_ref_label (y); case SYMBOL_REF: diff --git a/gcc/rtl.c b/gcc/rtl.c index 1aa794c82ca..e4ae1683069 100644 --- a/gcc/rtl.c +++ b/gcc/rtl.c @@ -466,6 +466,11 @@ rtx_equal_p_cb (const_rtx x, const_rtx y, rtx_equal_p_callback_function cb) CASE_CONST_UNIQUE: return 0; + case CONST_VECTOR: + if (!same_vector_encodings_p (x, y)) + return false; + break; + case DEBUG_IMPLICIT_PTR: return DEBUG_IMPLICIT_PTR_DECL (x) == DEBUG_IMPLICIT_PTR_DECL (y); @@ -608,6 +613,11 @@ rtx_equal_p (const_rtx x, const_rtx y) CASE_CONST_UNIQUE: return 0; + case CONST_VECTOR: + if (!same_vector_encodings_p (x, y)) + return false; + break; + case DEBUG_IMPLICIT_PTR: return DEBUG_IMPLICIT_PTR_DECL (x) == DEBUG_IMPLICIT_PTR_DECL (y); diff --git a/gcc/rtl.h b/gcc/rtl.h index a392721c8d0..398d745aff5 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -3087,6 +3087,23 @@ vec_series_p (const_rtx x, rtx *base_out, rtx *step_out) return const_vec_series_p (x, base_out, step_out); } +/* Return true if CONST_VECTORs X and Y, which are known to have the same mode, + also have the same encoding. This means that they are equal whenever their + operands are equal. */ + +inline bool +same_vector_encodings_p (const_rtx x, const_rtx y) +{ + /* Don't be fussy about the encoding of constant-length vectors, + since XVECEXP (X, 0) and XVECEXP (Y, 0) list all the elements anyway. */ + if (poly_uint64 (CONST_VECTOR_NUNITS (x)).is_constant ()) + return true; + + return (CONST_VECTOR_NPATTERNS (x) == CONST_VECTOR_NPATTERNS (y) + && (CONST_VECTOR_NELTS_PER_PATTERN (x) + == CONST_VECTOR_NELTS_PER_PATTERN (y))); +} + /* Return the unpromoted (outer) mode of SUBREG_PROMOTED_VAR_P subreg X. */ inline scalar_int_mode diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr99929_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pr99929_1.c new file mode 100644 index 00000000000..1fe18136e28 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr99929_1.c @@ -0,0 +1,16 @@ +/* { dg-do run { target aarch64_sve_hw } } */ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include <arm_sve.h> + +static void e(short *g, short p2) { *g ^= p2; } +static short m[23]; +int main() { + for (unsigned i = 0; i < 23; ++i) + m[i] = 4; + if (svaddv(svptrue_pat_b32(SV_VL1), svdup_u32(1)) != 1) + __builtin_abort(); + for (unsigned i = 0; i < 3; ++i) + e(m, m[i]); + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr99929_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pr99929_2.c new file mode 100644 index 00000000000..50d432db9b8 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr99929_2.c @@ -0,0 +1,5 @@ +/* { dg-options "-O2 -ftree-vectorize" } */ + +#include "pr99929_1.c" + +/* { dg-final { scan-assembler {\tptrue\tp[0-7].[bhsd], vl1\n} } } */