Hi, This patch backports the fix of r16-3083 to gcc-13.
Compared to the trunk version, this is slightly different because those RTL patterns in gcc-13 do not yet use the compact syntax for multiple alternatives. But this patch is functionally identical to the trunk fix. Bootstrapped and regtested on gcc-13. Ok for gcc-13 now? Thanks, Pengfei -- >8 -- This patch fixes incorrect constraints in RTL patterns for AArch64 SVE gather/scatter with type widening/narrowing and vector-plus-immediate addressing. The bug leads to below "immediate offset out of range" errors during assembly, eventually causing compilation failures. /tmp/ccsVqBp1.s: Assembler messages: /tmp/ccsVqBp1.s:54: Error: immediate offset out of range 0 to 31 at operand 3 -- `ld1b z1.d,p0/z,[z1.d,#64]' Current RTL patterns for such instructions incorrectly use vgw or vgd constraints for the immediate operand, base on the vector element type in Z registers (zN.s or zN.d). However, for gather/scatter with type conversions, the immediate range for vector-plus-immediate addressing is determined by the element type in memory, which differs from that in vector registers. Using the wrong constraint can produce out-of-range offset values that cannot be encoded in the instruction. This patch corrects the constraints used in these patterns. A test case that reproduces the issue is also included. Bootstrapped and regression-tested on aarch64-linux-gnu. gcc/ChangeLog: PR target/121449 * config/aarch64/aarch64-sve.md (mask_gather_load<mode><v_int_container>): Use vg<Vesize> constraints for alternatives with immediate offset. (mask_scatter_store<mode><v_int_container>): Likewise. gcc/testsuite/ChangeLog: PR target/121449 * g++.target/aarch64/sve/pr121449.C: New test. --- gcc/config/aarch64/aarch64-sve.md | 8 ++-- .../g++.target/aarch64/sve/pr121449.C | 44 +++++++++++++++++++ 2 files changed, 48 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/g++.target/aarch64/sve/pr121449.C diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 9cb09187c62..89f4e6d3b75 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1421,7 +1421,7 @@ [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w") (unspec:SVE_4 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") - (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk") + (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vg<Vesize>, rk, rk, rk, rk") (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w") (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") @@ -1443,7 +1443,7 @@ [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w") (unspec:SVE_2 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") - (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk") + (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vg<Vesize>, rk, rk") (match_operand:VNx2DI 2 "register_operand" "w, w, w, w") (match_operand:DI 3 "const_int_operand") (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i") @@ -2254,7 +2254,7 @@ [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl") - (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk") + (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vg<Vesize>, rk, rk, rk, rk") (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w") (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1") (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i") @@ -2276,7 +2276,7 @@ [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl") - (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk") + (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vg<Vesize>, rk, rk") (match_operand:VNx2DI 1 "register_operand" "w, w, w, w") (match_operand:DI 2 "const_int_operand") (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i") diff --git a/gcc/testsuite/g++.target/aarch64/sve/pr121449.C b/gcc/testsuite/g++.target/aarch64/sve/pr121449.C new file mode 100644 index 00000000000..b2e13765dfa --- /dev/null +++ b/gcc/testsuite/g++.target/aarch64/sve/pr121449.C @@ -0,0 +1,44 @@ +/* PR target/121449 */ +/* { dg-do assemble { target aarch64_asm_sve_ok } } */ +/* { dg-options "-O3 -save-temps" } */ + +struct example; + +struct array { + unsigned length(); + example *operator[](unsigned i) { + example **data = reinterpret_cast<example **>(this); + return data[i]; + } +}; + +struct example { + int a[16]; + bool is_even; + int version; + int count() { return is_even ? 2 : 1; } + void fun1(int, long); + void fun2(unsigned, unsigned); + void process(array &, array &); +}; + +bool found; + +void example::process(array &a, array &b) { + for (unsigned i = 1; a.length(); i++) { + long total = 0; + for (unsigned k = 0; k <= i; k++) { + total += a[k]->count(); + } + for (unsigned j = 0; j < i; j++) { + int major = b[j]->version; + if (found) + major += i; + fun1(i + 1, total); + fun2(j, major); + } + } +} + +/* { dg-final { scan-assembler-not {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, \[(z[0-9]+)\.d, #64\]} } } */ + -- 2.43.0