> On 8 Aug 2025, at 14:23, Tamar Christina <tamar.christ...@arm.com> wrote:
> 
>> -----Original Message-----
>> From: Pengfei Li <pengfei....@arm.com>
>> Sent: Friday, August 8, 2025 11:00 AM
>> To: Kyrylo Tkachov <ktkac...@nvidia.com>
>> Cc: gcc-patches@gcc.gnu.org; Richard Sandiford <richard.sandif...@arm.com>;
>> Tamar Christina <tamar.christ...@arm.com>
>> Subject: [PATCH v2] AArch64: Fix invalid immediate offsets in SVE 
>> gather/scatter
>> [PR121449]
>> 
>> Below v2 patch just updates the commit message in v1.
>> 
>> Please let me know if it's good enough now.
>> 
>> Thanks,
>> Pengfei
>> 
>> -- >8 --
>> This patch fixes incorrect constraints in RTL patterns for AArch64 SVE
>> gather/scatter with type widening/narrowing and vector-plus-immediate
>> addressing. The bug leads to below "immediate offset out of range"
>> errors during assembly, eventually causing compilation failures.
>> 
>> /tmp/ccsVqBp1.s: Assembler messages:
>> /tmp/ccsVqBp1.s:54: Error: immediate offset out of range 0 to 31 at operand 
>> 3 --
>> `ld1b z1.d,p0/z,[z1.d,#64]'
>> 
>> Current RTL patterns for such instructions incorrectly use vgw or vgd
>> constraints for the immediate operand, base on the vector element type
>> in Z registers (zN.s or zN.d). However, for gather/scatter with type
>> conversions, the immediate range for vector-plus-immediate addressing is
>> determined by the element type in memory, which differs from that in
>> vector registers. Using the wrong constraint can produce out-of-range
>> offset values that cannot be encoded in the instruction.
>> 
>> This patch corrects the constraints used in these patterns. A test case
>> that reproduces the issue is also included.
>> 
>> Bootstrapped and regression-tested on aarch64-linux-gnu.
>> 
>> gcc/ChangeLog:
>> PR target/121449
>> * config/aarch64/aarch64-sve.md
>> (mask_gather_load<mode><v_int_container>): Fix constraints.
>> (mask_gather_load<mode><v_int_container>): Likewise.
>> (mask_scatter_store<mode><v_int_container>): Likewise.
>> (mask_scatter_store<mode><v_int_container>): Likewise.
> 
> You just need to mention each pattern names once.
> 
> So 
> 
> gcc/ChangeLog:
> 
> PR target/121449
> * config/aarch64/aarch64-sve.md
> (mask_gather_load<mode><v_int_container>): Fix constraints.
> (mask_scatter_store<mode><v_int_container>): Likewise.
> 
> Ok with those changes (you don't need to send a new patch for review).

I’d also ask for a slightly more descriptive sentence like “Use vg<Vesize> 
constraint for alternative so-and-so”.
Ok to push whatever reword you come up with.
Thanks,
Kyrill


> 
> Thanks,
> Tamar
> 
>> 
>> gcc/testsuite/ChangeLog:
>> PR target/121449
>> * g++.target/aarch64/sve/pr121449.C: New test.
>> ---
>> gcc/config/aarch64/aarch64-sve.md             | 64 +++++++++----------
>> .../g++.target/aarch64/sve/pr121449.C         | 44 +++++++++++++
>> 2 files changed, 76 insertions(+), 32 deletions(-)
>> create mode 100644 gcc/testsuite/g++.target/aarch64/sve/pr121449.C
>> 
>> diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-
>> sve.md
>> index 88d323af32d..51e2d7d7e87 100644
>> --- a/gcc/config/aarch64/aarch64-sve.md
>> +++ b/gcc/config/aarch64/aarch64-sve.md
>> @@ -1542,18 +1542,18 @@
>>   UNSPEC_LD1_GATHER))]
>>   "TARGET_SVE && TARGET_NON_STREAMING"
>>   {@ [cons: =0, 1, 2, 3, 4, 5  ]
>> -     [&w, Z,   w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
>> -     [?w, Z,   0, Ui1, Ui1, Upl] ^
>> -     [&w, vgw, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
>> -     [?w, vgw, 0, Ui1, Ui1, Upl] ^
>> -     [&w, rk,  w, Z,   Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
>> -     [?w, rk,  0, Z,   Ui1, Upl] ^
>> -     [&w, rk,  w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
>> -     [?w, rk,  0, Ui1, Ui1, Upl] ^
>> -     [&w, rk,  w, Z,   i,   Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw 
>> %p4]
>> -     [?w, rk,  0, Z,   i,   Upl] ^
>> -     [&w, rk,  w, Ui1, i,   Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw 
>> %p4]
>> -     [?w, rk,  0, Ui1, i,   Upl] ^
>> +     [&w, Z,          w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
>> +     [?w, Z,          0, Ui1, Ui1, Upl] ^
>> +     [&w, vg<Vesize>, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
>> +     [?w, vg<Vesize>, 0, Ui1, Ui1, Upl] ^
>> +     [&w, rk,         w, Z,   Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, 
>> sxtw]
>> +     [?w, rk,         0, Z,   Ui1, Upl] ^
>> +     [&w, rk,         w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, 
>> uxtw]
>> +     [?w, rk,         0, Ui1, Ui1, Upl] ^
>> +     [&w, rk,         w, Z,   i,   Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, 
>> sxtw %p4]
>> +     [?w, rk,         0, Z,   i,   Upl] ^
>> +     [&w, rk,         w, Ui1, i,   Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, 
>> uxtw %p4]
>> +     [?w, rk,         0, Ui1, i,   Upl] ^
>>   }
>> )
>> 
>> @@ -1572,14 +1572,14 @@
>>   UNSPEC_LD1_GATHER))]
>>   "TARGET_SVE && TARGET_NON_STREAMING"
>>   {@ [cons: =0, 1, 2, 3, 4, 5]
>> -     [&w, Z,   w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
>> -     [?w, Z,   0, i, Ui1, Upl] ^
>> -     [&w, vgd, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
>> -     [?w, vgd, 0, i, Ui1, Upl] ^
>> -     [&w, rk,  w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
>> -     [?w, rk,  0, i, Ui1, Upl] ^
>> -     [&w, rk,  w, i, i,   Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
>> -     [?w, rk,  0, i, i,   Upl] ^
>> +     [&w, Z,          w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
>> +     [?w, Z,          0, i, Ui1, Upl] ^
>> +     [&w, vg<Vesize>, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
>> +     [?w, vg<Vesize>, 0, i, Ui1, Upl] ^
>> +     [&w, rk,         w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
>> +     [?w, rk,         0, i, Ui1, Upl] ^
>> +     [&w, rk,         w, i, i,   Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, 
>> lsl %p4]
>> +     [?w, rk,         0, i, i,   Upl] ^
>>   }
>> )
>> 
>> @@ -2488,13 +2488,13 @@
>>    (match_operand:SVE_4 4 "register_operand")]
>>   UNSPEC_ST1_SCATTER))]
>>   "TARGET_SVE && TARGET_NON_STREAMING"
>> -  {@ [ cons: 0 , 1 , 2   , 3   , 4 , 5    ]
>> -     [ Z       , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%1.s]
>> -     [ vgw     , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%1.s, 
>> #%0]
>> -     [ rk      , w , Z   , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, 
>> %1.s, sxtw]
>> -     [ rk      , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, 
>> %1.s, uxtw]
>> -     [ rk      , w , Z   , i   , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, 
>> %1.s, sxtw %p3]
>> -     [ rk      , w , Ui1 , i   , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, 
>> %1.s, uxtw %p3]
>> +  {@ [ cons: 0    , 1 , 2   , 3   , 4 , 5    ]
>> +     [ Z          , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%1.s]
>> +     [ vg<Vesize> , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, 
>> [%1.s, #%0]
>> +     [ rk         , w , Z   , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, 
>> %1.s, sxtw]
>> +     [ rk         , w , Ui1 , Ui1 , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, 
>> %1.s, uxtw]
>> +     [ rk         , w , Z   , i   , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, 
>> %1.s, sxtw %p3]
>> +     [ rk         , w , Ui1 , i   , w , Upl  ] st1<Vesize>\t%4.s, %5, [%0, 
>> %1.s, uxtw %p3]
>>   }
>> )
>> 
>> @@ -2511,11 +2511,11 @@
>>    (match_operand:SVE_2 4 "register_operand")]
>>   UNSPEC_ST1_SCATTER))]
>>   "TARGET_SVE && TARGET_NON_STREAMING"
>> -  {@ [ cons: 0 , 1 , 3   , 4 , 5    ]
>> -     [ Z       , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%1.d]
>> -     [ vgd     , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
>> -     [ rk      , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
>> -     [ rk      , w , i   , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl 
>> %p3]
>> +  {@ [ cons: 0    , 1 , 3   , 4 , 5    ]
>> +     [ Z          , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%1.d]
>> +     [ vg<Vesize> , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
>> +     [ rk         , w , Ui1 , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
>> +     [ rk         , w , i   , w , Upl  ] st1<Vesize>\t%4.d, %5, [%0, %1.d, 
>> lsl %p3]
>>   }
>> )
>> 
>> diff --git a/gcc/testsuite/g++.target/aarch64/sve/pr121449.C
>> b/gcc/testsuite/g++.target/aarch64/sve/pr121449.C
>> new file mode 100644
>> index 00000000000..b2e13765dfa
>> --- /dev/null
>> +++ b/gcc/testsuite/g++.target/aarch64/sve/pr121449.C
>> @@ -0,0 +1,44 @@
>> +/* PR target/121449 */
>> +/* { dg-do assemble { target aarch64_asm_sve_ok } } */
>> +/* { dg-options "-O3 -save-temps" } */
>> +
>> +struct example;
>> +
>> +struct array {
>> +  unsigned length();
>> +  example *operator[](unsigned i) {
>> +    example **data = reinterpret_cast<example **>(this);
>> +    return data[i];
>> +  }
>> +};
>> +
>> +struct example {
>> +  int a[16];
>> +  bool is_even;
>> +  int version;
>> +  int count() { return is_even ? 2 : 1; }
>> +  void fun1(int, long);
>> +  void fun2(unsigned, unsigned);
>> +  void process(array &, array &);
>> +};
>> +
>> +bool found;
>> +
>> +void example::process(array &a, array &b) {
>> +  for (unsigned i = 1; a.length(); i++) {
>> +    long total = 0;
>> +    for (unsigned k = 0; k <= i; k++) {
>> +      total += a[k]->count();
>> +    }
>> +    for (unsigned j = 0; j < i; j++) {
>> +      int major = b[j]->version;
>> +      if (found)
>> +        major += i;
>> +      fun1(i + 1, total);
>> +      fun2(j, major);
>> +    }
>> +  }
>> +}
>> +
>> +/* { dg-final { scan-assembler-not {\tld1b\t(z[0-9]+)\.d, p[0-7]/z, 
>> \[(z[0-9]+)\.d,
>> #64\]} } } */
>> +
>> --
>> 2.43.0


Reply via email to