https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99102
Bug ID: 99102
Summary: SVE: Wrong code with -O2 -ftree-vectorize
-march=armv8.2-a+sve -msve-vector-bits=256
Product: gcc
Version: 11.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: acoplan at gcc dot gnu.org
Target Milestone: ---
AArch64 GCC miscompiles the following testcase:
long a[44];
short d, e = -7;
void b(char f, short j, short k, unsigned l) {
for (int g = 0; g < 9; g += f)
for (int b = 0; b < 90; b -= k)
for (int h = 0; h < f; h++)
for (short i = 0; i < 15; i += 4)
if (!l)
a[i] = j;
}
int main() {
for (long c = 0; c < 2; ++c)
a[c] = 7;
b(9, d, e, 5);
if (!a[0])
__builtin_abort();
}
at -O2 -ftree-vectorize -march=armv8.2-a+sve -msve-vector-bits=256. Looking at
the generated code for b:
b:
.LFB0:
.cfi_startproc
adrp x3, .LANCHOR0
and w0, w0, 255
sxth w2, w2
add x3, x3, :lo12:.LANCHOR0
mov w5, 0
mov z0.h, w1
ptrue p0.b, vl32
mov x1, 32
sxth z0.d, p0/m, z0.d
index z1.d, #0, x1
.L2:
mov w4, 0
.p2align 3,,7
.L7:
mov w1, 0
cbz w0, .L5
.p2align 3,,7
.L3:
add w1, w1, 1
st1d z0.d, p0, [x3, z1.d]
cmp w0, w1
bne .L3
.L5:
sub w4, w4, w2
cmp w4, 89
ble .L7
add w5, w5, w0
cmp w5, 8
ble .L2
ret
we appear to ignore the value for the argument "l" completely (we immediately
clobber x3 with the address for a).