https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95464
Bug ID: 95464
Summary: [10/11 Regression] Miscompilation of mesa on
x86_64-linux since r10-6426
Product: gcc
Version: 10.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: middle-end
Assignee: unassigned at gcc dot gnu.org
Reporter: jakub at gcc dot gnu.org
Target Milestone: ---
Since r10-6426-g5f0653a8b75a5ad5a5405a27dd92d3a5759eed4c we on x86_64-linux
miscompile following testcase at -O2:
struct S { unsigned a:1, b:1, c:1, d:1, e:14, f:14; };
__attribute__((noipa)) int
foo (struct S x)
{
if (x.a != 0 || x.b != 1 || x.c != 0 || x.d != 1
|| x.e != 7239 || x.f != 6474)
__builtin_abort ();
}
__attribute__((noipa)) void
bar (struct S x, struct S y)
{
if (x.a != 0 || x.b != 1 || x.c != 0 || x.d != 1
|| x.e != 7239 || x.f != 6474)
__builtin_abort ();
if (y.a != 0 || y.b != 1 || y.c != 1 || y.d != 1
|| y.e != 16320 || y.f != 7315)
__builtin_abort ();
}
__attribute__((noipa)) void
baz (struct S x)
{
if (x.a != 1 || x.b != 1 || x.c != 1 || x.d != 1
|| x.e != 16320 || x.f != 7315)
__builtin_abort ();
}
__attribute__((noipa)) void
qux (struct S x, struct S y, unsigned z)
{
struct S a = x, b;
for (unsigned i = 0; i < z; ++i)
foo (x);
if (x.a && x.e == 16)
a.e = 32;
b = a;
b.c = y.c;
b.e = y.e;
b.f = y.f;
bar (a, b);
a = b;
__asm volatile ("" : : : "ax", "bx", "cx", "dx", "si", "di",
#ifdef __OPTIMIZE__
"bp",
#endif
"r8", "r9", "r10", "r11", "r12", "r13", "r14",
"r15");
a.a = 1;
a.c = 1;
baz (a);
}
int
main ()
{
struct S x = { 0, 1, 0, 1, 7239, 6474 };
struct S y = { 1, 0, 1, 0, 16320, 7315 };
qux (x, y, 1);
return 0;
}
(in original source obviously there was no inline asm, but instead the function
was large enough that the variable got spilled).
I believe this is a RA bug though. In *.ira we have:
(insn 67 66 68 3 (parallel [
(set (strict_low_part (subreg:QI (reg/v:SI 94 [ a ]) 0))
(ior:QI (subreg:QI (reg/v:SI 94 [ a ]) 0)
(const_int 5 [0x5])))
(clobber (reg:CC 17 flags))
]) "gallivm2.c":51:3 492 {*iorqi_1_slp}
(expr_list:REG_UNUSED (reg:CC 17 flags)
(nil)))
(insn 68 67 69 3 (set (reg:SI 5 di)
(reg/v:SI 94 [ a ])) "gallivm2.c":51:3 67 {*movsi_internal}
(expr_list:REG_DEAD (reg/v:SI 94 [ a ])
(nil)))
which looks good to me, the strict_low_part in there says that the low 8 bits
of pseudo 94 are ored with 5 and the upper 24 bits stay as is (so, in the end
it is the same as SImode |= 5, not really sure why we ended up with that, e.g.
if the loop calling foo is changed into a single call to foo, it doesn't
happen).
But LRA changes this into:
(insn 120 66 67 3 (set (reg:QI 0 ax [137])
(mem/c:QI (plus:DI (reg/f:DI 7 sp)
(const_int 8 [0x8])) [4 %sfp+-8 S1 A32])) "gallivm2.c":51:3 69
{*movqi_internal}
(nil))
(insn 67 120 121 3 (parallel [
(set (strict_low_part (reg:QI 0 ax [137]))
(ior:QI (reg:QI 0 ax [137])
(const_int 5 [0x5])))
(clobber (reg:CC 17 flags))
]) "gallivm2.c":51:3 492 {*iorqi_1_slp}
(nil))
(insn 121 67 122 3 (set (reg:QI 5 di [orig:94 a ] [94])
(reg:QI 0 ax [137])) "gallivm2.c":51:3 69 {*movqi_internal}
(nil))
which is not equivalent, because it makes the upper 24 bits undefined instead
of loaded from the spill slot.