https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98190
Bug ID: 98190 Summary: GCC 11.0 miscompiles code using _Bool when inlining: bfxil instruction misused? Product: gcc Version: 11.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c Assignee: unassigned at gcc dot gnu.org Reporter: vstinner at redhat dot com Target Milestone: --- Created attachment 49704 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=49704&action=edit bug_bool.c reproducer GCC 11 miscompiles attached bug_bool.c on AArch64 with -O1: * Using -O0, memory_richcompare() returns 1 as expected * Using -O1 or higher, memory_richcompare() returns 0 => BUG "bfxil w4, w6, #0, #8" instruction is used to extend a single byte registry (w6) into a larger registry (w4). It works for w4 when it's equal to 0. But it doesn't with "bfxil w5, w6, #0, #8" when w5 is not equal to 0. Sorry, I don't know AArch64, I cannot help too much. $ gcc bug_bool.c -o bug_bool -O0 && ./bug_bool equal = 1 $ gcc bug_bool.c -o bug_bool -O1 && ./bug_bool equal = 0 * gcc (GCC) 11.0.0 20201204 (Red Hat 11.0.0-0) (Fedora: gcc-11.0.0-0.7.fc34.aarch64) * Fedora Rawhide (Linux 5.10.0-0.rc6.90.fc34.aarch64) If _Bool is replaced with "char", the code works as expected. GCC optimizes _Bool differently. === GCC -O1 === Dump of assembler code for function cmp_base2: // w4 = 0 // w5 = 0xbb0aebc39d5f8094 0x00000000004005e8 <+0>: cmp w2, #0x0 0x00000000004005ec <+4>: b.le 0x400628 <cmp_base2+64> 0x00000000004005f0 <+8>: mov x3, #0x0 // #0 0x00000000004005f4 <+12>: ldrb w6, [x0, x3] // w6=1 0x00000000004005f8 <+16>: bfxil w4, w6, #0, #8 // w4=1 0x00000000004005fc <+20>: ldrb w6, [x1, x3] // w6=1 0x0000000000400600 <+24>: bfxil w5, w6, #0, #8 // w5=0x9d5f8001 0x0000000000400604 <+28>: cmp w4, w5 // 1 is not equal to 0x9d5f8001 ! 0x0000000000400608 <+32>: b.ne 0x400620 <cmp_base2+56> // b.any 0x000000000040060c <+36>: add x3, x3, #0x1 0x0000000000400610 <+40>: cmp w2, w3 0x0000000000400614 <+44>: b.gt 0x4005f4 <cmp_base2+12> 0x0000000000400618 <+48>: mov w0, #0x1 // #1 0x000000000040061c <+52>: b 0x400624 <cmp_base2+60> 0x0000000000400620 <+56>: mov w0, #0x0 // #0 0x0000000000400624 <+60>: ret 0x0000000000400628 <+64>: mov w0, #0x1 // #1 0x000000000040062c <+68>: b 0x400624 <cmp_base2+60> Dump of assembler code for function memory_richcompare: 0x0000000000400630 <+0>: stp x29, x30, [sp, #-32]! 0x0000000000400634 <+4>: mov x29, sp 0x0000000000400638 <+8>: mov w0, #0x1 // #1 0x000000000040063c <+12>: str w0, [sp, #24] 0x0000000000400640 <+16>: ldr w2, [sp, #24] 0x0000000000400644 <+20>: adrp x0, 0x400000 0x0000000000400648 <+24>: add x0, x0, #0x770 0x000000000040064c <+28>: mov x1, x0 0x0000000000400650 <+32>: bl 0x4005e8 <cmp_base2> 0x0000000000400654 <+36>: ldp x29, x30, [sp], #32 0x0000000000400658 <+40>: ret === GCC -O0 === (gdb) disassemble cmp_base2 Dump of assembler code for function cmp_base2: 0x00000000004005e8 <+0>: sub sp, sp, #0x30 0x00000000004005ec <+4>: str x0, [sp, #24] 0x00000000004005f0 <+8>: str x1, [sp, #16] 0x00000000004005f4 <+12>: str w2, [sp, #12] 0x00000000004005f8 <+16>: str wzr, [sp, #44] 0x00000000004005fc <+20>: b 0x400668 <cmp_base2+128> 0x0000000000400600 <+24>: ldr x0, [sp, #24] 0x0000000000400604 <+28>: ldrb w0, [x0] 0x0000000000400608 <+32>: strb w0, [sp, #39] 0x000000000040060c <+36>: ldr x0, [sp, #16] 0x0000000000400610 <+40>: ldrb w0, [x0] 0x0000000000400614 <+44>: strb w0, [sp, #38] 0x0000000000400618 <+48>: ldrb w1, [sp, #39] 0x000000000040061c <+52>: ldrb w0, [sp, #38] 0x0000000000400620 <+56>: cmp w1, w0 0x0000000000400624 <+60>: cset w0, eq // eq = none 0x0000000000400628 <+64>: and w0, w0, #0xff 0x000000000040062c <+68>: str w0, [sp, #40] 0x0000000000400630 <+72>: ldr w0, [sp, #40] 0x0000000000400634 <+76>: cmp w0, #0x0 0x0000000000400638 <+80>: b.gt 0x400644 <cmp_base2+92> 0x000000000040063c <+84>: ldr w0, [sp, #40] 0x0000000000400640 <+88>: b 0x40067c <cmp_base2+148> 0x0000000000400644 <+92>: ldr x0, [sp, #24] 0x0000000000400648 <+96>: add x0, x0, #0x1 0x000000000040064c <+100>: str x0, [sp, #24] 0x0000000000400650 <+104>: ldr x0, [sp, #16] 0x0000000000400654 <+108>: add x0, x0, #0x1 0x0000000000400658 <+112>: str x0, [sp, #16] 0x000000000040065c <+116>: ldr w0, [sp, #44] 0x0000000000400660 <+120>: add w0, w0, #0x1 0x0000000000400664 <+124>: str w0, [sp, #44] 0x0000000000400668 <+128>: ldr w1, [sp, #44] 0x000000000040066c <+132>: ldr w0, [sp, #12] 0x0000000000400670 <+136>: cmp w1, w0 0x0000000000400674 <+140>: b.lt 0x400600 <cmp_base2+24> // b.tstop 0x0000000000400678 <+144>: mov w0, #0x1 // #1 0x000000000040067c <+148>: add sp, sp, #0x30 0x0000000000400680 <+152>: ret Dump of assembler code for function memory_richcompare: 0x0000000000400684 <+0>: stp x29, x30, [sp, #-32]! 0x0000000000400688 <+4>: mov x29, sp 0x000000000040068c <+8>: adrp x0, 0x400000 0x0000000000400690 <+12>: add x0, x0, #0x7c0 0x0000000000400694 <+16>: str x0, [sp, #24] 0x0000000000400698 <+20>: mov w0, #0x1 // #1 0x000000000040069c <+24>: str w0, [sp, #16] 0x00000000004006a0 <+28>: ldr w0, [sp, #16] 0x00000000004006a4 <+32>: mov w2, w0 0x00000000004006a8 <+36>: ldr x1, [sp, #24] 0x00000000004006ac <+40>: ldr x0, [sp, #24] 0x00000000004006b0 <+44>: bl 0x4005e8 <cmp_base2> 0x00000000004006b4 <+48>: ldp x29, x30, [sp], #32 0x00000000004006b8 <+52>: ret Note: bug discovered in Python on Fedora Rawhide when running test_buffer: https://bugs.python.org/issue42587