[Bug c/98190] New: GCC 11.0 miscompiles code using _Bool when inlining: bfxil instruction misused?

vstinner at redhat dot com via Gcc-bugs Mon, 07 Dec 2020 16:22:57 -0800

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98190


            Bug ID: 98190
           Summary: GCC 11.0 miscompiles code using _Bool when inlining:
                    bfxil instruction misused?
           Product: gcc
           Version: 11.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: vstinner at redhat dot com
  Target Milestone: ---

Created attachment 49704
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=49704&action=edit
bug_bool.c reproducer

GCC 11 miscompiles attached bug_bool.c on AArch64 with -O1:

* Using -O0, memory_richcompare() returns 1 as expected
* Using -O1 or higher, memory_richcompare() returns 0 => BUG

"bfxil  w4, w6, #0, #8" instruction is used to extend a single byte registry
(w6) into a larger registry (w4). It works for w4 when it's equal to 0. But it
doesn't with "bfxil w5, w6, #0, #8" when w5 is not equal to 0. Sorry, I don't
know AArch64, I cannot help too much.

$ gcc bug_bool.c -o bug_bool -O0 && ./bug_bool
equal = 1

$ gcc bug_bool.c -o bug_bool -O1 && ./bug_bool
equal = 0

* gcc (GCC) 11.0.0 20201204 (Red Hat 11.0.0-0) (Fedora:
gcc-11.0.0-0.7.fc34.aarch64)
* Fedora Rawhide (Linux 5.10.0-0.rc6.90.fc34.aarch64)

If _Bool is replaced with "char", the code works as expected. GCC optimizes
_Bool differently.


=== GCC -O1 ===

Dump of assembler code for function cmp_base2:
   // w4 = 0
   // w5 = 0xbb0aebc39d5f8094
   0x00000000004005e8 <+0>:     cmp     w2, #0x0
   0x00000000004005ec <+4>:     b.le    0x400628 <cmp_base2+64>
   0x00000000004005f0 <+8>:     mov     x3, #0x0                        // #0

   0x00000000004005f4 <+12>:    ldrb    w6, [x0, x3]   // w6=1
   0x00000000004005f8 <+16>:    bfxil   w4, w6, #0, #8 // w4=1

   0x00000000004005fc <+20>:    ldrb    w6, [x1, x3]   // w6=1
   0x0000000000400600 <+24>:    bfxil   w5, w6, #0, #8 // w5=0x9d5f8001

   0x0000000000400604 <+28>:    cmp     w4, w5         // 1 is not equal to
0x9d5f8001 !

   0x0000000000400608 <+32>:    b.ne    0x400620 <cmp_base2+56>  // b.any
   0x000000000040060c <+36>:    add     x3, x3, #0x1
   0x0000000000400610 <+40>:    cmp     w2, w3
   0x0000000000400614 <+44>:    b.gt    0x4005f4 <cmp_base2+12>
   0x0000000000400618 <+48>:    mov     w0, #0x1                        // #1
   0x000000000040061c <+52>:    b       0x400624 <cmp_base2+60>
   0x0000000000400620 <+56>:    mov     w0, #0x0                        // #0
   0x0000000000400624 <+60>:    ret
   0x0000000000400628 <+64>:    mov     w0, #0x1                        // #1
   0x000000000040062c <+68>:    b       0x400624 <cmp_base2+60>

Dump of assembler code for function memory_richcompare:
   0x0000000000400630 <+0>:     stp     x29, x30, [sp, #-32]!
   0x0000000000400634 <+4>:     mov     x29, sp
   0x0000000000400638 <+8>:     mov     w0, #0x1                        // #1
   0x000000000040063c <+12>:    str     w0, [sp, #24]
   0x0000000000400640 <+16>:    ldr     w2, [sp, #24]
   0x0000000000400644 <+20>:    adrp    x0, 0x400000
   0x0000000000400648 <+24>:    add     x0, x0, #0x770
   0x000000000040064c <+28>:    mov     x1, x0
   0x0000000000400650 <+32>:    bl      0x4005e8 <cmp_base2>
   0x0000000000400654 <+36>:    ldp     x29, x30, [sp], #32
   0x0000000000400658 <+40>:    ret


=== GCC -O0 ===

(gdb) disassemble cmp_base2 
Dump of assembler code for function cmp_base2:
   0x00000000004005e8 <+0>:     sub     sp, sp, #0x30
   0x00000000004005ec <+4>:     str     x0, [sp, #24]
   0x00000000004005f0 <+8>:     str     x1, [sp, #16]
   0x00000000004005f4 <+12>:    str     w2, [sp, #12]
   0x00000000004005f8 <+16>:    str     wzr, [sp, #44]
   0x00000000004005fc <+20>:    b       0x400668 <cmp_base2+128>
   0x0000000000400600 <+24>:    ldr     x0, [sp, #24]
   0x0000000000400604 <+28>:    ldrb    w0, [x0]
   0x0000000000400608 <+32>:    strb    w0, [sp, #39]
   0x000000000040060c <+36>:    ldr     x0, [sp, #16]
   0x0000000000400610 <+40>:    ldrb    w0, [x0]
   0x0000000000400614 <+44>:    strb    w0, [sp, #38]
   0x0000000000400618 <+48>:    ldrb    w1, [sp, #39]
   0x000000000040061c <+52>:    ldrb    w0, [sp, #38]
   0x0000000000400620 <+56>:    cmp     w1, w0
   0x0000000000400624 <+60>:    cset    w0, eq  // eq = none
   0x0000000000400628 <+64>:    and     w0, w0, #0xff
   0x000000000040062c <+68>:    str     w0, [sp, #40]
   0x0000000000400630 <+72>:    ldr     w0, [sp, #40]
   0x0000000000400634 <+76>:    cmp     w0, #0x0
   0x0000000000400638 <+80>:    b.gt    0x400644 <cmp_base2+92>
   0x000000000040063c <+84>:    ldr     w0, [sp, #40]
   0x0000000000400640 <+88>:    b       0x40067c <cmp_base2+148>
   0x0000000000400644 <+92>:    ldr     x0, [sp, #24]
   0x0000000000400648 <+96>:    add     x0, x0, #0x1
   0x000000000040064c <+100>:   str     x0, [sp, #24]
   0x0000000000400650 <+104>:   ldr     x0, [sp, #16]
   0x0000000000400654 <+108>:   add     x0, x0, #0x1
   0x0000000000400658 <+112>:   str     x0, [sp, #16]
   0x000000000040065c <+116>:   ldr     w0, [sp, #44]
   0x0000000000400660 <+120>:   add     w0, w0, #0x1
   0x0000000000400664 <+124>:   str     w0, [sp, #44]
   0x0000000000400668 <+128>:   ldr     w1, [sp, #44]
   0x000000000040066c <+132>:   ldr     w0, [sp, #12]
   0x0000000000400670 <+136>:   cmp     w1, w0
   0x0000000000400674 <+140>:   b.lt    0x400600 <cmp_base2+24>  // b.tstop
   0x0000000000400678 <+144>:   mov     w0, #0x1                        // #1
   0x000000000040067c <+148>:   add     sp, sp, #0x30
   0x0000000000400680 <+152>:   ret

Dump of assembler code for function memory_richcompare:
   0x0000000000400684 <+0>:     stp     x29, x30, [sp, #-32]!
   0x0000000000400688 <+4>:     mov     x29, sp
   0x000000000040068c <+8>:     adrp    x0, 0x400000
   0x0000000000400690 <+12>:    add     x0, x0, #0x7c0
   0x0000000000400694 <+16>:    str     x0, [sp, #24]
   0x0000000000400698 <+20>:    mov     w0, #0x1                        // #1
   0x000000000040069c <+24>:    str     w0, [sp, #16]
   0x00000000004006a0 <+28>:    ldr     w0, [sp, #16]
   0x00000000004006a4 <+32>:    mov     w2, w0
   0x00000000004006a8 <+36>:    ldr     x1, [sp, #24]
   0x00000000004006ac <+40>:    ldr     x0, [sp, #24]
   0x00000000004006b0 <+44>:    bl      0x4005e8 <cmp_base2>
   0x00000000004006b4 <+48>:    ldp     x29, x30, [sp], #32
   0x00000000004006b8 <+52>:    ret


Note: bug discovered in Python on Fedora Rawhide when running test_buffer:
https://bugs.python.org/issue42587

[Bug c/98190] New: GCC 11.0 miscompiles code using _Bool when inlining: bfxil instruction misused?

Reply via email to