[Bug c/114659] New: gcc miscompiles a __builtin_memcpy on i386, leading to wrong results for SNaN

bruno at clisp dot org via Gcc-bugs Tue, 09 Apr 2024 08:32:48 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114659


            Bug ID: 114659
           Summary: gcc miscompiles a __builtin_memcpy on i386, leading to
                    wrong results for SNaN
           Product: gcc
           Version: 13.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: bruno at clisp dot org
  Target Milestone: ---

Created attachment 57912
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=57912&action=edit
test case tf.c

In the two attached test cases, gcc miscompiles a __builtin_memcpy invocation.
In the first test case, the data type is a 'float' (4 bytes).
In the second test case, the data type is a 'double' (8 bytes).

A value of this data type exists in memory, given as *x and *y.
A modified copy of this value, convert_snan_to_qnan(value), exists
also in the stack, among the local variables.
gcc implements the __builtin_memcpy operation by accessing
convert_snan_to_qnan(value) instead of the original value.

How to reproduce:

$ gcc-version 13.2.0 -m32 -Wall tf.c
$ ./a.out ; echo $?
0
$ gcc-version 13.2.0 -m32 -Wall -O2 tf.c
$ ./a.out ; echo $?
1

$ gcc-version 13.2.0 -m32 -Wall td.c
$ ./a.out ; echo $?
0
$ gcc-version 13.2.0 -m32 -Wall -O2 td.c
$ ./a.out ; echo $?
1

Analysis:

$ gcc-version 13.2.0 -m32 -Wall -O2 -S tf.c

tf.c has this function:
============================================================
int
my_totalorderf (float const *x, float const *y)
{
  int xs = __builtin_signbit (*x);
  int ys = __builtin_signbit (*y);
  if (!xs != !ys)
    return xs;

  int xn = __builtin_isnan (*x);
  int yn = __builtin_isnan (*y);
  if (!xn != !yn)
    return !xn == !xs;
  if (!xn)
    return *x <= *y;

  unsigned int extended_sign = -!!xs;
  union { unsigned int i; float f; } xu = {0}, yu = {0};
  __builtin_memcpy (&xu.f, x, sizeof (float));
  __builtin_memcpy (&yu.f, y, sizeof (float));
  return (xu.i ^ extended_sign) <= (yu.i ^ extended_sign);
}
============================================================
tf.s looks like this:
============================================================
my_totalorderf:
        pushl   %ebx
        subl    $8, %esp
;;  int xs = __builtin_signbit (*x);
        movl    16(%esp), %eax
        flds    (%eax)
        fsts    (%esp)                ;; [%esp+0] := convert_snan_to_qnan(*x)
        fxam
        fnstsw  %ax
        movl    %eax, %edx
        movl    20(%esp), %eax
        andl    $512, %edx
;;  int ys = __builtin_signbit (*y);
        flds    (%eax)
        sete    %cl
        fsts    4(%esp)               ;; [%esp+4] := convert_snan_to_qnan(*y)
        fxam
        fnstsw  %ax
        testb   $2, %ah
        sete    %al
;;  if (!xs != !ys)
        cmpb    %al, %cl
        jne     .L12
;;  int xn = __builtin_isnan (*x);
        fxch    %st(1)
        fucomi  %st(0), %st
        fxch    %st(1)
        setnp   %bl
;;  int yn = __builtin_isnan (*y);
        fucomip %st(0), %st
        setnp   %al
;;  if (!xn != !yn)
        cmpb    %al, %bl
        jne     .L11
        fstp    %st(0)
        flds    (%esp)
        fucomi  %st(0), %st
        jp      .L9
        flds    4(%esp)
        xorl    %edx, %edx
        fcomip  %st(1), %st
        fstp    %st(0)
        setnb   %dl
        jmp     .L6
        .p2align 4,,10
        .p2align 3
.L12:
        fstp    %st(0)
        fstp    %st(0)
.L6:
        addl    $8, %esp
        movl    %edx, %eax
        popl    %ebx
        ret
        .p2align 4,,10
        .p2align 3
.L11:
        fucomip %st(0), %st
        setp    %dl
        addl    $8, %esp
        xorl    %ecx, %edx
        popl    %ebx
        movzbl  %dl, %edx
        movl    %edx, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L9:
        fstp    %st(0)
        negl    %edx                  ;; computes -xs
        movl    (%esp), %eax          ;; fetches convert_snan_to_qnan(*x)
instead of *x
        movl    4(%esp), %ebx         ;; fetches convert_snan_to_qnan(*y)
instead of *y
        sbbl    %edx, %edx            ;; computes extended_sign = -!!xs;
        xorl    %edx, %eax            ;; computes (xu.i ^ extended_sign)
        xorl    %ebx, %edx            ;; computes (yu.i ^ extended_sign)
        cmpl    %eax, %edx            ;; compares (xu.i ^ extended_sign) and
(xu.i ^ extended_sign)
        setnb   %dl
        movzbl  %dl, %edx
        jmp     .L6
============================================================
As you can see, (%esp) and 4(%esp) contain *not* the original
*x and *y respectively, but the result of an flds/fsts instruction pair,
that is, convert_snan_to_qnan(*x) and convert_snan_to_qnan(*y), respectively.

See https://lists.gnu.org/archive/html/bug-gnulib/2023-10/msg00060.html
for some background about these instructions on i386.

The analysis of td.c is similar; here the value is stored to
memory through an fldl/fstl pair.

[Bug c/114659] New: gcc miscompiles a __builtin_memcpy on i386, leading to wrong results for SNaN

Reply via email to