[Bug c/113921] New: Output register of an "asm volatile goto" is incorrectly clobbered/discarded

2024-02-14 Thread seanjc at google dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=113921

Bug ID: 113921
   Summary: Output register of an "asm volatile goto" is
incorrectly clobbered/discarded
   Product: gcc
   Version: 11.4.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: c
  Assignee: unassigned at gcc dot gnu.org
  Reporter: seanjc at google dot com
CC: jakub at redhat dot com, ndesaulniers at google dot com,
torva...@linux-foundation.org, ubizjak at gmail dot com
  Target Milestone: ---

Created attachment 57428
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=57428&action=edit
Intermediate output of the miscompiled file

gcc-11 appears to have a bug that results in gcc incorrectly clobbering the
output register of an "asm volatile goto".

The failing asm blob is a sequence of VMREADs in the Linux kernel, with the
outputs stored into a dynamically allocated structure whose lifecycle is far
beyond the scope of the code in question:

  vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
  vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
  vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
  vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);

where vmcs_read64() eventually becomes:

asm volatile goto("1: vmread %[field], %[output]\n\t"
  "jna %l[do_fail]\n\t"

  _ASM_EXTABLE(1b, %l[do_exception])

  : [output] "=r" (value)
  : [field] "r" (field)
  : "cc"
  : do_fail, do_exception);

return value;

  do_fail:
instrumentation_begin();
vmread_error(field);
instrumentation_end();
return 0;

  do_exception:
kvm_spurious_fault();
return 0;


The first three PDPTR VMREADs generate correctly, but the fourth effectively
gets ignored, and '0' is written to vmcs12->guest_pdptr3.

3597:   mov$0x280a,%r13d
359d:   vmread %r13,%r13
35a1:   jbe3724 
35a7:   mov%r13,0xd8(%rbx)
35ae:   jmp396b 
35b3:   mov$0x280c,%r13d
35b9:   vmread %r13,%r13
35bd:   jbe3705 
35c3:   mov%r13,0xe0(%rbx)
35ca:   jmp393a 
35cf:   mov$0x280e,%r13d
35d5:   vmread %r13,%r13
35d9:   jbe36e6 
35df:   mov%r13,0xe8(%rbx)
35e6:   jmp3909 
35eb:   mov$0x2810,%eax
35f0:   vmread %rax,%rax<= VMREAD to nowhere   
35f3:   jbe36ca 
35f9:   xor%r12d,%r12d  <= zeroing of output
35fc:   mov%r12,0xf0(%rbx)  <= store to vmcs12->guest_pdptr3

Replacing "asm volatile goto" with the following macro

  #define asm_goto(x...) \
  do { asm volatile goto(x); asm (""); } while (0)

to force a second barrier generates functional code, although the attempt to
miscompile the sequence is still evident, as the output of the affected VMREAD
is unnecessarily bounced through an extra register:

35f8:   mov$0x280a,%r13d
35fe:   vmread %r13,%r13
3602:   jbe36b2 
3608:   mov%r13,0xd8(%rbx)
360f:   jmp3925 
3614:   mov$0x280c,%r13d
361a:   vmread %r13,%r13
361e:   jbe3693 
3620:   mov%r13,0xe0(%rbx)
3627:   jmp38f4 
362c:   mov$0x280e,%r13d
3632:   vmread %r13,%r13
3636:   jbe367a 
3638:   mov%r13,0xe8(%rbx)
363f:   jmp38c3 
3644:   mov$0x2810,%eax
3649:   vmread %rax,%rax
364c:   jbe3664 
364e:   mov%rax,%r12
3651:   mov%r12,0xf0(%rbx)

The bug reproduces with two different 11.4.0 builds, on three different systems
(Intel i7-9850H, Intel i7-13700K, AMD EPYC 7B12), all running Debian-based
Linux.

$ gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/x86_64-linux-gnu/11/lto-wrapper
OFFLOAD_TARGET_NAMES=nvptx-none:amdgcn-amdhsa
OFFLOAD_TARGET_DEFAULT=1
Target: x86_64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu
11.4.0-1ubuntu1~22.04' --with-bugurl=file:///usr/share/doc/gcc-11/README.Bugs
--enable-languages=c,ada,c++,go,brig,d,fortran,objc,obj-c++,m2 --prefix=/usr
--with-gcc-major-version-only --program-suffix=-11
--program-prefix=x86_64-linux-gnu- --enable-shared --enable-linker-build-id
--libexecdir=/usr/lib --without-included-gettext --enable-threads=posix
--libdir=/usr/lib --enable-nls --enable-bootstrap --enable-clocale=gnu
--enable-libstdcxx-debug --enable-libstdcxx-time=yes
--with-default-libstdcxx-abi=new --enable-gnu-unique-object
--disable-vtable-verify --enable-plugin --enable-default-pie --with-system-zlib
--enable-libphobos-checking=release --with-target-system-zlib=auto
--enable-objc-gc=auto --enable-multiarch --disable-wer

[Bug rtl-optimization/116912] Cross-compiling aarch64 on x86_64 incorrectly ignores/discards function output parameter

2024-09-30 Thread seanjc at google dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116912

--- Comment #4 from Sean Christopherson  ---
Gah, sorry, I skimmed right past that sentence.  Thanks for the fast response,
much appreciated!

[Bug rtl-optimization/116912] New: Cross-compiling aarch64 on x86_64 incorrectly ignores/discards function output parameter

2024-09-30 Thread seanjc at google dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116912

Bug ID: 116912
   Summary: Cross-compiling aarch64 on x86_64 incorrectly
ignores/discards function output parameter
   Product: gcc
   Version: 13.2.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: rtl-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: seanjc at google dot com
  Target Milestone: ---

Created attachment 59242
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=59242&action=edit
Intermediate output of the miscompiled file

aarch64-linux-gnu-gcc 13.2.0 and 11.4.0 (haven't tried other gcc versions) on
x86-64 miscompile this code snippet:

  static  noinline void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
  {
u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N);
  }

pmcr = pmcr_orig;
printf("orig = %lx, next = %lx, want = %lu\n", pmcr_orig, pmcr,
pmcr_n);
set_pmcr_n(&pmcr, pmcr_n);
printf("orig = %lx, next = %lx, want = %lu\n", pmcr_orig, pmcr,
pmcr_n);

into code the ignores the @pmcr output parameter.  The generated code for
set_pmcr_n() is correct, the caller simply ignores the result that is stored
into @pmcr.  Note, the printf() calls aren't necessary to reproduce the issue,
they were added purely to serve as markers to make it easy to visually confirm
the issue.  There is other consumption of the set_pmcr_n() output that also
ignores the result (the issue originally manifested as a failed test).

E.g. aarch64-linux-gnu-gcc 13.2.0 generates:

 00401c90 :
  401c90:   f942ldr x2, [x0]
  401c94:   b3751022bfi x2, x1, #11, #5
  401c98:   f902str x2, [x0]
  401c9c:   d65f03c0ret

 00402660 :
  ...
  402724:   aa1403e3mov x3, x20
  402728:   aa1503e2mov x2, x21
  40272c:   aa1603e0mov x0, x22
  402730:   aa1503e1mov x1, x21
  402734:   940060ffbl  41ab30 <_IO_printf>
  402738:   aa1403e1mov x1, x20
  40273c:   910183e0add x0, sp, #0x60
  402740:   97fffd54bl  401c90 
  402744:   aa1403e3mov x3, x20
  402748:   aa1503e2mov x2, x21
  40274c:   aa1503e1mov x1, x21
  402750:   aa1603e0mov x0, x22
  402754:   940060f7bl  41ab30 <_IO_printf>

and running the compiled program with a value of pmcr_n=6 yields output of:

  orig = 3040, next = 3040, want = 0
  orig = 3040, next = 3040, want = 0

$ aarch64-linux-gnu-gcc -v
Using built-in specs.
COLLECT_GCC=aarch64-linux-gnu-gcc
COLLECT_LTO_WRAPPER=/usr/libexec/gcc-cross/aarch64-linux-gnu/13/lto-wrapper
Target: aarch64-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Debian 13.2.0-13'
--with-bugurl=file:///usr/share/doc/gcc-13/README.Bugs
--enable-languages=c,ada,c++,go,d,fortran,objc,obj-c++,m2 --prefix=/usr
--with-gcc-major-version-only --program-suffix=-13 --enable-shared
--enable-linker-build-id --libexecdir=/usr/libexec --without-included-gettext
--enable-threads=posix --libdir=/usr/lib --enable-nls --with-sysroot=/
--enable-clocale=gnu --enable-libstdcxx-debug --enable-libstdcxx-time=yes
--with-default-libstdcxx-abi=new --enable-libstdcxx-backtrace
--enable-gnu-unique-object --disable-libquadmath --disable-libquadmath-support
--enable-plugin --enable-default-pie --with-system-zlib
--enable-libphobos-checking=release --without-target-system-zlib
--enable-multiarch --enable-fix-cortex-a53-843419 --disable-werror
--enable-checking=release --build=x86_64-linux-gnu --host=x86_64-linux-gnu
--target=aarch64-linux-gnu --program-prefix=aarch64-linux-gnu-
--includedir=/usr/aarch64-linux-gnu/include
Thread model: posix
Supported LTO compression algorithms: zlib zstd
gcc version 13.2.0 (Debian 13.2.0-13)