[Bug inline-asm/91111] New: arm64 Linux kernel panics at boot due to unexpected register assignment in inline asm

2019-07-08 Thread will.deacon at arm dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=9

Bug ID: 9
   Summary: arm64 Linux kernel panics at boot due to unexpected
register assignment in inline asm
   Product: gcc
   Version: 8.3.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: inline-asm
  Assignee: unassigned at gcc dot gnu.org
  Reporter: will.deacon at arm dot com
  Target Milestone: ---

Created attachment 46578
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=46578&action=edit
Output of -save-temps

When compiling the Linux kernel for arm64 with CONFIG_OPTIMIZE_INLINING=y
(which effectively removes the use of __attribute__((__always_inline__)) for
functions marked as inline), the atomic64 selftest fails due to a local
register variable being assigned to a different register from the one specified
when used in an inline asm block.

While I appreciate that we're treading on thin ice here, my reading of the
docs at:

 
https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables

suggests that this should work.

To be more precise, this kernel code:


static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
{
register long x0 asm ("x0") = (long)v;

asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
__LL_SC_ATOMIC64(dec_if_positive)
__nops(6),
/* LSE atomics */
"1: ldr x30, %[v]\n"
"   subs%[ret], x30, #1\n"
"   b.lt2f\n"
"   casal   x30, %[ret], %[v]\n"
"   sub x30, x30, #1\n"
"   sub x30, x30, %[ret]\n"
"   cbnzx30, 1b\n"
"2:")
: [ret] "+&r" (x0), [v] "+Q" (v->counter)
:
: __LL_SC_CLOBBERS, "cc", "memory");

return x0;
}


requires that %[ret] expands to register x0, whereas it is instead expanding to 
register x1. You can see this in the assembly code for the function:


.align  2
.type   arch_atomic64_dec_if_positive, %function
arch_atomic64_dec_if_positive:
.LVL0:
.LFB244:
.file 1 "./arch/arm64/include/asm/atomic_lse.h"
.loc 1 411 1 view -0
.cfi_startproc
.loc 1 412 2 view .LVU1
.loc 1 414 2 view .LVU2
.loc 1 411 1 is_stmt 0 view .LVU3
stp x29, x30, [sp, -16]!
.cfi_def_cfa_offset 16
.cfi_offset 29, -16
.cfi_offset 30, -8
.LVL1:
.loc 1 414 2 view .LVU4
mov x1, x0
.loc 1 411 1 view .LVU5
mov x29, sp
.loc 1 414 2 view .LVU6
#APP
// 414 "./arch/arm64/include/asm/atomic_lse.h" 1
.if 1 == 1
661:
bl  __ll_sc_arch_atomic64_dec_if_positive
.rept   6
nop
.endr

662:
.pushsection .altinstructions,"a"
 .word 661b - .
 .if 0 == 0
 .word 663f - .
 .else
 .word 0- .
 .endif
 .hword 5
 .byte 662b-661b
 .byte 664f-663f
.popsection
 .if 0 == 0
.pushsection .altinstr_replacement, "a"
663:
1:  ldr x30, [x0]
subsx1, x30, #1
b.lt2f
casal   x30, x1, [x0]
sub x30, x30, #1
sub x30, x30, x1
cbnzx30, 1b
2:
664:
.popsection
.org. - (664b-663b) + (662b-661b)
.org. - (662b-661b) + (664b-663b)
.else
663:
664:
.endif
.endif

// 0 "" 2
.LVL2:
.loc 1 414 2 view .LVU7
#NO_APP
mov x0, x1
.LVL3:
.loc 1 431 2 is_stmt 1 view .LVU8
.loc 1 432 1 is_stmt 0 view .LVU9
ldp x29, x30, [sp], 16
.cfi_restore 30
.cfi_restore 29
.cfi_def_cfa_offset 0
ret
.cfi_endproc
.LFE244:
.size   arch_atomic64_dec_if_positive, .-arch_atomic64_dec_if_positive


I've attached the .i/.s files output by:

aarch64-linux-gnu-gcc -save-temps -Wp,-MD,lib/.atomic64_test.o.d  -nostdinc
-isystem
/home/will/system/aarch64/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu/bin/../lib/gcc/aarch64-linux-gnu/8.3.0/include
-I./arch/arm64/include -I./arch/arm64/include/generated  -I./include
-I./arch/arm64/include/uapi -I./arch/arm64/include/generated/uapi
-I./include/uapi -I./include/generated/uapi -include ./include/linux/kconfig.h
-include ./include/linux/compiler_types.h -D__KERNEL__ -mlittle-endian
-DKASAN_SHADOW_SCALE_SHIFT=3 -Wall -Wundef -Werror=strict-prototypes
-Wno-trigraphs -fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE
-Werror=implicit-function-declaration -Werror=implicit-int -Wno-format-security
-std=gnu89 -mgeneral-regs-only -DCONFIG_AS_LSE=1
-fno-asynchronous-unwind-tables -Wno-psabi -mabi=lp64
-DKASAN_SHADOW_SCALE_SHIFT=3 -fno-delete-null-pointer-checks -Wno-frame-address
-Wno-format-truncation -Wno-format-overflow -O2
--param=allow-store

[Bug target/86677] popcount builtin detection is breaking some kernel build

2018-10-18 Thread will.deacon at arm dot com
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86677

Will Deacon  changed:

   What|Removed |Added

 CC||will.deacon at arm dot com

--- Comment #8 from Will Deacon  ---
I replied to the ticket raised on the kernel.org bugzilla about this change:

https://bugzilla.kernel.org/show_bug.cgi?id=200671#c1

I've also duplicated my response below in case you'd rather respond here.

--->8

Whilst providing an implementation of __popcountsi2 will fix the build, won't
this end up with worse code generation compared to a compiler which doesn't do
this idiom recognition?

If I understand this correctly, an in-line integer popcount implementation in
the code can be spotted by the compiler and replaced  by a branch to an
out-of-line integer popcount implementation.

Please can we have an option to disable this idiom recognition? It really
doesn't seem to make sense in an environment where the SIMD registers aren't
readily accessible.