https://gcc.gnu.org/bugzilla/show_bug.cgi?id=9
Bug ID: 9
Summary: arm64 Linux kernel panics at boot due to unexpected
register assignment in inline asm
Product: gcc
Version: 8.3.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: inline-asm
Assignee: unassigned at gcc dot gnu.org
Reporter: will.deacon at arm dot com
Target Milestone: ---
Created attachment 46578
--> https://gcc.gnu.org/bugzilla/attachment.cgi?id=46578&action=edit
Output of -save-temps
When compiling the Linux kernel for arm64 with CONFIG_OPTIMIZE_INLINING=y
(which effectively removes the use of __attribute__((__always_inline__)) for
functions marked as inline), the atomic64 selftest fails due to a local
register variable being assigned to a different register from the one specified
when used in an inline asm block.
While I appreciate that we're treading on thin ice here, my reading of the
docs at:
https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables
suggests that this should work.
To be more precise, this kernel code:
static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
{
register long x0 asm ("x0") = (long)v;
asm volatile(ARM64_LSE_ATOMIC_INSN(
/* LL/SC */
__LL_SC_ATOMIC64(dec_if_positive)
__nops(6),
/* LSE atomics */
"1: ldr x30, %[v]\n"
" subs%[ret], x30, #1\n"
" b.lt2f\n"
" casal x30, %[ret], %[v]\n"
" sub x30, x30, #1\n"
" sub x30, x30, %[ret]\n"
" cbnzx30, 1b\n"
"2:")
: [ret] "+&r" (x0), [v] "+Q" (v->counter)
:
: __LL_SC_CLOBBERS, "cc", "memory");
return x0;
}
requires that %[ret] expands to register x0, whereas it is instead expanding to
register x1. You can see this in the assembly code for the function:
.align 2
.type arch_atomic64_dec_if_positive, %function
arch_atomic64_dec_if_positive:
.LVL0:
.LFB244:
.file 1 "./arch/arm64/include/asm/atomic_lse.h"
.loc 1 411 1 view -0
.cfi_startproc
.loc 1 412 2 view .LVU1
.loc 1 414 2 view .LVU2
.loc 1 411 1 is_stmt 0 view .LVU3
stp x29, x30, [sp, -16]!
.cfi_def_cfa_offset 16
.cfi_offset 29, -16
.cfi_offset 30, -8
.LVL1:
.loc 1 414 2 view .LVU4
mov x1, x0
.loc 1 411 1 view .LVU5
mov x29, sp
.loc 1 414 2 view .LVU6
#APP
// 414 "./arch/arm64/include/asm/atomic_lse.h" 1
.if 1 == 1
661:
bl __ll_sc_arch_atomic64_dec_if_positive
.rept 6
nop
.endr
662:
.pushsection .altinstructions,"a"
.word 661b - .
.if 0 == 0
.word 663f - .
.else
.word 0- .
.endif
.hword 5
.byte 662b-661b
.byte 664f-663f
.popsection
.if 0 == 0
.pushsection .altinstr_replacement, "a"
663:
1: ldr x30, [x0]
subsx1, x30, #1
b.lt2f
casal x30, x1, [x0]
sub x30, x30, #1
sub x30, x30, x1
cbnzx30, 1b
2:
664:
.popsection
.org. - (664b-663b) + (662b-661b)
.org. - (662b-661b) + (664b-663b)
.else
663:
664:
.endif
.endif
// 0 "" 2
.LVL2:
.loc 1 414 2 view .LVU7
#NO_APP
mov x0, x1
.LVL3:
.loc 1 431 2 is_stmt 1 view .LVU8
.loc 1 432 1 is_stmt 0 view .LVU9
ldp x29, x30, [sp], 16
.cfi_restore 30
.cfi_restore 29
.cfi_def_cfa_offset 0
ret
.cfi_endproc
.LFE244:
.size arch_atomic64_dec_if_positive, .-arch_atomic64_dec_if_positive
I've attached the .i/.s files output by:
aarch64-linux-gnu-gcc -save-temps -Wp,-MD,lib/.atomic64_test.o.d -nostdinc
-isystem
/home/will/system/aarch64/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu/bin/../lib/gcc/aarch64-linux-gnu/8.3.0/include
-I./arch/arm64/include -I./arch/arm64/include/generated -I./include
-I./arch/arm64/include/uapi -I./arch/arm64/include/generated/uapi
-I./include/uapi -I./include/generated/uapi -include ./include/linux/kconfig.h
-include ./include/linux/compiler_types.h -D__KERNEL__ -mlittle-endian
-DKASAN_SHADOW_SCALE_SHIFT=3 -Wall -Wundef -Werror=strict-prototypes
-Wno-trigraphs -fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE
-Werror=implicit-function-declaration -Werror=implicit-int -Wno-format-security
-std=gnu89 -mgeneral-regs-only -DCONFIG_AS_LSE=1
-fno-asynchronous-unwind-tables -Wno-psabi -mabi=lp64
-DKASAN_SHADOW_SCALE_SHIFT=3 -fno-delete-null-pointer-checks -Wno-frame-address
-Wno-format-truncation -Wno-format-overflow -O2
--param=allow-store