https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66917
--- Comment #2 from Mikael Pettersson <mikpelinux at gmail dot com> ---
This test case changed behaviour twice in the 4.7->4.8 development cycle.
First r185807 broke it by replacing code for unaligned memory accesses with
code requiring more alignment than present in the source:
--- pr66917.s-r185806 2015-07-19 17:16:23.536116155 +0200
+++ pr66917.s-r185807 2015-07-19 17:13:23.016388416 +0200
@@ -17,42 +17,13 @@
.global test_neon_load_store_alignment
.type test_neon_load_store_alignment, %function
test_neon_load_store_alignment:
- @ args = 0, pretend = 0, frame = 32
+ @ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
- mov r3, r0
- stmfd sp!, {r4, r5, r6, r7}
- mov r7, r1
- ldr r0, [r0, #0] @ unaligned
- mov r6, r2
- sub sp, sp, #32
- ldr r1, [r3, #4] @ unaligned
- mov r5, sp
- ldr r2, [r3, #8] @ unaligned
- add r4, sp, #16
- ldr r3, [r3, #12] @ unaligned
- mov ip, sp
- stmia r5!, {r0, r1, r2, r3}
- ldr r0, [r7, #0] @ unaligned
- ldr r1, [r7, #4] @ unaligned
- ldr r2, [r7, #8] @ unaligned
- ldr r3, [r7, #12] @ unaligned
- fldd d16, [sp, #0] @ int
- fldd d19, [sp, #8] @ int
- stmia r4!, {r0, r1, r2, r3}
- fldd d18, [sp, #16] @ int
- veor d17, d16, d18
- fldd d18, [sp, #24] @ int
- fstd d17, [sp, #0] @ int
- veor d16, d19, d18
- fstd d16, [sp, #8] @ int
- ldmia ip!, {r0, r1, r2, r3}
- str r0, [r6, #0] @ unaligned
- str r1, [r6, #4] @ unaligned
- str r2, [r6, #8] @ unaligned
- str r3, [r6, #12] @ unaligned
- add sp, sp, #32
- ldmfd sp!, {r4, r5, r6, r7}
+ vldmia r0, {d18-d19}
+ vldmia r1, {d16-d17}
+ veor q8, q9, q8
+ vstmia r2, {d16-d17}
bx lr
.size test_neon_load_store_alignment,
.-test_neon_load_store_alignment
.section .text.startup,"ax",%progbits
On Linux, this code SIGBUSes because the kernel can't fix up the first
misaligned access:
[292105.326391] Alignment trap: not handling instruction ecd02b04 at
[<00008e84>]
[292105.396370] Unhandled fault: alignment exception (0x001) at 0x0008b109
Then r191399 changed it again by replacing the vldm/vstm instructions with
vld1.64/vst1.64 instructions:
--- pr66917.s-r191398 2015-07-19 19:12:15.815583139 +0200
+++ pr66917.s-r191399 2015-07-19 19:08:36.416037498 +0200
@@ -20,10 +20,10 @@
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
- vldmia r0, {d18-d19}
- vldmia r1, {d16-d17}
+ vld1.64 {d18-d19}, [r0:64]
+ vld1.64 {d16-d17}, [r1:64]
veor q8, q9, q8
- vstmia r2, {d16-d17}
+ vst1.64 {d16-d17}, [r2:64]
bx lr
.size test_neon_load_store_alignment,
.-test_neon_load_store_alignment
.section .text.startup,"ax",%progbits
These instructions still fault, but the kernel recognizes them and fixes up the
alignment faults (if suitably configured).
If I compile the test case for x86_64 w/ -O3 -mavx, the compiler generates
vmovdqu instructions which permit unaligned addresses. So I suspect a target
bug.