https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66917
--- Comment #2 from Mikael Pettersson <mikpelinux at gmail dot com> --- This test case changed behaviour twice in the 4.7->4.8 development cycle. First r185807 broke it by replacing code for unaligned memory accesses with code requiring more alignment than present in the source: --- pr66917.s-r185806 2015-07-19 17:16:23.536116155 +0200 +++ pr66917.s-r185807 2015-07-19 17:13:23.016388416 +0200 @@ -17,42 +17,13 @@ .global test_neon_load_store_alignment .type test_neon_load_store_alignment, %function test_neon_load_store_alignment: - @ args = 0, pretend = 0, frame = 32 + @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. - mov r3, r0 - stmfd sp!, {r4, r5, r6, r7} - mov r7, r1 - ldr r0, [r0, #0] @ unaligned - mov r6, r2 - sub sp, sp, #32 - ldr r1, [r3, #4] @ unaligned - mov r5, sp - ldr r2, [r3, #8] @ unaligned - add r4, sp, #16 - ldr r3, [r3, #12] @ unaligned - mov ip, sp - stmia r5!, {r0, r1, r2, r3} - ldr r0, [r7, #0] @ unaligned - ldr r1, [r7, #4] @ unaligned - ldr r2, [r7, #8] @ unaligned - ldr r3, [r7, #12] @ unaligned - fldd d16, [sp, #0] @ int - fldd d19, [sp, #8] @ int - stmia r4!, {r0, r1, r2, r3} - fldd d18, [sp, #16] @ int - veor d17, d16, d18 - fldd d18, [sp, #24] @ int - fstd d17, [sp, #0] @ int - veor d16, d19, d18 - fstd d16, [sp, #8] @ int - ldmia ip!, {r0, r1, r2, r3} - str r0, [r6, #0] @ unaligned - str r1, [r6, #4] @ unaligned - str r2, [r6, #8] @ unaligned - str r3, [r6, #12] @ unaligned - add sp, sp, #32 - ldmfd sp!, {r4, r5, r6, r7} + vldmia r0, {d18-d19} + vldmia r1, {d16-d17} + veor q8, q9, q8 + vstmia r2, {d16-d17} bx lr .size test_neon_load_store_alignment, .-test_neon_load_store_alignment .section .text.startup,"ax",%progbits On Linux, this code SIGBUSes because the kernel can't fix up the first misaligned access: [292105.326391] Alignment trap: not handling instruction ecd02b04 at [<00008e84>] [292105.396370] Unhandled fault: alignment exception (0x001) at 0x0008b109 Then r191399 changed it again by replacing the vldm/vstm instructions with vld1.64/vst1.64 instructions: --- pr66917.s-r191398 2015-07-19 19:12:15.815583139 +0200 +++ pr66917.s-r191399 2015-07-19 19:08:36.416037498 +0200 @@ -20,10 +20,10 @@ @ args = 0, pretend = 0, frame = 0 @ frame_needed = 0, uses_anonymous_args = 0 @ link register save eliminated. - vldmia r0, {d18-d19} - vldmia r1, {d16-d17} + vld1.64 {d18-d19}, [r0:64] + vld1.64 {d16-d17}, [r1:64] veor q8, q9, q8 - vstmia r2, {d16-d17} + vst1.64 {d16-d17}, [r2:64] bx lr .size test_neon_load_store_alignment, .-test_neon_load_store_alignment .section .text.startup,"ax",%progbits These instructions still fault, but the kernel recognizes them and fixes up the alignment faults (if suitably configured). If I compile the test case for x86_64 w/ -O3 -mavx, the compiler generates vmovdqu instructions which permit unaligned addresses. So I suspect a target bug.