http://gcc.gnu.org/bugzilla/show_bug.cgi?id=49061
Summary: wrong code with ARM NEON intrinsics Product: gcc Version: 4.6.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c AssignedTo: unassig...@gcc.gnu.org ReportedBy: hu...@free.fr The code below generates wrong results when compiled with optimizations enabled: #include <stdio.h> #include <string.h> #include <arm_neon.h> static void vvalidate(float *f) { float32x4_t a0 = { 0, 1, 2, 3}; float32x4_t a1 = { 4, 5, 6, 7}; float32x4_t a2 = { 8, 9, 10, 11}; float32x4x2_t t0 = vzipq_f32(a1, a2); t0 = vzipq_f32(a0, a2); memcpy(f, &t0.val[0], 8*sizeof(float)); } int main() { float f[8]; vvalidate(f); printf("interleaved([0 1 2 3], [8 9 10 11])=%g %g %g %g %g %g %g %g\n", f[0], f[1], f[2], f[3], f[4], f[5], f[6], f[7]); if (f[4] != 2) printf("BUUUUGGG -- should be 0 8 1 9 2 10 3 11\n"); return 0; } # gcc-4.6.0 -mfloat-abi=softfp -mfpu=neon -O0 ./neon_bug.c && ./a.out interleaved([0 1 2 3], [8 9 10 11])=0 8 1 9 2 10 3 11 # gcc-4.6.0 -mfloat-abi=softfp -mfpu=neon -O1 ./neon_bug.c && ./a.out interleaved([0 1 2 3], [8 9 10 11])=0 8 1 9 6 10 7 11 BUUUUGGG -- should be 0 8 1 9 2 10 3 11 The bug also happens with the gcc-4.5 (Ubuntu/Linaro 4.5.2-8ubuntu4) and gcc-4.4 (Ubuntu/Linaro 4.4.5-15ubuntu1) that ship with ubuntu 11.04.