https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93005
--- Comment #5 from Joel Holdsworth <joel at airwebreathe dot org.uk> --- I found that if I make modified versions of the intrinsics in arm_neon.h that are designed more along the lines of the x86_64 SSE intrinsics defined with a simple pointer dereference, then gcc does the right thing [1]. #include <arm_neon.h> __extension__ extern __inline void __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vst1q_s32_fixed (int32_t * __a, int32x4_t __b) { *(int32x4_t*)__a = __b; } __extension__ extern __inline int32x4_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vld1q_s32_fixed (const int32_t * __a) { return *(const int32x4_t*)__a; } int32x4_t foo(int32x4_t a) { int32_t temp[4]; vst1q_s32_fixed(temp, a); return vld1q_s32_fixed(temp); } ...compiles to: foo(long __vector(4)): bx lr Is there any reason not to simply redefine vst1q_s32, vld1q_s32 and friends to stop using builtins? [1]https://godbolt.org/#g:!((g:!((g:!((h:codeEditor,i:(fontScale:14,j:2,lang:c%2B%2B,selection:(endColumn:2,endLineNumber:22,positionColumn:1,positionLineNumber:1,selectionStartColumn:2,selectionStartLineNumber:22,startColumn:1,startLineNumber:1),source:'%23include+%3Carm_neon.h%3E%0A%0A__extension__+extern+__inline+void%0A__attribute__++((__always_inline__,+__gnu_inline__,+__artificial__))%0Avst1q_s32_fixed+(int32_t+*+__a,+int32x4_t+__b)%0A%7B%0A++*(int32x4_t*)__a+%3D+__b%3B%0A%7D%0A%0A__extension__+extern+__inline+int32x4_t%0A__attribute__++((__always_inline__,+__gnu_inline__,+__artificial__))%0Avld1q_s32_fixed+(const+int32_t+*+__a)%0A%7B%0A++return+*(const+int32x4_t*)__a%3B%0A%7D%0A%0Aint32x4_t+foo(int32x4_t+a)%0A%7B%0A++++int32_t+temp%5B4%5D%3B%0A++++vst1q_s32_fixed(temp,+a)%3B%0A++++return+vld1q_s32_fixed(temp)%3B%0A%7D'),l:'5',n:'0',o:'C%2B%2B+source+%232',t:'0')),header:(),k:49.54010711093072,l:'4',m:50,n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:arm831,filters:(b:'0',binary:'1',commentOnly:'0',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'1',trim:'1'),fontScale:14,j:2,lang:c%2B%2B,libs:!(),options:'-O2+-march%3Darmv7-a+-mtune%3Dcortex-a8+-mfpu%3Dneon+-mfloat-abi%3Dhard',selection:(endColumn:1,endLineNumber:1,positionColumn:1,positionLineNumber:1,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:2),l:'5',n:'0',o:'ARM+gcc+8.3.1+(none)+(Editor+%232,+Compiler+%232)+C%2B%2B',t:'0')),header:(),l:'4',m:50,n:'0',o:'',s:0,t:'0')),k:100,l:'3',n:'0',o:'',t:'0')),version:4