https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81646
--- Comment #6 from H.J. Lu <hjl.tools at gmail dot com> ---
(In reply to Florian Weimer from comment #5)
> (In reply to H.J. Lu from comment #4)
> > You can use -mstackrealign.
>
> I don't want to realign the stack unconditionally for performance reasons.
> I want to preserve alignment for callback functions, and give GCC the option
> to use SSE2 where beneficial. If that's not possible, so be it, considering
> that it's only i386.
Have you tried mstackrealign on your code? I got
[hjl@gnu-6 gcc]$ cat x.c
#include <x86intrin.h>
extern void foo1 (__m128, __m128, __m128);
extern void foo2 (__m128, __m128, __m128, __m128);
extern __m128 x;
void
bar1 (void)
{
foo1 (x, x, x);
}
void
bar2 (void)
{
foo2 (x, x, x, x);
}
[hjl@gnu-6 gcc]$ gcc -S -O2 -m32 x.c -mstackrealign -msse2
[hjl@gnu-6 gcc]$ cat x.s
.file "x.c"
.text
.p2align 4,,15
.globl bar1
.type bar1, @function
bar1:
.LFB4910:
.cfi_startproc
movaps x, %xmm0
movaps %xmm0, %xmm2
movaps %xmm0, %xmm1
jmp foo1
.cfi_endproc
.LFE4910:
.size bar1, .-bar1
.p2align 4,,15
.globl bar2
.type bar2, @function
bar2:
.LFB4911:
.cfi_startproc
leal 4(%esp), %ecx
.cfi_def_cfa 1, 0
andl $-16, %esp
pushl -4(%ecx)
pushl %ebp
.cfi_escape 0x10,0x5,0x2,0x75,0
movl %esp, %ebp
pushl %ecx
.cfi_escape 0xf,0x3,0x75,0x7c,0x6
subl $20, %esp
movaps x, %xmm0
movaps %xmm0, %xmm2
movaps %xmm0, %xmm1
movaps %xmm0, (%esp)
call foo2
addl $16, %esp
movl -4(%ebp), %ecx
.cfi_def_cfa 1, 0
leave
.cfi_restore 5
leal -4(%ecx), %esp
.cfi_def_cfa 4, 4
ret
.cfi_endproc
.LFE4911:
.size bar2, .-bar2
.ident "GCC: (GNU) 7.1.1 20170709 (Red Hat 7.1.1-4)"
.section .note.GNU-stack,"",@progbits
[hjl@gnu-6 gcc]$
GCC aligns stack only in foo2, not in foo1 since there is no need for it.