https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93588
--- Comment #5 from H.J. Lu <hjl.tools at gmail dot com> --- It is because load/store costs of vector registers for register allocator are too low. This patch fixes it: diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index c73917e5a62..54b9dad932c 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -2574,9 +2574,9 @@ struct processor_costs core_cost = { {6, 6}, /* cost of storing MMX registers in SImode and DImode */ 2, 2, 4, /* cost of moving XMM,YMM,ZMM register */ - {6, 6, 6, 6, 12}, /* cost of loading SSE registers + {6, 6, 6, 10, 20}, /* cost of loading SSE registers in 32,64,128,256 and 512-bit */ - {6, 6, 6, 6, 12}, /* cost of storing SSE registers + {6, 6, 6, 12, 24}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ 6, 6, /* SSE->integer and integer->SSE moves */ /* End of register allocator costs. */