https://gcc.gnu.org/g:4a04110ec8388b6540380cfedbe50af1b29e3e36
commit r14-10435-g4a04110ec8388b6540380cfedbe50af1b29e3e36 Author: Richard Biener <rguent...@suse.de> Date: Tue Jul 16 10:45:27 2024 +0200 Fixup unaligned load/store cost for znver5 Currently unaligned YMM and ZMM load and store costs are cheaper than aligned which causes the vectorizer to purposely mis-align accesses by adding an alignment prologue. It looks like the unaligned costs were simply copied from the bogus znver4 costs. The following makes the unaligned costs equal to the aligned costs like in the fixed znver4 version. * config/i386/x86-tune-costs.h (znver5_cost): Update unaligned load and store cost from the aligned costs. (cherry picked from commit 896393791ee34ffc176c87d232dfee735db3aaab) Diff: --- gcc/config/i386/x86-tune-costs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index d0168eebdc15..8348ab8230ad 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -2060,8 +2060,8 @@ struct processor_costs znver5_cost = { in 32bit, 64bit, 128bit, 256bit and 512bit */ {8, 8, 8, 12, 12}, /* cost of storing SSE register in 32bit, 64bit, 128bit, 256bit and 512bit */ - {6, 6, 6, 6, 6}, /* cost of unaligned loads. */ - {8, 8, 8, 8, 8}, /* cost of unaligned stores. */ + {6, 6, 10, 10, 12}, /* cost of unaligned loads. */ + {8, 8, 8, 12, 12}, /* cost of unaligned stores. */ 2, 2, 2, /* cost of moving XMM,YMM,ZMM register. */ 6, /* cost of moving SSE register to integer. */