https://gcc.gnu.org/g:e2011ab13de3e70774f869b356f5f9c750780b34

commit r15-9495-ge2011ab13de3e70774f869b356f5f9c750780b34
Author: Jan Hubicka <hubi...@ucw.cz>
Date:   Tue Apr 15 19:04:15 2025 +0200

    Set ADDSS cost to 3 for znver5
    
    Znver5 has latency of addss 2 in typical case while all earlier versions 
has latency 3.
    Unforunately addss cost is used to cost many other SSE instructions than 
just addss and
    setting the cost to 2 makes us to vectorize 4 64bit stores into one 256bit 
store which
    in turn regesses imagemagick.
    
    This patch sets the cost back to 3.  Next stage1 we can untie addss from 
the other operatoins
    and set it correctly.
    
    bootstrapped/regtested x86_64-linux and also benchmarked on SPEC2k17
    
    gcc/ChangeLog:
    
            PR target/119298
            * config/i386/x86-tune-costs.h (znver5_cost): Set ADDSS cost to 3.

Diff:
---
 gcc/config/i386/x86-tune-costs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 7c8cb738d7cd..9477345bdd7e 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -2120,7 +2120,7 @@ struct processor_costs znver5_cost = {
   COSTS_N_INSNS (1),                   /* cost of cheap SSE instruction.  */
   /* ADDSS has throughput 2 and latency 2
      (in some cases when source is another addition).  */
-  COSTS_N_INSNS (2),                   /* cost of ADDSS/SD SUBSS/SD insns.  */
+  COSTS_N_INSNS (3),                   /* cost of ADDSS/SD SUBSS/SD insns.  */
   /* MULSS has throughput 2 and latency 3.  */
   COSTS_N_INSNS (3),                   /* cost of MULSS instruction.  */
   COSTS_N_INSNS (3),                   /* cost of MULSD instruction.  */

Reply via email to