Hi, this patch backports the initial support to gcc10 branch. Since the trunk and branch diverged there is non-trivial change to cpuinfo discovery. I do;
--- a/libgcc/config/i386/cpuinfo.c +++ b/libgcc/config/i386/cpuinfo.c @@ -111,6 +111,12 @@ get_amd_cpu (unsigned int family, unsigned int model) if (model >= 0x30) __cpu_model.__cpu_subtype = AMDFAM17H_ZNVER2; break; + case 0x19: + __cpu_model.__cpu_type = AMDFAM19H; + /* AMD family 19h version 1. */ + if (model <= 0x0f) + __cpu_model.__cpu_subtype = AMDFAM19H_ZNVER3; + break; default: break; } While your patch also sets ZNVER3 for case where VAES is supporte that would require backporting more of logic detecting VAES. Is that necessary? I see it may make znver3 to be defaulted on future znver4 if it stays with amdfam19, but we did not do this before. Bootstrapped/regtested x86_64-linux. With -march=native on znver3 machine we get right flags, but trunk in addition passes: -mno-amx-bf16 -mno-amx-int8 -mno-amx-tile -mno-avxvnni -mno-hreset -mno-kl -mno-serialize -mno-tsxldtrk -mno-uintr -mno-widekl Which are options we did not backported. Atop of that I plan to backport the tuning patches with exception of gather which seems bit controversal and can wait for gcc11. Honza 2021-03-30 Jan Hubicka <hubi...@ucw.cz> Backport Venkataramanan Kumar <venkataramanan.ku...@amd.com> Sharavan Kumar <shravan.ku...@amd.com> * common/config/i386/cpuinfo.h (get_amd_cpu) recognize znver3. * common/config/i386/i386-common.c (processor_names): Add znver3. (processor_alias_table): Add znver3 and AMDFAM19H entry. * common/config/i386/i386-cpuinfo.h (processor_types): Add AMDFAM19H. (processor_subtypes): AMDFAM19H_ZNVER3. * config.gcc (i[34567]86-*-linux* | ...): Likewise. * config/i386/driver-i386.c: (host_detect_local_cpu): Let -march=native recognize znver3 processors. * config/i386/i386-c.c (ix86_target_macros_internal): Add znver3. * config/i386/i386-options.c (m_znver3): New definition. (m_ZNVER): Include m_znver3. (processor_cost_table): Add znver3. * config/i386/i386.c (ix86_reassociation_width): Likewise. * config/i386/i386.h (TARGET_znver3): New definition. (enum processor_type): Add PROCESSOR_ZNVER3. * config/i386/i386.md (define_attr "cpu"): Add znver3. * config/i386/x86-tune-sched.c: (ix86_issue_rate): Likewise. (ix86_adjust_cost): Likewise. * config/i386/x86-tune.def (X86_TUNE_AVOID_256FMA_CHAINS: Likewise. * config/i386/znver1.md: Add new reservations for znver3. * doc/extend.texi: Add details about znver3. * doc/invoke.texi: Likewise. gcc/testsuite/ChangeLog: 2021-03-30 Jan Hubicka <hubi...@ucw.cz> * gcc.target/i386/funcspec-56.inc: Handle new march. libgcc/ChangeLog: 2021-03-30 Jan Hubicka <hubi...@ucw.cz> * config/i386/cpuinfo.c (get_amd_cpu): Support amdfam19. * config/i386/cpuinfo.h (enum processor_types): Add AMDFAM19H. (enum processor_subtypes): Add AMDFAM19H_ZNVER3. diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c index 1e4d25f052a..97335d42af1 100644 --- a/gcc/common/config/i386/i386-common.c +++ b/gcc/common/config/i386/i386-common.c @@ -1582,7 +1582,8 @@ const char *const processor_names[] = "btver1", "btver2", "znver1", - "znver2" + "znver2", + "znver3" }; /* Guarantee that the array is aligned with enum processor_type. */ @@ -1775,6 +1776,16 @@ const pta processor_alias_table[] = | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SHA | PTA_LZCNT | PTA_POPCNT | PTA_CLWB | PTA_RDPID | PTA_WBNOINVD}, + {"znver3", PROCESSOR_ZNVER2, CPU_ZNVER2, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 + | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 + | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW + | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE + | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED + | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES + | PTA_SHA | PTA_LZCNT | PTA_POPCNT | PTA_CLWB | PTA_RDPID + | PTA_WBNOINVD | PTA_VAES | PTA_VPCLMULQDQ | PTA_PKU}, {"btver1", PROCESSOR_BTVER1, CPU_GENERIC, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16 | PTA_PRFCHW diff --git a/gcc/config.gcc b/gcc/config.gcc index d093b6b7f79..6fcdd771d4c 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -662,7 +662,7 @@ pentium4 pentium4m pentiumpro prescott lakemont" # 64-bit x86 processors supported by --with-arch=. Each processor # MUST be separated by exactly one space. x86_64_archs="amdfam10 athlon64 athlon64-sse3 barcelona bdver1 bdver2 \ -bdver3 bdver4 znver1 znver2 btver1 btver2 k8 k8-sse3 opteron \ +bdver3 bdver4 znver1 znver2 znver3 btver1 btver2 k8 k8-sse3 opteron \ opteron-sse3 nocona core2 corei7 corei7-avx core-avx-i core-avx2 atom \ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \ silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \ @@ -3687,6 +3687,10 @@ case ${target} in arch=znver2 cpu=znver2 ;; + znver3-*) + arch=znver3 + cpu=znver3 + ;; bdver4-*) arch=bdver4 cpu=bdver4 @@ -3808,6 +3812,10 @@ case ${target} in arch=znver2 cpu=znver2 ;; + znver3-*) + arch=znver3 + cpu=znver3 + ;; bdver4-*) arch=bdver4 cpu=bdver4 diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c index b9cab74887d..d71fc7e89ed 100644 --- a/gcc/config/i386/driver-i386.c +++ b/gcc/config/i386/driver-i386.c @@ -668,6 +668,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) processor = PROCESSOR_GEODE; else if (has_movbe && family == 22) processor = PROCESSOR_BTVER2; + else if (has_vaes) + processor = PROCESSOR_ZNVER3; else if (has_clwb) processor = PROCESSOR_ZNVER2; else if (has_clzero) @@ -1077,6 +1079,9 @@ const char *host_detect_local_cpu (int argc, const char **argv) case PROCESSOR_ZNVER2: cpu = "znver2"; break; + case PROCESSOR_ZNVER3: + cpu = "znver3"; + break; case PROCESSOR_BTVER1: cpu = "btver1"; break; diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index a4b9b786fb9..d6529859785 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -128,6 +128,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__znver2"); def_or_undef (parse_in, "__znver2__"); break; + case PROCESSOR_ZNVER3: + def_or_undef (parse_in, "__znver3"); + def_or_undef (parse_in, "__znver3__"); + break; case PROCESSOR_BTVER1: def_or_undef (parse_in, "__btver1"); def_or_undef (parse_in, "__btver1__"); @@ -306,6 +310,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, case PROCESSOR_ZNVER2: def_or_undef (parse_in, "__tune_znver2__"); break; + case PROCESSOR_ZNVER3: + def_or_undef (parse_in, "__tune_znver3__"); + break; case PROCESSOR_BTVER1: def_or_undef (parse_in, "__tune_btver1__"); break; diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c index 81f5af1fdd2..4f21be9c9d3 100644 --- a/gcc/config/i386/i386-options.c +++ b/gcc/config/i386/i386-options.c @@ -145,11 +145,12 @@ along with GCC; see the file COPYING3. If not see #define m_BDVER4 (HOST_WIDE_INT_1U<<PROCESSOR_BDVER4) #define m_ZNVER1 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER1) #define m_ZNVER2 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER2) +#define m_ZNVER3 (HOST_WIDE_INT_1U<<PROCESSOR_ZNVER3) #define m_BTVER1 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER1) #define m_BTVER2 (HOST_WIDE_INT_1U<<PROCESSOR_BTVER2) #define m_BDVER (m_BDVER1 | m_BDVER2 | m_BDVER3 | m_BDVER4) #define m_BTVER (m_BTVER1 | m_BTVER2) -#define m_ZNVER (m_ZNVER1 | m_ZNVER2) +#define m_ZNVER (m_ZNVER1 | m_ZNVER2 | m_ZNVER3) #define m_AMD_MULTIPLE (m_ATHLON_K8 | m_AMDFAM10 | m_BDVER | m_BTVER \ | m_ZNVER) @@ -730,6 +731,7 @@ static const struct processor_costs *processor_cost_table[] = &btver1_cost, &btver2_cost, &znver1_cost, + &znver2_cost, &znver2_cost }; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index e23f92b58cc..2f838840e96 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -21577,8 +21577,9 @@ ix86_reassociation_width (unsigned int op, machine_mode mode) /* Integer vector instructions execute in FP unit and can execute 3 additions and one multiplication per cycle. */ - if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2) - && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS) + if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2 + || ix86_tune == PROCESSOR_ZNVER3) + && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS) return 1; /* Account for targets that splits wide vectors into multiple parts. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index a94b3ea6696..a382011929b 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -455,6 +455,7 @@ extern const struct processor_costs ix86_size_cost; #define TARGET_BTVER2 (ix86_tune == PROCESSOR_BTVER2) #define TARGET_ZNVER1 (ix86_tune == PROCESSOR_ZNVER1) #define TARGET_ZNVER2 (ix86_tune == PROCESSOR_ZNVER2) +#define TARGET_ZNVER3 (ix86_tune == PROCESSOR_ZNVER3) /* Feature tests against the various tunings. */ enum ix86_tune_indices { @@ -2350,6 +2351,7 @@ enum processor_type PROCESSOR_BTVER2, PROCESSOR_ZNVER1, PROCESSOR_ZNVER2, + PROCESSOR_ZNVER3, PROCESSOR_max }; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6e46cdcebb0..d447df2cfe4 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -439,7 +439,7 @@ (define_constants ;; Processor type. (define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem, atom,slm,glm,haswell,generic,amdfam10,bdver1,bdver2,bdver3, - bdver4,btver2,znver1,znver2" + bdver4,btver2,znver1,znver2,znver3" (const (symbol_ref "ix86_schedule"))) ;; A basic instruction type. Refinements due to arguments to be diff --git a/gcc/config/i386/x86-tune-sched.c b/gcc/config/i386/x86-tune-sched.c index d4d8a127b41..404b5b101f8 100644 --- a/gcc/config/i386/x86-tune-sched.c +++ b/gcc/config/i386/x86-tune-sched.c @@ -66,6 +66,7 @@ ix86_issue_rate (void) case PROCESSOR_BDVER4: case PROCESSOR_ZNVER1: case PROCESSOR_ZNVER2: + case PROCESSOR_ZNVER3: case PROCESSOR_CORE2: case PROCESSOR_NEHALEM: case PROCESSOR_SANDYBRIDGE: @@ -396,6 +397,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, case PROCESSOR_ZNVER1: case PROCESSOR_ZNVER2: + case PROCESSOR_ZNVER3: /* Stack engine allows to execute push&pop instructions in parall. */ if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 6eff8256897..ed4d74cecd1 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -444,7 +444,7 @@ DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER) /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or smaller FMA chain. */ -DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2) +DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3) /*****************************************************************************/ /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ diff --git a/gcc/config/i386/znver1.md b/gcc/config/i386/znver1.md index 6812a3de9f5..b0edfab084f 100644 --- a/gcc/config/i386/znver1.md +++ b/gcc/config/i386/znver1.md @@ -21,7 +21,7 @@ (define_attr "znver1_decode" "direct,vector,double" (const_string "direct")) -;; AMD znver1 and znver2 Scheduling +;; AMD znver1, znver2 and znver3 Scheduling ;; Modeling automatons for zen decoders, integer execution pipes, ;; AGU pipes and floating point execution units. (define_automaton "znver1, znver1_ieu, znver1_fp, znver1_agu") @@ -52,7 +52,7 @@ (define_cpu_unit "znver1-ieu2" "znver1_ieu") (define_cpu_unit "znver1-ieu3" "znver1_ieu") (define_reservation "znver1-ieu" "znver1-ieu0|znver1-ieu1|znver1-ieu2|znver1-ieu3") -;; 2 AGU pipes in znver1 and 3 AGU pipes in znver2 +;; 2 AGU pipes in znver1 and 3 AGU pipes in znver2 and znver3 ;; According to CPU diagram last AGU unit is used only for stores. (define_cpu_unit "znver1-agu0" "znver1_agu") (define_cpu_unit "znver1-agu1" "znver1_agu") @@ -63,7 +63,7 @@ (define_reservation "znver2-store-agu-reserve" "znver1-agu0|znver1-agu1|znver2-a ;; Load is 4 cycles. We do not model reservation of load unit. ;;(define_reservation "znver1-load" "znver1-agu-reserve, nothing, nothing, nothing") (define_reservation "znver1-load" "znver1-agu-reserve") -;; Store operations differs between znver1 and znver2 because extra AGU +;; Store operations differs between znver1, znver2 and znver3 because extra AGU ;; was added. (define_reservation "znver1-store" "znver1-agu-reserve") (define_reservation "znver2-store" "znver2-store-agu-reserve") @@ -77,6 +77,7 @@ (define_reservation "znver1-ivector" "znver1-ieu0+znver1-ieu1 (define_reservation "znver2-ivector" "znver1-ieu0+znver1-ieu1 +znver1-ieu2+znver1-ieu3 +znver1-agu0+znver1-agu1+znver2-agu2") + ;; Floating point unit 4 FP pipes. (define_cpu_unit "znver1-fp0" "znver1_fp") (define_cpu_unit "znver1-fp1" "znver1_fp") @@ -99,7 +100,7 @@ (define_insn_reservation "znver1_call" 1 "znver1-double,znver1-store,znver1-ieu0|znver1-ieu3") (define_insn_reservation "znver2_call" 1 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (eq_attr "type" "call,callv")) "znver1-double,znver2-store,znver1-ieu0|znver1-ieu3") @@ -110,10 +111,10 @@ (define_insn_reservation "znver1_push" 1 (eq_attr "memory" "store"))) "znver1-direct,znver1-store") (define_insn_reservation "znver2_push" 1 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "push") (eq_attr "memory" "store"))) - "znver1-direct,znver1-store") + "znver1-direct,znver2-store") (define_insn_reservation "znver1_push_load" 4 (and (eq_attr "cpu" "znver1") @@ -121,13 +122,13 @@ (define_insn_reservation "znver1_push_load" 4 (eq_attr "memory" "both"))) "znver1-direct,znver1-load,znver1-store") (define_insn_reservation "znver2_push_load" 4 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "push") (eq_attr "memory" "both"))) "znver1-direct,znver1-load,znver2-store") (define_insn_reservation "znver1_pop" 4 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "pop") (eq_attr "memory" "load"))) "znver1-direct,znver1-load") @@ -138,7 +139,7 @@ (define_insn_reservation "znver1_pop_mem" 4 (eq_attr "memory" "both"))) "znver1-direct,znver1-load,znver1-store") (define_insn_reservation "znver2_pop_mem" 4 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "pop") (eq_attr "memory" "both"))) "znver1-direct,znver1-load,znver2-store") @@ -149,7 +150,7 @@ (define_insn_reservation "znver1_leave" 1 (eq_attr "type" "leave")) "znver1-double,znver1-ieu, znver1-store") (define_insn_reservation "znver2_leave" 1 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (eq_attr "type" "leave")) "znver1-double,znver1-ieu, znver2-store") @@ -157,13 +158,13 @@ (define_insn_reservation "znver2_leave" 1 ;; Multiplications ;; Reg operands (define_insn_reservation "znver1_imul" 3 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "imul") (eq_attr "memory" "none"))) "znver1-direct,znver1-ieu1") (define_insn_reservation "znver1_imul_mem" 7 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "imul") (eq_attr "memory" "!none"))) "znver1-direct,znver1-load, znver1-ieu1") @@ -227,6 +228,62 @@ (define_insn_reservation "znver1_idiv_mem_QI" 16 (eq_attr "memory" "none")))) "znver1-direct,znver1-load,znver1-ieu2*12") +(define_insn_reservation "znver3_idiv_DI" 18 + (and (eq_attr "cpu" "znver3") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "DI") + (eq_attr "memory" "none")))) + "znver1-double,znver1-ieu2*18") + +(define_insn_reservation "znver3_idiv_SI" 12 + (and (eq_attr "cpu" "znver3") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "SI") + (eq_attr "memory" "none")))) + "znver1-double,znver1-ieu2*12") + +(define_insn_reservation "znver3_idiv_HI" 10 + (and (eq_attr "cpu" "znver3") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "HI") + (eq_attr "memory" "none")))) + "znver1-double,znver1-ieu2*10") + +(define_insn_reservation "znver3_idiv_QI" 9 + (and (eq_attr "cpu" "znver3") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "QI") + (eq_attr "memory" "none")))) + "znver1-direct,znver1-ieu2*9") + +(define_insn_reservation "znver3_idiv_mem_DI" 22 + (and (eq_attr "cpu" "znver3") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "DI") + (eq_attr "memory" "load")))) + "znver1-double,znver1-load,znver1-ieu2*22") + +(define_insn_reservation "znver3_idiv_mem_SI" 16 + (and (eq_attr "cpu" "znver3") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "SI") + (eq_attr "memory" "load")))) + "znver1-double,znver1-load,znver1-ieu2*16") + +(define_insn_reservation "znver3_idiv_mem_HI" 14 + (and (eq_attr "cpu" "znver3") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "HI") + (eq_attr "memory" "load")))) + "znver1-double,znver1-load,znver1-ieu2*10") + +(define_insn_reservation "znver3_idiv_mem_QI" 13 + (and (eq_attr "cpu" "znver3") + (and (eq_attr "type" "idiv") + (and (eq_attr "mode" "QI") + (eq_attr "memory" "load")))) + "znver1-direct,znver1-load,znver1-ieu2*9") + ;; STR ISHIFT which are micro coded. ;; Fix me: Latency need to be rechecked. (define_insn_reservation "znver1_str_ishift" 6 @@ -236,15 +293,16 @@ (define_insn_reservation "znver1_str_ishift" 6 "znver1-vector,znver1-ivector") (define_insn_reservation "znver2_str_ishift" 3 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "ishift") (eq_attr "memory" "both,store"))) "znver1-vector,znver1-ivector") (define_insn_reservation "znver2_str_istr" 19 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "str") (eq_attr "memory" "both,store"))) "znver1-vector,znver1-ivector") + ;; MOV - integer moves (define_insn_reservation "znver1_load_imov_double" 2 (and (eq_attr "cpu" "znver1") @@ -254,14 +312,14 @@ (define_insn_reservation "znver1_load_imov_double" 2 "znver1-double,znver1-ieu|znver1-ieu") (define_insn_reservation "znver2_load_imov_double" 1 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "znver1_decode" "double") (and (eq_attr "type" "imovx") (eq_attr "memory" "none")))) "znver1-double,znver1-ieu|znver1-ieu") (define_insn_reservation "znver1_load_imov_direct" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "imov,imovx") (eq_attr "memory" "none"))) "znver1-direct,znver1-ieu") @@ -274,7 +332,7 @@ (define_insn_reservation "znver1_load_imov_double_store" 2 "znver1-double,znver1-ieu|znver1-ieu,znver1-store") (define_insn_reservation "znver2_load_imov_double_store" 1 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "znver1_decode" "double") (and (eq_attr "type" "imovx") (eq_attr "memory" "store")))) @@ -287,7 +345,7 @@ (define_insn_reservation "znver1_load_imov_direct_store" 1 "znver1-direct,znver1-ieu,znver1-store") (define_insn_reservation "znver2_load_imov_direct_store" 1 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "imov,imovx") (eq_attr "memory" "store"))) "znver1-direct,znver1-ieu,znver2-store") @@ -300,14 +358,14 @@ (define_insn_reservation "znver1_load_imov_double_load" 5 "znver1-double,znver1-load,znver1-ieu|znver1-ieu") (define_insn_reservation "znver2_load_imov_double_load" 4 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "znver1_decode" "double") (and (eq_attr "type" "imovx") (eq_attr "memory" "load")))) "znver1-double,znver1-load,znver1-ieu|znver1-ieu") (define_insn_reservation "znver1_load_imov_direct_load" 4 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "imov,imovx") (eq_attr "memory" "load"))) "znver1-direct,znver1-load") @@ -315,13 +373,13 @@ (define_insn_reservation "znver1_load_imov_direct_load" 4 ;; INTEGER/GENERAL instructions ;; register/imm operands only: ALU, ICMP, NEG, NOT, ROTATE, ISHIFT, TEST (define_insn_reservation "znver1_insn" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov") (eq_attr "memory" "none,unknown"))) "znver1-direct,znver1-ieu") (define_insn_reservation "znver1_insn_load" 5 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift,ishift1,test,setcc,incdec,icmov") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-ieu") @@ -333,7 +391,7 @@ (define_insn_reservation "znver1_insn_store" 1 "znver1-direct,znver1-ieu,znver1-store") (define_insn_reservation "znver2_insn_store" 1 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec") (eq_attr "memory" "store"))) "znver1-direct,znver1-ieu,znver2-store") @@ -345,7 +403,7 @@ (define_insn_reservation "znver1_insn_both" 5 "znver1-direct,znver1-load,znver1-ieu,znver1-store") (define_insn_reservation "znver2_insn_both" 5 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "alu,icmp,negnot,rotate,rotate1,ishift1,test,setcc,incdec") (eq_attr "memory" "both"))) "znver1-direct,znver1-load,znver1-ieu,znver2-store") @@ -357,7 +415,7 @@ (define_insn_reservation "znver1_ieu_vector" 6 "znver1-vector,znver1-ivector") (define_insn_reservation "znver2_ieu_vector" 5 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (eq_attr "type" "other,str,multi")) "znver1-vector,znver2-ivector") @@ -370,21 +428,21 @@ (define_insn_reservation "znver1_alu1_vector" 3 "znver1-vector,znver1-ivector") (define_insn_reservation "znver2_alu1_vector" 3 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "znver1_decode" "vector") (and (eq_attr "type" "alu1") (eq_attr "memory" "none,unknown")))) "znver1-vector,znver2-ivector") (define_insn_reservation "znver1_alu1_double" 2 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "znver1_decode" "double") (and (eq_attr "type" "alu1") (eq_attr "memory" "none,unknown")))) "znver1-double,znver1-ieu") (define_insn_reservation "znver1_alu1_direct" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "znver1_decode" "direct") (and (eq_attr "type" "alu1") (eq_attr "memory" "none,unknown")))) @@ -392,45 +450,45 @@ (define_insn_reservation "znver1_alu1_direct" 1 ;; Branches : Fix me need to model conditional branches. (define_insn_reservation "znver1_branch" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "ibr") - (eq_attr "memory" "none"))) + (eq_attr "memory" "none"))) "znver1-direct") ;; Indirect branches check latencies. (define_insn_reservation "znver1_indirect_branch_mem" 6 (and (eq_attr "cpu" "znver1") (and (eq_attr "type" "ibr") - (eq_attr "memory" "load"))) + (eq_attr "memory" "load"))) "znver1-vector,znver1-ivector") (define_insn_reservation "znver2_indirect_branch_mem" 6 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "ibr") - (eq_attr "memory" "load"))) + (eq_attr "memory" "load"))) "znver1-vector,znver2-ivector") ;; LEA executes in ALU units with 1 cycle latency. (define_insn_reservation "znver1_lea" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (eq_attr "type" "lea")) "znver1-direct,znver1-ieu") ;; Other integer instrucions (define_insn_reservation "znver1_idirect" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "unit" "integer,unknown") (eq_attr "memory" "none,unknown"))) "znver1-direct,znver1-ieu") ;; Floating point (define_insn_reservation "znver1_fp_cmov" 6 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (eq_attr "type" "fcmov")) "znver1-vector,znver1-fvector") (define_insn_reservation "znver1_fp_mov_direct_load" 8 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "znver1_decode" "direct") (and (eq_attr "type" "fmov") (eq_attr "memory" "load")))) @@ -443,41 +501,34 @@ (define_insn_reservation "znver1_fp_mov_direct_store" 5 (eq_attr "memory" "store")))) "znver1-direct,znver1-fp2|znver1-fp3,znver1-store") (define_insn_reservation "znver2_fp_mov_direct_store" 5 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "znver1_decode" "direct") (and (eq_attr "type" "fmov") (eq_attr "memory" "store")))) "znver1-direct,znver1-fp2|znver1-fp3,znver2-store") (define_insn_reservation "znver1_fp_mov_double" 4 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "znver1_decode" "double") (and (eq_attr "type" "fmov") (eq_attr "memory" "none")))) "znver1-double,znver1-fp3") (define_insn_reservation "znver1_fp_mov_double_load" 12 - (and (eq_attr "cpu" "znver1") - (and (eq_attr "znver1_decode" "double") - (and (eq_attr "type" "fmov") - (eq_attr "memory" "load")))) - "znver1-double,znver1-load,znver1-fp3") - -(define_insn_reservation "znver2_fp_mov_double_load" 12 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "znver1_decode" "double") (and (eq_attr "type" "fmov") (eq_attr "memory" "load")))) "znver1-double,znver1-load,znver1-fp3") (define_insn_reservation "znver1_fp_mov_direct" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (eq_attr "type" "fmov")) "znver1-direct,znver1-fp3") ;; TODO: AGU? (define_insn_reservation "znver1_fp_spc_direct" 5 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "fpspc") (eq_attr "memory" "store"))) "znver1-direct,znver1-fp3,znver1-fp2") @@ -488,26 +539,26 @@ (define_insn_reservation "znver1_fp_insn_vector" 6 (eq_attr "type" "fpspc,mmxcvt,sselog1,ssemul,ssemov"))) "znver1-vector,znver1-fvector") (define_insn_reservation "znver2_fp_insn_vector" 6 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "znver1_decode" "vector") (eq_attr "type" "fpspc,mmxcvt,sselog1,ssemul,ssemov"))) "znver1-vector,znver2-fvector") ;; FABS (define_insn_reservation "znver1_fp_fsgn" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (eq_attr "type" "fsgn")) "znver1-direct,znver1-fp3") (define_insn_reservation "znver1_fp_fcmp" 2 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "memory" "none") (and (eq_attr "znver1_decode" "double") (eq_attr "type" "fcmp")))) "znver1-double,znver1-fp0,znver1-fp2") (define_insn_reservation "znver1_fp_fcmp_load" 9 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "memory" "none") (and (eq_attr "znver1_decode" "double") (eq_attr "type" "fcmp")))) @@ -515,32 +566,32 @@ (define_insn_reservation "znver1_fp_fcmp_load" 9 ;;FADD FSUB FMUL (define_insn_reservation "znver1_fp_op_mul" 5 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "fop,fmul") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0*5") (define_insn_reservation "znver1_fp_op_mul_load" 12 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "fop,fmul") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp0*5") (define_insn_reservation "znver1_fp_op_imul_load" 16 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "fop,fmul") (and (eq_attr "fp_int_src" "true") (eq_attr "memory" "load")))) "znver1-double,znver1-load,znver1-fp3,znver1-fp0") (define_insn_reservation "znver1_fp_op_div" 15 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "fdiv") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp3*15") (define_insn_reservation "znver1_fp_op_div_load" 22 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "fdiv") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp3*15") @@ -553,62 +604,63 @@ (define_insn_reservation "znver1_fp_op_idiv_load" 27 "znver1-double,znver1-load,znver1-fp3*19") (define_insn_reservation "znver2_fp_op_idiv_load" 26 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "fdiv") (and (eq_attr "fp_int_src" "true") (eq_attr "memory" "load")))) "znver1-double,znver1-load,znver1-fp3*19") + ;; MMX, SSE, SSEn.n, AVX, AVX2 instructions (define_insn_reservation "znver1_fp_insn" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (eq_attr "type" "mmx")) "znver1-direct,znver1-fpu") (define_insn_reservation "znver1_mmx_add" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxadd") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0|znver1-fp1|znver1-fp3") (define_insn_reservation "znver1_mmx_add_load" 8 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxadd") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp0|znver1-fp1|znver1-fp3") (define_insn_reservation "znver1_mmx_cmp" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxcmp") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0|znver1-fp3") (define_insn_reservation "znver1_mmx_cmp_load" 8 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxcmp") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp0|znver1-fp3") (define_insn_reservation "znver1_mmx_cvt_pck_shuf" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp1|znver1-fp2") (define_insn_reservation "znver1_mmx_cvt_pck_shuf_load" 8 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxcvt,sseshuf,sseshuf1") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp1|znver1-fp2") (define_insn_reservation "znver1_mmx_shift_move" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxshft,mmxmov") (eq_attr "memory" "none"))) - "znver1-direct,znver1-fp2") + "znver1-direct,znver1-fp2") (define_insn_reservation "znver1_mmx_shift_move_load" 8 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxshft,mmxmov") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp2") @@ -619,19 +671,19 @@ (define_insn_reservation "znver1_mmx_move_store" 1 (eq_attr "memory" "store,both"))) "znver1-direct,znver1-fp2,znver1-store") (define_insn_reservation "znver2_mmx_move_store" 1 - (and (eq_attr "cpu" "znver1") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "mmxshft,mmxmov") (eq_attr "memory" "store,both"))) "znver1-direct,znver1-fp2,znver2-store") (define_insn_reservation "znver1_mmx_mul" 3 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxmul") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0*3") (define_insn_reservation "znver1_mmx_load" 10 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "mmxmul") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp0*3") @@ -652,13 +704,13 @@ (define_insn_reservation "znver1_avx256_log_load" 8 "znver1-double,znver1-load,znver1-fpu") (define_insn_reservation "znver1_sse_log" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "sselog") (eq_attr "memory" "none"))) "znver1-direct,znver1-fpu") (define_insn_reservation "znver1_sse_log_load" 8 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "sselog") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fpu") @@ -678,13 +730,13 @@ (define_insn_reservation "znver1_avx256_log1_load" 8 "znver1-double,znver1-load,znver1-fp1|znver1-fp2") (define_insn_reservation "znver1_sse_log1" 1 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "sselog1") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp1|znver1-fp2") (define_insn_reservation "znver1_sse_log1_load" 8 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "sselog1") (eq_attr "memory" "!none"))) "znver1-direct,znver1-load,znver1-fp1|znver1-fp2") @@ -701,7 +753,8 @@ (define_insn_reservation "znver1_sse_comi" 1 (define_insn_reservation "znver1_sse_comi_load" 8 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "SF,DF,V4SF,V2DF")) - (eq_attr "cpu" "znver2")) + (ior (eq_attr "cpu" "znver2") + (eq_attr "cpu" "znver3"))) (and (eq_attr "prefix_extra" "0") (and (eq_attr "type" "ssecomi") (eq_attr "memory" "load")))) @@ -710,7 +763,8 @@ (define_insn_reservation "znver1_sse_comi_load" 8 (define_insn_reservation "znver1_sse_comi_double" 2 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "V4SF,V2DF,TI")) - (eq_attr "cpu" "znver2")) + (ior (eq_attr "cpu" "znver2") + (eq_attr "cpu" "znver3"))) (and (eq_attr "prefix" "vex") (and (eq_attr "prefix_extra" "0") (and (eq_attr "type" "ssecomi") @@ -720,7 +774,8 @@ (define_insn_reservation "znver1_sse_comi_double" 2 (define_insn_reservation "znver1_sse_comi_double_load" 10 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "V4SF,V2DF,TI")) - (eq_attr "cpu" "znver2")) + (ior (eq_attr "cpu" "znver2") + (eq_attr "cpu" "znver3"))) (and (eq_attr "prefix" "vex") (and (eq_attr "prefix_extra" "0") (and (eq_attr "type" "ssecomi") @@ -730,7 +785,8 @@ (define_insn_reservation "znver1_sse_comi_double_load" 10 (define_insn_reservation "znver1_sse_test" 1 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) - (eq_attr "cpu" "znver2")) + (ior (eq_attr "cpu" "znver2") + (eq_attr "cpu" "znver3"))) (and (eq_attr "prefix_extra" "1") (and (eq_attr "type" "ssecomi") (eq_attr "memory" "none")))) @@ -739,7 +795,8 @@ (define_insn_reservation "znver1_sse_test" 1 (define_insn_reservation "znver1_sse_test_load" 8 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) - (eq_attr "cpu" "znver2")) + (ior (eq_attr "cpu" "znver2") + (eq_attr "cpu" "znver3"))) (and (eq_attr "prefix_extra" "1") (and (eq_attr "type" "ssecomi") (eq_attr "memory" "load")))) @@ -757,7 +814,7 @@ (define_insn_reservation "znver1_sse_mov" 2 "znver1-direct,znver1-ieu0") (define_insn_reservation "znver2_sse_mov" 1 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "mode" "SI") (and (eq_attr "isa" "avx") (and (eq_attr "type" "ssemov") @@ -774,7 +831,7 @@ (define_insn_reservation "znver1_avx_mov" 2 "znver1-direct,znver1-ieu2") (define_insn_reservation "znver2_avx_mov" 1 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "mode" "TI") (and (eq_attr "isa" "avx") (and (eq_attr "type" "ssemov") @@ -785,7 +842,8 @@ (define_insn_reservation "znver2_avx_mov" 1 (define_insn_reservation "znver1_sseavx_mov" 1 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) - (eq_attr "cpu" "znver2")) + (ior (eq_attr "cpu" "znver2") + (eq_attr "cpu" "znver3"))) (and (eq_attr "type" "ssemov") (eq_attr "memory" "none"))) "znver1-direct,znver1-fpu") @@ -797,7 +855,7 @@ (define_insn_reservation "znver1_sseavx_mov_store" 1 (eq_attr "memory" "store")))) "znver1-direct,znver1-fpu,znver1-store") (define_insn_reservation "znver2_sseavx_mov_store" 1 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "ssemov") (eq_attr "memory" "store"))) "znver1-direct,znver1-fpu,znver2-store") @@ -805,7 +863,8 @@ (define_insn_reservation "znver2_sseavx_mov_store" 1 (define_insn_reservation "znver1_sseavx_mov_load" 8 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) - (eq_attr "cpu" "znver2")) + (ior (eq_attr "cpu" "znver2") + (eq_attr "cpu" "znver3"))) (and (eq_attr "type" "ssemov") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fpu") @@ -835,7 +894,8 @@ (define_insn_reservation "znver1_avx256_mov_load" 8 (define_insn_reservation "znver1_sseavx_add" 3 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) - (eq_attr "cpu" "znver2")) + (ior (eq_attr "cpu" "znver2") + (eq_attr "cpu" "znver3"))) (and (eq_attr "type" "sseadd") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp2|znver1-fp3") @@ -843,7 +903,8 @@ (define_insn_reservation "znver1_sseavx_add" 3 (define_insn_reservation "znver1_sseavx_add_load" 10 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")) - (eq_attr "cpu" "znver2")) + (ior (eq_attr "cpu" "znver2") + (eq_attr "cpu" "znver3"))) (and (eq_attr "type" "sseadd") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp2|znver1-fp3") @@ -892,10 +953,39 @@ (define_insn_reservation "znver1_avx256_fma_load" 12 (eq_attr "memory" "load")))) "znver1-double,znver1-load,znver1-fp0|znver1-fp1") +(define_insn_reservation "znver3_sseavx_fma" 4 + (and (and (eq_attr "cpu" "znver3") + (eq_attr "mode" "SF,DF,V4SF,V2DF")) + (and (eq_attr "type" "ssemuladd") + (eq_attr "memory" "none"))) + "znver1-direct,znver1-fp0|znver1-fp1") + +(define_insn_reservation "znver3_sseavx_fma_load" 11 + (and (and (eq_attr "cpu" "znver3") + (eq_attr "mode" "SF,DF,V4SF,V2DF")) + (and (eq_attr "type" "ssemuladd") + (eq_attr "memory" "load"))) + "znver1-direct,znver1-load,znver1-fp0|znver1-fp1") + +(define_insn_reservation "znver3_avx256_fma" 4 + (and (eq_attr "cpu" "znver3") + (and (eq_attr "mode" "V8SF,V4DF") + (and (eq_attr "type" "ssemuladd") + (eq_attr "memory" "none")))) + "znver1-double,znver1-fp0|znver1-fp1") + +(define_insn_reservation "znver3_avx256_fma_load" 11 + (and (eq_attr "cpu" "znver3") + (and (eq_attr "mode" "V8SF,V4DF") + (and (eq_attr "type" "ssemuladd") + (eq_attr "memory" "load")))) + "znver1-double,znver1-load,znver1-fp0|znver1-fp1") + (define_insn_reservation "znver1_sseavx_iadd" 1 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "DI,TI")) - (eq_attr "cpu" "znver2")) + (ior (eq_attr "cpu" "znver2") + (eq_attr "cpu" "znver3"))) (and (eq_attr "type" "sseiadd") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0|znver1-fp1|znver1-fp3") @@ -903,7 +993,8 @@ (define_insn_reservation "znver1_sseavx_iadd" 1 (define_insn_reservation "znver1_sseavx_iadd_load" 8 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "DI,TI")) - (eq_attr "cpu" "znver2")) + (ior (eq_attr "cpu" "znver2") + (eq_attr "cpu" "znver3"))) (and (eq_attr "type" "sseiadd") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp0|znver1-fp1|znver1-fp3") @@ -924,7 +1015,7 @@ (define_insn_reservation "znver1_avx256_iadd_load" 8 ;; SSE conversions. (define_insn_reservation "znver1_ssecvtsf_si_load" 12 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "mode" "SI") (and (eq_attr "type" "sseicvt") (and (match_operand:SF 1 "memory_operand") @@ -939,7 +1030,7 @@ (define_insn_reservation "znver1_ssecvtdf_si" 5 (eq_attr "memory" "none"))))) "znver1-double,znver1-fp3,znver1-ieu0") (define_insn_reservation "znver2_ssecvtdf_si" 4 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "mode" "SI") (and (match_operand:DF 1 "register_operand") (and (eq_attr "type" "sseicvt") @@ -955,13 +1046,14 @@ (define_insn_reservation "znver1_ssecvtdf_si_load" 12 "znver1-double,znver1-load,znver1-fp3,znver1-ieu0") (define_insn_reservation "znver2_ssecvtdf_si_load" 11 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "mode" "SI") (and (eq_attr "type" "sseicvt") (and (match_operand:DF 1 "memory_operand") (eq_attr "memory" "load"))))) "znver1-double,znver1-load,znver1-fp3,znver1-ieu0") + ;; All other used ssecvt fp3 pipes ;; Check: Need to revisit this again. ;; Some SSE converts may use different pipe combinations. @@ -972,19 +1064,13 @@ (define_insn_reservation "znver1_ssecvt" 4 "znver1-direct,znver1-fp3") (define_insn_reservation "znver2_ssecvt" 3 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "ssecvt") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp3") (define_insn_reservation "znver1_ssecvt_load" 11 - (and (eq_attr "cpu" "znver1") - (and (eq_attr "type" "ssecvt") - (eq_attr "memory" "load"))) - "znver1-direct,znver1-load,znver1-fp3") - -(define_insn_reservation "znver2_ssecvt_load" 11 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "type" "ssecvt") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp3") @@ -994,7 +1080,9 @@ (define_insn_reservation "znver1_ssediv_ss_ps" 10 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "V4SF,SF")) (and (eq_attr "cpu" "znver2") - (eq_attr "mode" "V8SF,V4SF,SF"))) + (eq_attr "mode" "V8SF,V4SF,SF")) + (and (eq_attr "cpu" "znver3") + (eq_attr "mode" "V8SF,V4SF,SF"))) (and (eq_attr "type" "ssediv") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp3*10") @@ -1003,7 +1091,9 @@ (define_insn_reservation "znver1_ssediv_ss_ps_load" 17 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "V4SF,SF")) (and (eq_attr "cpu" "znver2") - (eq_attr "mode" "V8SF,V4SF,SF"))) + (eq_attr "mode" "V8SF,V4SF,SF")) + (and (eq_attr "cpu" "znver3") + (eq_attr "mode" "V8SF,V4SF,SF"))) (and (eq_attr "type" "ssediv") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp3*10") @@ -1012,16 +1102,20 @@ (define_insn_reservation "znver1_ssediv_sd_pd" 13 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "V2DF,DF")) (and (eq_attr "cpu" "znver2") - (eq_attr "mode" "V4DF,V2DF,DF"))) + (eq_attr "mode" "V4DF,V2DF,DF")) + (and (eq_attr "cpu" "znver3") + (eq_attr "mode" "V4DF,V2DF,DF"))) (and (eq_attr "type" "ssediv") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp3*13") (define_insn_reservation "znver1_ssediv_sd_pd_load" 20 (and (ior (and (eq_attr "cpu" "znver1") - (eq_attr "mode" "V2DF,DF")) + (eq_attr "mode" "V2DF,DF")) (and (eq_attr "cpu" "znver2") - (eq_attr "mode" "V4DF,V2DF,DF"))) + (eq_attr "mode" "V4DF,V2DF,DF")) + (and (eq_attr "cpu" "znver3") + (eq_attr "mode" "V4DF,V2DF,DF"))) (and (eq_attr "type" "ssediv") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp3*13") @@ -1058,7 +1152,9 @@ (define_insn_reservation "znver1_ssemul_ss_ps" 3 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "V4SF,SF")) (and (eq_attr "cpu" "znver2") - (eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF"))) + (eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF")) + (and (eq_attr "cpu" "znver3") + (eq_attr "mode" "V8SF,V4SF,SF,V4DF,V2DF,DF"))) (and (eq_attr "type" "ssemul") (eq_attr "memory" "none"))) "znver1-direct,(znver1-fp0|znver1-fp1)*3") @@ -1067,7 +1163,9 @@ (define_insn_reservation "znver1_ssemul_ss_ps_load" 10 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "V4SF,SF")) (and (eq_attr "cpu" "znver2") - (eq_attr "mode" "V8SF,V4SF,SF"))) + (eq_attr "mode" "V8SF,V4SF,SF")) + (and (eq_attr "cpu" "znver3") + (eq_attr "mode" "V8SF,V4SF,SF"))) (and (eq_attr "type" "ssemul") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3") @@ -1101,17 +1199,18 @@ (define_insn_reservation "znver1_ssemul_sd_pd_load" 11 "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*4") (define_insn_reservation "znver2_ssemul_sd_pd" 3 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "ssemul") (eq_attr "memory" "none"))) "znver1-direct,(znver1-fp0|znver1-fp1)*3") (define_insn_reservation "znver2_ssemul_sd_pd_load" 10 - (and (eq_attr "cpu" "znver2") + (and (eq_attr "cpu" "znver2,znver3") (and (eq_attr "type" "ssemul") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,(znver1-fp0|znver1-fp1)*3") + (define_insn_reservation "znver1_ssemul_avx256_pd" 5 (and (eq_attr "cpu" "znver1") (and (eq_attr "mode" "V4DF") @@ -1131,13 +1230,15 @@ (define_insn_reservation "znver1_sseimul" 3 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "TI")) (and (eq_attr "cpu" "znver2") - (eq_attr "mode" "TI,OI"))) + (eq_attr "mode" "TI,OI")) + (and (eq_attr "cpu" "znver3") + (eq_attr "mode" "TI,OI"))) (and (eq_attr "type" "sseimul") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0*3") (define_insn_reservation "znver1_sseimul_avx256" 4 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "mode" "OI") (and (eq_attr "type" "sseimul") (eq_attr "memory" "none")))) @@ -1147,27 +1248,29 @@ (define_insn_reservation "znver1_sseimul_load" 10 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "TI")) (and (eq_attr "cpu" "znver2") + (eq_attr "mode" "TI,OI")) + (and (eq_attr "cpu" "znver3") (eq_attr "mode" "TI,OI"))) (and (eq_attr "type" "sseimul") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp0*3") (define_insn_reservation "znver1_sseimul_avx256_load" 11 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "mode" "OI") (and (eq_attr "type" "sseimul") (eq_attr "memory" "load")))) "znver1-double,znver1-load,znver1-fp0*4") (define_insn_reservation "znver1_sseimul_di" 3 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "mode" "DI") (and (eq_attr "memory" "none") (eq_attr "type" "sseimul")))) "znver1-direct,znver1-fp0*3") (define_insn_reservation "znver1_sseimul_load_di" 10 - (and (eq_attr "cpu" "znver1,znver2") + (and (eq_attr "cpu" "znver1,znver2,znver3") (and (eq_attr "mode" "DI") (and (eq_attr "type" "sseimul") (eq_attr "memory" "load")))) @@ -1178,16 +1281,20 @@ (define_insn_reservation "znver1_sse_cmp" 1 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "SF,DF,V4SF,V2DF")) (and (eq_attr "cpu" "znver2") - (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))) + (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")) + (and (eq_attr "cpu" "znver3") + (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))) (and (eq_attr "type" "ssecmp") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0|znver1-fp1") (define_insn_reservation "znver1_sse_cmp_load" 8 (and (ior (and (eq_attr "cpu" "znver1") - (eq_attr "mode" "SF,DF,V4SF,V2DF")) + (eq_attr "mode" "SF,DF,V4SF,V2DF")) (and (eq_attr "cpu" "znver2") - (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))) + (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF")) + (and (eq_attr "cpu" "znver3") + (eq_attr "mode" "SF,DF,V4SF,V2DF,V8SF,V4DF"))) (and (eq_attr "type" "ssecmp") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp0|znver1-fp1") @@ -1208,9 +1315,11 @@ (define_insn_reservation "znver1_sse_cmp_avx256_load" 8 (define_insn_reservation "znver1_sse_icmp" 1 (and (ior (and (eq_attr "cpu" "znver1") - (eq_attr "mode" "QI,HI,SI,DI,TI")) + (eq_attr "mode" "QI,HI,SI,DI,TI")) (and (eq_attr "cpu" "znver2") - (eq_attr "mode" "QI,HI,SI,DI,TI,OI"))) + (eq_attr "mode" "QI,HI,SI,DI,TI,OI")) + (and (eq_attr "cpu" "znver3") + (eq_attr "mode" "QI,HI,SI,DI,TI,OI"))) (and (eq_attr "type" "ssecmp") (eq_attr "memory" "none"))) "znver1-direct,znver1-fp0|znver1-fp3") @@ -1219,7 +1328,9 @@ (define_insn_reservation "znver1_sse_icmp_load" 8 (and (ior (and (eq_attr "cpu" "znver1") (eq_attr "mode" "QI,HI,SI,DI,TI")) (and (eq_attr "cpu" "znver2") - (eq_attr "mode" "QI,HI,SI,DI,TI,OI"))) + (eq_attr "mode" "QI,HI,SI,DI,TI,OI")) + (and (eq_attr "cpu" "znver3") + (eq_attr "mode" "QI,HI,SI,DI,TI,OI"))) (and (eq_attr "type" "ssecmp") (eq_attr "memory" "load"))) "znver1-direct,znver1-load,znver1-fp0|znver1-fp3") diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index e7c768f1e15..9c734595950 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -21810,6 +21810,12 @@ AMD Family 17h Zen version 1. @item znver2 AMD Family 17h Zen version 2. + +@item amdfam19h +AMD Family 19h CPU. + +@item znver3 +AMD Family 19h Zen version 3. @end table Here is an example: diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 0c1b5975e6b..eabeec944e7 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -28881,6 +28881,13 @@ MWAITX, SHA, CLZERO, AES, PCLMUL, CX16, MOVBE, MMX, SSE, SSE2, SSE3, SSE4A, SSSE3, SSE4.1, SSE4.2, ABM, XSAVEC, XSAVES, CLFLUSHOPT, POPCNT, RDPID, WBNOINVD, and 64-bit instruction set extensions.) +@item znver3 +AMD Family 19h core based CPUs with x86-64 instruction set support. (This +supersets BMI, BMI2, CLWB, F16C, FMA, FSGSBASE, AVX, AVX2, ADCX, RDSEED, +MWAITX, SHA, CLZERO, AES, PCLMUL, CX16, MOVBE, MMX, SSE, SSE2, SSE3, SSE4A, +SSSE3, SSE4.1, SSE4.2, ABM, XSAVEC, XSAVES, CLFLUSHOPT, POPCNT, RDPID, +WBNOINVD, PKU, VPCLMULQDQ, VAES, and 64-bit instruction set extensions.) + @item btver1 CPUs based on AMD Family 14h cores with x86-64 instruction set support. (This supersets MMX, SSE, SSE2, SSE3, SSSE3, SSE4A, CX16, ABM and 64-bit diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index 200d27220df..3abfd0d3e43 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -165,6 +165,9 @@ extern void test_arch_barcelona (void) __attribute__((__target__("arch=barcelon extern void test_arch_bdver1 (void) __attribute__((__target__("arch=bdver1"))); extern void test_arch_bdver2 (void) __attribute__((__target__("arch=bdver2"))); extern void test_arch_bdver3 (void) __attribute__((__target__("arch=bdver3"))); +extern void test_arch_znver1 (void) __attribute__((__target__("arch=znver1"))); +extern void test_arch_znver2 (void) __attribute__((__target__("arch=znver2"))); +extern void test_arch_znver3 (void) __attribute__((__target__("arch=znver3"))); extern void test_tune_nocona (void) __attribute__((__target__("tune=nocona"))); extern void test_tune_core2 (void) __attribute__((__target__("tune=core2"))); @@ -184,6 +187,9 @@ extern void test_tune_bdver1 (void) __attribute__((__target__("tune=bdver1"))); extern void test_tune_bdver2 (void) __attribute__((__target__("tune=bdver2"))); extern void test_tune_bdver3 (void) __attribute__((__target__("tune=bdver3"))); extern void test_tune_generic (void) __attribute__((__target__("tune=generic"))); +extern void test_tune_znver1 (void) __attribute__((__target__("tune=znver1"))); +extern void test_tune_znver2 (void) __attribute__((__target__("tune=znver2"))); +extern void test_tune_znver3 (void) __attribute__((__target__("tune=znver3"))); extern void test_fpmath_sse (void) __attribute__((__target__("sse2,fpmath=sse"))); extern void test_fpmath_387 (void) __attribute__((__target__("sse2,fpmath=387"))); diff --git a/libgcc/config/i386/cpuinfo.c b/libgcc/config/i386/cpuinfo.c index cf5f0884bb4..83301a1445f 100644 --- a/libgcc/config/i386/cpuinfo.c +++ b/libgcc/config/i386/cpuinfo.c @@ -111,6 +111,12 @@ get_amd_cpu (unsigned int family, unsigned int model) if (model >= 0x30) __cpu_model.__cpu_subtype = AMDFAM17H_ZNVER2; break; + case 0x19: + __cpu_model.__cpu_type = AMDFAM19H; + /* AMD family 19h version 1. */ + if (model <= 0x0f) + __cpu_model.__cpu_subtype = AMDFAM19H_ZNVER3; + break; default: break; } diff --git a/libgcc/config/i386/cpuinfo.h b/libgcc/config/i386/cpuinfo.h index 0f97510cde1..7f47552e537 100644 --- a/libgcc/config/i386/cpuinfo.h +++ b/libgcc/config/i386/cpuinfo.h @@ -51,6 +51,7 @@ enum processor_types INTEL_GOLDMONT, INTEL_GOLDMONT_PLUS, INTEL_TREMONT, + AMDFAM19H, CPU_TYPE_MAX }; @@ -79,6 +80,7 @@ enum processor_subtypes INTEL_COREI7_CASCADELAKE, INTEL_COREI7_TIGERLAKE, INTEL_COREI7_COOPERLAKE, + AMDFAM19H_ZNVER3, CPU_SUBTYPE_MAX };