Thanks, I will push a V2 patch, to fix the typo and add a vector cost model for p600 series. About block the div units, I decided to use your suggestion. The P600 series divider is 4 bits per cycle. So blocking 3-5 cycles is enough.
On Thu, Feb 1, 2024 at 9:50 AM Edwin Lu <e...@rivosinc.com> wrote: > I recently committed changes modifying the scheduling reservations. Some > things may need to be retested with the newly enabled asserts. > > Edwin > > On 1/31/2024 1:40 AM, Monk Chiang wrote: > > Add sifive p600 series scheduler module. For more information > > see https://www.sifive.com/cores/performance-p650-670. > > Add sifive-p650, sifive-p670 for mcpu option will come in separate > patches. > > > > gcc/ChangeLog: > > * config/riscv/riscv.md: Add "fcvt_i2f", "fcvt_f2i" type > > attribute, and include sifive-p600.md. > > * config/riscv/generic-ooo.md: Update type attribute. > > * config/riscv/sifive-7.md: Update type attribute. > > * config/riscv/sifive-p600.md: New file. > > * config/riscv/riscv-cores.def (RISCV_TUNE): Add parameter. > > * config/riscv/riscv-opts.h (enum riscv_microarchitecture_type): > > Add sifive_p600. > > * config/riscv/riscv.c (sifive_p600_tune_info): New. > > * config/riscv/riscv.h (TARGET_SFB_ALU): Update. > > * doc/invoke.texi (RISC-V Options): Add sifive-p600-series > > --- > > gcc/config/riscv/generic-ooo.md | 2 +- > > gcc/config/riscv/generic.md | 2 +- > > gcc/config/riscv/riscv-cores.def | 1 + > > gcc/config/riscv/riscv-opts.h | 1 + > > gcc/config/riscv/riscv.cc | 17 +++ > > gcc/config/riscv/riscv.h | 4 +- > > gcc/config/riscv/riscv.md | 19 ++-- > > gcc/config/riscv/sifive-7.md | 2 +- > > gcc/config/riscv/sifive-p600.md | 174 +++++++++++++++++++++++++++++++ > > gcc/doc/invoke.texi | 3 +- > > 10 files changed, 212 insertions(+), 13 deletions(-) > > create mode 100644 gcc/config/riscv/sifive-p600.md > > > > diff --git a/gcc/config/riscv/generic-ooo.md > b/gcc/config/riscv/generic-ooo.md > > index 421a7bb929d..a22f8a3e079 100644 > > --- a/gcc/config/riscv/generic-ooo.md > > +++ b/gcc/config/riscv/generic-ooo.md > > @@ -127,7 +127,7 @@ > > > > (define_insn_reservation "generic_ooo_fcvt" 3 > > (and (eq_attr "tune" "generic_ooo") > > - (eq_attr "type" "fcvt")) > > + (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i")) > > "generic_ooo_issue,generic_ooo_fxu") > > > > (define_insn_reservation "generic_ooo_fcmp" 2 > > diff --git a/gcc/config/riscv/generic.md b/gcc/config/riscv/generic.md > > index b99ae345bb3..3f0eaa2ea08 100644 > > --- a/gcc/config/riscv/generic.md > > +++ b/gcc/config/riscv/generic.md > > @@ -42,7 +42,7 @@ > > > > (define_insn_reservation "generic_xfer" 3 > > (and (eq_attr "tune" "generic") > > - (eq_attr "type" "mfc,mtc,fcvt,fmove,fcmp")) > > + (eq_attr "type" "mfc,mtc,fcvt,fcvt_i2f,fcvt_f2i,fmove,fcmp")) > > "alu") > > > > (define_insn_reservation "generic_branch" 1 > > diff --git a/gcc/config/riscv/riscv-cores.def > b/gcc/config/riscv/riscv-cores.def > > index b30f4dfb08e..a07a79e2cb7 100644 > > --- a/gcc/config/riscv/riscv-cores.def > > +++ b/gcc/config/riscv/riscv-cores.def > > @@ -37,6 +37,7 @@ RISCV_TUNE("rocket", generic, rocket_tune_info) > > RISCV_TUNE("sifive-3-series", generic, rocket_tune_info) > > RISCV_TUNE("sifive-5-series", generic, rocket_tune_info) > > RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info) > > +RISCV_TUNE("sifive-p600-series", sifive_p600, sifive_p600_tune_info) > > RISCV_TUNE("thead-c906", generic, thead_c906_tune_info) > > RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info) > > RISCV_TUNE("size", generic, optimize_size_tune_info) > > diff --git a/gcc/config/riscv/riscv-opts.h > b/gcc/config/riscv/riscv-opts.h > > index 1500f8811ef..25951665b13 100644 > > --- a/gcc/config/riscv/riscv-opts.h > > +++ b/gcc/config/riscv/riscv-opts.h > > @@ -55,6 +55,7 @@ extern enum riscv_isa_spec_class riscv_isa_spec; > > enum riscv_microarchitecture_type { > > generic, > > sifive_7, > > + sifive_p600, > > generic_ooo > > }; > > extern enum riscv_microarchitecture_type riscv_microarchitecture; > > diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc > > index 7b6111aa545..92d6fd5cf47 100644 > > --- a/gcc/config/riscv/riscv.cc > > +++ b/gcc/config/riscv/riscv.cc > > @@ -447,6 +447,23 @@ static const struct riscv_tune_param > sifive_7_tune_info = { > > NULL, /* vector cost */ > > }; > > > > +/* Costs to use when optimizing for Sifive p600 Series. */ > > +static const struct riscv_tune_param sifive_p600_tune_info = { > > + {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_add */ > > + {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* fp_mul */ > > + {COSTS_N_INSNS (20), COSTS_N_INSNS (20)}, /* fp_div */ > > + {COSTS_N_INSNS (4), COSTS_N_INSNS (4)}, /* int_mul */ > > + {COSTS_N_INSNS (6), COSTS_N_INSNS (6)}, /* int_div */ > > + 4, /* issue_rate */ > > + 4, /* branch_cost */ > > + 3, /* memory_cost */ > > + 4, /* fmv_cost */ > > + true, /* > slow_unaligned_access */ > > + false, /* use_divmod_expansion */ > > + RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI, /* fusible_ops */ > > + NULL, /* vector cost */ > > +}; > > + > > /* Costs to use when optimizing for T-HEAD c906. */ > > static const struct riscv_tune_param thead_c906_tune_info = { > > {COSTS_N_INSNS (4), COSTS_N_INSNS (5)}, /* fp_add */ > > diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h > > index 627eba12548..e0cb3ba08d4 100644 > > --- a/gcc/config/riscv/riscv.h > > +++ b/gcc/config/riscv/riscv.h > > @@ -896,7 +896,9 @@ extern enum riscv_cc get_riscv_cc (const rtx use); > > SLT[I][U], AND[I], XOR[I], OR[I], LUI, AUIPC, and their compressed > > counterparts, including C.MV and C.LI) can be in the branch > shadow. */ > > > > -#define TARGET_SFB_ALU (riscv_microarchitecture == sifive_7) > > +#define TARGET_SFB_ALU \ > > + ((riscv_microarchitecture == sifive_7) \ > > + || (riscv_microarchitecture == sifive_p600)) > > > > #define LOGICAL_OP_NON_SHORT_CIRCUIT 0 > > > > diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md > > index edcaec4a786..02aefc76400 100644 > > --- a/gcc/config/riscv/riscv.md > > +++ b/gcc/config/riscv/riscv.md > > @@ -314,6 +314,8 @@ > > ;; fdiv floating point divide > > ;; fcmp floating point compare > > ;; fcvt floating point convert > > +;; fcvt_i2f integer to floating point convert > > +;; fcvt_f2i floating point to integer convert > > ;; fsqrt floating point square root > > ;; multi multiword sequence (or user asm statements) > > ;; auipc integer addition to PC > > @@ -466,8 +468,8 @@ > > (define_attr "type" > > "unknown,branch,jump,jalr,ret,call,load,fpload,store,fpstore, > > > mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul, > > - > fmadd,fdiv,fcmp,fcvt,fsqrt,multi,auipc,sfb_alu,nop,trap,ghost,bitmanip, > > - rotate,clmul,min,max,minu,maxu,clz,ctz,cpop, > > + > fmadd,fdiv,fcmp,fcvt,fcvt_i2f,fcvt_f2i,fsqrt,multi,auipc,sfb_alu,nop,trap, > > + ghost,bitmanip,rotate,clmul,min,max,minu,maxu,clz,ctz,cpop, > > > atomic,condmove,cbo,crypto,pushpop,mvpair,zicond,rdvlenb,rdvl,wrvxrm,wrfrm, > > rdfrm,vsetvl,vsetvl_pre,vlde,vste,vldm,vstm,vlds,vsts, > > vldux,vldox,vstux,vstox,vldff,vldr,vstr, > > @@ -685,7 +687,7 @@ > > ;; Microarchitectures we know how to tune for. > > ;; Keep this in sync with enum riscv_microarchitecture. > > (define_attr "tune" > > - "generic,sifive_7,generic_ooo" > > + "generic,sifive_7,sifive_p600,generic_ooo" > > (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)"))) > > > > ;; Describe a user's asm statement. > > @@ -1973,7 +1975,7 @@ > > (match_operand:ANYF 1 "register_operand" " f")))] > > "TARGET_HARD_FLOAT || TARGET_ZFINX" > > "fcvt.<GPR:ifmt>.<ANYF:fmt> %0,%1,rtz" > > - [(set_attr "type" "fcvt") > > + [(set_attr "type" "fcvt_f2i") > > (set_attr "mode" "<ANYF:MODE>")]) > > > > (define_insn "fixuns_trunc<ANYF:mode><GPR:mode>2" > > @@ -1982,7 +1984,7 @@ > > (match_operand:ANYF 1 "register_operand" " f")))] > > "TARGET_HARD_FLOAT || TARGET_ZFINX" > > "fcvt.<GPR:ifmt>u.<ANYF:fmt> %0,%1,rtz" > > - [(set_attr "type" "fcvt") > > + [(set_attr "type" "fcvt_f2i") > > (set_attr "mode" "<ANYF:MODE>")]) > > > > (define_insn "float<GPR:mode><ANYF:mode>2" > > @@ -1991,7 +1993,7 @@ > > (match_operand:GPR 1 "reg_or_0_operand" " rJ")))] > > "TARGET_HARD_FLOAT || TARGET_ZFINX" > > "fcvt.<ANYF:fmt>.<GPR:ifmt>\t%0,%z1" > > - [(set_attr "type" "fcvt") > > + [(set_attr "type" "fcvt_i2f") > > (set_attr "mode" "<ANYF:MODE>")]) > > > > (define_insn "floatuns<GPR:mode><ANYF:mode>2" > > @@ -2000,7 +2002,7 @@ > > (match_operand:GPR 1 "reg_or_0_operand" " rJ")))] > > "TARGET_HARD_FLOAT || TARGET_ZFINX" > > "fcvt.<ANYF:fmt>.<GPR:ifmt>u\t%0,%z1" > > - [(set_attr "type" "fcvt") > > + [(set_attr "type" "fcvt_i2f") > > (set_attr "mode" "<ANYF:MODE>")]) > > > > (define_insn "l<rint_pattern><ANYF:mode><GPR:mode>2" > > @@ -2010,7 +2012,7 @@ > > RINT))] > > "TARGET_HARD_FLOAT || TARGET_ZFINX" > > "fcvt.<GPR:ifmt>.<ANYF:fmt> %0,%1,<rint_rm>" > > - [(set_attr "type" "fcvt") > > + [(set_attr "type" "fcvt_f2i") > > (set_attr "mode" "<ANYF:MODE>")]) > > > > (define_insn "<round_pattern><ANYF:mode>2" > > @@ -3848,6 +3850,7 @@ > > (include "pic.md") > > (include "generic.md") > > (include "sifive-7.md") > > +(include "sifive-p600.md") > > (include "thead.md") > > (include "generic-ooo.md") > > (include "vector.md") > > diff --git a/gcc/config/riscv/sifive-7.md b/gcc/config/riscv/sifive-7.md > > index a63394c8c58..48bdba48190 100644 > > --- a/gcc/config/riscv/sifive-7.md > > +++ b/gcc/config/riscv/sifive-7.md > > @@ -81,7 +81,7 @@ > > > > (define_insn_reservation "sifive_7_fp_other" 3 > > (and (eq_attr "tune" "sifive_7") > > - (eq_attr "type" "fcvt,fcmp,fmove")) > > + (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i,fcmp,fmove")) > > "sifive_7_B") > > > > (define_insn_reservation "sifive_7_fdiv_s" 27 > > diff --git a/gcc/config/riscv/sifive-p600.md > b/gcc/config/riscv/sifive-p600.md > > new file mode 100644 > > index 00000000000..1529d1b743c > > --- /dev/null > > +++ b/gcc/config/riscv/sifive-p600.md > > @@ -0,0 +1,174 @@ > > +;; Scheduling description for Sifive p600 series. > > + > > +;; Sifive p600 series is a triple-issue, superscalar, out-of-order > processor. > > + > > +;; CPU execution units: > > +;; ialu Integer Units: all arithmetic and logic. > > +;; > > +;; bru Branch Resolution Unit: all branches. > > +;; > > +;; st Memory Write Unit: all writes to memory. > > +;; > > +;; ld Memory Read Unit: all reads from memory. > > +;; > > +;; imul Integer Multiply Unit > > +;; > > +;; idiv Integer Divide Unit > > +;; > > +;; system System Unit: all coprocessor accesses. > > +;; > > +;; fpu Floating Point Unit > > +;; > > +;; fmul Floating Point Multiply Unit > > +;; > > +;; fdiv Floating Point Divide Unit > > + > > +;; Four automata are defined to reduce number of states > > +;; which a single large automaton will have. > > +(define_automaton > "sifive_p600_iex,sifive_p600_fex,sifive_p600_mem,sifive_p600_div") > > + > > +;; The Sifive 8 has six pipelines: > > +;; A-pipe Load, Store > > +;; B-pipe ALU, Branch > > +;; M-pipe ALU, MUL, DIV and I2F(integer to float instruction) > > +;; C-pipe ALU, Conditional move and system for coprocessor > accesses > > +;; F-pipe FPU, MUL, F2I(float to integer instruction) > > +;; FM-pipe FPU, MUL, DIV > > + > > +(define_cpu_unit "sifive_p600_A" "sifive_p600_mem") > > +(define_cpu_unit "sifive_p600_B" "sifive_p600_iex") > > +(define_cpu_unit "sifive_p600_M" "sifive_p600_iex") > > +(define_cpu_unit "sifive_p600_C" "sifive_p600_iex") > > +(define_cpu_unit "sifive_p600_F" "sifive_p600_fex") > > +(define_cpu_unit "sifive_p600_FM" "sifive_p600_fex") > > + > > +;; Load and store unit. > > +(define_cpu_unit "sifive_p600_ld" "sifive_p600_mem") > > +(define_cpu_unit "sifive_p600_st" "sifive_p600_mem") > > + > > +;; Branch unit. > > +(define_cpu_unit "sifive_p600_bru" "sifive_p600_iex") > > + > > +;; Integer and multiply unit. > > +(define_cpu_unit "sifive_p600_ialu" "sifive_p600_iex") > > +(define_cpu_unit "sifive_p600_imul" "sifive_p600_iex") > > +(define_cpu_unit "sifive_p600_system" "sifive_p600_iex") > > + > > +;; Divide unit. > > +(define_cpu_unit "sifive_p600_idiv" "sifive_p600_div") > > +(define_cpu_unit "sifive_p600_fdiv" "sifive_p600_div") > > + > > +;; Float and multiply unit. > > +(define_cpu_unit "sifive_p600_fmul" "sifive_p600_fex") > > +(define_cpu_unit "sifive_p600_fpu" "sifive_p600_fex") > > + > > +;; ALU instruction can use pipeline C, B and M. > > +(define_reservation "int_pipe" > "(sifive_p600_C|sifive_p600_B|sifive_p600_M)") > > +;; FPU instruction can use pipeline F and FM. > > +(define_reservation "float_pipe" "(sifive_p600_F|sifive_p600_FM)") > > + > > +(define_insn_reservation "sifive_p600_load" 3 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "load")) > > + "sifive_p600_A,sifive_p600_ld*2") > > + > > +(define_insn_reservation "sifive_p600_fpload" 4 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "fpload")) > > + "sifive_p600_A,sifive_p600_ld*3") > > + > > +(define_insn_reservation "sifive_p600_store" 1 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "store")) > > + "sifive_p600_A+sifive_p600_st") > > + > > +(define_insn_reservation "sifive_p600_fpstore" 1 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "fpstore")) > > + "sifive_p600_A+sifive_p600_st") > > + > > +(define_insn_reservation "sifive_p600_branch" 1 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "branch,jump,call")) > > + "sifive_p600_B+sifive_p600_bru") > > + > > +(define_insn_reservation "sifive_p600_sfb_alu" 1 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "sfb_alu")) > > + "sifive_p600_C+sifive_p600_bru+sifive_p600_ialu") > > + > > +(define_insn_reservation "sifive_p600_atomic" 3 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "atomic")) > > + "sifive_p600_C,sifive_p600_system*2") > > + > > +(define_insn_reservation "sifive_p600_mul" 3 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "imul")) > > + "sifive_p600_M,sifive_p600_imul*2") > > + > > +(define_insn_reservation "sifive_p600_div" 33 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "idiv")) > > + "sifive_p600_M, sifive_p600_idiv*32") > > + > > +(define_insn_reservation "sifive_p600_alu" 1 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" > "unknown,arith,logical,shift,slt,multi,bitmanip,clz,ctz,rotate")) > > + "int_pipe+sifive_p600_ialu") > > + > > +(define_insn_reservation "sifive_p600_cpop" 3 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "cpop")) > > + "int_pipe,sifive_p600_ialu*2") > > + > > +(define_insn_reservation "sifive_p600_load_immediate" 1 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "nop,const,auipc,move")) > > + "int_pipe") > > + > > +(define_insn_reservation "sifive_p600_fma" 4 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "fadd,fmul,fmadd")) > > + "float_pipe,sifive_p600_fmul*3") > > + > > +(define_insn_reservation "sifive_p600_i2f" 2 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "mtc,fcvt_i2f")) > > + "sifive_p600_M,sifive_p600_ialu") > > + > > +(define_insn_reservation "sifive_p600_f2i" 2 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "mfc,fcmp,fcvt_f2i")) > > + "sifive_p600_F,sifive_p600_fpu") > > + > > +(define_insn_reservation "sifive_p600_fmove" 2 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "fmove,fcvt")) > > + "float_pipe,sifive_p600_fpu") > > + > > +(define_insn_reservation "sifive_p600_fdiv_s" 18 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "fdiv,fsqrt") > > + (eq_attr "mode" "SF")) > > + "sifive_p600_FM, sifive_p600_fdiv*17") > > + > > +(define_insn_reservation "sifive_p600_fdiv_d" 31 > > + (and (eq_attr "tune" "sifive_p600") > > + (eq_attr "type" "fdiv,fsqrt") > > + (eq_attr "mode" "DF")) > > + "sifive_p600_FM, sifive_p600_fdiv*30") > > + > > +(define_bypass 1 > "sifive_p600_load,sifive_p600_alu,sifive_p600_mul,sifive_p600_sfb_alu" > > + "sifive_p600_alu,sifive_p600_branch") > > + > > +(define_bypass 1 "sifive_p600_load,sifive_p600_alu,sifive_p600_mul, > > + sifive_p600_f2i,sifive_p600_fmove,sifive_p600_sfb_alu" > > + "sifive_p600_store" "riscv_store_data_bypass_p") > > + > > +(define_bypass 1 "sifive_p600_i2f" > > + > "sifive_p600_fma,sifive_p600_f2i,sifive_p600_fmove,sifive_p600_fdiv_s,sifive_p600_fdiv_d") > > + > > +(define_bypass 1 "sifive_p600_f2i" > > + "sifive_p600_branch,sifive_p600_sfb_alu,sifive_p600_mul, > > + sifive_p600_div,sifive_p600_alu,sifive_p600_cpop") > > diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi > > index 819a75dfe94..1017f4de8d2 100644 > > --- a/gcc/doc/invoke.texi > > +++ b/gcc/doc/invoke.texi > > @@ -30670,7 +30670,8 @@ Permissible values for this option are: > @samp{sifive-e20}, @samp{sifive-e21}, > > Optimize the output for the given processor, specified by > microarchitecture or > > particular CPU name. Permissible values for this option are: > @samp{rocket}, > > @samp{sifive-3-series}, @samp{sifive-5-series}, @samp{sifive-7-series}, > > -@samp{thead-c906}, @samp{size}, and all valid options for > @option{-mcpu=}. > > +@samp{thead-c906}, @samp{size}, @samp{sifive-p600-series}, > > +and all valid options for @option{-mcpu=}. > > > > When @option{-mtune=} is not specified, use the setting from > @option{-mcpu}, > > the default is @samp{rocket} if both are not specified. > >