Add integer and floating point scheduling models for the Tenstorrent Ascalon 8 wide CPU.
gcc/ChangeLog: * config/riscv/riscv-cores.def (RISCV_TUNE): Update. * config/riscv/riscv-opts.h (enum riscv_microarchitecture_type): Add tt_ascalon_d8. * config/riscv/riscv.md: Update tune attribute and include tt-ascalon-d8.md. * config/riscv/tenstorrent-ascalon.md: New file. Signed-off-by: Anton Blanchard <ant...@tenstorrent.com> --- gcc/config/riscv/riscv-cores.def | 2 +- gcc/config/riscv/riscv-opts.h | 1 + gcc/config/riscv/riscv.md | 3 +- gcc/config/riscv/tt-ascalon-d8.md | 154 ++++++++++++++++++++++++++++++ 4 files changed, 158 insertions(+), 2 deletions(-) create mode 100644 gcc/config/riscv/tt-ascalon-d8.md diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def index e31afc3fe70..33d93080eca 100644 --- a/gcc/config/riscv/riscv-cores.def +++ b/gcc/config/riscv/riscv-cores.def @@ -39,7 +39,7 @@ RISCV_TUNE("sifive-5-series", generic, rocket_tune_info) RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info) RISCV_TUNE("sifive-p400-series", sifive_p400, sifive_p400_tune_info) RISCV_TUNE("sifive-p600-series", sifive_p600, sifive_p600_tune_info) -RISCV_TUNE("tt-ascalon-d8", generic_ooo, tt_ascalon_d8_tune_info) +RISCV_TUNE("tt-ascalon-d8", tt_ascalon_d8, tt_ascalon_d8_tune_info) RISCV_TUNE("thead-c906", generic, thead_c906_tune_info) RISCV_TUNE("xt-c908", generic, generic_ooo_tune_info) RISCV_TUNE("xt-c908v", generic, generic_ooo_tune_info) diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h index 26fe228e0f8..e921c71679f 100644 --- a/gcc/config/riscv/riscv-opts.h +++ b/gcc/config/riscv/riscv-opts.h @@ -57,6 +57,7 @@ enum riscv_microarchitecture_type { sifive_7, sifive_p400, sifive_p600, + tt_ascalon_d8, xiangshan, generic_ooo }; diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md index eec96875f96..fac9eb9292c 100644 --- a/gcc/config/riscv/riscv.md +++ b/gcc/config/riscv/riscv.md @@ -669,7 +669,7 @@ ;; Microarchitectures we know how to tune for. ;; Keep this in sync with enum riscv_microarchitecture. (define_attr "tune" - "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo" + "generic,sifive_7,sifive_p400,sifive_p600,tt_ascalon_d8,xiangshan,generic_ooo" (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)"))) ;; Describe a user's asm statement. @@ -4832,6 +4832,7 @@ (include "thead.md") (include "generic-vector-ooo.md") (include "generic-ooo.md") +(include "tt-ascalon-d8.md") (include "vector.md") (include "vector-crypto.md") (include "vector-bfloat16.md") diff --git a/gcc/config/riscv/tt-ascalon-d8.md b/gcc/config/riscv/tt-ascalon-d8.md new file mode 100644 index 00000000000..513608cea79 --- /dev/null +++ b/gcc/config/riscv/tt-ascalon-d8.md @@ -0,0 +1,154 @@ +;; Tenstorrent Ascalon code scheduling model. +;; Copyright (C) 2023-2025 Free Software Foundation, Inc. +;; +;; This file is part of GCC. +;; +;; GCC is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 3, or (at your option) +;; any later version. +;; +;; GCC is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. +;; +;; You should have received a copy of the GNU General Public License +;; along with GCC; see the file COPYING3. If not see +;; <http://www.gnu.org/licenses/>. + +(define_automaton "tt_ascalon_d8") + +;; Ascalon has more issue/execution bandwidth than decode/retire bandwidth, +;; so we model decode to place an upper limit on what we can achieve. +(define_cpu_unit "asc-decode0,asc-decode1,asc-decode2,asc-decode3,asc-decode4,asc-decode5,asc-decode6,asc-decode7" "tt_ascalon_d8") + +(define_cpu_unit "asc-lsu0,asc-lsu1,asc-lsu2" "tt_ascalon_d8") +(define_cpu_unit "asc-fxu0,asc-fxu1,asc-fxu2,asc-fxu3,asc-fxu4,asc-fxu5" "tt_ascalon_d8") +(define_cpu_unit "asc-fpu0,asc-fpu1" "tt_ascalon_d8") + +;; Shortcuts +(define_reservation "tt_ascalon_d8_decode" "asc-decode0|asc-decode1|asc-decode2|asc-decode3|asc-decode4|asc-decode5|asc-decode6|asc-decode7") +(define_reservation "tt_ascalon_d8_ls" "asc-lsu0|asc-lsu1|asc-lsu2") +(define_reservation "tt_ascalon_d8_alu" "asc-fxu0|asc-fxu1|asc-fxu2|asc-fxu3|asc-fxu4|asc-fxu5") +(define_reservation "tt_ascalon_d8_mul" "asc-fxu0") +(define_reservation "tt_ascalon_d8_div" "asc-fxu0") +(define_reservation "tt_ascalon_d8_br" "asc-fxu2|asc-fxu3") +(define_reservation "tt_ascalon_d8_fp" "asc-fpu0|asc-fpu1") + +;; Integer load/store +(define_insn_reservation "tt_ascalon_d8_int_load" 4 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "load")) + "tt_ascalon_d8_decode,tt_ascalon_d8_ls") + +(define_insn_reservation "tt_ascalon_d8_int_store" 4 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "store")) + "tt_ascalon_d8_decode,tt_ascalon_d8_ls") + +;; Float load/store +(define_insn_reservation "tt_ascalon_d8_float_load" 4 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "fpload")) + "tt_ascalon_d8_decode,tt_ascalon_d8_ls") + +(define_insn_reservation "tt_ascalon_d8_float_store" 4 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "fpstore")) + "tt_ascalon_d8_decode,tt_ascalon_d8_ls") + +;; Generic integer instructions. +(define_insn_reservation "tt_ascalon_d8_alu" 1 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,nop,logical,\ + move,bitmanip,rotate,min,max,minu,maxu,clz,ctz,atomic,\ + condmove,mvpair,zicond")) + "tt_ascalon_d8_decode,tt_ascalon_d8_alu") + +;; Short forward branch +(define_insn_reservation "tt_ascalon_d8_sfb" 1 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "sfb_alu")) + "tt_ascalon_d8_decode,tt_ascalon_d8_br") + +;; Branch instructions +(define_insn_reservation "tt_ascalon_d8_branch" 1 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "branch,jump,call,jalr,ret,trap")) + "tt_ascalon_d8_decode,tt_ascalon_d8_br") + +;; Float move, convert and compare. +;; INT -> FP moves are executed by the FXU and FP -> INT moves +;; are executed by the FPU, but we can't model that at the moment. +(define_insn_reservation "tt_ascalon_d8_float_move" 4 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "fmove")) + "tt_ascalon_d8_decode,tt_ascalon_d8_fp") + +(define_insn_reservation "tt_ascalon_d8_fcvt" 3 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i")) + "tt_ascalon_d8_decode,tt_ascalon_d8_fp") + +(define_insn_reservation "tt_ascalon_d8_fcmp" 2 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "fcmp")) + "tt_ascalon_d8_decode,tt_ascalon_d8_fp") + +;; Integer multiplication. +(define_insn_reservation "tt_ascalon_d8_imul" 3 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "imul")) + "tt_ascalon_d8_decode,tt_ascalon_d8_mul") + +;; Integer division is not pipelined. Do not block the unit for more than +;; three cycles so the DFA does not get too large. Similar for other +;; non-pipelined instructions. +(define_insn_reservation "tt_ascalon_d8_idiv" 15 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "idiv")) + "tt_ascalon_d8_decode,tt_ascalon_d8_div,tt_ascalon_d8_div*3") + +;; Float addition and multiplication. +(define_insn_reservation "tt_ascalon_d8_faddmul" 3 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "fadd,fmul")) + "tt_ascalon_d8_decode,tt_ascalon_d8_fp") + +;; Float FMA. +(define_insn_reservation "tt_ascalon_d8_float_fma" 3 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "fmadd")) + "tt_ascalon_d8_decode,tt_ascalon_d8_fp") + +;; Float division. +(define_insn_reservation "tt_ascalon_d8_float_div_half" 7 + (and (eq_attr "tune" "tt_ascalon_d8") + (and (eq_attr "type" "fdiv,fsqrt") + (eq_attr "mode" "HF"))) + "tt_ascalon_d8_decode,tt_ascalon_d8_fp,tt_ascalon_d8_fp*3") + +(define_insn_reservation "tt_ascalon_d8_float_div_single" 7 + (and (eq_attr "tune" "tt_ascalon_d8") + (and (eq_attr "type" "fdiv,fsqrt") + (eq_attr "mode" "SF"))) + "tt_ascalon_d8_decode,tt_ascalon_d8_fp,tt_ascalon_d8_fp*3") + +(define_insn_reservation "tt_ascalon_d8_float_div_double" 12 + (and (eq_attr "tune" "tt_ascalon_d8") + (and (eq_attr "type" "fdiv,fsqrt") + (eq_attr "mode" "DF"))) + "tt_ascalon_d8_decode,tt_ascalon_d8_fp,tt_ascalon_d8_fp*3") + +;; Popcount and clmul. +(define_insn_reservation "tt_ascalon_d8_popcount" 1 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "cpop,clmul")) + "tt_ascalon_d8_decode,tt_ascalon_d8_alu") + +;; Transfer from/to coprocessor. +(define_insn_reservation "tt_ascalon_d8_xfer" 3 + (and (eq_attr "tune" "tt_ascalon_d8") + (eq_attr "type" "mfc,mtc")) + "tt_ascalon_d8_decode,tt_ascalon_d8_alu") -- 2.34.1