Add integer and floating point scheduling models for the Tenstorrent
Ascalon 8 wide CPU.

gcc/ChangeLog:
        * config/riscv/riscv-cores.def (RISCV_TUNE): Update.
        * config/riscv/riscv-opts.h (enum riscv_microarchitecture_type):
          Add tt_ascalon_d8.
        * config/riscv/riscv.md: Update tune attribute and include
          tt-ascalon-d8.md.
        * config/riscv/tenstorrent-ascalon.md: New file.

Signed-off-by: Anton Blanchard <ant...@tenstorrent.com>
---
 gcc/config/riscv/riscv-cores.def  |   2 +-
 gcc/config/riscv/riscv-opts.h     |   1 +
 gcc/config/riscv/riscv.md         |   3 +-
 gcc/config/riscv/tt-ascalon-d8.md | 154 ++++++++++++++++++++++++++++++
 4 files changed, 158 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/riscv/tt-ascalon-d8.md

diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index e31afc3fe70..33d93080eca 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -39,7 +39,7 @@ RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
 RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
 RISCV_TUNE("sifive-p400-series", sifive_p400, sifive_p400_tune_info)
 RISCV_TUNE("sifive-p600-series", sifive_p600, sifive_p600_tune_info)
-RISCV_TUNE("tt-ascalon-d8", generic_ooo, tt_ascalon_d8_tune_info)
+RISCV_TUNE("tt-ascalon-d8", tt_ascalon_d8, tt_ascalon_d8_tune_info)
 RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
 RISCV_TUNE("xt-c908", generic, generic_ooo_tune_info)
 RISCV_TUNE("xt-c908v", generic, generic_ooo_tune_info)
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 26fe228e0f8..e921c71679f 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -57,6 +57,7 @@ enum riscv_microarchitecture_type {
   sifive_7,
   sifive_p400,
   sifive_p600,
+  tt_ascalon_d8,
   xiangshan,
   generic_ooo
 };
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index eec96875f96..fac9eb9292c 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -669,7 +669,7 @@
 ;; Microarchitectures we know how to tune for.
 ;; Keep this in sync with enum riscv_microarchitecture.
 (define_attr "tune"
-  "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo"
+  
"generic,sifive_7,sifive_p400,sifive_p600,tt_ascalon_d8,xiangshan,generic_ooo"
   (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)")))
 
 ;; Describe a user's asm statement.
@@ -4832,6 +4832,7 @@
 (include "thead.md")
 (include "generic-vector-ooo.md")
 (include "generic-ooo.md")
+(include "tt-ascalon-d8.md")
 (include "vector.md")
 (include "vector-crypto.md")
 (include "vector-bfloat16.md")
diff --git a/gcc/config/riscv/tt-ascalon-d8.md 
b/gcc/config/riscv/tt-ascalon-d8.md
new file mode 100644
index 00000000000..513608cea79
--- /dev/null
+++ b/gcc/config/riscv/tt-ascalon-d8.md
@@ -0,0 +1,154 @@
+;; Tenstorrent Ascalon code scheduling model.
+;; Copyright (C) 2023-2025 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "tt_ascalon_d8")
+
+;; Ascalon has more issue/execution bandwidth than decode/retire bandwidth,
+;; so we model decode to place an upper limit on what we can achieve.
+(define_cpu_unit 
"asc-decode0,asc-decode1,asc-decode2,asc-decode3,asc-decode4,asc-decode5,asc-decode6,asc-decode7"
 "tt_ascalon_d8")
+
+(define_cpu_unit "asc-lsu0,asc-lsu1,asc-lsu2" "tt_ascalon_d8")
+(define_cpu_unit "asc-fxu0,asc-fxu1,asc-fxu2,asc-fxu3,asc-fxu4,asc-fxu5" 
"tt_ascalon_d8")
+(define_cpu_unit "asc-fpu0,asc-fpu1" "tt_ascalon_d8")
+
+;; Shortcuts
+(define_reservation "tt_ascalon_d8_decode" 
"asc-decode0|asc-decode1|asc-decode2|asc-decode3|asc-decode4|asc-decode5|asc-decode6|asc-decode7")
+(define_reservation "tt_ascalon_d8_ls" "asc-lsu0|asc-lsu1|asc-lsu2")
+(define_reservation "tt_ascalon_d8_alu" 
"asc-fxu0|asc-fxu1|asc-fxu2|asc-fxu3|asc-fxu4|asc-fxu5")
+(define_reservation "tt_ascalon_d8_mul" "asc-fxu0")
+(define_reservation "tt_ascalon_d8_div" "asc-fxu0")
+(define_reservation "tt_ascalon_d8_br" "asc-fxu2|asc-fxu3")
+(define_reservation "tt_ascalon_d8_fp" "asc-fpu0|asc-fpu1")
+
+;; Integer load/store
+(define_insn_reservation "tt_ascalon_d8_int_load" 4
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "load"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_ls")
+
+(define_insn_reservation "tt_ascalon_d8_int_store" 4
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "store"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_ls")
+
+;; Float load/store
+(define_insn_reservation "tt_ascalon_d8_float_load" 4
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "fpload"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_ls")
+
+(define_insn_reservation "tt_ascalon_d8_float_store" 4
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "fpstore"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_ls")
+
+;; Generic integer instructions.
+(define_insn_reservation "tt_ascalon_d8_alu" 1
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,nop,logical,\
+                       move,bitmanip,rotate,min,max,minu,maxu,clz,ctz,atomic,\
+                       condmove,mvpair,zicond"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_alu")
+
+;; Short forward branch
+(define_insn_reservation "tt_ascalon_d8_sfb" 1
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "sfb_alu"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_br")
+
+;; Branch instructions
+(define_insn_reservation "tt_ascalon_d8_branch" 1
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "branch,jump,call,jalr,ret,trap"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_br")
+
+;; Float move, convert and compare.
+;; INT -> FP moves are executed by the FXU and FP -> INT moves
+;; are executed by the FPU, but we can't model that at the moment.
+(define_insn_reservation "tt_ascalon_d8_float_move" 4
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "fmove"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_fp")
+
+(define_insn_reservation "tt_ascalon_d8_fcvt" 3
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_fp")
+
+(define_insn_reservation "tt_ascalon_d8_fcmp" 2
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "fcmp"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_fp")
+
+;; Integer multiplication.
+(define_insn_reservation "tt_ascalon_d8_imul" 3
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "imul"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_mul")
+
+;; Integer division is not pipelined.  Do not block the unit for more than
+;; three cycles so the DFA does not get too large.  Similar for other
+;; non-pipelined instructions.
+(define_insn_reservation "tt_ascalon_d8_idiv" 15
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "idiv"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_div,tt_ascalon_d8_div*3")
+
+;; Float addition and multiplication.
+(define_insn_reservation "tt_ascalon_d8_faddmul" 3
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "fadd,fmul"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_fp")
+
+;; Float FMA.
+(define_insn_reservation "tt_ascalon_d8_float_fma" 3
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "fmadd"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_fp")
+
+;; Float division.
+(define_insn_reservation "tt_ascalon_d8_float_div_half" 7
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (and (eq_attr "type" "fdiv,fsqrt")
+           (eq_attr "mode" "HF")))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_fp,tt_ascalon_d8_fp*3")
+
+(define_insn_reservation "tt_ascalon_d8_float_div_single" 7
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (and (eq_attr "type" "fdiv,fsqrt")
+           (eq_attr "mode" "SF")))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_fp,tt_ascalon_d8_fp*3")
+
+(define_insn_reservation "tt_ascalon_d8_float_div_double" 12
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (and (eq_attr "type" "fdiv,fsqrt")
+           (eq_attr "mode" "DF")))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_fp,tt_ascalon_d8_fp*3")
+
+;; Popcount and clmul.
+(define_insn_reservation "tt_ascalon_d8_popcount" 1
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "cpop,clmul"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_alu")
+
+;; Transfer from/to coprocessor.
+(define_insn_reservation "tt_ascalon_d8_xfer" 3
+  (and (eq_attr "tune" "tt_ascalon_d8")
+       (eq_attr "type" "mfc,mtc"))
+  "tt_ascalon_d8_decode,tt_ascalon_d8_alu")
-- 
2.34.1

Reply via email to