================ @@ -0,0 +1,494 @@ +//===-- RISCVSchedGenericOOO.td - Generic O3 Processor -----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// We assume that: +// * 6-issue out-of-order CPU with 192 ROB entries. +// * Units: +// * IXU (Integer GenericOOOALU Unit): 4 units, only one can execute division. +// * FXU (Floating-point Unit): 2 units. +// * LSU (Load/Store Unit): 2 units. +// * VXU (Vector Unit): 1 unit. +// * Latency: +// * Integer instructions: 1 cycle. +// * Multiplication instructions: 4 cycles. +// * Multiplication/Division instructions: 7-13 cycles. +// * Floating-point instructions: 4-6 cycles. +// * Vector instructions: 2-6 cycles. +// * Load/Store: +// * IXU: 4 cycles. +// * FXU: 6 cycles. +// * VXU: 6 cycles. +// * Integer/floating-point/vector div/rem/sqrt/... are non-pipelined. +//===----------------------------------------------------------------------===// + +def GenericOOOModel : SchedMachineModel { + int IssueWidth = 6; + int MicroOpBufferSize = 192; + int LoadLatency = 4; + int MispredictPenalty = 8; + let CompleteModel = 0; +} + +let SchedModel = GenericOOOModel in { +//===----------------------------------------------------------------------===// +// Resource groups +//===----------------------------------------------------------------------===// +def GenericOOODIV : ProcResource<1>; +def GenericOOOIXU : ProcResource<3>; +def GenericOOOALU : ProcResGroup<[GenericOOODIV, GenericOOOIXU]>; +def GenericOOOLSU : ProcResource<2>; +def GenericOOOFPU : ProcResource<2>; +// TODO: Add vector scheduling. +// def GenericOOOVXU : ProcResource<1>; + +//===----------------------------------------------------------------------===// +// Branches +//===----------------------------------------------------------------------===// +def : WriteRes<WriteJmp, [GenericOOOALU]>; +def : WriteRes<WriteJalr, [GenericOOOALU]>; +def : WriteRes<WriteJal, [GenericOOOALU]>; + +//===----------------------------------------------------------------------===// +// Integer arithmetic and logic +//===----------------------------------------------------------------------===// +def : WriteRes<WriteIALU, [GenericOOOALU]>; +def : WriteRes<WriteIALU32, [GenericOOOALU]>; +def : WriteRes<WriteShiftImm, [GenericOOOALU]>; +def : WriteRes<WriteShiftImm32, [GenericOOOALU]>; +def : WriteRes<WriteShiftReg, [GenericOOOALU]>; +def : WriteRes<WriteShiftReg32, [GenericOOOALU]>; + +//===----------------------------------------------------------------------===// +// Integer multiplication +//===----------------------------------------------------------------------===// +let Latency = 4 in { + def : WriteRes<WriteIMul, [GenericOOOALU]>; + def : WriteRes<WriteIMul32, [GenericOOOALU]>; +} + +//===----------------------------------------------------------------------===// +// Integer division +//===----------------------------------------------------------------------===// +def : WriteRes<WriteIDiv32, [GenericOOODIV]> { + let Latency = 13; + let ReleaseAtCycles = [13]; +} +def : WriteRes<WriteIDiv, [GenericOOODIV]> { + let Latency = 21; + let ReleaseAtCycles = [21]; +} +def : WriteRes<WriteIRem32, [GenericOOODIV]> { + let Latency = 13; + let ReleaseAtCycles = [13]; +} +def : WriteRes<WriteIRem, [GenericOOODIV]> { + let Latency = 21; + let ReleaseAtCycles = [21]; +} + +//===----------------------------------------------------------------------===// +// Integer memory +//===----------------------------------------------------------------------===// +// Load +let Latency = 4 in { + def : WriteRes<WriteLDB, [GenericOOOLSU]>; + def : WriteRes<WriteLDH, [GenericOOOLSU]>; + def : WriteRes<WriteLDW, [GenericOOOLSU]>; + def : WriteRes<WriteLDD, [GenericOOOLSU]>; +} + +// Store +def : WriteRes<WriteSTB, [GenericOOOLSU]>; +def : WriteRes<WriteSTH, [GenericOOOLSU]>; +def : WriteRes<WriteSTW, [GenericOOOLSU]>; +def : WriteRes<WriteSTD, [GenericOOOLSU]>; + +//===----------------------------------------------------------------------===// +// Atomic +//===----------------------------------------------------------------------===// +let Latency = 4 in { + def : WriteRes<WriteAtomicLDW, [GenericOOOLSU]>; + def : WriteRes<WriteAtomicLDD, [GenericOOOLSU]>; +} + +let Latency = 5 in { + def : WriteRes<WriteAtomicW, [GenericOOOLSU]>; + def : WriteRes<WriteAtomicD, [GenericOOOLSU]>; +} + +def : WriteRes<WriteAtomicSTW, [GenericOOOLSU]>; +def : WriteRes<WriteAtomicSTD, [GenericOOOLSU]>; + +//===----------------------------------------------------------------------===// +// Floating-point +//===----------------------------------------------------------------------===// +// Floating-point load +let Latency = 6 in { + def : WriteRes<WriteFLD32, [GenericOOOLSU]>; + def : WriteRes<WriteFLD64, [GenericOOOLSU]>; +} + +// Floating-point store +def : WriteRes<WriteFST32, [GenericOOOLSU]>; +def : WriteRes<WriteFST64, [GenericOOOLSU]>; + +// Arithmetic and logic +let Latency = 4 in { + def : WriteRes<WriteFAdd32, [GenericOOOFPU]>; + def : WriteRes<WriteFAdd64, [GenericOOOFPU]>; +} + +let Latency = 5 in { + def : WriteRes<WriteFMul32, [GenericOOOFPU]>; + def : WriteRes<WriteFMul64, [GenericOOOFPU]>; +} + +let Latency = 6 in { + def : WriteRes<WriteFMA32, [GenericOOOFPU]>; + def : WriteRes<WriteFMA64, [GenericOOOFPU]>; +} + +def : WriteRes<WriteFSGNJ32, [GenericOOOFPU]>; +def : WriteRes<WriteFSGNJ64, [GenericOOOFPU]>; +def : WriteRes<WriteFMinMax32, [GenericOOOFPU]>; +def : WriteRes<WriteFMinMax64, [GenericOOOFPU]>; + +// Compare +let Latency = 2 in { + def : WriteRes<WriteFCmp32, [GenericOOOFPU]>; + def : WriteRes<WriteFCmp64, [GenericOOOFPU]>; +} + +// Division +let Latency = 13, ReleaseAtCycles = [13] in { + def : WriteRes<WriteFDiv32, [GenericOOOFPU]>; + def : WriteRes<WriteFSqrt32, [GenericOOOFPU]>; +} + +let Latency = 17, ReleaseAtCycles = [17] in { + def : WriteRes<WriteFDiv64, [GenericOOOFPU]>; + def : WriteRes<WriteFSqrt64, [GenericOOOFPU]>; +} + +// Conversions +let Latency = 4 in { + def : WriteRes<WriteFCvtI32ToF32, [GenericOOOFPU]>; + def : WriteRes<WriteFCvtI32ToF64, [GenericOOOFPU]>; + def : WriteRes<WriteFCvtI64ToF32, [GenericOOOFPU]>; + def : WriteRes<WriteFCvtI64ToF64, [GenericOOOFPU]>; +} + +let Latency = 4 in { + def : WriteRes<WriteFCvtF32ToI32, [GenericOOOFPU]>; + def : WriteRes<WriteFCvtF32ToI64, [GenericOOOFPU]>; +} + +let Latency = 4 in { + def : WriteRes<WriteFCvtF64ToI32, [GenericOOOFPU]>; + def : WriteRes<WriteFCvtF64ToI64, [GenericOOOFPU]>; +} + +let Latency = 4 in { + def : WriteRes<WriteFCvtF64ToF32, [GenericOOOFPU]>; + def : WriteRes<WriteFCvtF32ToF64, [GenericOOOFPU]>; +} + +let Latency = 6 in { + def : WriteRes<WriteFMovI32ToF32, [GenericOOOFPU]>; + def : WriteRes<WriteFMovI64ToF64, [GenericOOOFPU]>; + def : WriteRes<WriteFMovF32ToI32, [GenericOOOFPU]>; + def : WriteRes<WriteFMovF64ToI64, [GenericOOOFPU]>; +} + +// Classify +def : WriteRes<WriteFClass32, [GenericOOOFPU]>; +def : WriteRes<WriteFClass64, [GenericOOOFPU]>; + +//===----------------------------------------------------------------------===// +// Zicsr extension +//===----------------------------------------------------------------------===// +def : WriteRes<WriteCSR, [GenericOOOALU]>; + +//===----------------------------------------------------------------------===// +// Zabha extension +//===----------------------------------------------------------------------===// +let Latency = 5 in { + def : WriteRes<WriteAtomicB, [GenericOOOLSU]>; + def : WriteRes<WriteAtomicH, [GenericOOOLSU]>; +} + +//===----------------------------------------------------------------------===// +// Zba extension +//===----------------------------------------------------------------------===// +def : WriteRes<WriteSHXADD, [GenericOOOALU]>; +def : WriteRes<WriteSHXADD32, [GenericOOOALU]>; + +//===----------------------------------------------------------------------===// +// Zbb extension +//===----------------------------------------------------------------------===// +def : WriteRes<WriteCLZ, [GenericOOOALU]>; +def : WriteRes<WriteCTZ, [GenericOOOALU]>; +def : WriteRes<WriteCPOP, [GenericOOOALU]>; +def : WriteRes<WriteCLZ32, [GenericOOOALU]>; +def : WriteRes<WriteCTZ32, [GenericOOOALU]>; +def : WriteRes<WriteCPOP32, [GenericOOOALU]>; ---------------- topperc wrote:
3 cycle CTPOP on Intel is for scalar. AMD Zen is better than Intel https://uops.info/table.html?search=popcnt&cb_lat=on&cb_tp=on&cb_uops=on&cb_ports=on&cb_ADLE=on&cb_ZEN4=on&cb_measurements=on&cb_doc=on&cb_base=on&cb_sse=on https://github.com/llvm/llvm-project/pull/120712 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits