================ @@ -0,0 +1,258 @@ +//===- AMDGPURBLegalizeRules -------------------------------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURBLEGALIZERULES_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPURBLEGALIZERULES_H + +#include "llvm/CodeGen/MachineUniformityAnalysis.h" + +namespace llvm { + +class GCNSubtarget; + +namespace AMDGPU { + +// IDs used to build predicate for RBSRule. Predicate can have one or more IDs +// and each represents a check for 'uniform or divergent' + LLT or just LLT on +// register operand. +// Most often checking one operand is enough to decide which RegBankLLTMapping +// to apply (see Fast Rules), IDs are useful when two or more operands need to +// be checked. +enum UniformityLLTOpPredicateID { + _, + // scalars + S1, + S16, + S32, + S64, + + UniS1, + UniS16, + UniS32, + UniS64, + + DivS1, + DivS32, + DivS64, + + // pointers + P1, + + DivP1, + + // vectors + V2S16, + V2S32, + V3S32, + V4S32, +}; + +// How to apply register bank on register operand. +// In most cases, this serves as a LLT and register bank assert. +// Can change operands and insert copies, extends, truncs, and readfirstlanes. +// Anything more complicated requires LoweringMethod. +enum RegBankLLTMapingApplyID { + Invalid, + None, + IntrId, + Imm, + Vcc, + + // sgpr scalars, pointers, vectors and B-types + Sgpr16, + Sgpr32, + Sgpr64, + SgprV4S32, + + // vgpr scalars, pointers, vectors and B-types + Vgpr32, + Vgpr64, + VgprP1, + VgprV4S32, + + // Dst only modifiers: read-any-lane and truncs + UniInVcc, + UniInVgprS32, + UniInVgprV4S32, + + Sgpr32Trunc, + + // Src only modifiers: waterfalls, extends + Sgpr32AExt, + Sgpr32AExtBoolInReg, + Sgpr32SExt, +}; + +// Instruction needs to be replaced with sequence of instructions. Lowering was +// not done by legalizer since instructions is available in either SGPR or VGPR. +// For example S64 AND is available on SGPR, for that reason S64 AND is legal in +// context of Legalizer that only checks LLT. But S64 AND is not available on +// VGPR. Lower it to two S32 VGPR ANDs. +enum LoweringMethodID { + DoNotLower, + UniExtToSel, + VgprToVccCopy, + SplitTo32, + Ext32To64, + UniCstExt, +}; + +enum FastRulesTypes { + No, + Standard, // S16, S32, S64, V2S16 + Vector, // S32, V2S32, V3S32, V4S32 +}; + +struct RegBankLLTMapping { + SmallVector<RegBankLLTMapingApplyID, 2> DstOpMapping; + SmallVector<RegBankLLTMapingApplyID, 4> SrcOpMapping; + LoweringMethodID LoweringMethod; + RegBankLLTMapping( + std::initializer_list<RegBankLLTMapingApplyID> DstOpMappingList, + std::initializer_list<RegBankLLTMapingApplyID> SrcOpMappingList, + LoweringMethodID LoweringMethod = DoNotLower); +}; + +struct PredicateMapping { + SmallVector<UniformityLLTOpPredicateID, 4> OpUniformityAndTypes; + std::function<bool(const MachineInstr &)> TestFunc; + PredicateMapping( + std::initializer_list<UniformityLLTOpPredicateID> OpList, + std::function<bool(const MachineInstr &)> TestFunc = nullptr); + + bool match(const MachineInstr &MI, const MachineUniformityInfo &MUI, + const MachineRegisterInfo &MRI) const; +}; + +struct RBSRule { + PredicateMapping Predicate; + RegBankLLTMapping OperandMapping; +}; + +class SetOfRulesForOpcode { + // "Slow Rules". More complex 'Rules[i].Predicate', check them one by one. + SmallVector<RBSRule, 4> Rules; + + // "Fast Rules" + // Instead of testing each 'Rules[i].Predicate' we do direct access to + // RegBankLLTMapping using getFastPredicateSlot. For example if: + // - FastTypes == Standard Uni[0] holds Mapping in case Op 0 is uniform S32 + // - FastTypes == Vector Div[3] holds Mapping in case Op 0 is divergent V4S32 + FastRulesTypes FastTypes = No; +#define InvMapping RegBankLLTMapping({Invalid}, {Invalid}) + RegBankLLTMapping Uni[4] = {InvMapping, InvMapping, InvMapping, InvMapping}; + RegBankLLTMapping Div[4] = {InvMapping, InvMapping, InvMapping, InvMapping}; + +public: + SetOfRulesForOpcode(); + SetOfRulesForOpcode(FastRulesTypes FastTypes); + + const RegBankLLTMapping & + findMappingForMI(const MachineInstr &MI, const MachineRegisterInfo &MRI, + const MachineUniformityInfo &MUI) const; + + void addRule(RBSRule Rule); + + void addFastRuleDivergent(UniformityLLTOpPredicateID Ty, + RegBankLLTMapping RuleApplyIDs); + void addFastRuleUniform(UniformityLLTOpPredicateID Ty, + RegBankLLTMapping RuleApplyIDs); + +private: + int getFastPredicateSlot(UniformityLLTOpPredicateID Ty) const; +}; + +// Essentially 'map<Opcode(or intrinsic_opcode), SetOfRulesForOpcode>' but a +// little more efficient. +class RegBankLegalizeRules { + const GCNSubtarget *ST; + MachineRegisterInfo *MRI; + // Separate maps for G-opcodes and instrinsics since they are in differents + // enums. Multiple opcodes can share same set of rules. + // RulesAlias = map<Opcode, KeyOpcode> + // Rules = map<KeyOpcode, SetOfRulesForOpcode> + SmallDenseMap<unsigned, unsigned, 256> GRulesAlias; + SmallDenseMap<unsigned, SetOfRulesForOpcode, 128> GRules; + SmallDenseMap<unsigned, unsigned, 128> IRulesAlias; + SmallDenseMap<unsigned, SetOfRulesForOpcode, 64> IRules; + class RuleSetInitializer { + SetOfRulesForOpcode *RuleSet; + + public: + // Used for clang-format line breaks and to force writing all rules for + // opcode in same place. + template <class AliasMap, class RulesMap> + RuleSetInitializer(std::initializer_list<unsigned> OpcList, + AliasMap &RulesAlias, RulesMap &Rules, + FastRulesTypes FastTypes = No) { + unsigned KeyOpcode = *OpcList.begin(); + for (unsigned Opc : OpcList) { + auto [_, NewInput] = RulesAlias.try_emplace(Opc, KeyOpcode); + assert(NewInput && "Can't redefine existing Rules"); + } + + auto [DenseMapIter, NewInput] = Rules.try_emplace(KeyOpcode, FastTypes); + assert(NewInput && "Can't redefine existing Rules"); + + RuleSet = &DenseMapIter->second; + } + + RuleSetInitializer(const RuleSetInitializer &) = delete; + RuleSetInitializer &operator=(const RuleSetInitializer &) = delete; + RuleSetInitializer(RuleSetInitializer &&) = delete; + RuleSetInitializer &operator=(RuleSetInitializer &&) = delete; + ~RuleSetInitializer() = default; + + RuleSetInitializer &Div(UniformityLLTOpPredicateID Ty, + RegBankLLTMapping RuleApplyIDs, + bool STPred = true) { + if (STPred) + RuleSet->addFastRuleDivergent(Ty, RuleApplyIDs); + return *this; + } + + RuleSetInitializer &Uni(UniformityLLTOpPredicateID Ty, + RegBankLLTMapping RuleApplyIDs, + bool STPred = true) { + if (STPred) + RuleSet->addFastRuleUniform(Ty, RuleApplyIDs); + return *this; + } + + RuleSetInitializer &Any(RBSRule Init, bool STPred = true) { + if (STPred) + RuleSet->addRule(Init); + return *this; + } + }; + + RuleSetInitializer addRulesForGOpcs(std::initializer_list<unsigned> OpcList, + FastRulesTypes FastTypes = No); + + RuleSetInitializer addRulesForIOpcs(std::initializer_list<unsigned> OpcList, + FastRulesTypes FastTypes = No); + +public: + // Initialize rules for all opcodes. + RegBankLegalizeRules(const GCNSubtarget &ST, MachineRegisterInfo &MRI); + + // In case we don't want to regenerate same rules, we can use already + // generated rules but need to refresh references to objects that are + // created for this run. + void refreshRefs(const GCNSubtarget &_ST, MachineRegisterInfo &_MRI) { + ST = &_ST; + MRI = &_MRI; + }; + + const SetOfRulesForOpcode &getRulesForOpc(MachineInstr &MI) const; +}; + +} // end namespace AMDGPU +} // end namespace llvm + +#endif ---------------- arsenm wrote:
Missing end of file newline https://github.com/llvm/llvm-project/pull/112864 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits