================ @@ -69,6 +81,241 @@ FunctionPass *llvm::createAMDGPURBLegalizePass() { using namespace AMDGPU; +const RegBankLegalizeRules &getRules(const GCNSubtarget &ST, + MachineRegisterInfo &MRI) { + static std::mutex GlobalMutex; + static SmallDenseMap<unsigned, std::unique_ptr<RegBankLegalizeRules>> + CacheForRuleSet; + std::lock_guard<std::mutex> Lock(GlobalMutex); + if (!CacheForRuleSet.contains(ST.getGeneration())) { + auto Rules = std::make_unique<RegBankLegalizeRules>(ST, MRI); + CacheForRuleSet[ST.getGeneration()] = std::move(Rules); + } else { + CacheForRuleSet[ST.getGeneration()]->refreshRefs(ST, MRI); + } + return *CacheForRuleSet[ST.getGeneration()]; +} + bool AMDGPURBLegalize::runOnMachineFunction(MachineFunction &MF) { + + const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Setup the instruction builder with CSE. + std::unique_ptr<MachineIRBuilder> MIRBuilder; + const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); + GISelCSEAnalysisWrapper &Wrapper = + getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); + GISelCSEInfo *CSEInfo = nullptr; + GISelObserverWrapper Observer; + + if (TPC.isGISelCSEEnabled()) { + MIRBuilder = std::make_unique<CSEMIRBuilder>(); + CSEInfo = &Wrapper.get(TPC.getCSEConfig()); + MIRBuilder->setCSEInfo(CSEInfo); + Observer.addObserver(CSEInfo); + MIRBuilder->setChangeObserver(Observer); + } else { + MIRBuilder = std::make_unique<MachineIRBuilder>(); + } + MIRBuilder->setMF(MF); + + RAIIDelegateInstaller DelegateInstaller(MF, &Observer); + RAIIMFObserverInstaller MFObserverInstaller(MF, Observer); + + const MachineUniformityInfo &MUI = + getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo(); + const RegisterBankInfo &RBI = *MF.getSubtarget().getRegBankInfo(); + + // RegBankLegalizeRules is initialized with assigning sets of IDs to opcodes. + const RegBankLegalizeRules &RBLRules = getRules(ST, MRI); + + // Logic that does legalization based on IDs assigned to Opcode. + RegBankLegalizeHelper RBLegalizeHelper(*MIRBuilder, MRI, MUI, RBI, RBLRules); + + SmallVector<MachineInstr *> AllInst; + + for (auto &MBB : MF) { + for (MachineInstr &MI : MBB) { + AllInst.push_back(&MI); + } + } + + for (auto &MI : AllInst) { + if (!MI->isPreISelOpcode()) + continue; + + unsigned Opc = MI->getOpcode(); + + // Insert point for use operands needs some calculation. + if (Opc == G_PHI) { + RBLegalizeHelper.applyMappingPHI(*MI); + continue; + } + + // Opcodes that support pretty much all combinations of reg banks and LLTs + // (except S1). There is no point in writing rules for them. + if (Opc == G_BUILD_VECTOR || Opc == G_UNMERGE_VALUES || + Opc == G_MERGE_VALUES) { + RBLegalizeHelper.applyMappingTrivial(*MI); + continue; + } + + // Opcodes that also support S1. S1 rules are in RegBankLegalizeRules. + // Remaining reg bank and LLT combinations are trivially accepted. + if ((Opc == G_CONSTANT || Opc == G_FCONSTANT || Opc == G_IMPLICIT_DEF) && + !isS1(MI->getOperand(0).getReg(), MRI)) { + assert(isSgprRB(MI->getOperand(0).getReg(), MRI)); + continue; + } + + if (!RBLegalizeHelper.findRuleAndApplyMapping(*MI)) { + MI->dump(); + llvm_unreachable("failed to match any of the rules"); + } + } + + LLT S1 = LLT::scalar(1); + LLT S16 = LLT::scalar(16); + LLT S32 = LLT::scalar(32); + LLT S64 = LLT::scalar(64); + + // SGPR S1 clean up combines: + // - SGPR S1(S32) to SGPR S1(S32) Copy: anyext + trunc combine. + // In RBLegalize 'S1 Dst' are legalized into S32 as'S1Dst = Trunc S32Dst' + // and 'S1 Src' into 'S32Src = Anyext S1Src'. + // S1 Truncs and Anyexts that come from legalizer will also be cleaned up. + // Note: they can have non-S32 types e.g. S16 = Anyext S1 or S1 = Trunc S64. + // - Sgpr S1(S32) to VCC Copy: G_COPY_VCC_SCC combine. + // Divergent instruction uses Sgpr S1 as input that should be lane mask(VCC) + // Legalizing this use creates Sgpr S1(S32) to VCC Copy. + + // Note: Remaining S1 copies, S1s are either SGPR S1(S32) or VCC S1: + // - VCC to VCC Copy: nothing to do here, just a regular copy. + // - VCC to SGPR S1 Copy: Should not exist in a form of COPY instruction(*). + // Note: For 'uniform-in-VCC to SGPR-S1 copy' G_COPY_SCC_VCC is used + // instead. When only available instruction creates VCC result, use of + // UniformInVcc results in creating G_COPY_SCC_VCC. + + // (*)Explanation for 'SGPR S1(uniform) = COPY VCC(divergent)': + // Copy from divergent to uniform register indicates an error in either: + // - Uniformity analysis: Uniform instruction has divergent input. If one of + // the inputs is divergent, instruction should be divergent! + // - RBLegalizer not executing in waterfall loop (missing implementation) + + using namespace MIPatternMatch; + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + + for (auto &MBB : MF) { + for (auto &MI : make_early_inc_range(MBB)) { + + if (MI.getOpcode() == G_TRUNC && isTriviallyDead(MI, MRI)) { + MI.eraseFromParent(); + continue; + } + + if (MI.getOpcode() == COPY) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + if (!Dst.isVirtual() || !Src.isVirtual()) + continue; ---------------- arsenm wrote:
Still need to handle phys<->virtreg copies? https://github.com/llvm/llvm-project/pull/112864 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits