https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/130059
>From ee0ed7e6fdce69d98a05e42327a305228797a9de Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Wed, 5 Mar 2025 10:52:00 +0000 Subject: [PATCH 1/4] [AMDGPU][NPM] Port GCNCreateVOPD to NPM --- llvm/lib/Target/AMDGPU/AMDGPU.h | 7 ++- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 1 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 +- llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp | 53 ++++++++++++------- 4 files changed, 43 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 00355d8fb5e5f..95340f1287d8d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -358,6 +358,11 @@ class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> { PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager &AM); }; +class GCNCreateVOPDPass : public PassInfoMixin<GCNCreateVOPDPass> { +public: + PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &AM); +}; + FunctionPass *createAMDGPUAnnotateUniformValuesLegacy(); ModulePass *createAMDGPUPrintfRuntimeBinding(); @@ -443,7 +448,7 @@ extern char &SIFormMemoryClausesID; void initializeSIPostRABundlerLegacyPass(PassRegistry &); extern char &SIPostRABundlerLegacyID; -void initializeGCNCreateVOPDPass(PassRegistry &); +void initializeGCNCreateVOPDLegacyPass(PassRegistry &); extern char &GCNCreateVOPDID; void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 16ae23133a549..98b0bc7358e9d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -104,6 +104,7 @@ MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUse MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass()) MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass()) +MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass()) MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass()) MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass()) MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 856b5eb359c49..b06e87baa4ea9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -547,7 +547,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIPreAllocateWWMRegsLegacyPass(*PR); initializeSIFormMemoryClausesLegacyPass(*PR); initializeSIPostRABundlerLegacyPass(*PR); - initializeGCNCreateVOPDPass(*PR); + initializeGCNCreateVOPDLegacyPass(*PR); initializeAMDGPUUnifyDivergentExitNodesPass(*PR); initializeAMDGPUAAWrapperPassPass(*PR); initializeAMDGPUExternalAAWrapperPass(*PR); @@ -2150,7 +2150,7 @@ void AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const { void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const { if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) { - // TODO: addPass(GCNCreateVOPDPass()); + addPass(GCNCreateVOPDPass()); } // TODO: addPass(SIMemoryLegalizerPass()); // TODO: addPass(SIInsertWaitcntsPass()); diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp index 798279b279da3..32a26469d616b 100644 --- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp +++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachinePassManager.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "gcn-create-vopd" @@ -36,7 +37,7 @@ using namespace llvm; namespace { -class GCNCreateVOPD : public MachineFunctionPass { +class GCNCreateVOPD { private: class VOPDCombineInfo { public: @@ -49,20 +50,8 @@ class GCNCreateVOPD : public MachineFunctionPass { }; public: - static char ID; const GCNSubtarget *ST = nullptr; - GCNCreateVOPD() : MachineFunctionPass(ID) {} - - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.setPreservesCFG(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - StringRef getPassName() const override { - return "GCN Create VOPD Instructions"; - } - bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) { auto *FirstMI = CI.FirstMI; auto *SecondMI = CI.SecondMI; @@ -112,9 +101,7 @@ class GCNCreateVOPD : public MachineFunctionPass { return true; } - bool runOnMachineFunction(MachineFunction &MF) override { - if (skipFunction(MF.getFunction())) - return false; + bool run(MachineFunction &MF) { ST = &MF.getSubtarget<GCNSubtarget>(); if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32()) return false; @@ -163,11 +150,39 @@ class GCNCreateVOPD : public MachineFunctionPass { } }; +class GCNCreateVOPDLegacy : public MachineFunctionPass { +public: + static char ID; + GCNCreateVOPDLegacy() : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { + return "GCN Create VOPD Instructions"; + } + bool runOnMachineFunction(MachineFunction &MF) override { + if (skipFunction(MF.getFunction())) + return false; + + return GCNCreateVOPD().run(MF); + } +}; + } // namespace -char GCNCreateVOPD::ID = 0; +PreservedAnalyses llvm::GCNCreateVOPDPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &AM) { + if (!GCNCreateVOPD().run(MF)) + return PreservedAnalyses::all(); + return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>(); +} + +char GCNCreateVOPDLegacy::ID = 0; -char &llvm::GCNCreateVOPDID = GCNCreateVOPD::ID; +char &llvm::GCNCreateVOPDID = GCNCreateVOPDLegacy::ID; -INITIALIZE_PASS(GCNCreateVOPD, DEBUG_TYPE, "GCN Create VOPD Instructions", +INITIALIZE_PASS(GCNCreateVOPDLegacy, DEBUG_TYPE, "GCN Create VOPD Instructions", false, false) >From 94ec994f55a8b02e8b070e82d6253eb9de67ca97 Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Mon, 10 Mar 2025 04:27:24 +0000 Subject: [PATCH 2/4] clang format --- llvm/lib/Target/AMDGPU/AMDGPU.h | 3 +- llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp | 186 ++++++++++++----------- 2 files changed, 97 insertions(+), 92 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 95340f1287d8d..96f23432685de 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -360,7 +360,8 @@ class SIModeRegisterPass : public PassInfoMixin<SIModeRegisterPass> { class GCNCreateVOPDPass : public PassInfoMixin<GCNCreateVOPDPass> { public: - PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &AM); + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &AM); }; FunctionPass *createAMDGPUAnnotateUniformValuesLegacy(); diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp index 32a26469d616b..22123f738c948 100644 --- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp +++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp @@ -49,105 +49,108 @@ class GCNCreateVOPD { MachineInstr *SecondMI; }; -public: - const GCNSubtarget *ST = nullptr; - - bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) { - auto *FirstMI = CI.FirstMI; - auto *SecondMI = CI.SecondMI; - unsigned Opc1 = FirstMI->getOpcode(); - unsigned Opc2 = SecondMI->getOpcode(); - unsigned EncodingFamily = - AMDGPU::getVOPDEncodingFamily(SII->getSubtarget()); - int NewOpcode = - AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1), - AMDGPU::getVOPDOpcode(Opc2), EncodingFamily); - assert(NewOpcode != -1 && - "Should have previously determined this as a possible VOPD\n"); - - auto VOPDInst = BuildMI(*FirstMI->getParent(), FirstMI, - FirstMI->getDebugLoc(), SII->get(NewOpcode)) - .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags()); - - namespace VOPD = AMDGPU::VOPD; - MachineInstr *MI[] = {FirstMI, SecondMI}; - auto InstInfo = - AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc()); - - for (auto CompIdx : VOPD::COMPONENTS) { - auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands(); - VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx)); - } - - for (auto CompIdx : VOPD::COMPONENTS) { - auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum(); - for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) { - auto MCOprIdx = InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx); + public: + const GCNSubtarget *ST = nullptr; + + bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) { + auto *FirstMI = CI.FirstMI; + auto *SecondMI = CI.SecondMI; + unsigned Opc1 = FirstMI->getOpcode(); + unsigned Opc2 = SecondMI->getOpcode(); + unsigned EncodingFamily = + AMDGPU::getVOPDEncodingFamily(SII->getSubtarget()); + int NewOpcode = + AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1), + AMDGPU::getVOPDOpcode(Opc2), EncodingFamily); + assert(NewOpcode != -1 && + "Should have previously determined this as a possible VOPD\n"); + + auto VOPDInst = + BuildMI(*FirstMI->getParent(), FirstMI, FirstMI->getDebugLoc(), + SII->get(NewOpcode)) + .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags()); + + namespace VOPD = AMDGPU::VOPD; + MachineInstr *MI[] = {FirstMI, SecondMI}; + auto InstInfo = + AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc()); + + for (auto CompIdx : VOPD::COMPONENTS) { + auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands(); VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx)); } - } - SII->fixImplicitOperands(*VOPDInst); - for (auto CompIdx : VOPD::COMPONENTS) - VOPDInst.copyImplicitOps(*MI[CompIdx]); + for (auto CompIdx : VOPD::COMPONENTS) { + auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum(); + for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; + ++CompSrcIdx) { + auto MCOprIdx = + InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx); + VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx)); + } + } - LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: " - << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n"); + SII->fixImplicitOperands(*VOPDInst); + for (auto CompIdx : VOPD::COMPONENTS) + VOPDInst.copyImplicitOps(*MI[CompIdx]); - for (auto CompIdx : VOPD::COMPONENTS) - MI[CompIdx]->eraseFromParent(); + LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: " + << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n"); - ++NumVOPDCreated; - return true; - } + for (auto CompIdx : VOPD::COMPONENTS) + MI[CompIdx]->eraseFromParent(); - bool run(MachineFunction &MF) { - ST = &MF.getSubtarget<GCNSubtarget>(); - if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32()) - return false; - LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n"); - - const SIInstrInfo *SII = ST->getInstrInfo(); - bool Changed = false; - - SmallVector<VOPDCombineInfo> ReplaceCandidates; - - for (auto &MBB : MF) { - auto MII = MBB.begin(), E = MBB.end(); - while (MII != E) { - auto *FirstMI = &*MII; - MII = next_nodbg(MII, MBB.end()); - if (MII == MBB.end()) - break; - if (FirstMI->isDebugInstr()) - continue; - auto *SecondMI = &*MII; - unsigned Opc = FirstMI->getOpcode(); - unsigned Opc2 = SecondMI->getOpcode(); - llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); - llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2); - VOPDCombineInfo CI; - - if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y) - CI = VOPDCombineInfo(FirstMI, SecondMI); - else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X) - CI = VOPDCombineInfo(SecondMI, FirstMI); - else - continue; - // checkVOPDRegConstraints cares about program order, but doReplace - // cares about X-Y order in the constituted VOPD - if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) { - ReplaceCandidates.push_back(CI); - ++MII; + ++NumVOPDCreated; + return true; + } + + bool run(MachineFunction &MF) { + ST = &MF.getSubtarget<GCNSubtarget>(); + if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32()) + return false; + LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n"); + + const SIInstrInfo *SII = ST->getInstrInfo(); + bool Changed = false; + + SmallVector<VOPDCombineInfo> ReplaceCandidates; + + for (auto &MBB : MF) { + auto MII = MBB.begin(), E = MBB.end(); + while (MII != E) { + auto *FirstMI = &*MII; + MII = next_nodbg(MII, MBB.end()); + if (MII == MBB.end()) + break; + if (FirstMI->isDebugInstr()) + continue; + auto *SecondMI = &*MII; + unsigned Opc = FirstMI->getOpcode(); + unsigned Opc2 = SecondMI->getOpcode(); + llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); + llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2); + VOPDCombineInfo CI; + + if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y) + CI = VOPDCombineInfo(FirstMI, SecondMI); + else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X) + CI = VOPDCombineInfo(SecondMI, FirstMI); + else + continue; + // checkVOPDRegConstraints cares about program order, but doReplace + // cares about X-Y order in the constituted VOPD + if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) { + ReplaceCandidates.push_back(CI); + ++MII; + } } } - } - for (auto &CI : ReplaceCandidates) { - Changed |= doReplace(SII, CI); - } + for (auto &CI : ReplaceCandidates) { + Changed |= doReplace(SII, CI); + } - return Changed; - } + return Changed; + } }; class GCNCreateVOPDLegacy : public MachineFunctionPass { @@ -173,8 +176,9 @@ class GCNCreateVOPDLegacy : public MachineFunctionPass { } // namespace -PreservedAnalyses llvm::GCNCreateVOPDPass::run(MachineFunction &MF, - MachineFunctionAnalysisManager &AM) { +PreservedAnalyses +llvm::GCNCreateVOPDPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &AM) { if (!GCNCreateVOPD().run(MF)) return PreservedAnalyses::all(); return getMachineFunctionPassPreservedAnalyses().preserveSet<CFGAnalyses>(); >From 612e0ab5752d887dae8104c9ed5a208e3a0be940 Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Tue, 11 Mar 2025 09:03:31 +0000 Subject: [PATCH 3/4] add test --- llvm/test/CodeGen/AMDGPU/vopd-combine.mir | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir index 8d5060177c63d..5a13401c1631c 100644 --- a/llvm/test/CodeGen/AMDGPU/vopd-combine.mir +++ b/llvm/test/CodeGen/AMDGPU/vopd-combine.mir @@ -4,6 +4,8 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=postmisched %s -o - | FileCheck -check-prefix=SCHED %s # RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -run-pass=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefixes=PAIR,PAIR-GFX12 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -passes=postmisched,gcn-create-vopd %s -o - | FileCheck -check-prefixes=PAIR,PAIR-GFX12 %s + --- | @lds = external addrspace(3) global [8 x i8] define void @vopd_schedule() { ret void } >From 1aeebc20c7edad2e6b6e2b1d410fb778016fc304 Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Wed, 12 Mar 2025 06:03:07 +0000 Subject: [PATCH 4/4] format and sort registry --- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 2 +- llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp | 181 +++++++++--------- 2 files changed, 90 insertions(+), 93 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index 98b0bc7358e9d..b1dba132e5bf6 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -103,8 +103,8 @@ MACHINE_FUNCTION_PASS("amdgpu-reserve-wwm-regs", AMDGPUReserveWWMRegsPass()) MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartialRegUsesPass()) MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", GCNPreRAOptimizationsPass()) MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass()) -MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass()) MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass()) +MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass()) MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass()) MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass()) MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass()); diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp index 22123f738c948..ccc711a0bcc4e 100644 --- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp +++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp @@ -49,108 +49,105 @@ class GCNCreateVOPD { MachineInstr *SecondMI; }; - public: - const GCNSubtarget *ST = nullptr; - - bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) { - auto *FirstMI = CI.FirstMI; - auto *SecondMI = CI.SecondMI; - unsigned Opc1 = FirstMI->getOpcode(); - unsigned Opc2 = SecondMI->getOpcode(); - unsigned EncodingFamily = - AMDGPU::getVOPDEncodingFamily(SII->getSubtarget()); - int NewOpcode = - AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1), - AMDGPU::getVOPDOpcode(Opc2), EncodingFamily); - assert(NewOpcode != -1 && - "Should have previously determined this as a possible VOPD\n"); - - auto VOPDInst = - BuildMI(*FirstMI->getParent(), FirstMI, FirstMI->getDebugLoc(), - SII->get(NewOpcode)) - .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags()); - - namespace VOPD = AMDGPU::VOPD; - MachineInstr *MI[] = {FirstMI, SecondMI}; - auto InstInfo = - AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc()); - - for (auto CompIdx : VOPD::COMPONENTS) { - auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands(); - VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx)); - } +public: + const GCNSubtarget *ST = nullptr; + + bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) { + auto *FirstMI = CI.FirstMI; + auto *SecondMI = CI.SecondMI; + unsigned Opc1 = FirstMI->getOpcode(); + unsigned Opc2 = SecondMI->getOpcode(); + unsigned EncodingFamily = + AMDGPU::getVOPDEncodingFamily(SII->getSubtarget()); + int NewOpcode = + AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1), + AMDGPU::getVOPDOpcode(Opc2), EncodingFamily); + assert(NewOpcode != -1 && + "Should have previously determined this as a possible VOPD\n"); + + auto VOPDInst = BuildMI(*FirstMI->getParent(), FirstMI, + FirstMI->getDebugLoc(), SII->get(NewOpcode)) + .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags()); + + namespace VOPD = AMDGPU::VOPD; + MachineInstr *MI[] = {FirstMI, SecondMI}; + auto InstInfo = + AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc()); + + for (auto CompIdx : VOPD::COMPONENTS) { + auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands(); + VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx)); + } - for (auto CompIdx : VOPD::COMPONENTS) { - auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum(); - for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; - ++CompSrcIdx) { - auto MCOprIdx = - InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx); - VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx)); - } + for (auto CompIdx : VOPD::COMPONENTS) { + auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum(); + for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) { + auto MCOprIdx = InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx); + VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx)); } + } - SII->fixImplicitOperands(*VOPDInst); - for (auto CompIdx : VOPD::COMPONENTS) - VOPDInst.copyImplicitOps(*MI[CompIdx]); + SII->fixImplicitOperands(*VOPDInst); + for (auto CompIdx : VOPD::COMPONENTS) + VOPDInst.copyImplicitOps(*MI[CompIdx]); - LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: " - << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n"); + LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: " + << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n"); - for (auto CompIdx : VOPD::COMPONENTS) - MI[CompIdx]->eraseFromParent(); + for (auto CompIdx : VOPD::COMPONENTS) + MI[CompIdx]->eraseFromParent(); - ++NumVOPDCreated; - return true; - } + ++NumVOPDCreated; + return true; + } - bool run(MachineFunction &MF) { - ST = &MF.getSubtarget<GCNSubtarget>(); - if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32()) - return false; - LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n"); - - const SIInstrInfo *SII = ST->getInstrInfo(); - bool Changed = false; - - SmallVector<VOPDCombineInfo> ReplaceCandidates; - - for (auto &MBB : MF) { - auto MII = MBB.begin(), E = MBB.end(); - while (MII != E) { - auto *FirstMI = &*MII; - MII = next_nodbg(MII, MBB.end()); - if (MII == MBB.end()) - break; - if (FirstMI->isDebugInstr()) - continue; - auto *SecondMI = &*MII; - unsigned Opc = FirstMI->getOpcode(); - unsigned Opc2 = SecondMI->getOpcode(); - llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); - llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2); - VOPDCombineInfo CI; - - if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y) - CI = VOPDCombineInfo(FirstMI, SecondMI); - else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X) - CI = VOPDCombineInfo(SecondMI, FirstMI); - else - continue; - // checkVOPDRegConstraints cares about program order, but doReplace - // cares about X-Y order in the constituted VOPD - if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) { - ReplaceCandidates.push_back(CI); - ++MII; - } + bool run(MachineFunction &MF) { + ST = &MF.getSubtarget<GCNSubtarget>(); + if (!AMDGPU::hasVOPD(*ST) || !ST->isWave32()) + return false; + LLVM_DEBUG(dbgs() << "CreateVOPD Pass:\n"); + + const SIInstrInfo *SII = ST->getInstrInfo(); + bool Changed = false; + + SmallVector<VOPDCombineInfo> ReplaceCandidates; + + for (auto &MBB : MF) { + auto MII = MBB.begin(), E = MBB.end(); + while (MII != E) { + auto *FirstMI = &*MII; + MII = next_nodbg(MII, MBB.end()); + if (MII == MBB.end()) + break; + if (FirstMI->isDebugInstr()) + continue; + auto *SecondMI = &*MII; + unsigned Opc = FirstMI->getOpcode(); + unsigned Opc2 = SecondMI->getOpcode(); + llvm::AMDGPU::CanBeVOPD FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc); + llvm::AMDGPU::CanBeVOPD SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2); + VOPDCombineInfo CI; + + if (FirstCanBeVOPD.X && SecondCanBeVOPD.Y) + CI = VOPDCombineInfo(FirstMI, SecondMI); + else if (FirstCanBeVOPD.Y && SecondCanBeVOPD.X) + CI = VOPDCombineInfo(SecondMI, FirstMI); + else + continue; + // checkVOPDRegConstraints cares about program order, but doReplace + // cares about X-Y order in the constituted VOPD + if (llvm::checkVOPDRegConstraints(*SII, *FirstMI, *SecondMI)) { + ReplaceCandidates.push_back(CI); + ++MII; } } - for (auto &CI : ReplaceCandidates) { - Changed |= doReplace(SII, CI); - } - - return Changed; } + for (auto &CI : ReplaceCandidates) { + Changed |= doReplace(SII, CI); + } + + return Changed; + } }; class GCNCreateVOPDLegacy : public MachineFunctionPass { _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits