https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/125351
>From 4bf4fe28a25a1ac7e216b4dcd66da210114a5482 Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Sat, 1 Feb 2025 18:21:24 +0000 Subject: [PATCH 1/2] [AMDGPU][NewPM] Port SIOptimizeExecMaskingPreRA to NPM --- llvm/lib/Target/AMDGPU/AMDGPU.h | 2 +- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 2 +- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 +- .../AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 42 ++++++++++++++----- .../AMDGPU/SIOptimizeExecMaskingPreRA.h | 24 +++++++++++ .../CodeGen/AMDGPU/collapse-endcf-broken.mir | 1 + ...ask-pre-ra-non-empty-but-used-interval.mir | 1 + 7 files changed, 62 insertions(+), 13 deletions(-) create mode 100644 llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 67bad5884c260..59839fd54d014 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -368,7 +368,7 @@ struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; -void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); +void initializeSIOptimizeExecMaskingPreRALegacyPass(PassRegistry &); extern char &SIOptimizeExecMaskingPreRAID; void initializeSIOptimizeVGPRLiveRangeLegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index d9d97928062f5..520f1a4282cc8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -110,6 +110,7 @@ MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass()) MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass()) MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass()) MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass()) +MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPreRAPass()) MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass()) MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass()) MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass()) @@ -128,7 +129,6 @@ DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass()) DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass()) DUMMY_MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass()) DUMMY_MACHINE_FUNCTION_PASS("si-mode-register", SIModeRegisterPass()) -DUMMY_MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", SIOptimizeExecMaskingPreRAPass()) DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass()) // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it // already exists. diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 8b387a191efba..855c522ca4de7 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -48,6 +48,7 @@ #include "SIMachineFunctionInfo.h" #include "SIMachineScheduler.h" #include "SIOptimizeExecMasking.h" +#include "SIOptimizeExecMaskingPreRA.h" #include "SIOptimizeVGPRLiveRange.h" #include "SIPeepholeSDWA.h" #include "SIPreAllocateWWMRegs.h" @@ -497,7 +498,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIFoldOperandsLegacyPass(*PR); initializeSIPeepholeSDWALegacyPass(*PR); initializeSIShrinkInstructionsLegacyPass(*PR); - initializeSIOptimizeExecMaskingPreRAPass(*PR); + initializeSIOptimizeExecMaskingPreRALegacyPass(*PR); initializeSIOptimizeVGPRLiveRangeLegacyPass(*PR); initializeSILoadStoreOptimizerLegacyPass(*PR); initializeAMDGPUCtorDtorLoweringLegacyPass(*PR); diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 31f65d82a4d2b..2a8a398d7429d 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -12,6 +12,7 @@ /// //===----------------------------------------------------------------------===// +#include "SIOptimizeExecMaskingPreRA.h" #include "AMDGPU.h" #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" @@ -25,7 +26,7 @@ using namespace llvm; namespace { -class SIOptimizeExecMaskingPreRA : public MachineFunctionPass { +class SIOptimizeExecMaskingPreRA { private: const SIRegisterInfo *TRI; const SIInstrInfo *TII; @@ -42,11 +43,18 @@ class SIOptimizeExecMaskingPreRA : public MachineFunctionPass { bool optimizeVcndVcmpPair(MachineBasicBlock &MBB); bool optimizeElseBranch(MachineBasicBlock &MBB); +public: + SIOptimizeExecMaskingPreRA(LiveIntervals *LIS) : LIS(LIS) {} + bool run(MachineFunction &MF); +}; + +class SIOptimizeExecMaskingPreRALegacy : public MachineFunctionPass { public: static char ID; - SIOptimizeExecMaskingPreRA() : MachineFunctionPass(ID) { - initializeSIOptimizeExecMaskingPreRAPass(*PassRegistry::getPassRegistry()); + SIOptimizeExecMaskingPreRALegacy() : MachineFunctionPass(ID) { + initializeSIOptimizeExecMaskingPreRALegacyPass( + *PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; @@ -64,18 +72,18 @@ class SIOptimizeExecMaskingPreRA : public MachineFunctionPass { } // End anonymous namespace. -INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRA, DEBUG_TYPE, +INITIALIZE_PASS_BEGIN(SIOptimizeExecMaskingPreRALegacy, DEBUG_TYPE, "SI optimize exec mask operations pre-RA", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) -INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRA, DEBUG_TYPE, +INITIALIZE_PASS_END(SIOptimizeExecMaskingPreRALegacy, DEBUG_TYPE, "SI optimize exec mask operations pre-RA", false, false) -char SIOptimizeExecMaskingPreRA::ID = 0; +char SIOptimizeExecMaskingPreRALegacy::ID = 0; -char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRA::ID; +char &llvm::SIOptimizeExecMaskingPreRAID = SIOptimizeExecMaskingPreRALegacy::ID; FunctionPass *llvm::createSIOptimizeExecMaskingPreRAPass() { - return new SIOptimizeExecMaskingPreRA(); + return new SIOptimizeExecMaskingPreRALegacy(); } // See if there is a def between \p AndIdx and \p SelIdx that needs to live @@ -340,15 +348,29 @@ bool SIOptimizeExecMaskingPreRA::optimizeElseBranch(MachineBasicBlock &MBB) { return true; } -bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { +PreservedAnalyses +SIOptimizeExecMaskingPreRAPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + auto &LIS = MFAM.getResult<LiveIntervalsAnalysis>(MF); + SIOptimizeExecMaskingPreRA(&LIS).run(MF); + return PreservedAnalyses::all(); +} + +bool SIOptimizeExecMaskingPreRALegacy::runOnMachineFunction( + MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; + auto *LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); + return SIOptimizeExecMaskingPreRA(LIS).run(MF); +} + +bool SIOptimizeExecMaskingPreRA::run(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); TRI = ST.getRegisterInfo(); TII = ST.getInstrInfo(); MRI = &MF.getRegInfo(); - LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); + // LIS = &getAnalysis<LiveIntervalsWrapperPass>().getLIS(); const bool Wave32 = ST.isWave32(); AndOpc = Wave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64; diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h new file mode 100644 index 0000000000000..eca79c0d8dbfb --- /dev/null +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h @@ -0,0 +1,24 @@ +//===- SIOptimizeExecMaskingPreRA.h.h ---------------------------------------*- +//C++- *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H +#define LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H + +#include "llvm/CodeGen/MachinePassManager.h" + +namespace llvm { +class SIOptimizeExecMaskingPreRAPass + : public PassInfoMixin<SIOptimizeExecMaskingPreRAPass> { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AMDGPU_SIOPTIMIZEEXECMASKINGPRERA_H diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir index 7aea97a3053c7..2eb1f5d559651 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf-broken.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s +# RUN: llc -mtriple=amdgcn -verify-machineinstrs -passes=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GXN %s # FIXME: This is a miscompile, and the s_or_b64s need to be preserved. diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir index 63ee27e0f83ba..9673186e5ae3f 100644 --- a/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir +++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-mask-pre-ra-non-empty-but-used-interval.mir @@ -1,4 +1,5 @@ # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=si-optimize-exec-masking-pre-ra,greedy -verify-machineinstrs -o - %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -passes=si-optimize-exec-masking-pre-ra,greedy -verify-machineinstrs -o - %s # This sample can trigger a "Non-empty but used interval" assert in regalloc if # SIOptimizeExecMaskingPreRA does not update live intervals correctly. >From 74528e602082565a11d5cc482633dde009740f6b Mon Sep 17 00:00:00 2001 From: Akshat Oke <akshat....@amd.com> Date: Mon, 3 Feb 2025 06:04:39 +0000 Subject: [PATCH 2/2] format header file --- llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h index eca79c0d8dbfb..cf9c6ce5f0083 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.h @@ -1,5 +1,4 @@ -//===- SIOptimizeExecMaskingPreRA.h.h ---------------------------------------*- -//C++- *-===// +//===- SIOptimizeExecMaskingPreRA.h.h ---------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits