llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-transforms Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> This specific callback should now be at parity with the old pass manager version. There are still some missing IR passes before this point. Also I don't understand the need for the RequiresAnalysisPass at the end. SelectionDAG should just be using the uncached getResult? --- Full diff: https://github.com/llvm/llvm-project/pull/102814.diff 4 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp (+53-2) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+5-3) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h (+1) - (modified) llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll (+1-1) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp index fb3d3259171ac..36f44a20d9553 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp @@ -9,9 +9,17 @@ #include "AMDGPUCodeGenPassBuilder.h" #include "AMDGPU.h" #include "AMDGPUISelDAGToDAG.h" +#include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUTargetMachine.h" +#include "AMDGPUUnifyDivergentExitNodes.h" #include "SIFixSGPRCopies.h" #include "llvm/Analysis/UniformityAnalysis.h" +#include "llvm/Transforms/Scalar/FlattenCFG.h" +#include "llvm/Transforms/Scalar/Sink.h" +#include "llvm/Transforms/Scalar/StructurizeCFG.h" +#include "llvm/Transforms/Utils/FixIrreducible.h" +#include "llvm/Transforms/Utils/LCSSA.h" +#include "llvm/Transforms/Utils/UnifyLoopExits.h" using namespace llvm; @@ -28,8 +36,51 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder( } void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const { - // TODO: Add passes pre instruction selection. - // Test only, convert to real IR passes in future. + const bool LateCFGStructurize = AMDGPUTargetMachine::EnableLateStructurizeCFG; + const bool DisableStructurizer = AMDGPUTargetMachine::DisableStructurizer; + const bool EnableStructurizerWorkarounds = + AMDGPUTargetMachine::EnableStructurizerWorkarounds; + + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(FlattenCFGPass()); + + if (TM.getOptLevel() > CodeGenOptLevel::None) + addPass(SinkingPass()); + + addPass(AMDGPULateCodeGenPreparePass(TM)); + + // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit + // regions formed by them. + + addPass(AMDGPUUnifyDivergentExitNodesPass()); + + if (!LateCFGStructurize && !DisableStructurizer) { + if (EnableStructurizerWorkarounds) { + addPass(FixIrreduciblePass()); + addPass(UnifyLoopExitsPass()); + } + + addPass(StructurizeCFGPass(/*SkipUniformRegions=*/false)); + } + + addPass(AMDGPUAnnotateUniformValuesPass()); + + if (!LateCFGStructurize && !DisableStructurizer) { + addPass(SIAnnotateControlFlowPass(TM)); + + // TODO: Move this right after structurizeCFG to avoid extra divergence + // analysis. This depends on stopping SIAnnotateControlFlow from making + // control flow modifications. + addPass(AMDGPURewriteUndefForPHIPass()); + } + + addPass(LCSSAPass()); + + if (TM.getOptLevel() > CodeGenOptLevel::Less) + addPass(AMDGPUPerfHintAnalysisPass(TM)); + + // FIXME: Why isn't this queried as required from AMDGPUISelDAGToDAG, and why + // isn't this in addInstSelector? addPass(RequireAnalysisPass<UniformityInfoAnalysis, Function>()); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 0523fee5bcf9f..5929dadf93bcb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -338,10 +338,11 @@ static cl::opt<bool> EnableScalarIRPasses( cl::init(true), cl::Hidden); -static cl::opt<bool> EnableStructurizerWorkarounds( +static cl::opt<bool, true> EnableStructurizerWorkarounds( "amdgpu-enable-structurizer-workarounds", - cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), - cl::Hidden); + cl::desc("Enable workarounds for the StructurizeCFG pass"), + cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds), + cl::init(true), cl::Hidden); static cl::opt<bool, true> EnableLowerModuleLDS( "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), @@ -611,6 +612,7 @@ bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false; bool AMDGPUTargetMachine::EnableFunctionCalls = false; bool AMDGPUTargetMachine::EnableLowerModuleLDS = true; bool AMDGPUTargetMachine::DisableStructurizer = false; +bool AMDGPUTargetMachine::EnableStructurizerWorkarounds = true; AMDGPUTargetMachine::~AMDGPUTargetMachine() = default; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 6bb8788cc73b0..4d39ad2b41505 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -38,6 +38,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { static bool EnableFunctionCalls; static bool EnableLowerModuleLDS; static bool DisableStructurizer; + static bool EnableStructurizerWorkarounds; AMDGPUTargetMachine(const Target &T, const Triple &TT, StringRef CPU, StringRef FS, const TargetOptions &Options, diff --git a/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll b/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll index 2c7072b8c93b1..2acd2355965a5 100644 --- a/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll +++ b/llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=amdgpu-isel -stop-after=amdgpu-isel -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK -; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -stop-after=amdgpu-isel -enable-new-pm | FileCheck %s --check-prefixes=CHECK +; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -start-before=amdgpu-isel -stop-after=amdgpu-isel -enable-new-pm -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK ; This caused failure in infinite cycle in Selection DAG (combine) due to missing insert_subvector. ; `````````` </details> https://github.com/llvm/llvm-project/pull/102814 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits