llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) <details> <summary>Changes</summary> This will allow them to be shared between the old PM and new PM files. I don't really like needing to expose these globally like this; maybe it would be better to just move TargetPassConfig and the CodeGenPassBuilder into one common file? --- Full diff: https://github.com/llvm/llvm-project/pull/102865.diff 2 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+92-111) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h (+41) ``````````diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index cad4585c5b3013..3409a49fe203f9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -74,6 +74,7 @@ using namespace llvm; using namespace llvm::PatternMatch; +using namespace llvm::AMDGPU; namespace { class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> { @@ -186,109 +187,95 @@ static VGPRRegisterRegAlloc fastRegAllocVGPR( "fast", "fast register allocator", createFastVGPRRegisterAllocator); } // anonymous namespace -static cl::opt<bool> -EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, - cl::desc("Run early if-conversion"), - cl::init(false)); +namespace llvm::AMDGPU { +cl::opt<bool> EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, + cl::desc("Run early if-conversion"), + cl::init(false)); -static cl::opt<bool> -OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, - cl::desc("Run pre-RA exec mask optimizations"), - cl::init(true)); +cl::opt<bool> OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden, + cl::desc("Run pre-RA exec mask optimizations"), + cl::init(true)); -static cl::opt<bool> +cl::opt<bool> LowerCtorDtor("amdgpu-lower-global-ctor-dtor", cl::desc("Lower GPU ctor / dtors to globals on the device."), cl::init(true), cl::Hidden); // Option to disable vectorizer for tests. -static cl::opt<bool> EnableLoadStoreVectorizer( - "amdgpu-load-store-vectorizer", - cl::desc("Enable load store vectorizer"), - cl::init(true), - cl::Hidden); +cl::opt<bool> + EnableLoadStoreVectorizer("amdgpu-load-store-vectorizer", + cl::desc("Enable load store vectorizer"), + cl::init(true), cl::Hidden); // Option to control global loads scalarization -static cl::opt<bool> ScalarizeGlobal( - "amdgpu-scalarize-global-loads", - cl::desc("Enable global load scalarization"), - cl::init(true), - cl::Hidden); +cl::opt<bool> ScalarizeGlobal("amdgpu-scalarize-global-loads", + cl::desc("Enable global load scalarization"), + cl::init(true), cl::Hidden); // Option to run internalize pass. -static cl::opt<bool> InternalizeSymbols( - "amdgpu-internalize-symbols", - cl::desc("Enable elimination of non-kernel functions and unused globals"), - cl::init(false), - cl::Hidden); +cl::opt<bool> InternalizeSymbols( + "amdgpu-internalize-symbols", + cl::desc("Enable elimination of non-kernel functions and unused globals"), + cl::init(false), cl::Hidden); // Option to inline all early. -static cl::opt<bool> EarlyInlineAll( - "amdgpu-early-inline-all", - cl::desc("Inline all functions early"), - cl::init(false), - cl::Hidden); +cl::opt<bool> EarlyInlineAll("amdgpu-early-inline-all", + cl::desc("Inline all functions early"), + cl::init(false), cl::Hidden); -static cl::opt<bool> RemoveIncompatibleFunctions( +cl::opt<bool> RemoveIncompatibleFunctions( "amdgpu-enable-remove-incompatible-functions", cl::Hidden, cl::desc("Enable removal of functions when they" "use features not supported by the target GPU"), cl::init(true)); -static cl::opt<bool> EnableSDWAPeephole( - "amdgpu-sdwa-peephole", - cl::desc("Enable SDWA peepholer"), - cl::init(true)); +cl::opt<bool> EnableSDWAPeephole("amdgpu-sdwa-peephole", + cl::desc("Enable SDWA peepholer"), + cl::init(true)); -static cl::opt<bool> EnableDPPCombine( - "amdgpu-dpp-combine", - cl::desc("Enable DPP combiner"), - cl::init(true)); +cl::opt<bool> EnableDPPCombine("amdgpu-dpp-combine", + cl::desc("Enable DPP combiner"), cl::init(true)); // Enable address space based alias analysis -static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, - cl::desc("Enable AMDGPU Alias Analysis"), - cl::init(true)); +cl::opt<bool> + EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden, + cl::desc("Enable AMDGPU Alias Analysis"), + cl::init(true)); // Option to run late CFG structurizer -static cl::opt<bool, true> LateCFGStructurize( - "amdgpu-late-structurize", - cl::desc("Enable late CFG structurization"), - cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), - cl::Hidden); +cl::opt<bool, true> LateCFGStructurize( + "amdgpu-late-structurize", cl::desc("Enable late CFG structurization"), + cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG), cl::Hidden); // Disable structurizer-based control-flow lowering in order to test convergence // control tokens. This should eventually be replaced by the wave-transform. -static cl::opt<bool, true> DisableStructurizer( +cl::opt<bool, true> DisableStructurizer( "amdgpu-disable-structurizer", cl::desc("Disable structurizer for experiments; produces unusable code"), cl::location(AMDGPUTargetMachine::DisableStructurizer), cl::ReallyHidden); // Enable lib calls simplifications -static cl::opt<bool> EnableLibCallSimplify( - "amdgpu-simplify-libcall", - cl::desc("Enable amdgpu library simplifications"), - cl::init(true), - cl::Hidden); - -static cl::opt<bool> EnableLowerKernelArguments( - "amdgpu-ir-lower-kernel-arguments", - cl::desc("Lower kernel argument loads in IR pass"), - cl::init(true), - cl::Hidden); - -static cl::opt<bool> EnableRegReassign( - "amdgpu-reassign-regs", - cl::desc("Enable register reassign optimizations on gfx10+"), - cl::init(true), - cl::Hidden); - -static cl::opt<bool> OptVGPRLiveRange( +cl::opt<bool> + EnableLibCallSimplify("amdgpu-simplify-libcall", + cl::desc("Enable amdgpu library simplifications"), + cl::init(true), cl::Hidden); + +cl::opt<bool> EnableLowerKernelArguments( + "amdgpu-ir-lower-kernel-arguments", + cl::desc("Lower kernel argument loads in IR pass"), cl::init(true), + cl::Hidden); + +cl::opt<bool> EnableRegReassign( + "amdgpu-reassign-regs", + cl::desc("Enable register reassign optimizations on gfx10+"), + cl::init(true), cl::Hidden); + +cl::opt<bool> OptVGPRLiveRange( "amdgpu-opt-vgpr-liverange", cl::desc("Enable VGPR liverange optimizations for if-else structure"), cl::init(true), cl::Hidden); -static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy( +cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy( "amdgpu-atomic-optimizer-strategy", cl::desc("Select DPP or Iterative strategy for scan"), cl::init(ScanOptions::Iterative), @@ -299,91 +286,85 @@ static cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy( clEnumValN(ScanOptions::None, "None", "Disable atomic optimizer"))); // Enable Mode register optimization -static cl::opt<bool> EnableSIModeRegisterPass( - "amdgpu-mode-register", - cl::desc("Enable mode register pass"), - cl::init(true), - cl::Hidden); +cl::opt<bool> EnableSIModeRegisterPass("amdgpu-mode-register", + cl::desc("Enable mode register pass"), + cl::init(true), cl::Hidden); // Enable GFX11.5+ s_singleuse_vdst insertion -static cl::opt<bool> +cl::opt<bool> EnableInsertSingleUseVDST("amdgpu-enable-single-use-vdst", cl::desc("Enable s_singleuse_vdst insertion"), cl::init(false), cl::Hidden); // Enable GFX11+ s_delay_alu insertion -static cl::opt<bool> - EnableInsertDelayAlu("amdgpu-enable-delay-alu", - cl::desc("Enable s_delay_alu insertion"), - cl::init(true), cl::Hidden); +cl::opt<bool> EnableInsertDelayAlu("amdgpu-enable-delay-alu", + cl::desc("Enable s_delay_alu insertion"), + cl::init(true), cl::Hidden); // Enable GFX11+ VOPD -static cl::opt<bool> - EnableVOPD("amdgpu-enable-vopd", - cl::desc("Enable VOPD, dual issue of VALU in wave32"), - cl::init(true), cl::Hidden); +cl::opt<bool> EnableVOPD("amdgpu-enable-vopd", + cl::desc("Enable VOPD, dual issue of VALU in wave32"), + cl::init(true), cl::Hidden); // Option is used in lit tests to prevent deadcoding of patterns inspected. -static cl::opt<bool> -EnableDCEInRA("amdgpu-dce-in-ra", - cl::init(true), cl::Hidden, - cl::desc("Enable machine DCE inside regalloc")); - -static cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority", - cl::desc("Adjust wave priority"), - cl::init(false), cl::Hidden); - -static cl::opt<bool> EnableScalarIRPasses( - "amdgpu-scalar-ir-passes", - cl::desc("Enable scalar IR passes"), - cl::init(true), - cl::Hidden); - -static cl::opt<bool, true> EnableStructurizerWorkarounds( +cl::opt<bool> EnableDCEInRA("amdgpu-dce-in-ra", cl::init(true), cl::Hidden, + cl::desc("Enable machine DCE inside regalloc")); + +cl::opt<bool> EnableSetWavePriority("amdgpu-set-wave-priority", + cl::desc("Adjust wave priority"), + cl::init(false), cl::Hidden); + +cl::opt<bool> EnableScalarIRPasses("amdgpu-scalar-ir-passes", + cl::desc("Enable scalar IR passes"), + cl::init(true), cl::Hidden); + +cl::opt<bool, true> EnableStructurizerWorkarounds( "amdgpu-enable-structurizer-workarounds", cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds), cl::init(true), cl::Hidden); -static cl::opt<bool, true> EnableLowerModuleLDS( +cl::opt<bool, true> EnableLowerModuleLDS( "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true), cl::Hidden); -static cl::opt<bool> EnablePreRAOptimizations( - "amdgpu-enable-pre-ra-optimizations", - cl::desc("Enable Pre-RA optimizations pass"), cl::init(true), - cl::Hidden); +cl::opt<bool> + EnablePreRAOptimizations("amdgpu-enable-pre-ra-optimizations", + cl::desc("Enable Pre-RA optimizations pass"), + cl::init(true), cl::Hidden); -static cl::opt<bool> EnablePromoteKernelArguments( +cl::opt<bool> EnablePromoteKernelArguments( "amdgpu-enable-promote-kernel-arguments", cl::desc("Enable promotion of flat kernel pointer arguments to global"), cl::Hidden, cl::init(true)); -static cl::opt<bool> EnableImageIntrinsicOptimizer( +cl::opt<bool> EnableImageIntrinsicOptimizer( "amdgpu-enable-image-intrinsic-optimizer", cl::desc("Enable image intrinsic optimizer pass"), cl::init(true), cl::Hidden); -static cl::opt<bool> +cl::opt<bool> EnableLoopPrefetch("amdgpu-loop-prefetch", cl::desc("Enable loop data prefetch on AMDGPU"), cl::Hidden, cl::init(false)); -static cl::opt<bool> EnableMaxIlpSchedStrategy( +cl::opt<bool> EnableMaxIlpSchedStrategy( "amdgpu-enable-max-ilp-scheduling-strategy", cl::desc("Enable scheduling strategy to maximize ILP for a single wave."), cl::Hidden, cl::init(false)); -static cl::opt<bool> EnableRewritePartialRegUses( +cl::opt<bool> EnableRewritePartialRegUses( "amdgpu-enable-rewrite-partial-reg-uses", cl::desc("Enable rewrite partial reg uses pass"), cl::init(true), cl::Hidden); -static cl::opt<bool> EnableHipStdPar( - "amdgpu-enable-hipstdpar", - cl::desc("Enable HIP Standard Parallelism Offload support"), cl::init(false), - cl::Hidden); +cl::opt<bool> + EnableHipStdPar("amdgpu-enable-hipstdpar", + cl::desc("Enable HIP Standard Parallelism Offload support"), + cl::init(false), cl::Hidden); + +} // namespace llvm::AMDGPU extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { // Register the target diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 4d39ad2b415052..f01e26a846f433 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -16,12 +16,53 @@ #include "GCNSubtarget.h" #include "llvm/CodeGen/TargetPassConfig.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" #include <optional> #include <utility> namespace llvm { +enum class ScanOptions; + +namespace AMDGPU { + +extern cl::opt<bool> EnableEarlyIfConversion; +extern cl::opt<bool> OptExecMaskPreRA; +extern cl::opt<bool> LowerCtorDtor; +extern cl::opt<bool> EnableLoadStoreVectorizer; +extern cl::opt<bool> ScalarizeGlobal; +extern cl::opt<bool> InternalizeSymbols; +extern cl::opt<bool> EarlyInlineAll; +extern cl::opt<bool> RemoveIncompatibleFunctions; +extern cl::opt<bool> EnableSDWAPeephole; +extern cl::opt<bool> EnableDPPCombine; +extern cl::opt<bool> EnableAMDGPUAliasAnalysis; +extern cl::opt<bool, true> LateCFGStructurize; +extern cl::opt<bool, true> DisableStructurizer; +extern cl::opt<bool> EnableLibCallSimplify; +extern cl::opt<bool> EnableLowerKernelArguments; +extern cl::opt<bool> EnableRegReassign; +extern cl::opt<bool> OptVGPRLiveRange; +extern cl::opt<ScanOptions> AMDGPUAtomicOptimizerStrategy; +extern cl::opt<bool> EnableSIModeRegisterPass; +extern cl::opt<bool> EnableInsertSingleUseVDST; +extern cl::opt<bool> EnableInsertDelayAlu; +extern cl::opt<bool> EnableVOPD; +extern cl::opt<bool> EnableDCEInRA; +extern cl::opt<bool> EnableSetWavePriority; +extern cl::opt<bool> EnableScalarIRPasses; +extern cl::opt<bool, true> EnableStructurizerWorkarounds; +extern cl::opt<bool, true> EnableLowerModuleLDS; +extern cl::opt<bool> EnablePreRAOptimizations; +extern cl::opt<bool> EnablePromoteKernelArguments; +extern cl::opt<bool> EnableImageIntrinsicOptimizer; +extern cl::opt<bool> EnableLoopPrefetch; +extern cl::opt<bool> EnableMaxIlpSchedStrategy; +extern cl::opt<bool> EnableRewritePartialRegUses; +extern cl::opt<bool> EnableHipStdPar; +} // namespace AMDGPU + //===----------------------------------------------------------------------===// // AMDGPU Target Machine (R600+) //===----------------------------------------------------------------------===// `````````` </details> https://github.com/llvm/llvm-project/pull/102865 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits