[llvm-branch-commits] [llvm] 084d025 - Address comments
Author: Alexis Engelke Date: 2024-08-11T07:39:53Z New Revision: 084d02577eb68dd2b6260b9b1d12a61631e8d799 URL: https://github.com/llvm/llvm-project/commit/084d02577eb68dd2b6260b9b1d12a61631e8d799 DIFF: https://github.com/llvm/llvm-project/commit/084d02577eb68dd2b6260b9b1d12a61631e8d799.diff LOG: Address comments Added: llvm/test/Transforms/SLPVectorizer/X86/const-in-different-functions.ll Modified: Removed: llvm/test/Transforms/SLPVectorizer/const-in-different-functions.ll diff --git a/llvm/test/Transforms/SLPVectorizer/const-in- diff erent-functions.ll b/llvm/test/Transforms/SLPVectorizer/X86/const-in- diff erent-functions.ll similarity index 75% rename from llvm/test/Transforms/SLPVectorizer/const-in- diff erent-functions.ll rename to llvm/test/Transforms/SLPVectorizer/X86/const-in- diff erent-functions.ll index 29a8f15733c450..2e473f4f2c213c 100644 --- a/llvm/test/Transforms/SLPVectorizer/const-in- diff erent-functions.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/const-in- diff erent-functions.ll @@ -4,17 +4,19 @@ ; Test that SLP vectorize doesn't crash if a stored constant is used in multiple ; functions. +@p = external global [64 x float] + define void @_Z1hPfl() { ; CHECK-LABEL: define void @_Z1hPfl() { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT:[[TMP0:%.*]] = getelementptr i8, ptr null, i64 28 +; CHECK-NEXT:[[TMP0:%.*]] = getelementptr i8, ptr @p, i64 28 ; CHECK-NEXT:store <2 x float> , ptr [[TMP0]], align 4 ; CHECK-NEXT:ret void ; entry: - %0 = getelementptr i8, ptr null, i64 28 + %0 = getelementptr i8, ptr @p, i64 28 store float 0.00e+00, ptr %0, align 4 - %1 = getelementptr i8, ptr null, i64 32 + %1 = getelementptr i8, ptr @p, i64 32 store float 1.00e+00, ptr %1, align 16 ret void } @@ -27,8 +29,8 @@ define void @_Z1mv(i64 %arrayidx4.i.2.idx) { ; CHECK: [[FOR_COND1_PREHEADER_LR_PH_I:.*:]] ; CHECK-NEXT:br label %[[FOR_COND1_PREHEADER_I:.*]] ; CHECK: [[FOR_COND1_PREHEADER_I]]: -; CHECK-NEXT:store float 1.00e+00, ptr null, align 4 -; CHECK-NEXT:[[ARRAYIDX4_I_2:%.*]] = getelementptr i8, ptr null, i64 [[ARRAYIDX4_I_2_IDX]] +; CHECK-NEXT:store float 1.00e+00, ptr @p, align 4 +; CHECK-NEXT:[[ARRAYIDX4_I_2:%.*]] = getelementptr i8, ptr @p, i64 [[ARRAYIDX4_I_2_IDX]] ; CHECK-NEXT:store float 0.00e+00, ptr [[ARRAYIDX4_I_2]], align 4 ; CHECK-NEXT:br label %[[FOR_COND1_PREHEADER_I]] ; @@ -39,8 +41,8 @@ for.cond1.preheader.lr.ph.i: ; No predecessors! br label %for.cond1.preheader.i for.cond1.preheader.i:; preds = %for.cond1.preheader.i, %for.cond1.preheader.lr.ph.i - store float 1.00e+00, ptr null, align 4 - %arrayidx4.i.2 = getelementptr i8, ptr null, i64 %arrayidx4.i.2.idx + store float 1.00e+00, ptr @p, align 4 + %arrayidx4.i.2 = getelementptr i8, ptr @p, i64 %arrayidx4.i.2.idx store float 0.00e+00, ptr %arrayidx4.i.2, align 4 br label %for.cond1.preheader.i } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)
@@ -413,18 +439,57 @@ bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) { return Changed; } -bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const { - auto FI = FIM.find(F); - if (FI == FIM.end()) -return false; +bool AMDGPUPerfHintAnalysis::run(const GCNTargetMachine &TM, + LazyCallGraph &CG) { - return AMDGPUPerfHint::isMemBound(FI->second); + SmallVector Worklist; + CG.buildRefSCCs(); + for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { +for (LazyCallGraph::SCC &SCC : RC) { + if (SCC.size() != 1) +continue; + Function &F = SCC.begin()->getFunction(); + if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage()) arsenm wrote: Actually this broke the test https://github.com/llvm/llvm-project/pull/102645 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/102645 >From 0fa3fead38acc0dc3bca5d48c00be1090d4a16ad Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 9 Aug 2024 17:27:53 +0400 Subject: [PATCH] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager This was much more difficult than I anticipated. The pass is not in a good state, with poor test coverage. The legacy PM does seem to be relying on maintaining the map state between different SCCs, which seems bad. The pass is going out of its way to avoid putting the attributes it introduces onto non-callee functions. If it just added them, we could use them directly instead of relying on the map, I would think. The NewPM path uses a ModulePass; I'm not sure if we should be using CGSCC here but there seems to be some missing infrastructure to support backend defined ones. --- llvm/lib/Target/AMDGPU/AMDGPU.h | 4 +- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 3 + .../Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp | 111 ++ .../Target/AMDGPU/AMDGPUPerfHintAnalysis.h| 62 ++ .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 +- llvm/test/CodeGen/AMDGPU/perfhint.ll | 1 + 7 files changed, 136 insertions(+), 50 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 195e2a19214e80..5b8d37a8ae7944 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -209,8 +209,8 @@ extern char &SIPreAllocateWWMRegsID; void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &); extern char &AMDGPUImageIntrinsicOptimizerID; -void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); -extern char &AMDGPUPerfHintAnalysisID; +void initializeAMDGPUPerfHintAnalysisLegacyPass(PassRegistry &); +extern char &AMDGPUPerfHintAnalysisLegacyID; void initializeGCNRegPressurePrinterPass(PassRegistry &); extern char &GCNRegPressurePrinterID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 8579774f522309..bbb4573655ab79 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -102,7 +102,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, "amdgpu-isel", "AMDGPU DAG->DAG Pattern Instruction Selection", false, false) INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo) -INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis) +INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy) INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) #ifdef EXPENSIVE_CHECKS INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def index b6a6c33d85f83c..7188c8953254c0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def +++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def @@ -22,6 +22,9 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers", AMDGPULowerBufferFatPointersPass(*this)) MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass()) MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this)) +MODULE_PASS("amdgpu-perf-hint", +AMDGPUPerfHintAnalysisPass( + *static_cast(this))) MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass()) MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass()) #undef MODULE_PASS diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp index 1213d5e0b41db1..040e931b82af2f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp @@ -12,12 +12,15 @@ /// //===--===// -#include "AMDGPU.h" #include "AMDGPUPerfHintAnalysis.h" +#include "AMDGPU.h" +#include "AMDGPUTargetMachine.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -54,12 +57,6 @@ static cl::opt STATISTIC(NumMemBound, "Number of functions marked as memory bound"); STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave"); -char llvm::AMDGPUPerfHintAnalysis::ID = 0; -char &llvm::AMDGPUPerfHintAnalysisID = AMDGPUPerfHintAnalysis::ID; - -INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE, -"Analysis if a function is memory bound", true, true) - namespace { struct AMDGPUPerfHint { @@ -67,7 +64,7 @@ struct AMDGPUPerfHint { public: AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,
[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)
@@ -413,18 +439,57 @@ bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) { return Changed; } -bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const { - auto FI = FIM.find(F); - if (FI == FIM.end()) -return false; +bool AMDGPUPerfHintAnalysis::run(const GCNTargetMachine &TM, + LazyCallGraph &CG) { - return AMDGPUPerfHint::isMemBound(FI->second); + SmallVector Worklist; + CG.buildRefSCCs(); + for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) { +for (LazyCallGraph::SCC &SCC : RC) { + if (SCC.size() != 1) +continue; + Function &F = SCC.begin()->getFunction(); + if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage()) arsenm wrote: Nevermind, I somehow posted the wrong version that has the reverse list which is wrong https://github.com/llvm/llvm-project/pull/102645 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Use GCNTargetMachine in AMDGPUCodeGenPassBuilder (PR #102805)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/102805 R600 has a separate CodeGenPassBuilder anyway. >From 0a6b6eee1b1c2926169ff337a16ee291d5f72001 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 11 Aug 2024 11:55:22 +0400 Subject: [PATCH] AMDGPU: Use GCNTargetMachine in AMDGPUCodeGenPassBuilder R600 has a separate CodeGenPassBuilder anyway. --- .../Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp | 2 +- .../lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h | 6 +++--- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 16 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 12 ++-- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp index cc4285f130fc82..0d829b6aeafe8f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp @@ -15,7 +15,7 @@ using namespace llvm; AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder( -AMDGPUTargetMachine &TM, const CGPassBuilderOption &Opts, +GCNTargetMachine &TM, const CGPassBuilderOption &Opts, PassInstrumentationCallbacks *PIC) : CodeGenPassBuilder(TM, Opts, PIC) { Opt.RequiresCodeGenSCCOrder = true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h index 5f79e309703a33..e656e166b3eb2e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h @@ -14,12 +14,12 @@ namespace llvm { -class AMDGPUTargetMachine; +class GCNTargetMachine; class AMDGPUCodeGenPassBuilder -: public CodeGenPassBuilder { +: public CodeGenPassBuilder { public: - AMDGPUCodeGenPassBuilder(AMDGPUTargetMachine &TM, + AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM, const CGPassBuilderOption &Opts, PassInstrumentationCallbacks *PIC); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 202466f18d1bd6..62cf9c6cd61140 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -660,14 +660,6 @@ parseAMDGPUAtomicOptimizerStrategy(StringRef Params) { return make_error("invalid parameter", inconvertibleErrorCode()); } -Error AMDGPUTargetMachine::buildCodeGenPipeline( -ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, -CodeGenFileType FileType, const CGPassBuilderOption &Opts, -PassInstrumentationCallbacks *PIC) { - AMDGPUCodeGenPassBuilder CGPB(*this, Opts, PIC); - return CGPB.buildPipeline(MPM, Out, DwoOut, FileType); -} - void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { #define GET_PASS_REGISTRY "AMDGPUPassRegistry.def" @@ -900,6 +892,14 @@ GCNTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(GCNTTIImpl(this, F)); } +Error GCNTargetMachine::buildCodeGenPipeline( +ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, +CodeGenFileType FileType, const CGPassBuilderOption &Opts, +PassInstrumentationCallbacks *PIC) { + AMDGPUCodeGenPassBuilder CGPB(*this, Opts, PIC); + return CGPB.buildPipeline(MPM, Out, DwoOut, FileType); +} + //===--===// // AMDGPU Pass Setup //===--===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 0f74fbc22fa84f..6bb8788cc73b0c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -52,12 +52,6 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { return TLOF.get(); } - Error buildCodeGenPipeline(ModulePassManager &MPM, raw_pwrite_stream &Out, - raw_pwrite_stream *DwoOut, - CodeGenFileType FileType, - const CGPassBuilderOption &Opts, - PassInstrumentationCallbacks *PIC) override; - void registerPassBuilderCallbacks(PassBuilder &PB) override; void registerDefaultAliasAnalyses(AAManager &) override; @@ -103,6 +97,12 @@ class GCNTargetMachine final : public AMDGPUTargetMachine { return true; } + Error buildCodeGenPipeline(ModulePassManager &MPM, raw_pwrite_stream &Out, + raw_pwrite_stream *DwoOut, + CodeGenFileType FileType, + const CGPassBuilderOption &Opts, + PassInstrumentationCallbacks *PIC) override; + void registerMachineRegisterInfoCallback(MachineFunction &MF) const override; MachineFunctionInfo * _
[llvm-branch-commits] [llvm] AMDGPU: Use GCNTargetMachine in AMDGPUCodeGenPassBuilder (PR #102805)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/102805?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#102805** https://app.graphite.dev/github/pr/llvm/llvm-project/102805?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#102645** https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102644** https://app.graphite.dev/github/pr/llvm/llvm-project/102644?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/102805 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Use GCNTargetMachine in AMDGPUCodeGenPassBuilder (PR #102805)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes R600 has a separate CodeGenPassBuilder anyway. --- Full diff: https://github.com/llvm/llvm-project/pull/102805.diff 4 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp (+1-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h (+3-3) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+8-8) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h (+6-6) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp index cc4285f130fc8..0d829b6aeafe8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp @@ -15,7 +15,7 @@ using namespace llvm; AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder( -AMDGPUTargetMachine &TM, const CGPassBuilderOption &Opts, +GCNTargetMachine &TM, const CGPassBuilderOption &Opts, PassInstrumentationCallbacks *PIC) : CodeGenPassBuilder(TM, Opts, PIC) { Opt.RequiresCodeGenSCCOrder = true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h index 5f79e309703a3..e656e166b3eb2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h @@ -14,12 +14,12 @@ namespace llvm { -class AMDGPUTargetMachine; +class GCNTargetMachine; class AMDGPUCodeGenPassBuilder -: public CodeGenPassBuilder { +: public CodeGenPassBuilder { public: - AMDGPUCodeGenPassBuilder(AMDGPUTargetMachine &TM, + AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM, const CGPassBuilderOption &Opts, PassInstrumentationCallbacks *PIC); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 202466f18d1bd..62cf9c6cd6114 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -660,14 +660,6 @@ parseAMDGPUAtomicOptimizerStrategy(StringRef Params) { return make_error("invalid parameter", inconvertibleErrorCode()); } -Error AMDGPUTargetMachine::buildCodeGenPipeline( -ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, -CodeGenFileType FileType, const CGPassBuilderOption &Opts, -PassInstrumentationCallbacks *PIC) { - AMDGPUCodeGenPassBuilder CGPB(*this, Opts, PIC); - return CGPB.buildPipeline(MPM, Out, DwoOut, FileType); -} - void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { #define GET_PASS_REGISTRY "AMDGPUPassRegistry.def" @@ -900,6 +892,14 @@ GCNTargetMachine::getTargetTransformInfo(const Function &F) const { return TargetTransformInfo(GCNTTIImpl(this, F)); } +Error GCNTargetMachine::buildCodeGenPipeline( +ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut, +CodeGenFileType FileType, const CGPassBuilderOption &Opts, +PassInstrumentationCallbacks *PIC) { + AMDGPUCodeGenPassBuilder CGPB(*this, Opts, PIC); + return CGPB.buildPipeline(MPM, Out, DwoOut, FileType); +} + //===--===// // AMDGPU Pass Setup //===--===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 0f74fbc22fa84..6bb8788cc73b0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -52,12 +52,6 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { return TLOF.get(); } - Error buildCodeGenPipeline(ModulePassManager &MPM, raw_pwrite_stream &Out, - raw_pwrite_stream *DwoOut, - CodeGenFileType FileType, - const CGPassBuilderOption &Opts, - PassInstrumentationCallbacks *PIC) override; - void registerPassBuilderCallbacks(PassBuilder &PB) override; void registerDefaultAliasAnalyses(AAManager &) override; @@ -103,6 +97,12 @@ class GCNTargetMachine final : public AMDGPUTargetMachine { return true; } + Error buildCodeGenPipeline(ModulePassManager &MPM, raw_pwrite_stream &Out, + raw_pwrite_stream *DwoOut, + CodeGenFileType FileType, + const CGPassBuilderOption &Opts, + PassInstrumentationCallbacks *PIC) override; + void registerMachineRegisterInfoCallback(MachineFunction &MF) const override; MachineFunctionInfo * `` https://github.com/llvm/llvm-project/pull/102805 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bi
[llvm-branch-commits] [llvm] AMDGPU: Use GCNTargetMachine in AMDGPUCodeGenPassBuilder (PR #102805)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/102805 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Use GCNTargetMachine in AMDGPUCodeGenPassBuilder (PR #102805)
https://github.com/paperchalice approved this pull request. https://github.com/llvm/llvm-project/pull/102805 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager (PR #102806)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/102806 None >From 56fc9f47bd12696b13a677ee92c83a85cbf09466 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 11 Aug 2024 12:57:27 +0400 Subject: [PATCH] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager --- llvm/lib/Target/AMDGPU/AMDGPU.h | 17 ++- .../AMDGPU/AMDGPULateCodeGenPrepare.cpp | 110 +++--- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 3 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 +- .../AMDGPU/amdgpu-late-codegenprepare.ll | 1 + 5 files changed, 88 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 5b8d37a8ae7944..2a6b5a10a5d464 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -18,6 +18,7 @@ namespace llvm { class AMDGPUTargetMachine; +class GCNTargetMachine; class TargetMachine; // GlobalISel passes @@ -54,7 +55,7 @@ FunctionPass *createSIPostRABundlerPass(); FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *); ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); FunctionPass *createAMDGPUCodeGenPreparePass(); -FunctionPass *createAMDGPULateCodeGenPreparePass(); +FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass(); FunctionPass *createAMDGPUMachineCFGStructurizerPass(); FunctionPass *createAMDGPURewriteOutArgumentsPass(); ModulePass * @@ -273,6 +274,16 @@ class AMDGPUCodeGenPreparePass PreservedAnalyses run(Function &, FunctionAnalysisManager &); }; +class AMDGPULateCodeGenPreparePass +: public PassInfoMixin { +private: + const GCNTargetMachine &TM; + +public: + AMDGPULateCodeGenPreparePass(const GCNTargetMachine &TM) : TM(TM) {}; + PreservedAnalyses run(Function &, FunctionAnalysisManager &); +}; + class AMDGPULowerKernelArgumentsPass : public PassInfoMixin { private: @@ -329,8 +340,8 @@ extern char &AMDGPUCodeGenPrepareID; void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); extern char &AMDGPURemoveIncompatibleFunctionsID; -void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); -extern char &AMDGPULateCodeGenPrepareID; +void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &); +extern char &AMDGPULateCodeGenPrepareLegacyID; FunctionPass *createAMDGPURewriteUndefForPHILegacyPass(); void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp index 7bf5170794cd9e..36dfebacaed686 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp @@ -42,10 +42,10 @@ static cl::opt namespace { class AMDGPULateCodeGenPrepare -: public FunctionPass, - public InstVisitor { +: public InstVisitor { Module *Mod = nullptr; const DataLayout *DL = nullptr; + const GCNSubtarget &ST; AssumptionCache *AC = nullptr; UniformityInfo *UA = nullptr; @@ -53,24 +53,10 @@ class AMDGPULateCodeGenPrepare SmallVector DeadInsts; public: - static char ID; - - AMDGPULateCodeGenPrepare() : FunctionPass(ID) {} - - StringRef getPassName() const override { -return "AMDGPU IR late optimizations"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { -AU.addRequired(); -AU.addRequired(); -AU.addRequired(); -AU.setPreservesAll(); - } - - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; - + AMDGPULateCodeGenPrepare(Module &M, const GCNSubtarget &ST, + AssumptionCache *AC, UniformityInfo *UA) + : Mod(&M), DL(&M.getDataLayout()), ST(ST), AC(AC), UA(UA) {} + bool run(Function &F); bool visitInstruction(Instruction &) { return false; } // Check if the specified value is at least DWORD aligned. @@ -148,23 +134,7 @@ class LiveRegOptimizer { } // end anonymous namespace -bool AMDGPULateCodeGenPrepare::doInitialization(Module &M) { - Mod = &M; - DL = &Mod->getDataLayout(); - return false; -} - -bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) { - if (skipFunction(F)) -return false; - - const TargetPassConfig &TPC = getAnalysis(); - const TargetMachine &TM = TPC.getTM(); - const GCNSubtarget &ST = TM.getSubtarget(F); - - AC = &getAnalysis().getAssumptionCache(F); - UA = &getAnalysis().getUniformityInfo(); - +bool AMDGPULateCodeGenPrepare::run(Function &F) { // "Optimize" the virtual regs that cross basic block boundaries. When // building the SelectionDAG, vectors of illegal types that cross basic blocks // will be scalarized and widened, with each scalar living in its @@ -505,16 +475,72 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) { return true; } -INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE, +PreservedAnalyses +AMDGPULateCodeGenPreparePass:
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager (PR #102806)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/102806 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager (PR #102806)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/102806?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#102806** https://app.graphite.dev/github/pr/llvm/llvm-project/102806?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#102805** https://app.graphite.dev/github/pr/llvm/llvm-project/102805?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102645** https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102644** https://app.graphite.dev/github/pr/llvm/llvm-project/102644?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/102806 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager (PR #102806)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/102806.diff 5 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+14-3) - (modified) llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp (+68-42) - (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+3) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2-2) - (modified) llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll (+1) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 5b8d37a8ae794..2a6b5a10a5d46 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -18,6 +18,7 @@ namespace llvm { class AMDGPUTargetMachine; +class GCNTargetMachine; class TargetMachine; // GlobalISel passes @@ -54,7 +55,7 @@ FunctionPass *createSIPostRABundlerPass(); FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *); ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); FunctionPass *createAMDGPUCodeGenPreparePass(); -FunctionPass *createAMDGPULateCodeGenPreparePass(); +FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass(); FunctionPass *createAMDGPUMachineCFGStructurizerPass(); FunctionPass *createAMDGPURewriteOutArgumentsPass(); ModulePass * @@ -273,6 +274,16 @@ class AMDGPUCodeGenPreparePass PreservedAnalyses run(Function &, FunctionAnalysisManager &); }; +class AMDGPULateCodeGenPreparePass +: public PassInfoMixin { +private: + const GCNTargetMachine &TM; + +public: + AMDGPULateCodeGenPreparePass(const GCNTargetMachine &TM) : TM(TM) {}; + PreservedAnalyses run(Function &, FunctionAnalysisManager &); +}; + class AMDGPULowerKernelArgumentsPass : public PassInfoMixin { private: @@ -329,8 +340,8 @@ extern char &AMDGPUCodeGenPrepareID; void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); extern char &AMDGPURemoveIncompatibleFunctionsID; -void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); -extern char &AMDGPULateCodeGenPrepareID; +void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &); +extern char &AMDGPULateCodeGenPrepareLegacyID; FunctionPass *createAMDGPURewriteUndefForPHILegacyPass(); void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp index 7bf5170794cd9..36dfebacaed68 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp @@ -42,10 +42,10 @@ static cl::opt namespace { class AMDGPULateCodeGenPrepare -: public FunctionPass, - public InstVisitor { +: public InstVisitor { Module *Mod = nullptr; const DataLayout *DL = nullptr; + const GCNSubtarget &ST; AssumptionCache *AC = nullptr; UniformityInfo *UA = nullptr; @@ -53,24 +53,10 @@ class AMDGPULateCodeGenPrepare SmallVector DeadInsts; public: - static char ID; - - AMDGPULateCodeGenPrepare() : FunctionPass(ID) {} - - StringRef getPassName() const override { -return "AMDGPU IR late optimizations"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { -AU.addRequired(); -AU.addRequired(); -AU.addRequired(); -AU.setPreservesAll(); - } - - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; - + AMDGPULateCodeGenPrepare(Module &M, const GCNSubtarget &ST, + AssumptionCache *AC, UniformityInfo *UA) + : Mod(&M), DL(&M.getDataLayout()), ST(ST), AC(AC), UA(UA) {} + bool run(Function &F); bool visitInstruction(Instruction &) { return false; } // Check if the specified value is at least DWORD aligned. @@ -148,23 +134,7 @@ class LiveRegOptimizer { } // end anonymous namespace -bool AMDGPULateCodeGenPrepare::doInitialization(Module &M) { - Mod = &M; - DL = &Mod->getDataLayout(); - return false; -} - -bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) { - if (skipFunction(F)) -return false; - - const TargetPassConfig &TPC = getAnalysis(); - const TargetMachine &TM = TPC.getTM(); - const GCNSubtarget &ST = TM.getSubtarget(F); - - AC = &getAnalysis().getAssumptionCache(F); - UA = &getAnalysis().getUniformityInfo(); - +bool AMDGPULateCodeGenPrepare::run(Function &F) { // "Optimize" the virtual regs that cross basic block boundaries. When // building the SelectionDAG, vectors of illegal types that cross basic blocks // will be scalarized and widened, with each scalar living in its @@ -505,16 +475,72 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) { return true; } -INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE, +PreservedAnalyses +AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) { + const GCNSubtarget &ST = TM.getSubtarget(F); + + AssumptionCache
[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)
arsenm wrote: ### Merge activity * **Aug 11, 7:00 AM EDT**: @arsenm started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/102645). https://github.com/llvm/llvm-project/pull/102645 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Use GCNTargetMachine in AMDGPUCodeGenPassBuilder (PR #102805)
arsenm wrote: ### Merge activity * **Aug 11, 7:00 AM EDT**: @arsenm started a stack merge that includes this pull request via [Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/102805). https://github.com/llvm/llvm-project/pull/102805 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)
@@ -0,0 +1,334 @@ +//===- nsan_allocator.cpp -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// NumericalStabilitySanitizer allocator. +// +//===--===// + +#include "nsan_allocator.h" +#include "interception/interception.h" +#include "nsan.h" +#include "nsan_platform.h" +#include "nsan_thread.h" +#include "sanitizer_common/sanitizer_allocator.h" +#include "sanitizer_common/sanitizer_allocator_checks.h" +#include "sanitizer_common/sanitizer_allocator_interface.h" +#include "sanitizer_common/sanitizer_allocator_report.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_errno.h" + +DECLARE_REAL(void *, memset, void *dest, int c, uptr n) + +using namespace __nsan; + +namespace { +struct Metadata { + uptr requested_size; +}; + +struct NsanMapUnmapCallback { + void OnMap(uptr p, uptr size) const {} + void OnMapSecondary(uptr p, uptr size, uptr user_begin, + uptr user_size) const {} + void OnUnmap(uptr p, uptr size) const {} +}; + +const uptr kMaxAllowedMallocSize = 1ULL << 40; + +// Allocator64 parameters. Deliberately using a short name. +struct AP64 { + static const uptr kSpaceBeg = Mapping::kHeapMemBeg; + static const uptr kSpaceSize = 0x400; // 4T. + static const uptr kMetadataSize = sizeof(Metadata); + using SizeClassMap = DefaultSizeClassMap; + using MapUnmapCallback = NsanMapUnmapCallback; + static const uptr kFlags = 0; + using AddressSpaceView = LocalAddressSpaceView; +}; +} // namespace + +using PrimaryAllocator = SizeClassAllocator64; +using Allocator = CombinedAllocator; +using AllocatorCache = Allocator::AllocatorCache; + +static Allocator allocator; +static AllocatorCache fallback_allocator_cache; +static StaticSpinMutex fallback_mutex; + +static uptr max_malloc_size; + +void __nsan::NsanAllocatorInit() { + SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null); + allocator.Init(common_flags()->allocator_release_to_os_interval_ms); + if (common_flags()->max_allocation_size_mb) +max_malloc_size = Min(common_flags()->max_allocation_size_mb << 20, + kMaxAllowedMallocSize); + else +max_malloc_size = kMaxAllowedMallocSize; +} + +static AllocatorCache *GetAllocatorCache(NsanThreadLocalMallocStorage *ms) { + CHECK(ms); + CHECK_LE(sizeof(AllocatorCache), sizeof(ms->allocator_cache)); + return reinterpret_cast(ms->allocator_cache); +} + +void NsanThreadLocalMallocStorage::Init() { + allocator.InitCache(GetAllocatorCache(this)); +} + +void NsanThreadLocalMallocStorage::CommitBack() { + allocator.SwallowCache(GetAllocatorCache(this)); + allocator.DestroyCache(GetAllocatorCache(this)); +} + +static void *NsanAllocate(uptr size, uptr alignment, bool zero) { + if (UNLIKELY(size > max_malloc_size)) { +if (AllocatorMayReturnNull()) { + Report("WARNING: NumericalStabilitySanitizer failed to allocate 0x%zx " + "bytes\n", + size); + return nullptr; +} +BufferedStackTrace stack; +GET_FATAL_STACK_TRACE_IF_EMPTY(&stack); +ReportAllocationSizeTooBig(size, max_malloc_size, &stack); + } + if (UNLIKELY(IsRssLimitExceeded())) { +if (AllocatorMayReturnNull()) + return nullptr; +BufferedStackTrace stack; +GET_FATAL_STACK_TRACE_IF_EMPTY(&stack); +ReportRssLimitExceeded(&stack); + } + NsanThread *t = GetCurrentThread(); + void *allocated; + if (t) { +AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); +allocated = allocator.Allocate(cache, size, alignment); + } else { alexander-shaposhnikov wrote: I'm wondering - when does the fallback case happen (t == nullptr) ? https://github.com/llvm/llvm-project/pull/102764 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)
@@ -0,0 +1,334 @@ +//===- nsan_allocator.cpp -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// NumericalStabilitySanitizer allocator. +// +//===--===// + +#include "nsan_allocator.h" +#include "interception/interception.h" +#include "nsan.h" +#include "nsan_platform.h" +#include "nsan_thread.h" +#include "sanitizer_common/sanitizer_allocator.h" +#include "sanitizer_common/sanitizer_allocator_checks.h" +#include "sanitizer_common/sanitizer_allocator_interface.h" +#include "sanitizer_common/sanitizer_allocator_report.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_errno.h" + +DECLARE_REAL(void *, memset, void *dest, int c, uptr n) + +using namespace __nsan; + +namespace { +struct Metadata { + uptr requested_size; +}; + +struct NsanMapUnmapCallback { + void OnMap(uptr p, uptr size) const {} + void OnMapSecondary(uptr p, uptr size, uptr user_begin, + uptr user_size) const {} + void OnUnmap(uptr p, uptr size) const {} +}; + +const uptr kMaxAllowedMallocSize = 1ULL << 40; + +// Allocator64 parameters. Deliberately using a short name. +struct AP64 { + static const uptr kSpaceBeg = Mapping::kHeapMemBeg; + static const uptr kSpaceSize = 0x400; // 4T. + static const uptr kMetadataSize = sizeof(Metadata); + using SizeClassMap = DefaultSizeClassMap; + using MapUnmapCallback = NsanMapUnmapCallback; + static const uptr kFlags = 0; + using AddressSpaceView = LocalAddressSpaceView; +}; +} // namespace + +using PrimaryAllocator = SizeClassAllocator64; +using Allocator = CombinedAllocator; +using AllocatorCache = Allocator::AllocatorCache; + +static Allocator allocator; +static AllocatorCache fallback_allocator_cache; +static StaticSpinMutex fallback_mutex; + +static uptr max_malloc_size; + +void __nsan::NsanAllocatorInit() { + SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null); + allocator.Init(common_flags()->allocator_release_to_os_interval_ms); + if (common_flags()->max_allocation_size_mb) +max_malloc_size = Min(common_flags()->max_allocation_size_mb << 20, + kMaxAllowedMallocSize); + else +max_malloc_size = kMaxAllowedMallocSize; +} + +static AllocatorCache *GetAllocatorCache(NsanThreadLocalMallocStorage *ms) { + CHECK(ms); + CHECK_LE(sizeof(AllocatorCache), sizeof(ms->allocator_cache)); + return reinterpret_cast(ms->allocator_cache); +} + +void NsanThreadLocalMallocStorage::Init() { + allocator.InitCache(GetAllocatorCache(this)); +} + +void NsanThreadLocalMallocStorage::CommitBack() { + allocator.SwallowCache(GetAllocatorCache(this)); + allocator.DestroyCache(GetAllocatorCache(this)); +} + +static void *NsanAllocate(uptr size, uptr alignment, bool zero) { + if (UNLIKELY(size > max_malloc_size)) { +if (AllocatorMayReturnNull()) { + Report("WARNING: NumericalStabilitySanitizer failed to allocate 0x%zx " + "bytes\n", + size); + return nullptr; +} +BufferedStackTrace stack; +GET_FATAL_STACK_TRACE_IF_EMPTY(&stack); +ReportAllocationSizeTooBig(size, max_malloc_size, &stack); + } + if (UNLIKELY(IsRssLimitExceeded())) { +if (AllocatorMayReturnNull()) + return nullptr; +BufferedStackTrace stack; +GET_FATAL_STACK_TRACE_IF_EMPTY(&stack); +ReportRssLimitExceeded(&stack); + } + NsanThread *t = GetCurrentThread(); + void *allocated; + if (t) { +AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); +allocated = allocator.Allocate(cache, size, alignment); + } else { +SpinMutexLock l(&fallback_mutex); +AllocatorCache *cache = &fallback_allocator_cache; +allocated = allocator.Allocate(cache, size, alignment); + } + if (UNLIKELY(!allocated)) { +SetAllocatorOutOfMemory(); +if (AllocatorMayReturnNull()) + return nullptr; +BufferedStackTrace stack; +GET_FATAL_STACK_TRACE_IF_EMPTY(&stack); +ReportOutOfMemory(size, &stack); + } + auto *meta = reinterpret_cast(allocator.GetMetaData(allocated)); + meta->requested_size = size; + if (zero && allocator.FromPrimary(allocated)) +REAL(memset)(allocated, 0, size); + __nsan_set_value_unknown(allocated, size); + RunMallocHooks(allocated, size); + return allocated; +} + +void __nsan::NsanDeallocate(void *p) { + DCHECK(p); + RunFreeHooks(p); + auto *meta = reinterpret_cast(allocator.GetMetaData(p)); + meta->requested_size = 0; + if (NsanThread *t = GetCurrentThread()) { +AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); +allocator.Deallocate(cache, p); + } else { +SpinMutexLock l(&fallback_mutex)
[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)
https://github.com/alexander-shaposhnikov edited https://github.com/llvm/llvm-project/pull/102764 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager (PR #102806)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/102806 >From 0bf4c6c04419447c691ec5f722c2af8af73994f9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 11 Aug 2024 12:57:27 +0400 Subject: [PATCH] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager --- llvm/lib/Target/AMDGPU/AMDGPU.h | 16 ++- .../AMDGPU/AMDGPULateCodeGenPrepare.cpp | 110 +++--- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 3 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 +- .../AMDGPU/amdgpu-late-codegenprepare.ll | 1 + 5 files changed, 87 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 34ef19637bf973..f5044f52f1648d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -56,7 +56,7 @@ FunctionPass *createSIPostRABundlerPass(); FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *); ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); FunctionPass *createAMDGPUCodeGenPreparePass(); -FunctionPass *createAMDGPULateCodeGenPreparePass(); +FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass(); FunctionPass *createAMDGPUMachineCFGStructurizerPass(); FunctionPass *createAMDGPURewriteOutArgumentsPass(); ModulePass * @@ -282,6 +282,16 @@ class AMDGPUCodeGenPreparePass PreservedAnalyses run(Function &, FunctionAnalysisManager &); }; +class AMDGPULateCodeGenPreparePass +: public PassInfoMixin { +private: + const GCNTargetMachine &TM; + +public: + AMDGPULateCodeGenPreparePass(const GCNTargetMachine &TM) : TM(TM) {}; + PreservedAnalyses run(Function &, FunctionAnalysisManager &); +}; + class AMDGPULowerKernelArgumentsPass : public PassInfoMixin { private: @@ -352,8 +362,8 @@ extern char &AMDGPUCodeGenPrepareID; void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); extern char &AMDGPURemoveIncompatibleFunctionsID; -void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); -extern char &AMDGPULateCodeGenPrepareID; +void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &); +extern char &AMDGPULateCodeGenPrepareLegacyID; FunctionPass *createAMDGPURewriteUndefForPHILegacyPass(); void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp index 7bf5170794cd9e..36dfebacaed686 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp @@ -42,10 +42,10 @@ static cl::opt namespace { class AMDGPULateCodeGenPrepare -: public FunctionPass, - public InstVisitor { +: public InstVisitor { Module *Mod = nullptr; const DataLayout *DL = nullptr; + const GCNSubtarget &ST; AssumptionCache *AC = nullptr; UniformityInfo *UA = nullptr; @@ -53,24 +53,10 @@ class AMDGPULateCodeGenPrepare SmallVector DeadInsts; public: - static char ID; - - AMDGPULateCodeGenPrepare() : FunctionPass(ID) {} - - StringRef getPassName() const override { -return "AMDGPU IR late optimizations"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { -AU.addRequired(); -AU.addRequired(); -AU.addRequired(); -AU.setPreservesAll(); - } - - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; - + AMDGPULateCodeGenPrepare(Module &M, const GCNSubtarget &ST, + AssumptionCache *AC, UniformityInfo *UA) + : Mod(&M), DL(&M.getDataLayout()), ST(ST), AC(AC), UA(UA) {} + bool run(Function &F); bool visitInstruction(Instruction &) { return false; } // Check if the specified value is at least DWORD aligned. @@ -148,23 +134,7 @@ class LiveRegOptimizer { } // end anonymous namespace -bool AMDGPULateCodeGenPrepare::doInitialization(Module &M) { - Mod = &M; - DL = &Mod->getDataLayout(); - return false; -} - -bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) { - if (skipFunction(F)) -return false; - - const TargetPassConfig &TPC = getAnalysis(); - const TargetMachine &TM = TPC.getTM(); - const GCNSubtarget &ST = TM.getSubtarget(F); - - AC = &getAnalysis().getAssumptionCache(F); - UA = &getAnalysis().getUniformityInfo(); - +bool AMDGPULateCodeGenPrepare::run(Function &F) { // "Optimize" the virtual regs that cross basic block boundaries. When // building the SelectionDAG, vectors of illegal types that cross basic blocks // will be scalarized and widened, with each scalar living in its @@ -505,16 +475,72 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) { return true; } -INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE, +PreservedAnalyses +AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) { + const GCNSubtarget &ST = TM.getSubtarget(F); + + AssumptionCache &AC = FAM.getResult(F);
[llvm-branch-commits] [llvm] StructurizeCFG: Add SkipUniformRegions pass parameter to new PM version (PR #102812)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/102812 Keep respecting the old cl::opt for now. >From fb38b82ebd02b5763bd0ddb9cbc4b9f318651871 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 11 Aug 2024 16:36:39 +0400 Subject: [PATCH] StructurizeCFG: Add SkipUniformRegions pass parameter to new PM version Keep respecting the old cl::opt for now. --- .../llvm/Transforms/Scalar/StructurizeCFG.h | 9 ++ llvm/lib/Passes/PassBuilder.cpp | 5 llvm/lib/Passes/PassRegistry.def | 7 - llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 28 ++- .../StructurizeCFG/AMDGPU/uniform-regions.ll | 2 ++ 5 files changed, 49 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h index 50d41acd529e76..f68067d9354583 100644 --- a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h +++ b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h @@ -13,6 +13,15 @@ namespace llvm { struct StructurizeCFGPass : PassInfoMixin { +private: + bool SkipUniformRegions; + +public: + StructurizeCFGPass(bool SkipUniformRegions = false); + + void printPipeline(raw_ostream &OS, + function_ref MapClassName2PassName); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; } // namespace llvm diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 7bc1c870ce5191..46f43f3de4705c 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1085,6 +1085,11 @@ Expected parseSeparateConstOffsetFromGEPPassOptions(StringRef Params) { "SeparateConstOffsetFromGEP"); } +Expected parseStructurizeCFGPassOptions(StringRef Params) { + return PassBuilder::parseSinglePassOption(Params, "skip-uniform-regions", +"StructurizeCFG"); +} + Expected parseFunctionSimplificationPipelineOptions(StringRef Params) { std::optional L = parseOptLevel(Params); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 95842d15a35bf6..0cec9fbd7cd05e 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -458,7 +458,6 @@ FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) FUNCTION_PASS("slsr", StraightLineStrengthReducePass()) FUNCTION_PASS("stack-protector", StackProtectorPass(TM)) FUNCTION_PASS("strip-gc-relocates", StripGCRelocates()) -FUNCTION_PASS("structurizecfg", StructurizeCFGPass()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) FUNCTION_PASS("tlshoist", TLSVariableHoistPass()) FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) @@ -586,6 +585,12 @@ FUNCTION_PASS_WITH_PARAMS( "sroa", "SROAPass", [](SROAOptions PreserveCFG) { return SROAPass(PreserveCFG); }, parseSROAOptions, "preserve-cfg;modify-cfg") +FUNCTION_PASS_WITH_PARAMS( + "structurizecfg", "StructurizeCFG", + [](bool SkipUniformRegions) { +return StructurizeCFGPass(SkipUniformRegions); + }, + parseStructurizeCFGPassOptions, "skip-uniform-regions") FUNCTION_PASS_WITH_PARAMS( "win-eh-prepare", "WinEHPreparePass", [](bool DemoteCatchSwitchPHIOnly) { diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 9c711ec183821f..a6ed58ac9d47f2 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -1212,20 +1212,46 @@ static void addRegionIntoQueue(Region &R, std::vector &Regions) { addRegionIntoQueue(*E, Regions); } +StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_) +: SkipUniformRegions(SkipUniformRegions_) { + if (ForceSkipUniformRegions.getNumOccurrences()) +SkipUniformRegions = ForceSkipUniformRegions.getValue(); +} + +void StructurizeCFGPass::printPipeline( +raw_ostream &OS, function_ref MapClassName2PassName) { + static_cast *>(this)->printPipeline( + OS, MapClassName2PassName); + if (SkipUniformRegions) +OS << ""; +} + PreservedAnalyses StructurizeCFGPass::run(Function &F, FunctionAnalysisManager &AM) { bool Changed = false; DominatorTree *DT = &AM.getResult(F); auto &RI = AM.getResult(F); + + UniformityInfo *UI = nullptr; + if (SkipUniformRegions) +UI = &AM.getResult(F); + std::vector Regions; addRegionIntoQueue(*RI.getTopLevelRegion(), Regions); while (!Regions.empty()) { Region *R = Regions.back(); +Regions.pop_back(); + StructurizeCFG SCFG; SCFG.init(R); + +if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) { + Changed = true; // May have added metadata. + continue; +} + Changed |= SCFG.run(R, DT); -Regions.pop_back(); } if (!Changed) return PreservedAnalyses::all(); diff --git a/llvm/test/Transforms/Struct
[llvm-branch-commits] [llvm] StructurizeCFG: Add SkipUniformRegions pass parameter to new PM version (PR #102812)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/102812?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#102812** https://app.graphite.dev/github/pr/llvm/llvm-project/102812?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#102806** https://app.graphite.dev/github/pr/llvm/llvm-project/102806?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102805** https://app.graphite.dev/github/pr/llvm/llvm-project/102805?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102645** https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102644** https://app.graphite.dev/github/pr/llvm/llvm-project/102644?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/102812 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] StructurizeCFG: Add SkipUniformRegions pass parameter to new PM version (PR #102812)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes Keep respecting the old cl::opt for now. --- Full diff: https://github.com/llvm/llvm-project/pull/102812.diff 5 Files Affected: - (modified) llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h (+9) - (modified) llvm/lib/Passes/PassBuilder.cpp (+5) - (modified) llvm/lib/Passes/PassRegistry.def (+6-1) - (modified) llvm/lib/Transforms/Scalar/StructurizeCFG.cpp (+27-1) - (modified) llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll (+2) ``diff diff --git a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h index 50d41acd529e76..f68067d9354583 100644 --- a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h +++ b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h @@ -13,6 +13,15 @@ namespace llvm { struct StructurizeCFGPass : PassInfoMixin { +private: + bool SkipUniformRegions; + +public: + StructurizeCFGPass(bool SkipUniformRegions = false); + + void printPipeline(raw_ostream &OS, + function_ref MapClassName2PassName); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; } // namespace llvm diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 7bc1c870ce5191..46f43f3de4705c 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1085,6 +1085,11 @@ Expected parseSeparateConstOffsetFromGEPPassOptions(StringRef Params) { "SeparateConstOffsetFromGEP"); } +Expected parseStructurizeCFGPassOptions(StringRef Params) { + return PassBuilder::parseSinglePassOption(Params, "skip-uniform-regions", +"StructurizeCFG"); +} + Expected parseFunctionSimplificationPipelineOptions(StringRef Params) { std::optional L = parseOptLevel(Params); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 95842d15a35bf6..0cec9fbd7cd05e 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -458,7 +458,6 @@ FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) FUNCTION_PASS("slsr", StraightLineStrengthReducePass()) FUNCTION_PASS("stack-protector", StackProtectorPass(TM)) FUNCTION_PASS("strip-gc-relocates", StripGCRelocates()) -FUNCTION_PASS("structurizecfg", StructurizeCFGPass()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) FUNCTION_PASS("tlshoist", TLSVariableHoistPass()) FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) @@ -586,6 +585,12 @@ FUNCTION_PASS_WITH_PARAMS( "sroa", "SROAPass", [](SROAOptions PreserveCFG) { return SROAPass(PreserveCFG); }, parseSROAOptions, "preserve-cfg;modify-cfg") +FUNCTION_PASS_WITH_PARAMS( + "structurizecfg", "StructurizeCFG", + [](bool SkipUniformRegions) { +return StructurizeCFGPass(SkipUniformRegions); + }, + parseStructurizeCFGPassOptions, "skip-uniform-regions") FUNCTION_PASS_WITH_PARAMS( "win-eh-prepare", "WinEHPreparePass", [](bool DemoteCatchSwitchPHIOnly) { diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 9c711ec183821f..a6ed58ac9d47f2 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -1212,20 +1212,46 @@ static void addRegionIntoQueue(Region &R, std::vector &Regions) { addRegionIntoQueue(*E, Regions); } +StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_) +: SkipUniformRegions(SkipUniformRegions_) { + if (ForceSkipUniformRegions.getNumOccurrences()) +SkipUniformRegions = ForceSkipUniformRegions.getValue(); +} + +void StructurizeCFGPass::printPipeline( +raw_ostream &OS, function_ref MapClassName2PassName) { + static_cast *>(this)->printPipeline( + OS, MapClassName2PassName); + if (SkipUniformRegions) +OS << ""; +} + PreservedAnalyses StructurizeCFGPass::run(Function &F, FunctionAnalysisManager &AM) { bool Changed = false; DominatorTree *DT = &AM.getResult(F); auto &RI = AM.getResult(F); + + UniformityInfo *UI = nullptr; + if (SkipUniformRegions) +UI = &AM.getResult(F); + std::vector Regions; addRegionIntoQueue(*RI.getTopLevelRegion(), Regions); while (!Regions.empty()) { Region *R = Regions.back(); +Regions.pop_back(); + StructurizeCFG SCFG; SCFG.init(R); + +if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) { + Changed = true; // May have added metadata. + continue; +} + Changed |= SCFG.run(R, DT); -Regions.pop_back(); } if (!Changed) return PreservedAnalyses::all(); diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll b/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll index ae73eedd4f502b..34c73ab8fd74f3 100644 --- a/llvm/test/Transforms/Str
[llvm-branch-commits] [llvm] StructurizeCFG: Add SkipUniformRegions pass parameter to new PM version (PR #102812)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/102812 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager (PR #102806)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/102806 >From 20d553816b135de48b78e2604384bcc4bb731eaa Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 11 Aug 2024 12:57:27 +0400 Subject: [PATCH] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager --- llvm/lib/Target/AMDGPU/AMDGPU.h | 16 ++- .../AMDGPU/AMDGPULateCodeGenPrepare.cpp | 110 +++--- llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 3 + .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 4 +- .../AMDGPU/amdgpu-late-codegenprepare.ll | 1 + 5 files changed, 87 insertions(+), 47 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 34ef19637bf973..f5044f52f1648d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -56,7 +56,7 @@ FunctionPass *createSIPostRABundlerPass(); FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *); ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *); FunctionPass *createAMDGPUCodeGenPreparePass(); -FunctionPass *createAMDGPULateCodeGenPreparePass(); +FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass(); FunctionPass *createAMDGPUMachineCFGStructurizerPass(); FunctionPass *createAMDGPURewriteOutArgumentsPass(); ModulePass * @@ -282,6 +282,16 @@ class AMDGPUCodeGenPreparePass PreservedAnalyses run(Function &, FunctionAnalysisManager &); }; +class AMDGPULateCodeGenPreparePass +: public PassInfoMixin { +private: + const GCNTargetMachine &TM; + +public: + AMDGPULateCodeGenPreparePass(const GCNTargetMachine &TM) : TM(TM) {}; + PreservedAnalyses run(Function &, FunctionAnalysisManager &); +}; + class AMDGPULowerKernelArgumentsPass : public PassInfoMixin { private: @@ -352,8 +362,8 @@ extern char &AMDGPUCodeGenPrepareID; void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &); extern char &AMDGPURemoveIncompatibleFunctionsID; -void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); -extern char &AMDGPULateCodeGenPrepareID; +void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &); +extern char &AMDGPULateCodeGenPrepareLegacyID; FunctionPass *createAMDGPURewriteUndefForPHILegacyPass(); void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &); diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp index 7bf5170794cd9e..36dfebacaed686 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp @@ -42,10 +42,10 @@ static cl::opt namespace { class AMDGPULateCodeGenPrepare -: public FunctionPass, - public InstVisitor { +: public InstVisitor { Module *Mod = nullptr; const DataLayout *DL = nullptr; + const GCNSubtarget &ST; AssumptionCache *AC = nullptr; UniformityInfo *UA = nullptr; @@ -53,24 +53,10 @@ class AMDGPULateCodeGenPrepare SmallVector DeadInsts; public: - static char ID; - - AMDGPULateCodeGenPrepare() : FunctionPass(ID) {} - - StringRef getPassName() const override { -return "AMDGPU IR late optimizations"; - } - - void getAnalysisUsage(AnalysisUsage &AU) const override { -AU.addRequired(); -AU.addRequired(); -AU.addRequired(); -AU.setPreservesAll(); - } - - bool doInitialization(Module &M) override; - bool runOnFunction(Function &F) override; - + AMDGPULateCodeGenPrepare(Module &M, const GCNSubtarget &ST, + AssumptionCache *AC, UniformityInfo *UA) + : Mod(&M), DL(&M.getDataLayout()), ST(ST), AC(AC), UA(UA) {} + bool run(Function &F); bool visitInstruction(Instruction &) { return false; } // Check if the specified value is at least DWORD aligned. @@ -148,23 +134,7 @@ class LiveRegOptimizer { } // end anonymous namespace -bool AMDGPULateCodeGenPrepare::doInitialization(Module &M) { - Mod = &M; - DL = &Mod->getDataLayout(); - return false; -} - -bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) { - if (skipFunction(F)) -return false; - - const TargetPassConfig &TPC = getAnalysis(); - const TargetMachine &TM = TPC.getTM(); - const GCNSubtarget &ST = TM.getSubtarget(F); - - AC = &getAnalysis().getAssumptionCache(F); - UA = &getAnalysis().getUniformityInfo(); - +bool AMDGPULateCodeGenPrepare::run(Function &F) { // "Optimize" the virtual regs that cross basic block boundaries. When // building the SelectionDAG, vectors of illegal types that cross basic blocks // will be scalarized and widened, with each scalar living in its @@ -505,16 +475,72 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) { return true; } -INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE, +PreservedAnalyses +AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) { + const GCNSubtarget &ST = TM.getSubtarget(F); + + AssumptionCache &AC = FAM.getResult(F);
[llvm-branch-commits] [llvm] StructurizeCFG: Add SkipUniformRegions pass parameter to new PM version (PR #102812)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/102812 >From 306343c33680bdce7d2e670f6893890ac810da8d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 11 Aug 2024 16:36:39 +0400 Subject: [PATCH] StructurizeCFG: Add SkipUniformRegions pass parameter to new PM version Keep respecting the old cl::opt for now. --- .../llvm/Transforms/Scalar/StructurizeCFG.h | 9 ++ llvm/lib/Passes/PassBuilder.cpp | 5 llvm/lib/Passes/PassRegistry.def | 7 - llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 28 ++- .../StructurizeCFG/AMDGPU/uniform-regions.ll | 2 ++ 5 files changed, 49 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h index 50d41acd529e76..f68067d9354583 100644 --- a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h +++ b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h @@ -13,6 +13,15 @@ namespace llvm { struct StructurizeCFGPass : PassInfoMixin { +private: + bool SkipUniformRegions; + +public: + StructurizeCFGPass(bool SkipUniformRegions = false); + + void printPipeline(raw_ostream &OS, + function_ref MapClassName2PassName); + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; } // namespace llvm diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 7bc1c870ce5191..46f43f3de4705c 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -1085,6 +1085,11 @@ Expected parseSeparateConstOffsetFromGEPPassOptions(StringRef Params) { "SeparateConstOffsetFromGEP"); } +Expected parseStructurizeCFGPassOptions(StringRef Params) { + return PassBuilder::parseSinglePassOption(Params, "skip-uniform-regions", +"StructurizeCFG"); +} + Expected parseFunctionSimplificationPipelineOptions(StringRef Params) { std::optional L = parseOptLevel(Params); diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 95842d15a35bf6..0cec9fbd7cd05e 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -458,7 +458,6 @@ FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) FUNCTION_PASS("slsr", StraightLineStrengthReducePass()) FUNCTION_PASS("stack-protector", StackProtectorPass(TM)) FUNCTION_PASS("strip-gc-relocates", StripGCRelocates()) -FUNCTION_PASS("structurizecfg", StructurizeCFGPass()) FUNCTION_PASS("tailcallelim", TailCallElimPass()) FUNCTION_PASS("tlshoist", TLSVariableHoistPass()) FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) @@ -586,6 +585,12 @@ FUNCTION_PASS_WITH_PARAMS( "sroa", "SROAPass", [](SROAOptions PreserveCFG) { return SROAPass(PreserveCFG); }, parseSROAOptions, "preserve-cfg;modify-cfg") +FUNCTION_PASS_WITH_PARAMS( + "structurizecfg", "StructurizeCFG", + [](bool SkipUniformRegions) { +return StructurizeCFGPass(SkipUniformRegions); + }, + parseStructurizeCFGPassOptions, "skip-uniform-regions") FUNCTION_PASS_WITH_PARAMS( "win-eh-prepare", "WinEHPreparePass", [](bool DemoteCatchSwitchPHIOnly) { diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 9c711ec183821f..a6ed58ac9d47f2 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -1212,20 +1212,46 @@ static void addRegionIntoQueue(Region &R, std::vector &Regions) { addRegionIntoQueue(*E, Regions); } +StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_) +: SkipUniformRegions(SkipUniformRegions_) { + if (ForceSkipUniformRegions.getNumOccurrences()) +SkipUniformRegions = ForceSkipUniformRegions.getValue(); +} + +void StructurizeCFGPass::printPipeline( +raw_ostream &OS, function_ref MapClassName2PassName) { + static_cast *>(this)->printPipeline( + OS, MapClassName2PassName); + if (SkipUniformRegions) +OS << ""; +} + PreservedAnalyses StructurizeCFGPass::run(Function &F, FunctionAnalysisManager &AM) { bool Changed = false; DominatorTree *DT = &AM.getResult(F); auto &RI = AM.getResult(F); + + UniformityInfo *UI = nullptr; + if (SkipUniformRegions) +UI = &AM.getResult(F); + std::vector Regions; addRegionIntoQueue(*RI.getTopLevelRegion(), Regions); while (!Regions.empty()) { Region *R = Regions.back(); +Regions.pop_back(); + StructurizeCFG SCFG; SCFG.init(R); + +if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) { + Changed = true; // May have added metadata. + continue; +} + Changed |= SCFG.run(R, DT); -Regions.pop_back(); } if (!Changed) return PreservedAnalyses::all(); diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll b/llvm
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Fill out addPreISelPasses (PR #102814)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/102814 This specific callback should now be at parity with the old pass manager version. There are still some missing IR passes before this point. Also I don't understand the need for the RequiresAnalysisPass at the end. SelectionDAG should just be using the uncached getResult? >From a410db38f893673d32ac34a62389efcc43a6687e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 11 Aug 2024 11:55:00 +0400 Subject: [PATCH] AMDGPU/NewPM: Fill out addPreISelPasses This specific callback should now be at parity with the old pass manager version. There are still some missing IR passes before this point. Also I don't understand the need for the RequiresAnalysisPass at the end. SelectionDAG should just be using the uncached getResult? --- .../AMDGPU/AMDGPUCodeGenPassBuilder.cpp | 55 ++- .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 8 ++- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 1 + .../CodeGen/AMDGPU/bug-v4f64-subvector.ll | 2 +- 4 files changed, 60 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp index fb3d3259171aca..36f44a20d95532 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp @@ -9,9 +9,17 @@ #include "AMDGPUCodeGenPassBuilder.h" #include "AMDGPU.h" #include "AMDGPUISelDAGToDAG.h" +#include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUTargetMachine.h" +#include "AMDGPUUnifyDivergentExitNodes.h" #include "SIFixSGPRCopies.h" #include "llvm/Analysis/UniformityAnalysis.h" +#include "llvm/Transforms/Scalar/FlattenCFG.h" +#include "llvm/Transforms/Scalar/Sink.h" +#include "llvm/Transforms/Scalar/StructurizeCFG.h" +#include "llvm/Transforms/Utils/FixIrreducible.h" +#include "llvm/Transforms/Utils/LCSSA.h" +#include "llvm/Transforms/Utils/UnifyLoopExits.h" using namespace llvm; @@ -28,8 +36,51 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder( } void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const { - // TODO: Add passes pre instruction selection. - // Test only, convert to real IR passes in future. + const bool LateCFGStructurize = AMDGPUTargetMachine::EnableLateStructurizeCFG; + const bool DisableStructurizer = AMDGPUTargetMachine::DisableStructurizer; + const bool EnableStructurizerWorkarounds = + AMDGPUTargetMachine::EnableStructurizerWorkarounds; + + if (TM.getOptLevel() > CodeGenOptLevel::None) +addPass(FlattenCFGPass()); + + if (TM.getOptLevel() > CodeGenOptLevel::None) +addPass(SinkingPass()); + + addPass(AMDGPULateCodeGenPreparePass(TM)); + + // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit + // regions formed by them. + + addPass(AMDGPUUnifyDivergentExitNodesPass()); + + if (!LateCFGStructurize && !DisableStructurizer) { +if (EnableStructurizerWorkarounds) { + addPass(FixIrreduciblePass()); + addPass(UnifyLoopExitsPass()); +} + +addPass(StructurizeCFGPass(/*SkipUniformRegions=*/false)); + } + + addPass(AMDGPUAnnotateUniformValuesPass()); + + if (!LateCFGStructurize && !DisableStructurizer) { +addPass(SIAnnotateControlFlowPass(TM)); + +// TODO: Move this right after structurizeCFG to avoid extra divergence +// analysis. This depends on stopping SIAnnotateControlFlow from making +// control flow modifications. +addPass(AMDGPURewriteUndefForPHIPass()); + } + + addPass(LCSSAPass()); + + if (TM.getOptLevel() > CodeGenOptLevel::Less) +addPass(AMDGPUPerfHintAnalysisPass(TM)); + + // FIXME: Why isn't this queried as required from AMDGPUISelDAGToDAG, and why + // isn't this in addInstSelector? addPass(RequireAnalysisPass()); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 0523fee5bcf9f4..5929dadf93bcbe 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -338,10 +338,11 @@ static cl::opt EnableScalarIRPasses( cl::init(true), cl::Hidden); -static cl::opt EnableStructurizerWorkarounds( +static cl::opt EnableStructurizerWorkarounds( "amdgpu-enable-structurizer-workarounds", -cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), -cl::Hidden); +cl::desc("Enable workarounds for the StructurizeCFG pass"), +cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds), +cl::init(true), cl::Hidden); static cl::opt EnableLowerModuleLDS( "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), @@ -611,6 +612,7 @@ bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false; bool AMDGPUTargetMachine::EnableFunctionCalls = false; bool AMDGPUTargetMachine::EnableLowerModuleLDS = true; bool AMDGPUTargetMachine::DisableStructurizer = false; +bool AMDGPUTargetMachine::Enable
[llvm-branch-commits] [llvm] CodeGen/NewPM: Add ExpandLarge* passes to isel IR passes (PR #102815)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/102815 None >From d91c9248e843a4b0b2dd7c32a3f47e72a1362409 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 11 Aug 2024 18:11:04 +0400 Subject: [PATCH] CodeGen/NewPM: Add ExpandLarge* passes to isel IR passes --- llvm/include/llvm/Passes/CodeGenPassBuilder.h | 4 1 file changed, 4 insertions(+) diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 3cc39b54ba758d..eb15beb835b535 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -27,6 +27,8 @@ #include "llvm/CodeGen/CodeGenPrepare.h" #include "llvm/CodeGen/DeadMachineInstructionElim.h" #include "llvm/CodeGen/DwarfEHPrepare.h" +#include "llvm/CodeGen/ExpandLargeDivRem.h" +#include "llvm/CodeGen/ExpandLargeFpConvert.h" #include "llvm/CodeGen/ExpandMemCmp.h" #include "llvm/CodeGen/ExpandReductions.h" #include "llvm/CodeGen/FinalizeISel.h" @@ -627,6 +629,8 @@ void CodeGenPassBuilder::addISelPasses( addPass(LowerEmuTLSPass()); addPass(PreISelIntrinsicLoweringPass(&TM)); + addPass(ExpandLargeDivRemPass(&TM)); + addPass(ExpandLargeFpConvertPass(&TM)); derived().addIRPasses(addPass); derived().addCodeGenPrepare(addPass); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] CodeGen/NewPM: Add ExpandLarge* passes to isel IR passes (PR #102815)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/102815?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#102815** https://app.graphite.dev/github/pr/llvm/llvm-project/102815?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#102814** https://app.graphite.dev/github/pr/llvm/llvm-project/102814?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102812** https://app.graphite.dev/github/pr/llvm/llvm-project/102812?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102806** https://app.graphite.dev/github/pr/llvm/llvm-project/102806?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102805** https://app.graphite.dev/github/pr/llvm/llvm-project/102805?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102645** https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102644** https://app.graphite.dev/github/pr/llvm/llvm-project/102644?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/102815 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Start implementing addCodeGenPrepare (PR #102816)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/102816 None >From cc51e15865010c73cf7bd3ab8632b965aa7a9dbf Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 11 Aug 2024 18:20:23 +0400 Subject: [PATCH] AMDGPU/NewPM: Start implementing addCodeGenPrepare --- llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp | 11 +++ llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h | 4 +++- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp index 36f44a20d9553..252a70d44736d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp @@ -19,6 +19,7 @@ #include "llvm/Transforms/Scalar/StructurizeCFG.h" #include "llvm/Transforms/Utils/FixIrreducible.h" #include "llvm/Transforms/Utils/LCSSA.h" +#include "llvm/Transforms/Utils/LowerSwitch.h" #include "llvm/Transforms/Utils/UnifyLoopExits.h" using namespace llvm; @@ -35,6 +36,16 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder( ShadowStackGCLoweringPass>(); } +void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const { + Base::addCodeGenPrepare(addPass); + + // LowerSwitch pass may introduce unreachable blocks that can cause unexpected + // behavior for subsequent passes. Placing it here seems better that these + // blocks would get cleaned up by UnreachableBlockElim inserted next in the + // pass flow. + addPass(LowerSwitchPass()); +} + void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const { const bool LateCFGStructurize = AMDGPUTargetMachine::EnableLateStructurizeCFG; const bool DisableStructurizer = AMDGPUTargetMachine::DisableStructurizer; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h index e656e166b3eb2..efb296689bd64 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h @@ -19,10 +19,12 @@ class GCNTargetMachine; class AMDGPUCodeGenPassBuilder : public CodeGenPassBuilder { public: + using Base = CodeGenPassBuilder; + AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM, const CGPassBuilderOption &Opts, PassInstrumentationCallbacks *PIC); - + void addCodeGenPrepare(AddIRPass &) const; void addPreISel(AddIRPass &addPass) const; void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const; Error addInstSelector(AddMachinePass &) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 5929dadf93bcb..cad4585c5b301 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -67,6 +67,7 @@ #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/LowerSwitch.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" #include ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Fill out addPreISelPasses (PR #102814)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/102814?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#102815** https://app.graphite.dev/github/pr/llvm/llvm-project/102815?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102814** https://app.graphite.dev/github/pr/llvm/llvm-project/102814?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#102812** https://app.graphite.dev/github/pr/llvm/llvm-project/102812?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102806** https://app.graphite.dev/github/pr/llvm/llvm-project/102806?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102805** https://app.graphite.dev/github/pr/llvm/llvm-project/102805?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102645** https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102644** https://app.graphite.dev/github/pr/llvm/llvm-project/102644?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/102814 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Start implementing addCodeGenPrepare (PR #102816)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/102816?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#102816** https://app.graphite.dev/github/pr/llvm/llvm-project/102816?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 * **#102815** https://app.graphite.dev/github/pr/llvm/llvm-project/102815?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102814** https://app.graphite.dev/github/pr/llvm/llvm-project/102814?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102812** https://app.graphite.dev/github/pr/llvm/llvm-project/102812?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102806** https://app.graphite.dev/github/pr/llvm/llvm-project/102806?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102805** https://app.graphite.dev/github/pr/llvm/llvm-project/102805?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102645** https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#102644** https://app.graphite.dev/github/pr/llvm/llvm-project/102644?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about stacking. Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="11px" height="11px"/> Graphite https://github.com/llvm/llvm-project/pull/102816 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Fill out addPreISelPasses (PR #102814)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/102814 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] CodeGen/NewPM: Add ExpandLarge* passes to isel IR passes (PR #102815)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/102815 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Fill out addPreISelPasses (PR #102814)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-transforms Author: Matt Arsenault (arsenm) Changes This specific callback should now be at parity with the old pass manager version. There are still some missing IR passes before this point. Also I don't understand the need for the RequiresAnalysisPass at the end. SelectionDAG should just be using the uncached getResult? --- Full diff: https://github.com/llvm/llvm-project/pull/102814.diff 4 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp (+53-2) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+5-3) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h (+1) - (modified) llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll (+1-1) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp index fb3d3259171ac..36f44a20d9553 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp @@ -9,9 +9,17 @@ #include "AMDGPUCodeGenPassBuilder.h" #include "AMDGPU.h" #include "AMDGPUISelDAGToDAG.h" +#include "AMDGPUPerfHintAnalysis.h" #include "AMDGPUTargetMachine.h" +#include "AMDGPUUnifyDivergentExitNodes.h" #include "SIFixSGPRCopies.h" #include "llvm/Analysis/UniformityAnalysis.h" +#include "llvm/Transforms/Scalar/FlattenCFG.h" +#include "llvm/Transforms/Scalar/Sink.h" +#include "llvm/Transforms/Scalar/StructurizeCFG.h" +#include "llvm/Transforms/Utils/FixIrreducible.h" +#include "llvm/Transforms/Utils/LCSSA.h" +#include "llvm/Transforms/Utils/UnifyLoopExits.h" using namespace llvm; @@ -28,8 +36,51 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder( } void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const { - // TODO: Add passes pre instruction selection. - // Test only, convert to real IR passes in future. + const bool LateCFGStructurize = AMDGPUTargetMachine::EnableLateStructurizeCFG; + const bool DisableStructurizer = AMDGPUTargetMachine::DisableStructurizer; + const bool EnableStructurizerWorkarounds = + AMDGPUTargetMachine::EnableStructurizerWorkarounds; + + if (TM.getOptLevel() > CodeGenOptLevel::None) +addPass(FlattenCFGPass()); + + if (TM.getOptLevel() > CodeGenOptLevel::None) +addPass(SinkingPass()); + + addPass(AMDGPULateCodeGenPreparePass(TM)); + + // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit + // regions formed by them. + + addPass(AMDGPUUnifyDivergentExitNodesPass()); + + if (!LateCFGStructurize && !DisableStructurizer) { +if (EnableStructurizerWorkarounds) { + addPass(FixIrreduciblePass()); + addPass(UnifyLoopExitsPass()); +} + +addPass(StructurizeCFGPass(/*SkipUniformRegions=*/false)); + } + + addPass(AMDGPUAnnotateUniformValuesPass()); + + if (!LateCFGStructurize && !DisableStructurizer) { +addPass(SIAnnotateControlFlowPass(TM)); + +// TODO: Move this right after structurizeCFG to avoid extra divergence +// analysis. This depends on stopping SIAnnotateControlFlow from making +// control flow modifications. +addPass(AMDGPURewriteUndefForPHIPass()); + } + + addPass(LCSSAPass()); + + if (TM.getOptLevel() > CodeGenOptLevel::Less) +addPass(AMDGPUPerfHintAnalysisPass(TM)); + + // FIXME: Why isn't this queried as required from AMDGPUISelDAGToDAG, and why + // isn't this in addInstSelector? addPass(RequireAnalysisPass()); } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 0523fee5bcf9f..5929dadf93bcb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -338,10 +338,11 @@ static cl::opt EnableScalarIRPasses( cl::init(true), cl::Hidden); -static cl::opt EnableStructurizerWorkarounds( +static cl::opt EnableStructurizerWorkarounds( "amdgpu-enable-structurizer-workarounds", -cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true), -cl::Hidden); +cl::desc("Enable workarounds for the StructurizeCFG pass"), +cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds), +cl::init(true), cl::Hidden); static cl::opt EnableLowerModuleLDS( "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"), @@ -611,6 +612,7 @@ bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false; bool AMDGPUTargetMachine::EnableFunctionCalls = false; bool AMDGPUTargetMachine::EnableLowerModuleLDS = true; bool AMDGPUTargetMachine::DisableStructurizer = false; +bool AMDGPUTargetMachine::EnableStructurizerWorkarounds = true; AMDGPUTargetMachine::~AMDGPUTargetMachine() = default; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h index 6bb8788cc73b0..4d39ad2b41505 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUT
[llvm-branch-commits] [llvm] CodeGen/NewPM: Add ExpandLarge* passes to isel IR passes (PR #102815)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/102815.diff 1 Files Affected: - (modified) llvm/include/llvm/Passes/CodeGenPassBuilder.h (+4) ``diff diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h index 3cc39b54ba758d..eb15beb835b535 100644 --- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h +++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h @@ -27,6 +27,8 @@ #include "llvm/CodeGen/CodeGenPrepare.h" #include "llvm/CodeGen/DeadMachineInstructionElim.h" #include "llvm/CodeGen/DwarfEHPrepare.h" +#include "llvm/CodeGen/ExpandLargeDivRem.h" +#include "llvm/CodeGen/ExpandLargeFpConvert.h" #include "llvm/CodeGen/ExpandMemCmp.h" #include "llvm/CodeGen/ExpandReductions.h" #include "llvm/CodeGen/FinalizeISel.h" @@ -627,6 +629,8 @@ void CodeGenPassBuilder::addISelPasses( addPass(LowerEmuTLSPass()); addPass(PreISelIntrinsicLoweringPass(&TM)); + addPass(ExpandLargeDivRemPass(&TM)); + addPass(ExpandLargeFpConvertPass(&TM)); derived().addIRPasses(addPass); derived().addCodeGenPrepare(addPass); `` https://github.com/llvm/llvm-project/pull/102815 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Start implementing addCodeGenPrepare (PR #102816)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/102816 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU/NewPM: Start implementing addCodeGenPrepare (PR #102816)
llvmbot wrote: @llvm/pr-subscribers-backend-amdgpu Author: Matt Arsenault (arsenm) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/102816.diff 3 Files Affected: - (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp (+11) - (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h (+3-1) - (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+1) ``diff diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp index 36f44a20d9553..252a70d44736d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp @@ -19,6 +19,7 @@ #include "llvm/Transforms/Scalar/StructurizeCFG.h" #include "llvm/Transforms/Utils/FixIrreducible.h" #include "llvm/Transforms/Utils/LCSSA.h" +#include "llvm/Transforms/Utils/LowerSwitch.h" #include "llvm/Transforms/Utils/UnifyLoopExits.h" using namespace llvm; @@ -35,6 +36,16 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder( ShadowStackGCLoweringPass>(); } +void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const { + Base::addCodeGenPrepare(addPass); + + // LowerSwitch pass may introduce unreachable blocks that can cause unexpected + // behavior for subsequent passes. Placing it here seems better that these + // blocks would get cleaned up by UnreachableBlockElim inserted next in the + // pass flow. + addPass(LowerSwitchPass()); +} + void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const { const bool LateCFGStructurize = AMDGPUTargetMachine::EnableLateStructurizeCFG; const bool DisableStructurizer = AMDGPUTargetMachine::DisableStructurizer; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h index e656e166b3eb2..efb296689bd64 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h @@ -19,10 +19,12 @@ class GCNTargetMachine; class AMDGPUCodeGenPassBuilder : public CodeGenPassBuilder { public: + using Base = CodeGenPassBuilder; + AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM, const CGPassBuilderOption &Opts, PassInstrumentationCallbacks *PIC); - + void addCodeGenPrepare(AddIRPass &) const; void addPreISel(AddIRPass &addPass) const; void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const; Error addInstSelector(AddMachinePass &) const; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 5929dadf93bcb..cad4585c5b301 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -67,6 +67,7 @@ #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/InferAddressSpaces.h" #include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/LowerSwitch.h" #include "llvm/Transforms/Utils/SimplifyLibCalls.h" #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" #include `` https://github.com/llvm/llvm-project/pull/102816 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [GlobalISel] Don't remove from unfinalized GISelWorkList (PR #102158)
https://github.com/tobias-stadler updated https://github.com/llvm/llvm-project/pull/102158 >From 1f5757a4c3989755623d66c43575c858dcb13f75 Mon Sep 17 00:00:00 2001 From: Tobias Stadler Date: Tue, 6 Aug 2024 17:13:59 +0200 Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20change?= =?UTF-8?q?s=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.6-bogner-wip [skip ci] --- llvm/lib/CodeGen/GlobalISel/Combiner.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp index 3310ce5455c978..5da9e86b207618 100644 --- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp @@ -153,7 +153,7 @@ bool Combiner::combineMachineInstrs() { // down RPOT. Changed = false; -RAIIDelegateInstaller DelInstall(MF, ObserverWrapper.get()); +RAIIMFObsDelInstaller DelInstall(MF, *ObserverWrapper); for (MachineBasicBlock *MBB : post_order(&MF)) { for (MachineInstr &CurMI : llvm::make_early_inc_range(llvm::reverse(*MBB))) { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) (PR #102168)
davemgreen wrote: > The patch here is pretty big in size, but it seems to only affects the > remarks, on the other hand it doesn't seem to really fix anything and in that > case I feel like RC3 might be the wrong time to merge this. Is there a huge > upside to take this this late in the process? Thanks - I wasn't sure what state the branch was in. As @jroelofs points out the issues this is attempting to help with can pretty performance-sensitive and a hard to diagnose without assistance. The issue is that when and where spills happen can occur quite chaotically out of the register allocator, and so users need to be using the same compiler to diagnose the issues as they will use in practice. Having to provide patches and for users to build the compiler themselves is quite difficult compared to having this on the branch. All the code (meaningfully) changed in this patch needs to be enabled with both `-Rpass-analysis=sme` and a backend `-mllvm -aarch64-stack-hazard-remark-size=XYZ` (or `-mllvm -aarch64-stack-hazard-size=xyz`), so the chance of it breaking anything else should be very low. https://github.com/llvm/llvm-project/pull/102168 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libcxx] release/19.x: [NFC][libc++][test][AIX] UnXFAIL LIT test transform.pass.cpp (#102338) (PR #102466)
https://github.com/philnik777 approved this pull request. https://github.com/llvm/llvm-project/pull/102466 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] StructurizeCFG: Add SkipUniformRegions pass parameter to new PM version (PR #102812)
https://github.com/jdoerfert approved this pull request. LG https://github.com/llvm/llvm-project/pull/102812 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)
https://github.com/alexander-shaposhnikov edited https://github.com/llvm/llvm-project/pull/102764 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)
https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/102764 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)
https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/102764 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)
https://github.com/MaskRay edited https://github.com/llvm/llvm-project/pull/102764 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)
@@ -0,0 +1,334 @@ +//===- nsan_allocator.cpp -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// NumericalStabilitySanitizer allocator. +// +//===--===// + +#include "nsan_allocator.h" +#include "interception/interception.h" +#include "nsan.h" +#include "nsan_platform.h" +#include "nsan_thread.h" +#include "sanitizer_common/sanitizer_allocator.h" +#include "sanitizer_common/sanitizer_allocator_checks.h" +#include "sanitizer_common/sanitizer_allocator_interface.h" +#include "sanitizer_common/sanitizer_allocator_report.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_errno.h" + +DECLARE_REAL(void *, memset, void *dest, int c, uptr n) + +using namespace __nsan; + +namespace { +struct Metadata { + uptr requested_size; +}; + +struct NsanMapUnmapCallback { + void OnMap(uptr p, uptr size) const {} + void OnMapSecondary(uptr p, uptr size, uptr user_begin, + uptr user_size) const {} + void OnUnmap(uptr p, uptr size) const {} +}; + +const uptr kMaxAllowedMallocSize = 1ULL << 40; + +// Allocator64 parameters. Deliberately using a short name. +struct AP64 { + static const uptr kSpaceBeg = Mapping::kHeapMemBeg; + static const uptr kSpaceSize = 0x400; // 4T. + static const uptr kMetadataSize = sizeof(Metadata); + using SizeClassMap = DefaultSizeClassMap; + using MapUnmapCallback = NsanMapUnmapCallback; + static const uptr kFlags = 0; + using AddressSpaceView = LocalAddressSpaceView; +}; +} // namespace + +using PrimaryAllocator = SizeClassAllocator64; +using Allocator = CombinedAllocator; +using AllocatorCache = Allocator::AllocatorCache; + +static Allocator allocator; +static AllocatorCache fallback_allocator_cache; +static StaticSpinMutex fallback_mutex; + +static uptr max_malloc_size; + +void __nsan::NsanAllocatorInit() { + SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null); + allocator.Init(common_flags()->allocator_release_to_os_interval_ms); + if (common_flags()->max_allocation_size_mb) +max_malloc_size = Min(common_flags()->max_allocation_size_mb << 20, + kMaxAllowedMallocSize); + else +max_malloc_size = kMaxAllowedMallocSize; +} + +static AllocatorCache *GetAllocatorCache(NsanThreadLocalMallocStorage *ms) { + CHECK(ms); + CHECK_LE(sizeof(AllocatorCache), sizeof(ms->allocator_cache)); + return reinterpret_cast(ms->allocator_cache); +} + +void NsanThreadLocalMallocStorage::Init() { + allocator.InitCache(GetAllocatorCache(this)); +} + +void NsanThreadLocalMallocStorage::CommitBack() { + allocator.SwallowCache(GetAllocatorCache(this)); + allocator.DestroyCache(GetAllocatorCache(this)); +} + +static void *NsanAllocate(uptr size, uptr alignment, bool zero) { + if (UNLIKELY(size > max_malloc_size)) { +if (AllocatorMayReturnNull()) { + Report("WARNING: NumericalStabilitySanitizer failed to allocate 0x%zx " + "bytes\n", + size); + return nullptr; +} +BufferedStackTrace stack; +GET_FATAL_STACK_TRACE_IF_EMPTY(&stack); +ReportAllocationSizeTooBig(size, max_malloc_size, &stack); + } + if (UNLIKELY(IsRssLimitExceeded())) { +if (AllocatorMayReturnNull()) + return nullptr; +BufferedStackTrace stack; +GET_FATAL_STACK_TRACE_IF_EMPTY(&stack); +ReportRssLimitExceeded(&stack); + } + NsanThread *t = GetCurrentThread(); + void *allocated; + if (t) { +AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); +allocated = allocator.Allocate(cache, size, alignment); + } else { +SpinMutexLock l(&fallback_mutex); +AllocatorCache *cache = &fallback_allocator_cache; +allocated = allocator.Allocate(cache, size, alignment); + } + if (UNLIKELY(!allocated)) { +SetAllocatorOutOfMemory(); +if (AllocatorMayReturnNull()) + return nullptr; +BufferedStackTrace stack; +GET_FATAL_STACK_TRACE_IF_EMPTY(&stack); +ReportOutOfMemory(size, &stack); + } + auto *meta = reinterpret_cast(allocator.GetMetaData(allocated)); + meta->requested_size = size; + if (zero && allocator.FromPrimary(allocated)) +REAL(memset)(allocated, 0, size); + __nsan_set_value_unknown(allocated, size); + RunMallocHooks(allocated, size); + return allocated; +} + +void __nsan::NsanDeallocate(void *p) { + DCHECK(p); + RunFreeHooks(p); + auto *meta = reinterpret_cast(allocator.GetMetaData(p)); + meta->requested_size = 0; + if (NsanThread *t = GetCurrentThread()) { +AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); +allocator.Deallocate(cache, p); + } else { +SpinMutexLock l(&fallback_mutex)
[llvm-branch-commits] [compiler-rt] [nsan] Use sanitizer allocator (PR #102764)
https://github.com/MaskRay updated https://github.com/llvm/llvm-project/pull/102764 >From 6ec669e2206a29bce0c28213e82c2694f03bfad9 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Sun, 11 Aug 2024 13:27:34 -0700 Subject: [PATCH] remove GetCurrentThread check for NsanAllocate Created using spr 1.3.5-bogner --- compiler-rt/lib/nsan/nsan_allocator.cpp | 13 - 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/compiler-rt/lib/nsan/nsan_allocator.cpp b/compiler-rt/lib/nsan/nsan_allocator.cpp index 3fa58513e9b8c..3ba71d7e9f9e2 100644 --- a/compiler-rt/lib/nsan/nsan_allocator.cpp +++ b/compiler-rt/lib/nsan/nsan_allocator.cpp @@ -108,15 +108,8 @@ static void *NsanAllocate(uptr size, uptr alignment, bool zero) { ReportRssLimitExceeded(&stack); } NsanThread *t = GetCurrentThread(); - void *allocated; - if (t) { -AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); -allocated = allocator.Allocate(cache, size, alignment); - } else { -SpinMutexLock l(&fallback_mutex); -AllocatorCache *cache = &fallback_allocator_cache; -allocated = allocator.Allocate(cache, size, alignment); - } + void *allocated = allocator.Allocate(GetAllocatorCache(&t->malloc_storage()), + size, alignment); if (UNLIKELY(!allocated)) { SetAllocatorOutOfMemory(); if (AllocatorMayReturnNull()) @@ -146,6 +139,8 @@ void __nsan::NsanDeallocate(void *p) { AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); allocator.Deallocate(cache, p); } else { +// In a just created thread, glibc's _dl_deallocate_tls might reach here +// before nsan_current_thread is set. SpinMutexLock l(&fallback_mutex); AllocatorCache *cache = &fallback_allocator_cache; allocator.Deallocate(cache, p); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [compiler-rt] [nsan] Use sanitizer allocator (PR #102764)
@@ -0,0 +1,334 @@ +//===- nsan_allocator.cpp -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===--===// +// +// NumericalStabilitySanitizer allocator. +// +//===--===// + +#include "nsan_allocator.h" +#include "interception/interception.h" +#include "nsan.h" +#include "nsan_platform.h" +#include "nsan_thread.h" +#include "sanitizer_common/sanitizer_allocator.h" +#include "sanitizer_common/sanitizer_allocator_checks.h" +#include "sanitizer_common/sanitizer_allocator_interface.h" +#include "sanitizer_common/sanitizer_allocator_report.h" +#include "sanitizer_common/sanitizer_common.h" +#include "sanitizer_common/sanitizer_errno.h" + +DECLARE_REAL(void *, memset, void *dest, int c, uptr n) + +using namespace __nsan; + +namespace { +struct Metadata { + uptr requested_size; +}; + +struct NsanMapUnmapCallback { + void OnMap(uptr p, uptr size) const {} + void OnMapSecondary(uptr p, uptr size, uptr user_begin, + uptr user_size) const {} + void OnUnmap(uptr p, uptr size) const {} +}; + +const uptr kMaxAllowedMallocSize = 1ULL << 40; + +// Allocator64 parameters. Deliberately using a short name. +struct AP64 { + static const uptr kSpaceBeg = Mapping::kHeapMemBeg; + static const uptr kSpaceSize = 0x400; // 4T. + static const uptr kMetadataSize = sizeof(Metadata); + using SizeClassMap = DefaultSizeClassMap; + using MapUnmapCallback = NsanMapUnmapCallback; + static const uptr kFlags = 0; + using AddressSpaceView = LocalAddressSpaceView; +}; +} // namespace + +using PrimaryAllocator = SizeClassAllocator64; +using Allocator = CombinedAllocator; +using AllocatorCache = Allocator::AllocatorCache; + +static Allocator allocator; +static AllocatorCache fallback_allocator_cache; +static StaticSpinMutex fallback_mutex; + +static uptr max_malloc_size; + +void __nsan::NsanAllocatorInit() { + SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null); + allocator.Init(common_flags()->allocator_release_to_os_interval_ms); + if (common_flags()->max_allocation_size_mb) +max_malloc_size = Min(common_flags()->max_allocation_size_mb << 20, + kMaxAllowedMallocSize); + else +max_malloc_size = kMaxAllowedMallocSize; +} + +static AllocatorCache *GetAllocatorCache(NsanThreadLocalMallocStorage *ms) { + CHECK(ms); + CHECK_LE(sizeof(AllocatorCache), sizeof(ms->allocator_cache)); + return reinterpret_cast(ms->allocator_cache); +} + +void NsanThreadLocalMallocStorage::Init() { + allocator.InitCache(GetAllocatorCache(this)); +} + +void NsanThreadLocalMallocStorage::CommitBack() { + allocator.SwallowCache(GetAllocatorCache(this)); + allocator.DestroyCache(GetAllocatorCache(this)); +} + +static void *NsanAllocate(uptr size, uptr alignment, bool zero) { + if (UNLIKELY(size > max_malloc_size)) { +if (AllocatorMayReturnNull()) { + Report("WARNING: NumericalStabilitySanitizer failed to allocate 0x%zx " + "bytes\n", + size); + return nullptr; +} +BufferedStackTrace stack; +GET_FATAL_STACK_TRACE_IF_EMPTY(&stack); +ReportAllocationSizeTooBig(size, max_malloc_size, &stack); + } + if (UNLIKELY(IsRssLimitExceeded())) { +if (AllocatorMayReturnNull()) + return nullptr; +BufferedStackTrace stack; +GET_FATAL_STACK_TRACE_IF_EMPTY(&stack); +ReportRssLimitExceeded(&stack); + } + NsanThread *t = GetCurrentThread(); + void *allocated; + if (t) { +AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage()); +allocated = allocator.Allocate(cache, size, alignment); + } else { MaskRay wrote: I added a comment here. Say, the main thread creates a new thread. The main thread allocates TLS blocks (`nsan_current_thread == main_thread`). In glibc, when nptl/allocatestack.c create or resize the guard area, it might call `_dl_deallocate_tls` on the TLS blocks. This happens at very early stage of pthread_create and the TLS `nsan_current_thread` is nullptr. https://github.com/llvm/llvm-project/pull/102764 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [libc] 61f3f31 - Revert "libc: Remove `extern "C"` from main declarations (#102825)"
Author: Schrodinger ZHU Yifan Date: 2024-08-11T13:40:32-07:00 New Revision: 61f3f31095e721e920967f91acf26c3249afc956 URL: https://github.com/llvm/llvm-project/commit/61f3f31095e721e920967f91acf26c3249afc956 DIFF: https://github.com/llvm/llvm-project/commit/61f3f31095e721e920967f91acf26c3249afc956.diff LOG: Revert "libc: Remove `extern "C"` from main declarations (#102825)" This reverts commit 1b71c471c7d0216fa7fc5c0b45b5926d1fabfaf4. Added: Modified: libc/benchmarks/gpu/LibcGpuBenchmarkMain.cpp libc/startup/gpu/amdgpu/start.cpp libc/startup/gpu/nvptx/start.cpp libc/startup/linux/do_start.cpp libc/test/IntegrationTest/test.h libc/test/UnitTest/LibcTestMain.cpp libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp Removed: diff --git a/libc/benchmarks/gpu/LibcGpuBenchmarkMain.cpp b/libc/benchmarks/gpu/LibcGpuBenchmarkMain.cpp index c4cc1a1731ce34..97366e55194a90 100644 --- a/libc/benchmarks/gpu/LibcGpuBenchmarkMain.cpp +++ b/libc/benchmarks/gpu/LibcGpuBenchmarkMain.cpp @@ -1,6 +1,6 @@ #include "LibcGpuBenchmark.h" -int main(int argc, char **argv, char **envp) { +extern "C" int main(int argc, char **argv, char **envp) { LIBC_NAMESPACE::benchmarks::Benchmark::run_benchmarks(); return 0; } diff --git a/libc/startup/gpu/amdgpu/start.cpp b/libc/startup/gpu/amdgpu/start.cpp index e10e4cd9c2cd74..5aaa7e938d2792 100644 --- a/libc/startup/gpu/amdgpu/start.cpp +++ b/libc/startup/gpu/amdgpu/start.cpp @@ -13,7 +13,7 @@ #include "src/stdlib/atexit.h" #include "src/stdlib/exit.h" -int main(int argc, char **argv, char **envp); +extern "C" int main(int argc, char **argv, char **envp); namespace LIBC_NAMESPACE_DECL { diff --git a/libc/startup/gpu/nvptx/start.cpp b/libc/startup/gpu/nvptx/start.cpp index 561301638c3ca8..ef1e63e5161a61 100644 --- a/libc/startup/gpu/nvptx/start.cpp +++ b/libc/startup/gpu/nvptx/start.cpp @@ -13,7 +13,7 @@ #include "src/stdlib/atexit.h" #include "src/stdlib/exit.h" -int main(int argc, char **argv, char **envp); +extern "C" int main(int argc, char **argv, char **envp); namespace LIBC_NAMESPACE_DECL { diff --git a/libc/startup/linux/do_start.cpp b/libc/startup/linux/do_start.cpp index 7009895f0036c2..72060b4adb2148 100644 --- a/libc/startup/linux/do_start.cpp +++ b/libc/startup/linux/do_start.cpp @@ -20,7 +20,7 @@ #include #include -int main(int argc, char **argv, char **envp); +extern "C" int main(int argc, char **argv, char **envp); extern "C" { // These arrays are present in the .init_array and .fini_array sections. diff --git a/libc/test/IntegrationTest/test.h b/libc/test/IntegrationTest/test.h index f7068ed628a3d5..5be66d9edff02a 100644 --- a/libc/test/IntegrationTest/test.h +++ b/libc/test/IntegrationTest/test.h @@ -83,6 +83,6 @@ // tests, then we should not need to explicitly declare/define the main // function in individual integration tests. We will not need this macro // then. -#define TEST_MAIN int main +#define TEST_MAIN extern "C" int main #endif // LLVM_LIBC_UTILS_INTEGRATION_TEST_TEST_H diff --git a/libc/test/UnitTest/LibcTestMain.cpp b/libc/test/UnitTest/LibcTestMain.cpp index eb1125b5dcaf1f..94536e97164686 100644 --- a/libc/test/UnitTest/LibcTestMain.cpp +++ b/libc/test/UnitTest/LibcTestMain.cpp @@ -43,7 +43,7 @@ TestOptions parseOptions(int argc, char **argv) { } // anonymous namespace -int main(int argc, char **argv, char **envp) { +extern "C" int main(int argc, char **argv, char **envp) { LIBC_NAMESPACE::testing::argc = argc; LIBC_NAMESPACE::testing::argv = argv; LIBC_NAMESPACE::testing::envp = envp; diff --git a/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp b/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp index 4cac072104ca9a..551b97caf81fd6 100644 --- a/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp +++ b/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp @@ -47,7 +47,7 @@ bool TestGeneratorMain(llvm::raw_ostream &OS, llvm::RecordKeeper &records) { OS << '\n'; - OS << "int main() {\n"; + OS << "extern \"C\" int main() {\n"; for (const auto &entrypoint : EntrypointNamesOption) { if (entrypoint == "errno") continue; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AArch64] Fold more load.x into load.i with large offset (PR #102837)
https://github.com/vitalybuka created https://github.com/llvm/llvm-project/pull/102837 The list of load.x is refer to canFoldIntoAddrMode on D152828. Also support LDRSroX missed in canFoldIntoAddrMode ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [AArch64] Fold more load.x into load.i with large offset (PR #102837)
llvmbot wrote: @llvm/pr-subscribers-backend-aarch64 Author: Vitaly Buka (vitalybuka) Changes The list of load.x is refer to canFoldIntoAddrMode on D152828. Also support LDRSroX missed in canFoldIntoAddrMode --- Full diff: https://github.com/llvm/llvm-project/pull/102837.diff 3 Files Affected: - (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+13) - (modified) llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (+49-2) - (modified) llvm/test/CodeGen/AArch64/arm64-addrmode.ll (+34-51) ``diff diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index e6d49da6fdef0b..548f50c97f2917 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4345,7 +4345,20 @@ AArch64InstrInfo::getLdStAmountOp(const MachineInstr &MI) { switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected opcode"); + case AArch64::LDRBroX: case AArch64::LDRBBroX: + case AArch64::LDRSBXroX: + case AArch64::LDRSBWroX: + case AArch64::LDRHroX: + case AArch64::LDRHHroX: + case AArch64::LDRSHXroX: + case AArch64::LDRSHWroX: + case AArch64::LDRWroX: + case AArch64::LDRSroX: + case AArch64::LDRSWroX: + case AArch64::LDRDroX: + case AArch64::LDRXroX: + case AArch64::LDRQroX: return MI.getOperand(4); } } diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index 8de3f8db84ae2b..de1727aa6ec70f 100644 --- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -509,12 +509,38 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { } static unsigned getBaseAddressOpcode(unsigned Opc) { - // TODO: Add more index address loads/stores. + // TODO: Add more index address stores. switch (Opc) { default: llvm_unreachable("Opcode has no base address equivalent!"); + case AArch64::LDRBroX: +return AArch64::LDRBui; case AArch64::LDRBBroX: return AArch64::LDRBBui; + case AArch64::LDRSBXroX: +return AArch64::LDRSBXui; + case AArch64::LDRSBWroX: +return AArch64::LDRSBWui; + case AArch64::LDRHroX: +return AArch64::LDRHui; + case AArch64::LDRHHroX: +return AArch64::LDRHHui; + case AArch64::LDRSHXroX: +return AArch64::LDRSHXui; + case AArch64::LDRSHWroX: +return AArch64::LDRSHWui; + case AArch64::LDRWroX: +return AArch64::LDRWui; + case AArch64::LDRSroX: +return AArch64::LDRSui; + case AArch64::LDRSWroX: +return AArch64::LDRSWui; + case AArch64::LDRDroX: +return AArch64::LDRDui; + case AArch64::LDRXroX: +return AArch64::LDRXui; + case AArch64::LDRQroX: +return AArch64::LDRQui; } } @@ -766,10 +792,31 @@ static bool isMergeableIndexLdSt(MachineInstr &MI, int &Scale) { default: return false; // Scaled instructions. - // TODO: Add more index address loads/stores. + // TODO: Add more index address stores. + case AArch64::LDRBroX: case AArch64::LDRBBroX: + case AArch64::LDRSBXroX: + case AArch64::LDRSBWroX: Scale = 1; return true; + case AArch64::LDRHroX: + case AArch64::LDRHHroX: + case AArch64::LDRSHXroX: + case AArch64::LDRSHWroX: +Scale = 2; +return true; + case AArch64::LDRWroX: + case AArch64::LDRSroX: + case AArch64::LDRSWroX: +Scale = 4; +return true; + case AArch64::LDRDroX: + case AArch64::LDRXroX: +Scale = 8; +return true; + case AArch64::LDRQroX: +Scale = 16; +return true; } } diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll index 2181eaaee7db68..bfef61abd8c129 100644 --- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll @@ -239,9 +239,8 @@ define i32 @LdOffset_i8_zext32(ptr %a) { define i32 @LdOffset_i8_sext32(ptr %a) { ; CHECK-LABEL: LdOffset_i8_sext32: ; CHECK: // %bb.0: -; CHECK-NEXT:mov w8, #56952 // =0xde78 -; CHECK-NEXT:movk w8, #15, lsl #16 -; CHECK-NEXT:ldrsb w0, [x0, x8] +; CHECK-NEXT:add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT:ldrsb w0, [x8, #3704] ; CHECK-NEXT:ret %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 %val = load i8, ptr %arrayidx, align 1 @@ -266,9 +265,8 @@ define i64 @LdOffset_i8_zext64(ptr %a) { define i64 @LdOffset_i8_sext64(ptr %a) { ; CHECK-LABEL: LdOffset_i8_sext64: ; CHECK: // %bb.0: -; CHECK-NEXT:mov w8, #56952 // =0xde78 -; CHECK-NEXT:movk w8, #15, lsl #16 -; CHECK-NEXT:ldrsb x0, [x0, x8] +; CHECK-NEXT:add x8, x0, #253, lsl #12 // =1036288 +; CHECK-NEXT:ldrsb x0, [x8, #3704] ; CHECK-NEXT:ret %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992 %val = load i8, ptr %arrayidx, align 1 @@ -280,9 +278,8 @@ define i64 @LdOffset_i8_sext64(ptr %a) { define i16 @LdOffset_i16(ptr %a) { ; CHECK-LABEL: LdOffset_i16: ; CHECK: // %bb.0: -; CH
[llvm-branch-commits] [clang] release/19.x: [C++20] [Modules] Don't diagnose duplicated implicit decl in multiple named modules (#102423) (PR #102425)
ChuanqiXu9 wrote: I landed this directly as the owner of serialization. I feel this change is not riskful as it adds more conditions to generate a diagnose message we didn't do in 18.x and before. So nothing will be worse. https://github.com/llvm/llvm-project/pull/102425 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits