[llvm-branch-commits] [llvm] 084d025 - Address comments

2024-08-11 Thread Alexis Engelke via llvm-branch-commits

Author: Alexis Engelke
Date: 2024-08-11T07:39:53Z
New Revision: 084d02577eb68dd2b6260b9b1d12a61631e8d799

URL: 
https://github.com/llvm/llvm-project/commit/084d02577eb68dd2b6260b9b1d12a61631e8d799
DIFF: 
https://github.com/llvm/llvm-project/commit/084d02577eb68dd2b6260b9b1d12a61631e8d799.diff

LOG: Address comments

Added: 
llvm/test/Transforms/SLPVectorizer/X86/const-in-different-functions.ll

Modified: 


Removed: 
llvm/test/Transforms/SLPVectorizer/const-in-different-functions.ll



diff  --git a/llvm/test/Transforms/SLPVectorizer/const-in-
diff erent-functions.ll b/llvm/test/Transforms/SLPVectorizer/X86/const-in-
diff erent-functions.ll
similarity index 75%
rename from llvm/test/Transforms/SLPVectorizer/const-in-
diff erent-functions.ll
rename to llvm/test/Transforms/SLPVectorizer/X86/const-in-
diff erent-functions.ll
index 29a8f15733c450..2e473f4f2c213c 100644
--- a/llvm/test/Transforms/SLPVectorizer/const-in-
diff erent-functions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/const-in-
diff erent-functions.ll
@@ -4,17 +4,19 @@
 ; Test that SLP vectorize doesn't crash if a stored constant is used in 
multiple
 ; functions.
 
+@p = external global [64 x float]
+
 define void @_Z1hPfl() {
 ; CHECK-LABEL: define void @_Z1hPfl() {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:[[TMP0:%.*]] = getelementptr i8, ptr null, i64 28
+; CHECK-NEXT:[[TMP0:%.*]] = getelementptr i8, ptr @p, i64 28
 ; CHECK-NEXT:store <2 x float> , 
ptr [[TMP0]], align 4
 ; CHECK-NEXT:ret void
 ;
 entry:
-  %0 = getelementptr i8, ptr null, i64 28
+  %0 = getelementptr i8, ptr @p, i64 28
   store float 0.00e+00, ptr %0, align 4
-  %1 = getelementptr i8, ptr null, i64 32
+  %1 = getelementptr i8, ptr @p, i64 32
   store float 1.00e+00, ptr %1, align 16
   ret void
 }
@@ -27,8 +29,8 @@ define void @_Z1mv(i64 %arrayidx4.i.2.idx) {
 ; CHECK:   [[FOR_COND1_PREHEADER_LR_PH_I:.*:]]
 ; CHECK-NEXT:br label %[[FOR_COND1_PREHEADER_I:.*]]
 ; CHECK:   [[FOR_COND1_PREHEADER_I]]:
-; CHECK-NEXT:store float 1.00e+00, ptr null, align 4
-; CHECK-NEXT:[[ARRAYIDX4_I_2:%.*]] = getelementptr i8, ptr null, i64 
[[ARRAYIDX4_I_2_IDX]]
+; CHECK-NEXT:store float 1.00e+00, ptr @p, align 4
+; CHECK-NEXT:[[ARRAYIDX4_I_2:%.*]] = getelementptr i8, ptr @p, i64 
[[ARRAYIDX4_I_2_IDX]]
 ; CHECK-NEXT:store float 0.00e+00, ptr [[ARRAYIDX4_I_2]], align 4
 ; CHECK-NEXT:br label %[[FOR_COND1_PREHEADER_I]]
 ;
@@ -39,8 +41,8 @@ for.cond1.preheader.lr.ph.i:  ; No 
predecessors!
   br label %for.cond1.preheader.i
 
 for.cond1.preheader.i:; preds = 
%for.cond1.preheader.i, %for.cond1.preheader.lr.ph.i
-  store float 1.00e+00, ptr null, align 4
-  %arrayidx4.i.2 = getelementptr i8, ptr null, i64 %arrayidx4.i.2.idx
+  store float 1.00e+00, ptr @p, align 4
+  %arrayidx4.i.2 = getelementptr i8, ptr @p, i64 %arrayidx4.i.2.idx
   store float 0.00e+00, ptr %arrayidx4.i.2, align 4
   br label %for.cond1.preheader.i
 }



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits


@@ -413,18 +439,57 @@ bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
   return Changed;
 }
 
-bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
-  auto FI = FIM.find(F);
-  if (FI == FIM.end())
-return false;
+bool AMDGPUPerfHintAnalysis::run(const GCNTargetMachine &TM,
+ LazyCallGraph &CG) {
 
-  return AMDGPUPerfHint::isMemBound(FI->second);
+  SmallVector Worklist;
+  CG.buildRefSCCs();
+  for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
+for (LazyCallGraph::SCC &SCC : RC) {
+  if (SCC.size() != 1)
+continue;
+  Function &F = SCC.begin()->getFunction();
+  if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage())

arsenm wrote:

Actually this broke the test 

https://github.com/llvm/llvm-project/pull/102645
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/102645

>From 0fa3fead38acc0dc3bca5d48c00be1090d4a16ad Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 9 Aug 2024 17:27:53 +0400
Subject: [PATCH] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager

This was much more difficult than I anticipated. The pass is
not in a good state, with poor test coverage. The legacy PM
does seem to be relying on maintaining the map state between
different SCCs, which seems bad. The pass is going out of its
way to avoid putting the attributes it introduces onto non-callee
functions. If it just added them, we could use them directly
instead of relying on the map, I would think.

The NewPM path uses a ModulePass; I'm not sure if we should be
using CGSCC here but there seems to be some missing infrastructure
to support backend defined ones.
---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |   4 +-
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |   2 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |   3 +
 .../Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp  | 111 ++
 .../Target/AMDGPU/AMDGPUPerfHintAnalysis.h|  62 ++
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   3 +-
 llvm/test/CodeGen/AMDGPU/perfhint.ll  |   1 +
 7 files changed, 136 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 195e2a19214e80..5b8d37a8ae7944 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -209,8 +209,8 @@ extern char &SIPreAllocateWWMRegsID;
 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
 extern char &AMDGPUImageIntrinsicOptimizerID;
 
-void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
-extern char &AMDGPUPerfHintAnalysisID;
+void initializeAMDGPUPerfHintAnalysisLegacyPass(PassRegistry &);
+extern char &AMDGPUPerfHintAnalysisLegacyID;
 
 void initializeGCNRegPressurePrinterPass(PassRegistry &);
 extern char &GCNRegPressurePrinterID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 8579774f522309..bbb4573655ab79 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -102,7 +102,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISelLegacy, 
"amdgpu-isel",
   "AMDGPU DAG->DAG Pattern Instruction Selection", false,
   false)
 INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
-INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
+INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysisLegacy)
 INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)
 #ifdef EXPENSIVE_CHECKS
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index b6a6c33d85f83c..7188c8953254c0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -22,6 +22,9 @@ MODULE_PASS("amdgpu-lower-buffer-fat-pointers",
 AMDGPULowerBufferFatPointersPass(*this))
 MODULE_PASS("amdgpu-lower-ctor-dtor", AMDGPUCtorDtorLoweringPass())
 MODULE_PASS("amdgpu-lower-module-lds", AMDGPULowerModuleLDSPass(*this))
+MODULE_PASS("amdgpu-perf-hint",
+AMDGPUPerfHintAnalysisPass(
+  *static_cast(this)))
 MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
 MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass())
 #undef MODULE_PASS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
index 1213d5e0b41db1..040e931b82af2f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPerfHintAnalysis.cpp
@@ -12,12 +12,15 @@
 ///
 
//===--===//
 
-#include "AMDGPU.h"
 #include "AMDGPUPerfHintAnalysis.h"
+#include "AMDGPU.h"
+#include "AMDGPUTargetMachine.h"
 #include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -54,12 +57,6 @@ static cl::opt
 STATISTIC(NumMemBound, "Number of functions marked as memory bound");
 STATISTIC(NumLimitWave, "Number of functions marked as needing limit wave");
 
-char llvm::AMDGPUPerfHintAnalysis::ID = 0;
-char &llvm::AMDGPUPerfHintAnalysisID = AMDGPUPerfHintAnalysis::ID;
-
-INITIALIZE_PASS(AMDGPUPerfHintAnalysis, DEBUG_TYPE,
-"Analysis if a function is memory bound", true, true)
-
 namespace {
 
 struct AMDGPUPerfHint {
@@ -67,7 +64,7 @@ struct AMDGPUPerfHint {
 
 public:
   AMDGPUPerfHint(AMDGPUPerfHintAnalysis::FuncInfoMap &FIM_,

[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits


@@ -413,18 +439,57 @@ bool AMDGPUPerfHintAnalysis::runOnSCC(CallGraphSCC &SCC) {
   return Changed;
 }
 
-bool AMDGPUPerfHintAnalysis::isMemoryBound(const Function *F) const {
-  auto FI = FIM.find(F);
-  if (FI == FIM.end())
-return false;
+bool AMDGPUPerfHintAnalysis::run(const GCNTargetMachine &TM,
+ LazyCallGraph &CG) {
 
-  return AMDGPUPerfHint::isMemBound(FI->second);
+  SmallVector Worklist;
+  CG.buildRefSCCs();
+  for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
+for (LazyCallGraph::SCC &SCC : RC) {
+  if (SCC.size() != 1)
+continue;
+  Function &F = SCC.begin()->getFunction();
+  if (!F.isDeclaration() && !F.doesNotRecurse() && F.hasInternalLinkage())

arsenm wrote:

Nevermind, I somehow posted the wrong version that has the reverse list which 
is wrong 

https://github.com/llvm/llvm-project/pull/102645
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Use GCNTargetMachine in AMDGPUCodeGenPassBuilder (PR #102805)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/102805

R600 has a separate CodeGenPassBuilder anyway.

>From 0a6b6eee1b1c2926169ff337a16ee291d5f72001 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 11 Aug 2024 11:55:22 +0400
Subject: [PATCH] AMDGPU: Use GCNTargetMachine in AMDGPUCodeGenPassBuilder

R600 has a separate CodeGenPassBuilder anyway.
---
 .../Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp   |  2 +-
 .../lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h |  6 +++---
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp   | 16 
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h | 12 ++--
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
index cc4285f130fc82..0d829b6aeafe8f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
@@ -15,7 +15,7 @@
 using namespace llvm;
 
 AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
-AMDGPUTargetMachine &TM, const CGPassBuilderOption &Opts,
+GCNTargetMachine &TM, const CGPassBuilderOption &Opts,
 PassInstrumentationCallbacks *PIC)
 : CodeGenPassBuilder(TM, Opts, PIC) {
   Opt.RequiresCodeGenSCCOrder = true;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h 
b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
index 5f79e309703a33..e656e166b3eb2e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
@@ -14,12 +14,12 @@
 
 namespace llvm {
 
-class AMDGPUTargetMachine;
+class GCNTargetMachine;
 
 class AMDGPUCodeGenPassBuilder
-: public CodeGenPassBuilder 
{
+: public CodeGenPassBuilder {
 public:
-  AMDGPUCodeGenPassBuilder(AMDGPUTargetMachine &TM,
+  AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM,
const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC);
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 202466f18d1bd6..62cf9c6cd61140 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -660,14 +660,6 @@ parseAMDGPUAtomicOptimizerStrategy(StringRef Params) {
   return make_error("invalid parameter", 
inconvertibleErrorCode());
 }
 
-Error AMDGPUTargetMachine::buildCodeGenPipeline(
-ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
-CodeGenFileType FileType, const CGPassBuilderOption &Opts,
-PassInstrumentationCallbacks *PIC) {
-  AMDGPUCodeGenPassBuilder CGPB(*this, Opts, PIC);
-  return CGPB.buildPipeline(MPM, Out, DwoOut, FileType);
-}
-
 void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
 
 #define GET_PASS_REGISTRY "AMDGPUPassRegistry.def"
@@ -900,6 +892,14 @@ GCNTargetMachine::getTargetTransformInfo(const Function 
&F) const {
   return TargetTransformInfo(GCNTTIImpl(this, F));
 }
 
+Error GCNTargetMachine::buildCodeGenPipeline(
+ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
+CodeGenFileType FileType, const CGPassBuilderOption &Opts,
+PassInstrumentationCallbacks *PIC) {
+  AMDGPUCodeGenPassBuilder CGPB(*this, Opts, PIC);
+  return CGPB.buildPipeline(MPM, Out, DwoOut, FileType);
+}
+
 
//===--===//
 // AMDGPU Pass Setup
 
//===--===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 0f74fbc22fa84f..6bb8788cc73b0c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -52,12 +52,6 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
 return TLOF.get();
   }
 
-  Error buildCodeGenPipeline(ModulePassManager &MPM, raw_pwrite_stream &Out,
- raw_pwrite_stream *DwoOut,
- CodeGenFileType FileType,
- const CGPassBuilderOption &Opts,
- PassInstrumentationCallbacks *PIC) override;
-
   void registerPassBuilderCallbacks(PassBuilder &PB) override;
   void registerDefaultAliasAnalyses(AAManager &) override;
 
@@ -103,6 +97,12 @@ class GCNTargetMachine final : public AMDGPUTargetMachine {
 return true;
   }
 
+  Error buildCodeGenPipeline(ModulePassManager &MPM, raw_pwrite_stream &Out,
+ raw_pwrite_stream *DwoOut,
+ CodeGenFileType FileType,
+ const CGPassBuilderOption &Opts,
+ PassInstrumentationCallbacks *PIC) override;
+
   void registerMachineRegisterInfoCallback(MachineFunction &MF) const override;
 
   MachineFunctionInfo *

_

[llvm-branch-commits] [llvm] AMDGPU: Use GCNTargetMachine in AMDGPUCodeGenPassBuilder (PR #102805)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/102805?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#102805** https://app.graphite.dev/github/pr/llvm/llvm-project/102805?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#102645** https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102644** https://app.graphite.dev/github/pr/llvm/llvm-project/102644?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/102805
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Use GCNTargetMachine in AMDGPUCodeGenPassBuilder (PR #102805)

2024-08-11 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

R600 has a separate CodeGenPassBuilder anyway.

---
Full diff: https://github.com/llvm/llvm-project/pull/102805.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp (+1-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h (+3-3) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+8-8) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h (+6-6) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
index cc4285f130fc8..0d829b6aeafe8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
@@ -15,7 +15,7 @@
 using namespace llvm;
 
 AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
-AMDGPUTargetMachine &TM, const CGPassBuilderOption &Opts,
+GCNTargetMachine &TM, const CGPassBuilderOption &Opts,
 PassInstrumentationCallbacks *PIC)
 : CodeGenPassBuilder(TM, Opts, PIC) {
   Opt.RequiresCodeGenSCCOrder = true;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h 
b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
index 5f79e309703a3..e656e166b3eb2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
@@ -14,12 +14,12 @@
 
 namespace llvm {
 
-class AMDGPUTargetMachine;
+class GCNTargetMachine;
 
 class AMDGPUCodeGenPassBuilder
-: public CodeGenPassBuilder 
{
+: public CodeGenPassBuilder {
 public:
-  AMDGPUCodeGenPassBuilder(AMDGPUTargetMachine &TM,
+  AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM,
const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC);
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 202466f18d1bd..62cf9c6cd6114 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -660,14 +660,6 @@ parseAMDGPUAtomicOptimizerStrategy(StringRef Params) {
   return make_error("invalid parameter", 
inconvertibleErrorCode());
 }
 
-Error AMDGPUTargetMachine::buildCodeGenPipeline(
-ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
-CodeGenFileType FileType, const CGPassBuilderOption &Opts,
-PassInstrumentationCallbacks *PIC) {
-  AMDGPUCodeGenPassBuilder CGPB(*this, Opts, PIC);
-  return CGPB.buildPipeline(MPM, Out, DwoOut, FileType);
-}
-
 void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
 
 #define GET_PASS_REGISTRY "AMDGPUPassRegistry.def"
@@ -900,6 +892,14 @@ GCNTargetMachine::getTargetTransformInfo(const Function 
&F) const {
   return TargetTransformInfo(GCNTTIImpl(this, F));
 }
 
+Error GCNTargetMachine::buildCodeGenPipeline(
+ModulePassManager &MPM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
+CodeGenFileType FileType, const CGPassBuilderOption &Opts,
+PassInstrumentationCallbacks *PIC) {
+  AMDGPUCodeGenPassBuilder CGPB(*this, Opts, PIC);
+  return CGPB.buildPipeline(MPM, Out, DwoOut, FileType);
+}
+
 
//===--===//
 // AMDGPU Pass Setup
 
//===--===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 0f74fbc22fa84..6bb8788cc73b0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -52,12 +52,6 @@ class AMDGPUTargetMachine : public LLVMTargetMachine {
 return TLOF.get();
   }
 
-  Error buildCodeGenPipeline(ModulePassManager &MPM, raw_pwrite_stream &Out,
- raw_pwrite_stream *DwoOut,
- CodeGenFileType FileType,
- const CGPassBuilderOption &Opts,
- PassInstrumentationCallbacks *PIC) override;
-
   void registerPassBuilderCallbacks(PassBuilder &PB) override;
   void registerDefaultAliasAnalyses(AAManager &) override;
 
@@ -103,6 +97,12 @@ class GCNTargetMachine final : public AMDGPUTargetMachine {
 return true;
   }
 
+  Error buildCodeGenPipeline(ModulePassManager &MPM, raw_pwrite_stream &Out,
+ raw_pwrite_stream *DwoOut,
+ CodeGenFileType FileType,
+ const CGPassBuilderOption &Opts,
+ PassInstrumentationCallbacks *PIC) override;
+
   void registerMachineRegisterInfoCallback(MachineFunction &MF) const override;
 
   MachineFunctionInfo *

``




https://github.com/llvm/llvm-project/pull/102805
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bi

[llvm-branch-commits] [llvm] AMDGPU: Use GCNTargetMachine in AMDGPUCodeGenPassBuilder (PR #102805)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/102805
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Use GCNTargetMachine in AMDGPUCodeGenPassBuilder (PR #102805)

2024-08-11 Thread via llvm-branch-commits

https://github.com/paperchalice approved this pull request.


https://github.com/llvm/llvm-project/pull/102805
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager (PR #102806)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/102806

None

>From 56fc9f47bd12696b13a677ee92c83a85cbf09466 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 11 Aug 2024 12:57:27 +0400
Subject: [PATCH] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass
 manager

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  17 ++-
 .../AMDGPU/AMDGPULateCodeGenPrepare.cpp   | 110 +++---
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |   3 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   4 +-
 .../AMDGPU/amdgpu-late-codegenprepare.ll  |   1 +
 5 files changed, 88 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 5b8d37a8ae7944..2a6b5a10a5d464 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -18,6 +18,7 @@
 namespace llvm {
 
 class AMDGPUTargetMachine;
+class GCNTargetMachine;
 class TargetMachine;
 
 // GlobalISel passes
@@ -54,7 +55,7 @@ FunctionPass *createSIPostRABundlerPass();
 FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
 FunctionPass *createAMDGPUCodeGenPreparePass();
-FunctionPass *createAMDGPULateCodeGenPreparePass();
+FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass();
 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
 FunctionPass *createAMDGPURewriteOutArgumentsPass();
 ModulePass *
@@ -273,6 +274,16 @@ class AMDGPUCodeGenPreparePass
   PreservedAnalyses run(Function &, FunctionAnalysisManager &);
 };
 
+class AMDGPULateCodeGenPreparePass
+: public PassInfoMixin {
+private:
+  const GCNTargetMachine &TM;
+
+public:
+  AMDGPULateCodeGenPreparePass(const GCNTargetMachine &TM) : TM(TM) {};
+  PreservedAnalyses run(Function &, FunctionAnalysisManager &);
+};
+
 class AMDGPULowerKernelArgumentsPass
 : public PassInfoMixin {
 private:
@@ -329,8 +340,8 @@ extern char &AMDGPUCodeGenPrepareID;
 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
 extern char &AMDGPURemoveIncompatibleFunctionsID;
 
-void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
-extern char &AMDGPULateCodeGenPrepareID;
+void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &);
+extern char &AMDGPULateCodeGenPrepareLegacyID;
 
 FunctionPass *createAMDGPURewriteUndefForPHILegacyPass();
 void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 7bf5170794cd9e..36dfebacaed686 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -42,10 +42,10 @@ static cl::opt
 namespace {
 
 class AMDGPULateCodeGenPrepare
-: public FunctionPass,
-  public InstVisitor {
+: public InstVisitor {
   Module *Mod = nullptr;
   const DataLayout *DL = nullptr;
+  const GCNSubtarget &ST;
 
   AssumptionCache *AC = nullptr;
   UniformityInfo *UA = nullptr;
@@ -53,24 +53,10 @@ class AMDGPULateCodeGenPrepare
   SmallVector DeadInsts;
 
 public:
-  static char ID;
-
-  AMDGPULateCodeGenPrepare() : FunctionPass(ID) {}
-
-  StringRef getPassName() const override {
-return "AMDGPU IR late optimizations";
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-AU.addRequired();
-AU.addRequired();
-AU.addRequired();
-AU.setPreservesAll();
-  }
-
-  bool doInitialization(Module &M) override;
-  bool runOnFunction(Function &F) override;
-
+  AMDGPULateCodeGenPrepare(Module &M, const GCNSubtarget &ST,
+   AssumptionCache *AC, UniformityInfo *UA)
+  : Mod(&M), DL(&M.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
+  bool run(Function &F);
   bool visitInstruction(Instruction &) { return false; }
 
   // Check if the specified value is at least DWORD aligned.
@@ -148,23 +134,7 @@ class LiveRegOptimizer {
 
 } // end anonymous namespace
 
-bool AMDGPULateCodeGenPrepare::doInitialization(Module &M) {
-  Mod = &M;
-  DL = &Mod->getDataLayout();
-  return false;
-}
-
-bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) {
-  if (skipFunction(F))
-return false;
-
-  const TargetPassConfig &TPC = getAnalysis();
-  const TargetMachine &TM = TPC.getTM();
-  const GCNSubtarget &ST = TM.getSubtarget(F);
-
-  AC = &getAnalysis().getAssumptionCache(F);
-  UA = &getAnalysis().getUniformityInfo();
-
+bool AMDGPULateCodeGenPrepare::run(Function &F) {
   // "Optimize" the virtual regs that cross basic block boundaries. When
   // building the SelectionDAG, vectors of illegal types that cross basic 
blocks
   // will be scalarized and widened, with each scalar living in its
@@ -505,16 +475,72 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst 
&LI) {
   return true;
 }
 
-INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE,
+PreservedAnalyses
+AMDGPULateCodeGenPreparePass:

[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager (PR #102806)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/102806
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager (PR #102806)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/102806?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#102806** https://app.graphite.dev/github/pr/llvm/llvm-project/102806?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#102805** https://app.graphite.dev/github/pr/llvm/llvm-project/102805?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102645** https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102644** https://app.graphite.dev/github/pr/llvm/llvm-project/102644?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/102806
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager (PR #102806)

2024-08-11 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/102806.diff


5 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPU.h (+14-3) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp (+68-42) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def (+3) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+2-2) 
- (modified) llvm/test/CodeGen/AMDGPU/amdgpu-late-codegenprepare.ll (+1) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 5b8d37a8ae794..2a6b5a10a5d46 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -18,6 +18,7 @@
 namespace llvm {
 
 class AMDGPUTargetMachine;
+class GCNTargetMachine;
 class TargetMachine;
 
 // GlobalISel passes
@@ -54,7 +55,7 @@ FunctionPass *createSIPostRABundlerPass();
 FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
 FunctionPass *createAMDGPUCodeGenPreparePass();
-FunctionPass *createAMDGPULateCodeGenPreparePass();
+FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass();
 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
 FunctionPass *createAMDGPURewriteOutArgumentsPass();
 ModulePass *
@@ -273,6 +274,16 @@ class AMDGPUCodeGenPreparePass
   PreservedAnalyses run(Function &, FunctionAnalysisManager &);
 };
 
+class AMDGPULateCodeGenPreparePass
+: public PassInfoMixin {
+private:
+  const GCNTargetMachine &TM;
+
+public:
+  AMDGPULateCodeGenPreparePass(const GCNTargetMachine &TM) : TM(TM) {};
+  PreservedAnalyses run(Function &, FunctionAnalysisManager &);
+};
+
 class AMDGPULowerKernelArgumentsPass
 : public PassInfoMixin {
 private:
@@ -329,8 +340,8 @@ extern char &AMDGPUCodeGenPrepareID;
 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
 extern char &AMDGPURemoveIncompatibleFunctionsID;
 
-void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
-extern char &AMDGPULateCodeGenPrepareID;
+void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &);
+extern char &AMDGPULateCodeGenPrepareLegacyID;
 
 FunctionPass *createAMDGPURewriteUndefForPHILegacyPass();
 void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 7bf5170794cd9..36dfebacaed68 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -42,10 +42,10 @@ static cl::opt
 namespace {
 
 class AMDGPULateCodeGenPrepare
-: public FunctionPass,
-  public InstVisitor {
+: public InstVisitor {
   Module *Mod = nullptr;
   const DataLayout *DL = nullptr;
+  const GCNSubtarget &ST;
 
   AssumptionCache *AC = nullptr;
   UniformityInfo *UA = nullptr;
@@ -53,24 +53,10 @@ class AMDGPULateCodeGenPrepare
   SmallVector DeadInsts;
 
 public:
-  static char ID;
-
-  AMDGPULateCodeGenPrepare() : FunctionPass(ID) {}
-
-  StringRef getPassName() const override {
-return "AMDGPU IR late optimizations";
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-AU.addRequired();
-AU.addRequired();
-AU.addRequired();
-AU.setPreservesAll();
-  }
-
-  bool doInitialization(Module &M) override;
-  bool runOnFunction(Function &F) override;
-
+  AMDGPULateCodeGenPrepare(Module &M, const GCNSubtarget &ST,
+   AssumptionCache *AC, UniformityInfo *UA)
+  : Mod(&M), DL(&M.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
+  bool run(Function &F);
   bool visitInstruction(Instruction &) { return false; }
 
   // Check if the specified value is at least DWORD aligned.
@@ -148,23 +134,7 @@ class LiveRegOptimizer {
 
 } // end anonymous namespace
 
-bool AMDGPULateCodeGenPrepare::doInitialization(Module &M) {
-  Mod = &M;
-  DL = &Mod->getDataLayout();
-  return false;
-}
-
-bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) {
-  if (skipFunction(F))
-return false;
-
-  const TargetPassConfig &TPC = getAnalysis();
-  const TargetMachine &TM = TPC.getTM();
-  const GCNSubtarget &ST = TM.getSubtarget(F);
-
-  AC = &getAnalysis().getAssumptionCache(F);
-  UA = &getAnalysis().getUniformityInfo();
-
+bool AMDGPULateCodeGenPrepare::run(Function &F) {
   // "Optimize" the virtual regs that cross basic block boundaries. When
   // building the SelectionDAG, vectors of illegal types that cross basic 
blocks
   // will be scalarized and widened, with each scalar living in its
@@ -505,16 +475,72 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst 
&LI) {
   return true;
 }
 
-INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE,
+PreservedAnalyses
+AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) {
+  const GCNSubtarget &ST = TM.getSubtarget(F);
+
+  AssumptionCache

[llvm-branch-commits] [llvm] NewPM/AMDGPU: Port AMDGPUPerfHintAnalysis to new pass manager (PR #102645)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Aug 11, 7:00 AM EDT**: @arsenm started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/102645).


https://github.com/llvm/llvm-project/pull/102645
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Use GCNTargetMachine in AMDGPUCodeGenPassBuilder (PR #102805)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Aug 11, 7:00 AM EDT**: @arsenm started a stack merge that includes this 
pull request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/102805).


https://github.com/llvm/llvm-project/pull/102805
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)

2024-08-11 Thread Alexander Shaposhnikov via llvm-branch-commits


@@ -0,0 +1,334 @@
+//===- nsan_allocator.cpp 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// NumericalStabilitySanitizer allocator.
+//
+//===--===//
+
+#include "nsan_allocator.h"
+#include "interception/interception.h"
+#include "nsan.h"
+#include "nsan_platform.h"
+#include "nsan_thread.h"
+#include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
+#include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_allocator_report.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_errno.h"
+
+DECLARE_REAL(void *, memset, void *dest, int c, uptr n)
+
+using namespace __nsan;
+
+namespace {
+struct Metadata {
+  uptr requested_size;
+};
+
+struct NsanMapUnmapCallback {
+  void OnMap(uptr p, uptr size) const {}
+  void OnMapSecondary(uptr p, uptr size, uptr user_begin,
+  uptr user_size) const {}
+  void OnUnmap(uptr p, uptr size) const {}
+};
+
+const uptr kMaxAllowedMallocSize = 1ULL << 40;
+
+// Allocator64 parameters. Deliberately using a short name.
+struct AP64 {
+  static const uptr kSpaceBeg = Mapping::kHeapMemBeg;
+  static const uptr kSpaceSize = 0x400; // 4T.
+  static const uptr kMetadataSize = sizeof(Metadata);
+  using SizeClassMap = DefaultSizeClassMap;
+  using MapUnmapCallback = NsanMapUnmapCallback;
+  static const uptr kFlags = 0;
+  using AddressSpaceView = LocalAddressSpaceView;
+};
+} // namespace
+
+using PrimaryAllocator = SizeClassAllocator64;
+using Allocator = CombinedAllocator;
+using AllocatorCache = Allocator::AllocatorCache;
+
+static Allocator allocator;
+static AllocatorCache fallback_allocator_cache;
+static StaticSpinMutex fallback_mutex;
+
+static uptr max_malloc_size;
+
+void __nsan::NsanAllocatorInit() {
+  SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
+  allocator.Init(common_flags()->allocator_release_to_os_interval_ms);
+  if (common_flags()->max_allocation_size_mb)
+max_malloc_size = Min(common_flags()->max_allocation_size_mb << 20,
+  kMaxAllowedMallocSize);
+  else
+max_malloc_size = kMaxAllowedMallocSize;
+}
+
+static AllocatorCache *GetAllocatorCache(NsanThreadLocalMallocStorage *ms) {
+  CHECK(ms);
+  CHECK_LE(sizeof(AllocatorCache), sizeof(ms->allocator_cache));
+  return reinterpret_cast(ms->allocator_cache);
+}
+
+void NsanThreadLocalMallocStorage::Init() {
+  allocator.InitCache(GetAllocatorCache(this));
+}
+
+void NsanThreadLocalMallocStorage::CommitBack() {
+  allocator.SwallowCache(GetAllocatorCache(this));
+  allocator.DestroyCache(GetAllocatorCache(this));
+}
+
+static void *NsanAllocate(uptr size, uptr alignment, bool zero) {
+  if (UNLIKELY(size > max_malloc_size)) {
+if (AllocatorMayReturnNull()) {
+  Report("WARNING: NumericalStabilitySanitizer failed to allocate 0x%zx "
+ "bytes\n",
+ size);
+  return nullptr;
+}
+BufferedStackTrace stack;
+GET_FATAL_STACK_TRACE_IF_EMPTY(&stack);
+ReportAllocationSizeTooBig(size, max_malloc_size, &stack);
+  }
+  if (UNLIKELY(IsRssLimitExceeded())) {
+if (AllocatorMayReturnNull())
+  return nullptr;
+BufferedStackTrace stack;
+GET_FATAL_STACK_TRACE_IF_EMPTY(&stack);
+ReportRssLimitExceeded(&stack);
+  }
+  NsanThread *t = GetCurrentThread();
+  void *allocated;
+  if (t) {
+AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
+allocated = allocator.Allocate(cache, size, alignment);
+  } else {

alexander-shaposhnikov wrote:

I'm wondering - when does the fallback case happen  (t == nullptr) ?

https://github.com/llvm/llvm-project/pull/102764
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)

2024-08-11 Thread Alexander Shaposhnikov via llvm-branch-commits


@@ -0,0 +1,334 @@
+//===- nsan_allocator.cpp 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// NumericalStabilitySanitizer allocator.
+//
+//===--===//
+
+#include "nsan_allocator.h"
+#include "interception/interception.h"
+#include "nsan.h"
+#include "nsan_platform.h"
+#include "nsan_thread.h"
+#include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
+#include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_allocator_report.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_errno.h"
+
+DECLARE_REAL(void *, memset, void *dest, int c, uptr n)
+
+using namespace __nsan;
+
+namespace {
+struct Metadata {
+  uptr requested_size;
+};
+
+struct NsanMapUnmapCallback {
+  void OnMap(uptr p, uptr size) const {}
+  void OnMapSecondary(uptr p, uptr size, uptr user_begin,
+  uptr user_size) const {}
+  void OnUnmap(uptr p, uptr size) const {}
+};
+
+const uptr kMaxAllowedMallocSize = 1ULL << 40;
+
+// Allocator64 parameters. Deliberately using a short name.
+struct AP64 {
+  static const uptr kSpaceBeg = Mapping::kHeapMemBeg;
+  static const uptr kSpaceSize = 0x400; // 4T.
+  static const uptr kMetadataSize = sizeof(Metadata);
+  using SizeClassMap = DefaultSizeClassMap;
+  using MapUnmapCallback = NsanMapUnmapCallback;
+  static const uptr kFlags = 0;
+  using AddressSpaceView = LocalAddressSpaceView;
+};
+} // namespace
+
+using PrimaryAllocator = SizeClassAllocator64;
+using Allocator = CombinedAllocator;
+using AllocatorCache = Allocator::AllocatorCache;
+
+static Allocator allocator;
+static AllocatorCache fallback_allocator_cache;
+static StaticSpinMutex fallback_mutex;
+
+static uptr max_malloc_size;
+
+void __nsan::NsanAllocatorInit() {
+  SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
+  allocator.Init(common_flags()->allocator_release_to_os_interval_ms);
+  if (common_flags()->max_allocation_size_mb)
+max_malloc_size = Min(common_flags()->max_allocation_size_mb << 20,
+  kMaxAllowedMallocSize);
+  else
+max_malloc_size = kMaxAllowedMallocSize;
+}
+
+static AllocatorCache *GetAllocatorCache(NsanThreadLocalMallocStorage *ms) {
+  CHECK(ms);
+  CHECK_LE(sizeof(AllocatorCache), sizeof(ms->allocator_cache));
+  return reinterpret_cast(ms->allocator_cache);
+}
+
+void NsanThreadLocalMallocStorage::Init() {
+  allocator.InitCache(GetAllocatorCache(this));
+}
+
+void NsanThreadLocalMallocStorage::CommitBack() {
+  allocator.SwallowCache(GetAllocatorCache(this));
+  allocator.DestroyCache(GetAllocatorCache(this));
+}
+
+static void *NsanAllocate(uptr size, uptr alignment, bool zero) {
+  if (UNLIKELY(size > max_malloc_size)) {
+if (AllocatorMayReturnNull()) {
+  Report("WARNING: NumericalStabilitySanitizer failed to allocate 0x%zx "
+ "bytes\n",
+ size);
+  return nullptr;
+}
+BufferedStackTrace stack;
+GET_FATAL_STACK_TRACE_IF_EMPTY(&stack);
+ReportAllocationSizeTooBig(size, max_malloc_size, &stack);
+  }
+  if (UNLIKELY(IsRssLimitExceeded())) {
+if (AllocatorMayReturnNull())
+  return nullptr;
+BufferedStackTrace stack;
+GET_FATAL_STACK_TRACE_IF_EMPTY(&stack);
+ReportRssLimitExceeded(&stack);
+  }
+  NsanThread *t = GetCurrentThread();
+  void *allocated;
+  if (t) {
+AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
+allocated = allocator.Allocate(cache, size, alignment);
+  } else {
+SpinMutexLock l(&fallback_mutex);
+AllocatorCache *cache = &fallback_allocator_cache;
+allocated = allocator.Allocate(cache, size, alignment);
+  }
+  if (UNLIKELY(!allocated)) {
+SetAllocatorOutOfMemory();
+if (AllocatorMayReturnNull())
+  return nullptr;
+BufferedStackTrace stack;
+GET_FATAL_STACK_TRACE_IF_EMPTY(&stack);
+ReportOutOfMemory(size, &stack);
+  }
+  auto *meta = reinterpret_cast(allocator.GetMetaData(allocated));
+  meta->requested_size = size;
+  if (zero && allocator.FromPrimary(allocated))
+REAL(memset)(allocated, 0, size);
+  __nsan_set_value_unknown(allocated, size);
+  RunMallocHooks(allocated, size);
+  return allocated;
+}
+
+void __nsan::NsanDeallocate(void *p) {
+  DCHECK(p);
+  RunFreeHooks(p);
+  auto *meta = reinterpret_cast(allocator.GetMetaData(p));
+  meta->requested_size = 0;
+  if (NsanThread *t = GetCurrentThread()) {
+AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
+allocator.Deallocate(cache, p);
+  } else {
+SpinMutexLock l(&fallback_mutex)

[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)

2024-08-11 Thread Alexander Shaposhnikov via llvm-branch-commits

https://github.com/alexander-shaposhnikov edited 
https://github.com/llvm/llvm-project/pull/102764
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager (PR #102806)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/102806

>From 0bf4c6c04419447c691ec5f722c2af8af73994f9 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 11 Aug 2024 12:57:27 +0400
Subject: [PATCH] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass
 manager

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  16 ++-
 .../AMDGPU/AMDGPULateCodeGenPrepare.cpp   | 110 +++---
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |   3 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   4 +-
 .../AMDGPU/amdgpu-late-codegenprepare.ll  |   1 +
 5 files changed, 87 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 34ef19637bf973..f5044f52f1648d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -56,7 +56,7 @@ FunctionPass *createSIPostRABundlerPass();
 FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
 FunctionPass *createAMDGPUCodeGenPreparePass();
-FunctionPass *createAMDGPULateCodeGenPreparePass();
+FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass();
 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
 FunctionPass *createAMDGPURewriteOutArgumentsPass();
 ModulePass *
@@ -282,6 +282,16 @@ class AMDGPUCodeGenPreparePass
   PreservedAnalyses run(Function &, FunctionAnalysisManager &);
 };
 
+class AMDGPULateCodeGenPreparePass
+: public PassInfoMixin {
+private:
+  const GCNTargetMachine &TM;
+
+public:
+  AMDGPULateCodeGenPreparePass(const GCNTargetMachine &TM) : TM(TM) {};
+  PreservedAnalyses run(Function &, FunctionAnalysisManager &);
+};
+
 class AMDGPULowerKernelArgumentsPass
 : public PassInfoMixin {
 private:
@@ -352,8 +362,8 @@ extern char &AMDGPUCodeGenPrepareID;
 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
 extern char &AMDGPURemoveIncompatibleFunctionsID;
 
-void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
-extern char &AMDGPULateCodeGenPrepareID;
+void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &);
+extern char &AMDGPULateCodeGenPrepareLegacyID;
 
 FunctionPass *createAMDGPURewriteUndefForPHILegacyPass();
 void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 7bf5170794cd9e..36dfebacaed686 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -42,10 +42,10 @@ static cl::opt
 namespace {
 
 class AMDGPULateCodeGenPrepare
-: public FunctionPass,
-  public InstVisitor {
+: public InstVisitor {
   Module *Mod = nullptr;
   const DataLayout *DL = nullptr;
+  const GCNSubtarget &ST;
 
   AssumptionCache *AC = nullptr;
   UniformityInfo *UA = nullptr;
@@ -53,24 +53,10 @@ class AMDGPULateCodeGenPrepare
   SmallVector DeadInsts;
 
 public:
-  static char ID;
-
-  AMDGPULateCodeGenPrepare() : FunctionPass(ID) {}
-
-  StringRef getPassName() const override {
-return "AMDGPU IR late optimizations";
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-AU.addRequired();
-AU.addRequired();
-AU.addRequired();
-AU.setPreservesAll();
-  }
-
-  bool doInitialization(Module &M) override;
-  bool runOnFunction(Function &F) override;
-
+  AMDGPULateCodeGenPrepare(Module &M, const GCNSubtarget &ST,
+   AssumptionCache *AC, UniformityInfo *UA)
+  : Mod(&M), DL(&M.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
+  bool run(Function &F);
   bool visitInstruction(Instruction &) { return false; }
 
   // Check if the specified value is at least DWORD aligned.
@@ -148,23 +134,7 @@ class LiveRegOptimizer {
 
 } // end anonymous namespace
 
-bool AMDGPULateCodeGenPrepare::doInitialization(Module &M) {
-  Mod = &M;
-  DL = &Mod->getDataLayout();
-  return false;
-}
-
-bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) {
-  if (skipFunction(F))
-return false;
-
-  const TargetPassConfig &TPC = getAnalysis();
-  const TargetMachine &TM = TPC.getTM();
-  const GCNSubtarget &ST = TM.getSubtarget(F);
-
-  AC = &getAnalysis().getAssumptionCache(F);
-  UA = &getAnalysis().getUniformityInfo();
-
+bool AMDGPULateCodeGenPrepare::run(Function &F) {
   // "Optimize" the virtual regs that cross basic block boundaries. When
   // building the SelectionDAG, vectors of illegal types that cross basic 
blocks
   // will be scalarized and widened, with each scalar living in its
@@ -505,16 +475,72 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst 
&LI) {
   return true;
 }
 
-INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE,
+PreservedAnalyses
+AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) {
+  const GCNSubtarget &ST = TM.getSubtarget(F);
+
+  AssumptionCache &AC = FAM.getResult(F);

[llvm-branch-commits] [llvm] StructurizeCFG: Add SkipUniformRegions pass parameter to new PM version (PR #102812)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/102812

Keep respecting the old cl::opt for now.

>From fb38b82ebd02b5763bd0ddb9cbc4b9f318651871 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 11 Aug 2024 16:36:39 +0400
Subject: [PATCH] StructurizeCFG: Add SkipUniformRegions pass parameter to new
 PM version

Keep respecting the old cl::opt for now.
---
 .../llvm/Transforms/Scalar/StructurizeCFG.h   |  9 ++
 llvm/lib/Passes/PassBuilder.cpp   |  5 
 llvm/lib/Passes/PassRegistry.def  |  7 -
 llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 28 ++-
 .../StructurizeCFG/AMDGPU/uniform-regions.ll  |  2 ++
 5 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h 
b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h
index 50d41acd529e76..f68067d9354583 100644
--- a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h
+++ b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h
@@ -13,6 +13,15 @@
 
 namespace llvm {
 struct StructurizeCFGPass : PassInfoMixin {
+private:
+  bool SkipUniformRegions;
+
+public:
+  StructurizeCFGPass(bool SkipUniformRegions = false);
+
+  void printPipeline(raw_ostream &OS,
+ function_ref MapClassName2PassName);
+
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 } // namespace llvm
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 7bc1c870ce5191..46f43f3de4705c 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1085,6 +1085,11 @@ Expected 
parseSeparateConstOffsetFromGEPPassOptions(StringRef Params) {
 "SeparateConstOffsetFromGEP");
 }
 
+Expected parseStructurizeCFGPassOptions(StringRef Params) {
+  return PassBuilder::parseSinglePassOption(Params, "skip-uniform-regions",
+"StructurizeCFG");
+}
+
 Expected
 parseFunctionSimplificationPipelineOptions(StringRef Params) {
   std::optional L = parseOptLevel(Params);
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 95842d15a35bf6..0cec9fbd7cd05e 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -458,7 +458,6 @@ FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
 FUNCTION_PASS("slsr", StraightLineStrengthReducePass())
 FUNCTION_PASS("stack-protector", StackProtectorPass(TM))
 FUNCTION_PASS("strip-gc-relocates", StripGCRelocates())
-FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
 FUNCTION_PASS("tailcallelim", TailCallElimPass())
 FUNCTION_PASS("tlshoist", TLSVariableHoistPass())
 FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
@@ -586,6 +585,12 @@ FUNCTION_PASS_WITH_PARAMS(
 "sroa", "SROAPass",
 [](SROAOptions PreserveCFG) { return SROAPass(PreserveCFG); },
 parseSROAOptions, "preserve-cfg;modify-cfg")
+FUNCTION_PASS_WITH_PARAMS(
+  "structurizecfg", "StructurizeCFG",
+  [](bool SkipUniformRegions) {
+return StructurizeCFGPass(SkipUniformRegions);
+  },
+  parseStructurizeCFGPassOptions, "skip-uniform-regions")
 FUNCTION_PASS_WITH_PARAMS(
 "win-eh-prepare", "WinEHPreparePass",
 [](bool DemoteCatchSwitchPHIOnly) {
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp 
b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 9c711ec183821f..a6ed58ac9d47f2 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -1212,20 +1212,46 @@ static void addRegionIntoQueue(Region &R, 
std::vector &Regions) {
 addRegionIntoQueue(*E, Regions);
 }
 
+StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_)
+: SkipUniformRegions(SkipUniformRegions_) {
+  if (ForceSkipUniformRegions.getNumOccurrences())
+SkipUniformRegions = ForceSkipUniformRegions.getValue();
+}
+
+void StructurizeCFGPass::printPipeline(
+raw_ostream &OS, function_ref MapClassName2PassName) 
{
+  static_cast *>(this)->printPipeline(
+  OS, MapClassName2PassName);
+  if (SkipUniformRegions)
+OS << "";
+}
+
 PreservedAnalyses StructurizeCFGPass::run(Function &F,
   FunctionAnalysisManager &AM) {
 
   bool Changed = false;
   DominatorTree *DT = &AM.getResult(F);
   auto &RI = AM.getResult(F);
+
+  UniformityInfo *UI = nullptr;
+  if (SkipUniformRegions)
+UI = &AM.getResult(F);
+
   std::vector Regions;
   addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
   while (!Regions.empty()) {
 Region *R = Regions.back();
+Regions.pop_back();
+
 StructurizeCFG SCFG;
 SCFG.init(R);
+
+if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) {
+  Changed = true; // May have added metadata.
+  continue;
+}
+
 Changed |= SCFG.run(R, DT);
-Regions.pop_back();
   }
   if (!Changed)
 return PreservedAnalyses::all();
diff --git a/llvm/test/Transforms/Struct

[llvm-branch-commits] [llvm] StructurizeCFG: Add SkipUniformRegions pass parameter to new PM version (PR #102812)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/102812?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#102812** https://app.graphite.dev/github/pr/llvm/llvm-project/102812?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#102806** https://app.graphite.dev/github/pr/llvm/llvm-project/102806?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102805** https://app.graphite.dev/github/pr/llvm/llvm-project/102805?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102645** https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102644** https://app.graphite.dev/github/pr/llvm/llvm-project/102644?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/102812
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] StructurizeCFG: Add SkipUniformRegions pass parameter to new PM version (PR #102812)

2024-08-11 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes

Keep respecting the old cl::opt for now.

---
Full diff: https://github.com/llvm/llvm-project/pull/102812.diff


5 Files Affected:

- (modified) llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h (+9) 
- (modified) llvm/lib/Passes/PassBuilder.cpp (+5) 
- (modified) llvm/lib/Passes/PassRegistry.def (+6-1) 
- (modified) llvm/lib/Transforms/Scalar/StructurizeCFG.cpp (+27-1) 
- (modified) llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll (+2) 


``diff
diff --git a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h 
b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h
index 50d41acd529e76..f68067d9354583 100644
--- a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h
+++ b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h
@@ -13,6 +13,15 @@
 
 namespace llvm {
 struct StructurizeCFGPass : PassInfoMixin {
+private:
+  bool SkipUniformRegions;
+
+public:
+  StructurizeCFGPass(bool SkipUniformRegions = false);
+
+  void printPipeline(raw_ostream &OS,
+ function_ref MapClassName2PassName);
+
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 } // namespace llvm
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 7bc1c870ce5191..46f43f3de4705c 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1085,6 +1085,11 @@ Expected 
parseSeparateConstOffsetFromGEPPassOptions(StringRef Params) {
 "SeparateConstOffsetFromGEP");
 }
 
+Expected parseStructurizeCFGPassOptions(StringRef Params) {
+  return PassBuilder::parseSinglePassOption(Params, "skip-uniform-regions",
+"StructurizeCFG");
+}
+
 Expected
 parseFunctionSimplificationPipelineOptions(StringRef Params) {
   std::optional L = parseOptLevel(Params);
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 95842d15a35bf6..0cec9fbd7cd05e 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -458,7 +458,6 @@ FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
 FUNCTION_PASS("slsr", StraightLineStrengthReducePass())
 FUNCTION_PASS("stack-protector", StackProtectorPass(TM))
 FUNCTION_PASS("strip-gc-relocates", StripGCRelocates())
-FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
 FUNCTION_PASS("tailcallelim", TailCallElimPass())
 FUNCTION_PASS("tlshoist", TLSVariableHoistPass())
 FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
@@ -586,6 +585,12 @@ FUNCTION_PASS_WITH_PARAMS(
 "sroa", "SROAPass",
 [](SROAOptions PreserveCFG) { return SROAPass(PreserveCFG); },
 parseSROAOptions, "preserve-cfg;modify-cfg")
+FUNCTION_PASS_WITH_PARAMS(
+  "structurizecfg", "StructurizeCFG",
+  [](bool SkipUniformRegions) {
+return StructurizeCFGPass(SkipUniformRegions);
+  },
+  parseStructurizeCFGPassOptions, "skip-uniform-regions")
 FUNCTION_PASS_WITH_PARAMS(
 "win-eh-prepare", "WinEHPreparePass",
 [](bool DemoteCatchSwitchPHIOnly) {
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp 
b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 9c711ec183821f..a6ed58ac9d47f2 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -1212,20 +1212,46 @@ static void addRegionIntoQueue(Region &R, 
std::vector &Regions) {
 addRegionIntoQueue(*E, Regions);
 }
 
+StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_)
+: SkipUniformRegions(SkipUniformRegions_) {
+  if (ForceSkipUniformRegions.getNumOccurrences())
+SkipUniformRegions = ForceSkipUniformRegions.getValue();
+}
+
+void StructurizeCFGPass::printPipeline(
+raw_ostream &OS, function_ref MapClassName2PassName) 
{
+  static_cast *>(this)->printPipeline(
+  OS, MapClassName2PassName);
+  if (SkipUniformRegions)
+OS << "";
+}
+
 PreservedAnalyses StructurizeCFGPass::run(Function &F,
   FunctionAnalysisManager &AM) {
 
   bool Changed = false;
   DominatorTree *DT = &AM.getResult(F);
   auto &RI = AM.getResult(F);
+
+  UniformityInfo *UI = nullptr;
+  if (SkipUniformRegions)
+UI = &AM.getResult(F);
+
   std::vector Regions;
   addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
   while (!Regions.empty()) {
 Region *R = Regions.back();
+Regions.pop_back();
+
 StructurizeCFG SCFG;
 SCFG.init(R);
+
+if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) {
+  Changed = true; // May have added metadata.
+  continue;
+}
+
 Changed |= SCFG.run(R, DT);
-Regions.pop_back();
   }
   if (!Changed)
 return PreservedAnalyses::all();
diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll 
b/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll
index ae73eedd4f502b..34c73ab8fd74f3 100644
--- a/llvm/test/Transforms/Str

[llvm-branch-commits] [llvm] StructurizeCFG: Add SkipUniformRegions pass parameter to new PM version (PR #102812)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/102812
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass manager (PR #102806)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/102806

>From 20d553816b135de48b78e2604384bcc4bb731eaa Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 11 Aug 2024 12:57:27 +0400
Subject: [PATCH] AMDGPU/NewPM: Port AMDGPULateCodeGenPrepare to new pass
 manager

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  16 ++-
 .../AMDGPU/AMDGPULateCodeGenPrepare.cpp   | 110 +++---
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |   3 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |   4 +-
 .../AMDGPU/amdgpu-late-codegenprepare.ll  |   1 +
 5 files changed, 87 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 34ef19637bf973..f5044f52f1648d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -56,7 +56,7 @@ FunctionPass *createSIPostRABundlerPass();
 FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
 FunctionPass *createAMDGPUCodeGenPreparePass();
-FunctionPass *createAMDGPULateCodeGenPreparePass();
+FunctionPass *createAMDGPULateCodeGenPrepareLegacyPass();
 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
 FunctionPass *createAMDGPURewriteOutArgumentsPass();
 ModulePass *
@@ -282,6 +282,16 @@ class AMDGPUCodeGenPreparePass
   PreservedAnalyses run(Function &, FunctionAnalysisManager &);
 };
 
+class AMDGPULateCodeGenPreparePass
+: public PassInfoMixin {
+private:
+  const GCNTargetMachine &TM;
+
+public:
+  AMDGPULateCodeGenPreparePass(const GCNTargetMachine &TM) : TM(TM) {};
+  PreservedAnalyses run(Function &, FunctionAnalysisManager &);
+};
+
 class AMDGPULowerKernelArgumentsPass
 : public PassInfoMixin {
 private:
@@ -352,8 +362,8 @@ extern char &AMDGPUCodeGenPrepareID;
 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
 extern char &AMDGPURemoveIncompatibleFunctionsID;
 
-void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
-extern char &AMDGPULateCodeGenPrepareID;
+void initializeAMDGPULateCodeGenPrepareLegacyPass(PassRegistry &);
+extern char &AMDGPULateCodeGenPrepareLegacyID;
 
 FunctionPass *createAMDGPURewriteUndefForPHILegacyPass();
 void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
index 7bf5170794cd9e..36dfebacaed686 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULateCodeGenPrepare.cpp
@@ -42,10 +42,10 @@ static cl::opt
 namespace {
 
 class AMDGPULateCodeGenPrepare
-: public FunctionPass,
-  public InstVisitor {
+: public InstVisitor {
   Module *Mod = nullptr;
   const DataLayout *DL = nullptr;
+  const GCNSubtarget &ST;
 
   AssumptionCache *AC = nullptr;
   UniformityInfo *UA = nullptr;
@@ -53,24 +53,10 @@ class AMDGPULateCodeGenPrepare
   SmallVector DeadInsts;
 
 public:
-  static char ID;
-
-  AMDGPULateCodeGenPrepare() : FunctionPass(ID) {}
-
-  StringRef getPassName() const override {
-return "AMDGPU IR late optimizations";
-  }
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-AU.addRequired();
-AU.addRequired();
-AU.addRequired();
-AU.setPreservesAll();
-  }
-
-  bool doInitialization(Module &M) override;
-  bool runOnFunction(Function &F) override;
-
+  AMDGPULateCodeGenPrepare(Module &M, const GCNSubtarget &ST,
+   AssumptionCache *AC, UniformityInfo *UA)
+  : Mod(&M), DL(&M.getDataLayout()), ST(ST), AC(AC), UA(UA) {}
+  bool run(Function &F);
   bool visitInstruction(Instruction &) { return false; }
 
   // Check if the specified value is at least DWORD aligned.
@@ -148,23 +134,7 @@ class LiveRegOptimizer {
 
 } // end anonymous namespace
 
-bool AMDGPULateCodeGenPrepare::doInitialization(Module &M) {
-  Mod = &M;
-  DL = &Mod->getDataLayout();
-  return false;
-}
-
-bool AMDGPULateCodeGenPrepare::runOnFunction(Function &F) {
-  if (skipFunction(F))
-return false;
-
-  const TargetPassConfig &TPC = getAnalysis();
-  const TargetMachine &TM = TPC.getTM();
-  const GCNSubtarget &ST = TM.getSubtarget(F);
-
-  AC = &getAnalysis().getAssumptionCache(F);
-  UA = &getAnalysis().getUniformityInfo();
-
+bool AMDGPULateCodeGenPrepare::run(Function &F) {
   // "Optimize" the virtual regs that cross basic block boundaries. When
   // building the SelectionDAG, vectors of illegal types that cross basic 
blocks
   // will be scalarized and widened, with each scalar living in its
@@ -505,16 +475,72 @@ bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst 
&LI) {
   return true;
 }
 
-INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepare, DEBUG_TYPE,
+PreservedAnalyses
+AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) {
+  const GCNSubtarget &ST = TM.getSubtarget(F);
+
+  AssumptionCache &AC = FAM.getResult(F);

[llvm-branch-commits] [llvm] StructurizeCFG: Add SkipUniformRegions pass parameter to new PM version (PR #102812)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/102812

>From 306343c33680bdce7d2e670f6893890ac810da8d Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 11 Aug 2024 16:36:39 +0400
Subject: [PATCH] StructurizeCFG: Add SkipUniformRegions pass parameter to new
 PM version

Keep respecting the old cl::opt for now.
---
 .../llvm/Transforms/Scalar/StructurizeCFG.h   |  9 ++
 llvm/lib/Passes/PassBuilder.cpp   |  5 
 llvm/lib/Passes/PassRegistry.def  |  7 -
 llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 28 ++-
 .../StructurizeCFG/AMDGPU/uniform-regions.ll  |  2 ++
 5 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h 
b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h
index 50d41acd529e76..f68067d9354583 100644
--- a/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h
+++ b/llvm/include/llvm/Transforms/Scalar/StructurizeCFG.h
@@ -13,6 +13,15 @@
 
 namespace llvm {
 struct StructurizeCFGPass : PassInfoMixin {
+private:
+  bool SkipUniformRegions;
+
+public:
+  StructurizeCFGPass(bool SkipUniformRegions = false);
+
+  void printPipeline(raw_ostream &OS,
+ function_ref MapClassName2PassName);
+
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 } // namespace llvm
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 7bc1c870ce5191..46f43f3de4705c 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1085,6 +1085,11 @@ Expected 
parseSeparateConstOffsetFromGEPPassOptions(StringRef Params) {
 "SeparateConstOffsetFromGEP");
 }
 
+Expected parseStructurizeCFGPassOptions(StringRef Params) {
+  return PassBuilder::parseSinglePassOption(Params, "skip-uniform-regions",
+"StructurizeCFG");
+}
+
 Expected
 parseFunctionSimplificationPipelineOptions(StringRef Params) {
   std::optional L = parseOptLevel(Params);
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 95842d15a35bf6..0cec9fbd7cd05e 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -458,7 +458,6 @@ FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
 FUNCTION_PASS("slsr", StraightLineStrengthReducePass())
 FUNCTION_PASS("stack-protector", StackProtectorPass(TM))
 FUNCTION_PASS("strip-gc-relocates", StripGCRelocates())
-FUNCTION_PASS("structurizecfg", StructurizeCFGPass())
 FUNCTION_PASS("tailcallelim", TailCallElimPass())
 FUNCTION_PASS("tlshoist", TLSVariableHoistPass())
 FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
@@ -586,6 +585,12 @@ FUNCTION_PASS_WITH_PARAMS(
 "sroa", "SROAPass",
 [](SROAOptions PreserveCFG) { return SROAPass(PreserveCFG); },
 parseSROAOptions, "preserve-cfg;modify-cfg")
+FUNCTION_PASS_WITH_PARAMS(
+  "structurizecfg", "StructurizeCFG",
+  [](bool SkipUniformRegions) {
+return StructurizeCFGPass(SkipUniformRegions);
+  },
+  parseStructurizeCFGPassOptions, "skip-uniform-regions")
 FUNCTION_PASS_WITH_PARAMS(
 "win-eh-prepare", "WinEHPreparePass",
 [](bool DemoteCatchSwitchPHIOnly) {
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp 
b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 9c711ec183821f..a6ed58ac9d47f2 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -1212,20 +1212,46 @@ static void addRegionIntoQueue(Region &R, 
std::vector &Regions) {
 addRegionIntoQueue(*E, Regions);
 }
 
+StructurizeCFGPass::StructurizeCFGPass(bool SkipUniformRegions_)
+: SkipUniformRegions(SkipUniformRegions_) {
+  if (ForceSkipUniformRegions.getNumOccurrences())
+SkipUniformRegions = ForceSkipUniformRegions.getValue();
+}
+
+void StructurizeCFGPass::printPipeline(
+raw_ostream &OS, function_ref MapClassName2PassName) 
{
+  static_cast *>(this)->printPipeline(
+  OS, MapClassName2PassName);
+  if (SkipUniformRegions)
+OS << "";
+}
+
 PreservedAnalyses StructurizeCFGPass::run(Function &F,
   FunctionAnalysisManager &AM) {
 
   bool Changed = false;
   DominatorTree *DT = &AM.getResult(F);
   auto &RI = AM.getResult(F);
+
+  UniformityInfo *UI = nullptr;
+  if (SkipUniformRegions)
+UI = &AM.getResult(F);
+
   std::vector Regions;
   addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
   while (!Regions.empty()) {
 Region *R = Regions.back();
+Regions.pop_back();
+
 StructurizeCFG SCFG;
 SCFG.init(R);
+
+if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) {
+  Changed = true; // May have added metadata.
+  continue;
+}
+
 Changed |= SCFG.run(R, DT);
-Regions.pop_back();
   }
   if (!Changed)
 return PreservedAnalyses::all();
diff --git a/llvm/test/Transforms/StructurizeCFG/AMDGPU/uniform-regions.ll 
b/llvm

[llvm-branch-commits] [llvm] AMDGPU/NewPM: Fill out addPreISelPasses (PR #102814)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/102814

This specific callback should now be at parity with the old
pass manager version. There are still some missing IR passes
before this point.

Also I don't understand the need for the RequiresAnalysisPass at the
end. SelectionDAG should just be using the uncached getResult?

>From a410db38f893673d32ac34a62389efcc43a6687e Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 11 Aug 2024 11:55:00 +0400
Subject: [PATCH] AMDGPU/NewPM: Fill out addPreISelPasses

This specific callback should now be at parity with the old
pass manager version. There are still some missing IR passes
before this point.

Also I don't understand the need for the RequiresAnalysisPass at the
end. SelectionDAG should just be using the uncached getResult?
---
 .../AMDGPU/AMDGPUCodeGenPassBuilder.cpp   | 55 ++-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  8 ++-
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h  |  1 +
 .../CodeGen/AMDGPU/bug-v4f64-subvector.ll |  2 +-
 4 files changed, 60 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
index fb3d3259171aca..36f44a20d95532 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
@@ -9,9 +9,17 @@
 #include "AMDGPUCodeGenPassBuilder.h"
 #include "AMDGPU.h"
 #include "AMDGPUISelDAGToDAG.h"
+#include "AMDGPUPerfHintAnalysis.h"
 #include "AMDGPUTargetMachine.h"
+#include "AMDGPUUnifyDivergentExitNodes.h"
 #include "SIFixSGPRCopies.h"
 #include "llvm/Analysis/UniformityAnalysis.h"
+#include "llvm/Transforms/Scalar/FlattenCFG.h"
+#include "llvm/Transforms/Scalar/Sink.h"
+#include "llvm/Transforms/Scalar/StructurizeCFG.h"
+#include "llvm/Transforms/Utils/FixIrreducible.h"
+#include "llvm/Transforms/Utils/LCSSA.h"
+#include "llvm/Transforms/Utils/UnifyLoopExits.h"
 
 using namespace llvm;
 
@@ -28,8 +36,51 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
 }
 
 void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const {
-  // TODO: Add passes pre instruction selection.
-  // Test only, convert to real IR passes in future.
+  const bool LateCFGStructurize = 
AMDGPUTargetMachine::EnableLateStructurizeCFG;
+  const bool DisableStructurizer = AMDGPUTargetMachine::DisableStructurizer;
+  const bool EnableStructurizerWorkarounds =
+  AMDGPUTargetMachine::EnableStructurizerWorkarounds;
+
+  if (TM.getOptLevel() > CodeGenOptLevel::None)
+addPass(FlattenCFGPass());
+
+  if (TM.getOptLevel() > CodeGenOptLevel::None)
+addPass(SinkingPass());
+
+  addPass(AMDGPULateCodeGenPreparePass(TM));
+
+  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
+  // regions formed by them.
+
+  addPass(AMDGPUUnifyDivergentExitNodesPass());
+
+  if (!LateCFGStructurize && !DisableStructurizer) {
+if (EnableStructurizerWorkarounds) {
+  addPass(FixIrreduciblePass());
+  addPass(UnifyLoopExitsPass());
+}
+
+addPass(StructurizeCFGPass(/*SkipUniformRegions=*/false));
+  }
+
+  addPass(AMDGPUAnnotateUniformValuesPass());
+
+  if (!LateCFGStructurize && !DisableStructurizer) {
+addPass(SIAnnotateControlFlowPass(TM));
+
+// TODO: Move this right after structurizeCFG to avoid extra divergence
+// analysis. This depends on stopping SIAnnotateControlFlow from making
+// control flow modifications.
+addPass(AMDGPURewriteUndefForPHIPass());
+  }
+
+  addPass(LCSSAPass());
+
+  if (TM.getOptLevel() > CodeGenOptLevel::Less)
+addPass(AMDGPUPerfHintAnalysisPass(TM));
+
+  // FIXME: Why isn't this queried as required from AMDGPUISelDAGToDAG, and why
+  // isn't this in addInstSelector?
   addPass(RequireAnalysisPass());
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 0523fee5bcf9f4..5929dadf93bcbe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -338,10 +338,11 @@ static cl::opt EnableScalarIRPasses(
   cl::init(true),
   cl::Hidden);
 
-static cl::opt EnableStructurizerWorkarounds(
+static cl::opt EnableStructurizerWorkarounds(
 "amdgpu-enable-structurizer-workarounds",
-cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
-cl::Hidden);
+cl::desc("Enable workarounds for the StructurizeCFG pass"),
+cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds),
+cl::init(true), cl::Hidden);
 
 static cl::opt EnableLowerModuleLDS(
 "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
@@ -611,6 +612,7 @@ bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
 bool AMDGPUTargetMachine::EnableFunctionCalls = false;
 bool AMDGPUTargetMachine::EnableLowerModuleLDS = true;
 bool AMDGPUTargetMachine::DisableStructurizer = false;
+bool AMDGPUTargetMachine::Enable

[llvm-branch-commits] [llvm] CodeGen/NewPM: Add ExpandLarge* passes to isel IR passes (PR #102815)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/102815

None

>From d91c9248e843a4b0b2dd7c32a3f47e72a1362409 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 11 Aug 2024 18:11:04 +0400
Subject: [PATCH] CodeGen/NewPM: Add ExpandLarge* passes to isel IR passes

---
 llvm/include/llvm/Passes/CodeGenPassBuilder.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 3cc39b54ba758d..eb15beb835b535 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -27,6 +27,8 @@
 #include "llvm/CodeGen/CodeGenPrepare.h"
 #include "llvm/CodeGen/DeadMachineInstructionElim.h"
 #include "llvm/CodeGen/DwarfEHPrepare.h"
+#include "llvm/CodeGen/ExpandLargeDivRem.h"
+#include "llvm/CodeGen/ExpandLargeFpConvert.h"
 #include "llvm/CodeGen/ExpandMemCmp.h"
 #include "llvm/CodeGen/ExpandReductions.h"
 #include "llvm/CodeGen/FinalizeISel.h"
@@ -627,6 +629,8 @@ void CodeGenPassBuilder::addISelPasses(
 addPass(LowerEmuTLSPass());
 
   addPass(PreISelIntrinsicLoweringPass(&TM));
+  addPass(ExpandLargeDivRemPass(&TM));
+  addPass(ExpandLargeFpConvertPass(&TM));
 
   derived().addIRPasses(addPass);
   derived().addCodeGenPrepare(addPass);

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] CodeGen/NewPM: Add ExpandLarge* passes to isel IR passes (PR #102815)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/102815?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#102815** https://app.graphite.dev/github/pr/llvm/llvm-project/102815?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#102814** https://app.graphite.dev/github/pr/llvm/llvm-project/102814?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102812** https://app.graphite.dev/github/pr/llvm/llvm-project/102812?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102806** https://app.graphite.dev/github/pr/llvm/llvm-project/102806?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102805** https://app.graphite.dev/github/pr/llvm/llvm-project/102805?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102645** https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102644** https://app.graphite.dev/github/pr/llvm/llvm-project/102644?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/102815
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/NewPM: Start implementing addCodeGenPrepare (PR #102816)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/102816

None

>From cc51e15865010c73cf7bd3ab8632b965aa7a9dbf Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Sun, 11 Aug 2024 18:20:23 +0400
Subject: [PATCH] AMDGPU/NewPM: Start implementing addCodeGenPrepare

---
 llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp | 11 +++
 llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h   |  4 +++-
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp  |  1 +
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
index 36f44a20d9553..252a70d44736d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Transforms/Scalar/StructurizeCFG.h"
 #include "llvm/Transforms/Utils/FixIrreducible.h"
 #include "llvm/Transforms/Utils/LCSSA.h"
+#include "llvm/Transforms/Utils/LowerSwitch.h"
 #include "llvm/Transforms/Utils/UnifyLoopExits.h"
 
 using namespace llvm;
@@ -35,6 +36,16 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
   ShadowStackGCLoweringPass>();
 }
 
+void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
+  Base::addCodeGenPrepare(addPass);
+
+  // LowerSwitch pass may introduce unreachable blocks that can cause 
unexpected
+  // behavior for subsequent passes. Placing it here seems better that these
+  // blocks would get cleaned up by UnreachableBlockElim inserted next in the
+  // pass flow.
+  addPass(LowerSwitchPass());
+}
+
 void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const {
   const bool LateCFGStructurize = 
AMDGPUTargetMachine::EnableLateStructurizeCFG;
   const bool DisableStructurizer = AMDGPUTargetMachine::DisableStructurizer;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h 
b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
index e656e166b3eb2..efb296689bd64 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
@@ -19,10 +19,12 @@ class GCNTargetMachine;
 class AMDGPUCodeGenPassBuilder
 : public CodeGenPassBuilder {
 public:
+  using Base = CodeGenPassBuilder;
+
   AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM,
const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC);
-
+  void addCodeGenPrepare(AddIRPass &) const;
   void addPreISel(AddIRPass &addPass) const;
   void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
   Error addInstSelector(AddMachinePass &) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 5929dadf93bcb..cad4585c5b301 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -67,6 +67,7 @@
 #include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Scalar/InferAddressSpaces.h"
 #include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/LowerSwitch.h"
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
 #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
 #include 

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/NewPM: Fill out addPreISelPasses (PR #102814)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/102814?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#102815** https://app.graphite.dev/github/pr/llvm/llvm-project/102815?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102814** https://app.graphite.dev/github/pr/llvm/llvm-project/102814?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#102812** https://app.graphite.dev/github/pr/llvm/llvm-project/102812?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102806** https://app.graphite.dev/github/pr/llvm/llvm-project/102806?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102805** https://app.graphite.dev/github/pr/llvm/llvm-project/102805?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102645** https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102644** https://app.graphite.dev/github/pr/llvm/llvm-project/102644?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/102814
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/NewPM: Start implementing addCodeGenPrepare (PR #102816)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/102816?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#102816** https://app.graphite.dev/github/pr/llvm/llvm-project/102816?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈
* **#102815** https://app.graphite.dev/github/pr/llvm/llvm-project/102815?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102814** https://app.graphite.dev/github/pr/llvm/llvm-project/102814?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102812** https://app.graphite.dev/github/pr/llvm/llvm-project/102812?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102806** https://app.graphite.dev/github/pr/llvm/llvm-project/102806?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102805** https://app.graphite.dev/github/pr/llvm/llvm-project/102805?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102645** https://app.graphite.dev/github/pr/llvm/llvm-project/102645?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#102644** https://app.graphite.dev/github/pr/llvm/llvm-project/102644?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`

This stack of pull requests is managed by Graphite. https://stacking.dev/?utm_source=stack-comment";>Learn more about 
stacking.


 Join @arsenm and the rest of your teammates on https://graphite.dev?utm-source=stack-comment";>https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="11px" height="11px"/> Graphite
  

https://github.com/llvm/llvm-project/pull/102816
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/NewPM: Fill out addPreISelPasses (PR #102814)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/102814
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] CodeGen/NewPM: Add ExpandLarge* passes to isel IR passes (PR #102815)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/102815
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/NewPM: Fill out addPreISelPasses (PR #102814)

2024-08-11 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-transforms

Author: Matt Arsenault (arsenm)


Changes

This specific callback should now be at parity with the old
pass manager version. There are still some missing IR passes
before this point.

Also I don't understand the need for the RequiresAnalysisPass at the
end. SelectionDAG should just be using the uncached getResult?

---
Full diff: https://github.com/llvm/llvm-project/pull/102814.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp (+53-2) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+5-3) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/bug-v4f64-subvector.ll (+1-1) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
index fb3d3259171ac..36f44a20d9553 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
@@ -9,9 +9,17 @@
 #include "AMDGPUCodeGenPassBuilder.h"
 #include "AMDGPU.h"
 #include "AMDGPUISelDAGToDAG.h"
+#include "AMDGPUPerfHintAnalysis.h"
 #include "AMDGPUTargetMachine.h"
+#include "AMDGPUUnifyDivergentExitNodes.h"
 #include "SIFixSGPRCopies.h"
 #include "llvm/Analysis/UniformityAnalysis.h"
+#include "llvm/Transforms/Scalar/FlattenCFG.h"
+#include "llvm/Transforms/Scalar/Sink.h"
+#include "llvm/Transforms/Scalar/StructurizeCFG.h"
+#include "llvm/Transforms/Utils/FixIrreducible.h"
+#include "llvm/Transforms/Utils/LCSSA.h"
+#include "llvm/Transforms/Utils/UnifyLoopExits.h"
 
 using namespace llvm;
 
@@ -28,8 +36,51 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
 }
 
 void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const {
-  // TODO: Add passes pre instruction selection.
-  // Test only, convert to real IR passes in future.
+  const bool LateCFGStructurize = 
AMDGPUTargetMachine::EnableLateStructurizeCFG;
+  const bool DisableStructurizer = AMDGPUTargetMachine::DisableStructurizer;
+  const bool EnableStructurizerWorkarounds =
+  AMDGPUTargetMachine::EnableStructurizerWorkarounds;
+
+  if (TM.getOptLevel() > CodeGenOptLevel::None)
+addPass(FlattenCFGPass());
+
+  if (TM.getOptLevel() > CodeGenOptLevel::None)
+addPass(SinkingPass());
+
+  addPass(AMDGPULateCodeGenPreparePass(TM));
+
+  // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
+  // regions formed by them.
+
+  addPass(AMDGPUUnifyDivergentExitNodesPass());
+
+  if (!LateCFGStructurize && !DisableStructurizer) {
+if (EnableStructurizerWorkarounds) {
+  addPass(FixIrreduciblePass());
+  addPass(UnifyLoopExitsPass());
+}
+
+addPass(StructurizeCFGPass(/*SkipUniformRegions=*/false));
+  }
+
+  addPass(AMDGPUAnnotateUniformValuesPass());
+
+  if (!LateCFGStructurize && !DisableStructurizer) {
+addPass(SIAnnotateControlFlowPass(TM));
+
+// TODO: Move this right after structurizeCFG to avoid extra divergence
+// analysis. This depends on stopping SIAnnotateControlFlow from making
+// control flow modifications.
+addPass(AMDGPURewriteUndefForPHIPass());
+  }
+
+  addPass(LCSSAPass());
+
+  if (TM.getOptLevel() > CodeGenOptLevel::Less)
+addPass(AMDGPUPerfHintAnalysisPass(TM));
+
+  // FIXME: Why isn't this queried as required from AMDGPUISelDAGToDAG, and why
+  // isn't this in addInstSelector?
   addPass(RequireAnalysisPass());
 }
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 0523fee5bcf9f..5929dadf93bcb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -338,10 +338,11 @@ static cl::opt EnableScalarIRPasses(
   cl::init(true),
   cl::Hidden);
 
-static cl::opt EnableStructurizerWorkarounds(
+static cl::opt EnableStructurizerWorkarounds(
 "amdgpu-enable-structurizer-workarounds",
-cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
-cl::Hidden);
+cl::desc("Enable workarounds for the StructurizeCFG pass"),
+cl::location(AMDGPUTargetMachine::EnableStructurizerWorkarounds),
+cl::init(true), cl::Hidden);
 
 static cl::opt EnableLowerModuleLDS(
 "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
@@ -611,6 +612,7 @@ bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
 bool AMDGPUTargetMachine::EnableFunctionCalls = false;
 bool AMDGPUTargetMachine::EnableLowerModuleLDS = true;
 bool AMDGPUTargetMachine::DisableStructurizer = false;
+bool AMDGPUTargetMachine::EnableStructurizerWorkarounds = true;
 
 AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index 6bb8788cc73b0..4d39ad2b41505 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUT

[llvm-branch-commits] [llvm] CodeGen/NewPM: Add ExpandLarge* passes to isel IR passes (PR #102815)

2024-08-11 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/102815.diff


1 Files Affected:

- (modified) llvm/include/llvm/Passes/CodeGenPassBuilder.h (+4) 


``diff
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 3cc39b54ba758d..eb15beb835b535 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -27,6 +27,8 @@
 #include "llvm/CodeGen/CodeGenPrepare.h"
 #include "llvm/CodeGen/DeadMachineInstructionElim.h"
 #include "llvm/CodeGen/DwarfEHPrepare.h"
+#include "llvm/CodeGen/ExpandLargeDivRem.h"
+#include "llvm/CodeGen/ExpandLargeFpConvert.h"
 #include "llvm/CodeGen/ExpandMemCmp.h"
 #include "llvm/CodeGen/ExpandReductions.h"
 #include "llvm/CodeGen/FinalizeISel.h"
@@ -627,6 +629,8 @@ void CodeGenPassBuilder::addISelPasses(
 addPass(LowerEmuTLSPass());
 
   addPass(PreISelIntrinsicLoweringPass(&TM));
+  addPass(ExpandLargeDivRemPass(&TM));
+  addPass(ExpandLargeFpConvertPass(&TM));
 
   derived().addIRPasses(addPass);
   derived().addCodeGenPrepare(addPass);

``




https://github.com/llvm/llvm-project/pull/102815
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/NewPM: Start implementing addCodeGenPrepare (PR #102816)

2024-08-11 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/102816
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/NewPM: Start implementing addCodeGenPrepare (PR #102816)

2024-08-11 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/102816.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp (+11) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h (+3-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+1) 


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
index 36f44a20d9553..252a70d44736d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Transforms/Scalar/StructurizeCFG.h"
 #include "llvm/Transforms/Utils/FixIrreducible.h"
 #include "llvm/Transforms/Utils/LCSSA.h"
+#include "llvm/Transforms/Utils/LowerSwitch.h"
 #include "llvm/Transforms/Utils/UnifyLoopExits.h"
 
 using namespace llvm;
@@ -35,6 +36,16 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
   ShadowStackGCLoweringPass>();
 }
 
+void AMDGPUCodeGenPassBuilder::addCodeGenPrepare(AddIRPass &addPass) const {
+  Base::addCodeGenPrepare(addPass);
+
+  // LowerSwitch pass may introduce unreachable blocks that can cause 
unexpected
+  // behavior for subsequent passes. Placing it here seems better that these
+  // blocks would get cleaned up by UnreachableBlockElim inserted next in the
+  // pass flow.
+  addPass(LowerSwitchPass());
+}
+
 void AMDGPUCodeGenPassBuilder::addPreISel(AddIRPass &addPass) const {
   const bool LateCFGStructurize = 
AMDGPUTargetMachine::EnableLateStructurizeCFG;
   const bool DisableStructurizer = AMDGPUTargetMachine::DisableStructurizer;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h 
b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
index e656e166b3eb2..efb296689bd64 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPassBuilder.h
@@ -19,10 +19,12 @@ class GCNTargetMachine;
 class AMDGPUCodeGenPassBuilder
 : public CodeGenPassBuilder {
 public:
+  using Base = CodeGenPassBuilder;
+
   AMDGPUCodeGenPassBuilder(GCNTargetMachine &TM,
const CGPassBuilderOption &Opts,
PassInstrumentationCallbacks *PIC);
-
+  void addCodeGenPrepare(AddIRPass &) const;
   void addPreISel(AddIRPass &addPass) const;
   void addAsmPrinter(AddMachinePass &, CreateMCStreamer) const;
   Error addInstSelector(AddMachinePass &) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 5929dadf93bcb..cad4585c5b301 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -67,6 +67,7 @@
 #include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Scalar/InferAddressSpaces.h"
 #include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/LowerSwitch.h"
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
 #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
 #include 

``




https://github.com/llvm/llvm-project/pull/102816
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [GlobalISel] Don't remove from unfinalized GISelWorkList (PR #102158)

2024-08-11 Thread Tobias Stadler via llvm-branch-commits

https://github.com/tobias-stadler updated 
https://github.com/llvm/llvm-project/pull/102158

>From 1f5757a4c3989755623d66c43575c858dcb13f75 Mon Sep 17 00:00:00 2001
From: Tobias Stadler 
Date: Tue, 6 Aug 2024 17:13:59 +0200
Subject: [PATCH] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20change?=
 =?UTF-8?q?s=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.6-bogner-wip

[skip ci]
---
 llvm/lib/CodeGen/GlobalISel/Combiner.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp 
b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
index 3310ce5455c978..5da9e86b207618 100644
--- a/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -153,7 +153,7 @@ bool Combiner::combineMachineInstrs() {
 // down RPOT.
 Changed = false;
 
-RAIIDelegateInstaller DelInstall(MF, ObserverWrapper.get());
+RAIIMFObsDelInstaller DelInstall(MF, *ObserverWrapper);
 for (MachineBasicBlock *MBB : post_order(&MF)) {
   for (MachineInstr &CurMI :
llvm::make_early_inc_range(llvm::reverse(*MBB))) {

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) (PR #102168)

2024-08-11 Thread David Green via llvm-branch-commits

davemgreen wrote:

> The patch here is pretty big in size, but it seems to only affects the 
> remarks, on the other hand it doesn't seem to really fix anything and in that 
> case I feel like RC3 might be the wrong time to merge this. Is there a huge 
> upside to take this this late in the process?

Thanks - I wasn't sure what state the branch was in. As @jroelofs points out 
the issues this is attempting to help with can pretty performance-sensitive and 
a hard to diagnose without assistance. The issue is that when and where spills 
happen can occur quite chaotically out of the register allocator, and so users 
need to be using the same compiler to diagnose the issues as they will use in 
practice. Having to provide patches and for users to build the compiler 
themselves is quite difficult compared to having this on the branch.

All the code (meaningfully) changed in this patch needs to be enabled with both 
`-Rpass-analysis=sme` and a backend `-mllvm 
-aarch64-stack-hazard-remark-size=XYZ` (or `-mllvm 
-aarch64-stack-hazard-size=xyz`), so the chance of it breaking anything else 
should be very low.

https://github.com/llvm/llvm-project/pull/102168
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/19.x: [NFC][libc++][test][AIX] UnXFAIL LIT test transform.pass.cpp (#102338) (PR #102466)

2024-08-11 Thread Nikolas Klauser via llvm-branch-commits

https://github.com/philnik777 approved this pull request.


https://github.com/llvm/llvm-project/pull/102466
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] StructurizeCFG: Add SkipUniformRegions pass parameter to new PM version (PR #102812)

2024-08-11 Thread Johannes Doerfert via llvm-branch-commits

https://github.com/jdoerfert approved this pull request.

LG

https://github.com/llvm/llvm-project/pull/102812
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)

2024-08-11 Thread Alexander Shaposhnikov via llvm-branch-commits

https://github.com/alexander-shaposhnikov edited 
https://github.com/llvm/llvm-project/pull/102764
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)

2024-08-11 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay updated 
https://github.com/llvm/llvm-project/pull/102764


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)

2024-08-11 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay updated 
https://github.com/llvm/llvm-project/pull/102764


___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)

2024-08-11 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay edited 
https://github.com/llvm/llvm-project/pull/102764
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [nsan] Use sanitizer allocator (PR #102764)

2024-08-11 Thread Fangrui Song via llvm-branch-commits


@@ -0,0 +1,334 @@
+//===- nsan_allocator.cpp 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// NumericalStabilitySanitizer allocator.
+//
+//===--===//
+
+#include "nsan_allocator.h"
+#include "interception/interception.h"
+#include "nsan.h"
+#include "nsan_platform.h"
+#include "nsan_thread.h"
+#include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
+#include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_allocator_report.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_errno.h"
+
+DECLARE_REAL(void *, memset, void *dest, int c, uptr n)
+
+using namespace __nsan;
+
+namespace {
+struct Metadata {
+  uptr requested_size;
+};
+
+struct NsanMapUnmapCallback {
+  void OnMap(uptr p, uptr size) const {}
+  void OnMapSecondary(uptr p, uptr size, uptr user_begin,
+  uptr user_size) const {}
+  void OnUnmap(uptr p, uptr size) const {}
+};
+
+const uptr kMaxAllowedMallocSize = 1ULL << 40;
+
+// Allocator64 parameters. Deliberately using a short name.
+struct AP64 {
+  static const uptr kSpaceBeg = Mapping::kHeapMemBeg;
+  static const uptr kSpaceSize = 0x400; // 4T.
+  static const uptr kMetadataSize = sizeof(Metadata);
+  using SizeClassMap = DefaultSizeClassMap;
+  using MapUnmapCallback = NsanMapUnmapCallback;
+  static const uptr kFlags = 0;
+  using AddressSpaceView = LocalAddressSpaceView;
+};
+} // namespace
+
+using PrimaryAllocator = SizeClassAllocator64;
+using Allocator = CombinedAllocator;
+using AllocatorCache = Allocator::AllocatorCache;
+
+static Allocator allocator;
+static AllocatorCache fallback_allocator_cache;
+static StaticSpinMutex fallback_mutex;
+
+static uptr max_malloc_size;
+
+void __nsan::NsanAllocatorInit() {
+  SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
+  allocator.Init(common_flags()->allocator_release_to_os_interval_ms);
+  if (common_flags()->max_allocation_size_mb)
+max_malloc_size = Min(common_flags()->max_allocation_size_mb << 20,
+  kMaxAllowedMallocSize);
+  else
+max_malloc_size = kMaxAllowedMallocSize;
+}
+
+static AllocatorCache *GetAllocatorCache(NsanThreadLocalMallocStorage *ms) {
+  CHECK(ms);
+  CHECK_LE(sizeof(AllocatorCache), sizeof(ms->allocator_cache));
+  return reinterpret_cast(ms->allocator_cache);
+}
+
+void NsanThreadLocalMallocStorage::Init() {
+  allocator.InitCache(GetAllocatorCache(this));
+}
+
+void NsanThreadLocalMallocStorage::CommitBack() {
+  allocator.SwallowCache(GetAllocatorCache(this));
+  allocator.DestroyCache(GetAllocatorCache(this));
+}
+
+static void *NsanAllocate(uptr size, uptr alignment, bool zero) {
+  if (UNLIKELY(size > max_malloc_size)) {
+if (AllocatorMayReturnNull()) {
+  Report("WARNING: NumericalStabilitySanitizer failed to allocate 0x%zx "
+ "bytes\n",
+ size);
+  return nullptr;
+}
+BufferedStackTrace stack;
+GET_FATAL_STACK_TRACE_IF_EMPTY(&stack);
+ReportAllocationSizeTooBig(size, max_malloc_size, &stack);
+  }
+  if (UNLIKELY(IsRssLimitExceeded())) {
+if (AllocatorMayReturnNull())
+  return nullptr;
+BufferedStackTrace stack;
+GET_FATAL_STACK_TRACE_IF_EMPTY(&stack);
+ReportRssLimitExceeded(&stack);
+  }
+  NsanThread *t = GetCurrentThread();
+  void *allocated;
+  if (t) {
+AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
+allocated = allocator.Allocate(cache, size, alignment);
+  } else {
+SpinMutexLock l(&fallback_mutex);
+AllocatorCache *cache = &fallback_allocator_cache;
+allocated = allocator.Allocate(cache, size, alignment);
+  }
+  if (UNLIKELY(!allocated)) {
+SetAllocatorOutOfMemory();
+if (AllocatorMayReturnNull())
+  return nullptr;
+BufferedStackTrace stack;
+GET_FATAL_STACK_TRACE_IF_EMPTY(&stack);
+ReportOutOfMemory(size, &stack);
+  }
+  auto *meta = reinterpret_cast(allocator.GetMetaData(allocated));
+  meta->requested_size = size;
+  if (zero && allocator.FromPrimary(allocated))
+REAL(memset)(allocated, 0, size);
+  __nsan_set_value_unknown(allocated, size);
+  RunMallocHooks(allocated, size);
+  return allocated;
+}
+
+void __nsan::NsanDeallocate(void *p) {
+  DCHECK(p);
+  RunFreeHooks(p);
+  auto *meta = reinterpret_cast(allocator.GetMetaData(p));
+  meta->requested_size = 0;
+  if (NsanThread *t = GetCurrentThread()) {
+AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
+allocator.Deallocate(cache, p);
+  } else {
+SpinMutexLock l(&fallback_mutex)

[llvm-branch-commits] [compiler-rt] [nsan] Use sanitizer allocator (PR #102764)

2024-08-11 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay updated 
https://github.com/llvm/llvm-project/pull/102764

>From 6ec669e2206a29bce0c28213e82c2694f03bfad9 Mon Sep 17 00:00:00 2001
From: Fangrui Song 
Date: Sun, 11 Aug 2024 13:27:34 -0700
Subject: [PATCH] remove GetCurrentThread check for NsanAllocate

Created using spr 1.3.5-bogner
---
 compiler-rt/lib/nsan/nsan_allocator.cpp | 13 -
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/compiler-rt/lib/nsan/nsan_allocator.cpp 
b/compiler-rt/lib/nsan/nsan_allocator.cpp
index 3fa58513e9b8c..3ba71d7e9f9e2 100644
--- a/compiler-rt/lib/nsan/nsan_allocator.cpp
+++ b/compiler-rt/lib/nsan/nsan_allocator.cpp
@@ -108,15 +108,8 @@ static void *NsanAllocate(uptr size, uptr alignment, bool 
zero) {
 ReportRssLimitExceeded(&stack);
   }
   NsanThread *t = GetCurrentThread();
-  void *allocated;
-  if (t) {
-AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
-allocated = allocator.Allocate(cache, size, alignment);
-  } else {
-SpinMutexLock l(&fallback_mutex);
-AllocatorCache *cache = &fallback_allocator_cache;
-allocated = allocator.Allocate(cache, size, alignment);
-  }
+  void *allocated = allocator.Allocate(GetAllocatorCache(&t->malloc_storage()),
+   size, alignment);
   if (UNLIKELY(!allocated)) {
 SetAllocatorOutOfMemory();
 if (AllocatorMayReturnNull())
@@ -146,6 +139,8 @@ void __nsan::NsanDeallocate(void *p) {
 AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
 allocator.Deallocate(cache, p);
   } else {
+// In a just created thread, glibc's _dl_deallocate_tls might reach here
+// before nsan_current_thread is set.
 SpinMutexLock l(&fallback_mutex);
 AllocatorCache *cache = &fallback_allocator_cache;
 allocator.Deallocate(cache, p);

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [compiler-rt] [nsan] Use sanitizer allocator (PR #102764)

2024-08-11 Thread Fangrui Song via llvm-branch-commits


@@ -0,0 +1,334 @@
+//===- nsan_allocator.cpp 
-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// NumericalStabilitySanitizer allocator.
+//
+//===--===//
+
+#include "nsan_allocator.h"
+#include "interception/interception.h"
+#include "nsan.h"
+#include "nsan_platform.h"
+#include "nsan_thread.h"
+#include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_checks.h"
+#include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_allocator_report.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_errno.h"
+
+DECLARE_REAL(void *, memset, void *dest, int c, uptr n)
+
+using namespace __nsan;
+
+namespace {
+struct Metadata {
+  uptr requested_size;
+};
+
+struct NsanMapUnmapCallback {
+  void OnMap(uptr p, uptr size) const {}
+  void OnMapSecondary(uptr p, uptr size, uptr user_begin,
+  uptr user_size) const {}
+  void OnUnmap(uptr p, uptr size) const {}
+};
+
+const uptr kMaxAllowedMallocSize = 1ULL << 40;
+
+// Allocator64 parameters. Deliberately using a short name.
+struct AP64 {
+  static const uptr kSpaceBeg = Mapping::kHeapMemBeg;
+  static const uptr kSpaceSize = 0x400; // 4T.
+  static const uptr kMetadataSize = sizeof(Metadata);
+  using SizeClassMap = DefaultSizeClassMap;
+  using MapUnmapCallback = NsanMapUnmapCallback;
+  static const uptr kFlags = 0;
+  using AddressSpaceView = LocalAddressSpaceView;
+};
+} // namespace
+
+using PrimaryAllocator = SizeClassAllocator64;
+using Allocator = CombinedAllocator;
+using AllocatorCache = Allocator::AllocatorCache;
+
+static Allocator allocator;
+static AllocatorCache fallback_allocator_cache;
+static StaticSpinMutex fallback_mutex;
+
+static uptr max_malloc_size;
+
+void __nsan::NsanAllocatorInit() {
+  SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
+  allocator.Init(common_flags()->allocator_release_to_os_interval_ms);
+  if (common_flags()->max_allocation_size_mb)
+max_malloc_size = Min(common_flags()->max_allocation_size_mb << 20,
+  kMaxAllowedMallocSize);
+  else
+max_malloc_size = kMaxAllowedMallocSize;
+}
+
+static AllocatorCache *GetAllocatorCache(NsanThreadLocalMallocStorage *ms) {
+  CHECK(ms);
+  CHECK_LE(sizeof(AllocatorCache), sizeof(ms->allocator_cache));
+  return reinterpret_cast(ms->allocator_cache);
+}
+
+void NsanThreadLocalMallocStorage::Init() {
+  allocator.InitCache(GetAllocatorCache(this));
+}
+
+void NsanThreadLocalMallocStorage::CommitBack() {
+  allocator.SwallowCache(GetAllocatorCache(this));
+  allocator.DestroyCache(GetAllocatorCache(this));
+}
+
+static void *NsanAllocate(uptr size, uptr alignment, bool zero) {
+  if (UNLIKELY(size > max_malloc_size)) {
+if (AllocatorMayReturnNull()) {
+  Report("WARNING: NumericalStabilitySanitizer failed to allocate 0x%zx "
+ "bytes\n",
+ size);
+  return nullptr;
+}
+BufferedStackTrace stack;
+GET_FATAL_STACK_TRACE_IF_EMPTY(&stack);
+ReportAllocationSizeTooBig(size, max_malloc_size, &stack);
+  }
+  if (UNLIKELY(IsRssLimitExceeded())) {
+if (AllocatorMayReturnNull())
+  return nullptr;
+BufferedStackTrace stack;
+GET_FATAL_STACK_TRACE_IF_EMPTY(&stack);
+ReportRssLimitExceeded(&stack);
+  }
+  NsanThread *t = GetCurrentThread();
+  void *allocated;
+  if (t) {
+AllocatorCache *cache = GetAllocatorCache(&t->malloc_storage());
+allocated = allocator.Allocate(cache, size, alignment);
+  } else {

MaskRay wrote:

I added a comment here. Say, the main thread creates a new thread. The main 
thread allocates TLS blocks (`nsan_current_thread == main_thread`).

In glibc, when nptl/allocatestack.c create or resize the guard area, it might 
call `_dl_deallocate_tls` on the TLS blocks. This happens at very early stage 
of pthread_create and the TLS `nsan_current_thread` is nullptr.

https://github.com/llvm/llvm-project/pull/102764
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libc] 61f3f31 - Revert "libc: Remove `extern "C"` from main declarations (#102825)"

2024-08-11 Thread via llvm-branch-commits

Author: Schrodinger ZHU Yifan
Date: 2024-08-11T13:40:32-07:00
New Revision: 61f3f31095e721e920967f91acf26c3249afc956

URL: 
https://github.com/llvm/llvm-project/commit/61f3f31095e721e920967f91acf26c3249afc956
DIFF: 
https://github.com/llvm/llvm-project/commit/61f3f31095e721e920967f91acf26c3249afc956.diff

LOG: Revert "libc: Remove `extern "C"` from main declarations (#102825)"

This reverts commit 1b71c471c7d0216fa7fc5c0b45b5926d1fabfaf4.

Added: 


Modified: 
libc/benchmarks/gpu/LibcGpuBenchmarkMain.cpp
libc/startup/gpu/amdgpu/start.cpp
libc/startup/gpu/nvptx/start.cpp
libc/startup/linux/do_start.cpp
libc/test/IntegrationTest/test.h
libc/test/UnitTest/LibcTestMain.cpp
libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp

Removed: 




diff  --git a/libc/benchmarks/gpu/LibcGpuBenchmarkMain.cpp 
b/libc/benchmarks/gpu/LibcGpuBenchmarkMain.cpp
index c4cc1a1731ce34..97366e55194a90 100644
--- a/libc/benchmarks/gpu/LibcGpuBenchmarkMain.cpp
+++ b/libc/benchmarks/gpu/LibcGpuBenchmarkMain.cpp
@@ -1,6 +1,6 @@
 #include "LibcGpuBenchmark.h"
 
-int main(int argc, char **argv, char **envp) {
+extern "C" int main(int argc, char **argv, char **envp) {
   LIBC_NAMESPACE::benchmarks::Benchmark::run_benchmarks();
   return 0;
 }

diff  --git a/libc/startup/gpu/amdgpu/start.cpp 
b/libc/startup/gpu/amdgpu/start.cpp
index e10e4cd9c2cd74..5aaa7e938d2792 100644
--- a/libc/startup/gpu/amdgpu/start.cpp
+++ b/libc/startup/gpu/amdgpu/start.cpp
@@ -13,7 +13,7 @@
 #include "src/stdlib/atexit.h"
 #include "src/stdlib/exit.h"
 
-int main(int argc, char **argv, char **envp);
+extern "C" int main(int argc, char **argv, char **envp);
 
 namespace LIBC_NAMESPACE_DECL {
 

diff  --git a/libc/startup/gpu/nvptx/start.cpp 
b/libc/startup/gpu/nvptx/start.cpp
index 561301638c3ca8..ef1e63e5161a61 100644
--- a/libc/startup/gpu/nvptx/start.cpp
+++ b/libc/startup/gpu/nvptx/start.cpp
@@ -13,7 +13,7 @@
 #include "src/stdlib/atexit.h"
 #include "src/stdlib/exit.h"
 
-int main(int argc, char **argv, char **envp);
+extern "C" int main(int argc, char **argv, char **envp);
 
 namespace LIBC_NAMESPACE_DECL {
 

diff  --git a/libc/startup/linux/do_start.cpp b/libc/startup/linux/do_start.cpp
index 7009895f0036c2..72060b4adb2148 100644
--- a/libc/startup/linux/do_start.cpp
+++ b/libc/startup/linux/do_start.cpp
@@ -20,7 +20,7 @@
 #include 
 #include 
 
-int main(int argc, char **argv, char **envp);
+extern "C" int main(int argc, char **argv, char **envp);
 
 extern "C" {
 // These arrays are present in the .init_array and .fini_array sections.

diff  --git a/libc/test/IntegrationTest/test.h 
b/libc/test/IntegrationTest/test.h
index f7068ed628a3d5..5be66d9edff02a 100644
--- a/libc/test/IntegrationTest/test.h
+++ b/libc/test/IntegrationTest/test.h
@@ -83,6 +83,6 @@
 // tests, then we should not need to explicitly declare/define the main
 // function in individual integration tests. We will not need this macro
 // then.
-#define TEST_MAIN int main
+#define TEST_MAIN extern "C" int main
 
 #endif // LLVM_LIBC_UTILS_INTEGRATION_TEST_TEST_H

diff  --git a/libc/test/UnitTest/LibcTestMain.cpp 
b/libc/test/UnitTest/LibcTestMain.cpp
index eb1125b5dcaf1f..94536e97164686 100644
--- a/libc/test/UnitTest/LibcTestMain.cpp
+++ b/libc/test/UnitTest/LibcTestMain.cpp
@@ -43,7 +43,7 @@ TestOptions parseOptions(int argc, char **argv) {
 
 } // anonymous namespace
 
-int main(int argc, char **argv, char **envp) {
+extern "C" int main(int argc, char **argv, char **envp) {
   LIBC_NAMESPACE::testing::argc = argc;
   LIBC_NAMESPACE::testing::argv = argv;
   LIBC_NAMESPACE::testing::envp = envp;

diff  --git a/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp 
b/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp
index 4cac072104ca9a..551b97caf81fd6 100644
--- a/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp
+++ b/libc/utils/HdrGen/PrototypeTestGen/PrototypeTestGen.cpp
@@ -47,7 +47,7 @@ bool TestGeneratorMain(llvm::raw_ostream &OS, 
llvm::RecordKeeper &records) {
 
   OS << '\n';
 
-  OS << "int main() {\n";
+  OS << "extern \"C\" int main() {\n";
   for (const auto &entrypoint : EntrypointNamesOption) {
 if (entrypoint == "errno")
   continue;



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [AArch64] Fold more load.x into load.i with large offset (PR #102837)

2024-08-11 Thread Vitaly Buka via llvm-branch-commits

https://github.com/vitalybuka created 
https://github.com/llvm/llvm-project/pull/102837

The list of load.x is refer to canFoldIntoAddrMode on D152828.
Also support LDRSroX missed in canFoldIntoAddrMode



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [AArch64] Fold more load.x into load.i with large offset (PR #102837)

2024-08-11 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: Vitaly Buka (vitalybuka)


Changes

The list of load.x is refer to canFoldIntoAddrMode on D152828.
Also support LDRSroX missed in canFoldIntoAddrMode


---
Full diff: https://github.com/llvm/llvm-project/pull/102837.diff


3 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (+13) 
- (modified) llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (+49-2) 
- (modified) llvm/test/CodeGen/AArch64/arm64-addrmode.ll (+34-51) 


``diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp 
b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index e6d49da6fdef0b..548f50c97f2917 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4345,7 +4345,20 @@ AArch64InstrInfo::getLdStAmountOp(const MachineInstr 
&MI) {
   switch (MI.getOpcode()) {
   default:
 llvm_unreachable("Unexpected opcode");
+  case AArch64::LDRBroX:
   case AArch64::LDRBBroX:
+  case AArch64::LDRSBXroX:
+  case AArch64::LDRSBWroX:
+  case AArch64::LDRHroX:
+  case AArch64::LDRHHroX:
+  case AArch64::LDRSHXroX:
+  case AArch64::LDRSHWroX:
+  case AArch64::LDRWroX:
+  case AArch64::LDRSroX:
+  case AArch64::LDRSWroX:
+  case AArch64::LDRDroX:
+  case AArch64::LDRXroX:
+  case AArch64::LDRQroX:
 return MI.getOperand(4);
   }
 }
diff --git a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp 
b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index 8de3f8db84ae2b..de1727aa6ec70f 100644
--- a/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -509,12 +509,38 @@ static unsigned getPreIndexedOpcode(unsigned Opc) {
 }
 
 static unsigned getBaseAddressOpcode(unsigned Opc) {
-  // TODO: Add more index address loads/stores.
+  // TODO: Add more index address stores.
   switch (Opc) {
   default:
 llvm_unreachable("Opcode has no base address equivalent!");
+  case AArch64::LDRBroX:
+return AArch64::LDRBui;
   case AArch64::LDRBBroX:
 return AArch64::LDRBBui;
+  case AArch64::LDRSBXroX:
+return AArch64::LDRSBXui;
+  case AArch64::LDRSBWroX:
+return AArch64::LDRSBWui;
+  case AArch64::LDRHroX:
+return AArch64::LDRHui;
+  case AArch64::LDRHHroX:
+return AArch64::LDRHHui;
+  case AArch64::LDRSHXroX:
+return AArch64::LDRSHXui;
+  case AArch64::LDRSHWroX:
+return AArch64::LDRSHWui;
+  case AArch64::LDRWroX:
+return AArch64::LDRWui;
+  case AArch64::LDRSroX:
+return AArch64::LDRSui;
+  case AArch64::LDRSWroX:
+return AArch64::LDRSWui;
+  case AArch64::LDRDroX:
+return AArch64::LDRDui;
+  case AArch64::LDRXroX:
+return AArch64::LDRXui;
+  case AArch64::LDRQroX:
+return AArch64::LDRQui;
   }
 }
 
@@ -766,10 +792,31 @@ static bool isMergeableIndexLdSt(MachineInstr &MI, int 
&Scale) {
   default:
 return false;
   // Scaled instructions.
-  // TODO: Add more index address loads/stores.
+  // TODO: Add more index address stores.
+  case AArch64::LDRBroX:
   case AArch64::LDRBBroX:
+  case AArch64::LDRSBXroX:
+  case AArch64::LDRSBWroX:
 Scale = 1;
 return true;
+  case AArch64::LDRHroX:
+  case AArch64::LDRHHroX:
+  case AArch64::LDRSHXroX:
+  case AArch64::LDRSHWroX:
+Scale = 2;
+return true;
+  case AArch64::LDRWroX:
+  case AArch64::LDRSroX:
+  case AArch64::LDRSWroX:
+Scale = 4;
+return true;
+  case AArch64::LDRDroX:
+  case AArch64::LDRXroX:
+Scale = 8;
+return true;
+  case AArch64::LDRQroX:
+Scale = 16;
+return true;
   }
 }
 
diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll 
b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
index 2181eaaee7db68..bfef61abd8c129 100644
--- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll
@@ -239,9 +239,8 @@ define i32 @LdOffset_i8_zext32(ptr %a)  {
 define i32 @LdOffset_i8_sext32(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_sext32:
 ; CHECK:   // %bb.0:
-; CHECK-NEXT:mov w8, #56952 // =0xde78
-; CHECK-NEXT:movk w8, #15, lsl #16
-; CHECK-NEXT:ldrsb w0, [x0, x8]
+; CHECK-NEXT:add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT:ldrsb w0, [x8, #3704]
 ; CHECK-NEXT:ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -266,9 +265,8 @@ define i64 @LdOffset_i8_zext64(ptr %a)  {
 define i64 @LdOffset_i8_sext64(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i8_sext64:
 ; CHECK:   // %bb.0:
-; CHECK-NEXT:mov w8, #56952 // =0xde78
-; CHECK-NEXT:movk w8, #15, lsl #16
-; CHECK-NEXT:ldrsb x0, [x0, x8]
+; CHECK-NEXT:add x8, x0, #253, lsl #12 // =1036288
+; CHECK-NEXT:ldrsb x0, [x8, #3704]
 ; CHECK-NEXT:ret
   %arrayidx = getelementptr inbounds i8, ptr %a, i64 1039992
   %val = load i8, ptr %arrayidx, align 1
@@ -280,9 +278,8 @@ define i64 @LdOffset_i8_sext64(ptr %a)  {
 define i16 @LdOffset_i16(ptr %a)  {
 ; CHECK-LABEL: LdOffset_i16:
 ; CHECK:   // %bb.0:
-; CH

[llvm-branch-commits] [clang] release/19.x: [C++20] [Modules] Don't diagnose duplicated implicit decl in multiple named modules (#102423) (PR #102425)

2024-08-11 Thread Chuanqi Xu via llvm-branch-commits

ChuanqiXu9 wrote:

I landed this directly as the owner of serialization. I feel this change is not 
riskful as it adds more conditions to generate a diagnose message we didn't do 
in 18.x and before. So nothing will be worse.

https://github.com/llvm/llvm-project/pull/102425
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits