[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIInsertWaitcnts to NPM (PR #130061)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130061

>From 6faf91245f016f428516d571e5bc8c85f983eb53 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 04:41:08 +
Subject: [PATCH] [AMDGPU][NPM] Port SIInsertWaitcnts to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  9 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  4 +-
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp   | 91 +--
 llvm/test/CodeGen/AMDGPU/call-waw-waitcnt.mir |  1 +
 .../CodeGen/AMDGPU/insert-waitcnts-hang.mir   |  1 +
 .../AMDGPU/vccz-corrupt-bug-workaround.mir|  2 +
 7 files changed, 76 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 368830384f99c..a72cf21d8907a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -370,6 +370,13 @@ class SIMemoryLegalizerPass : public 
PassInfoMixin {
   static bool isRequired() { return true; }
 };
 
+class SIInsertWaitcntsPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -446,7 +453,7 @@ extern char &AMDGPUInsertDelayAluID;
 void initializeSIInsertHardClausesPass(PassRegistry &);
 extern char &SIInsertHardClausesID;
 
-void initializeSIInsertWaitcntsPass(PassRegistry&);
+void initializeSIInsertWaitcntsLegacyPass(PassRegistry &);
 extern char &SIInsertWaitcntsID;
 
 void initializeSIFormMemoryClausesLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index de959f8a2aa62..c4641cba60e53 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -109,6 +109,7 @@ MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", 
SIFixVGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
 MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
 MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
+MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
 MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
 MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
@@ -131,7 +132,6 @@ 
DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", GCNRewritePartial
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPass())
 
 DUMMY_MACHINE_FUNCTION_PASS("si-insert-hard-clauses", 
SIInsertHardClausesPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", 
SILateBranchLoweringPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index dbe212ad0a216..c3cc1dc6e495b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -535,7 +535,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeSIAnnotateControlFlowLegacyPass(*PR);
   initializeAMDGPUInsertDelayAluLegacyPass(*PR);
   initializeSIInsertHardClausesPass(*PR);
-  initializeSIInsertWaitcntsPass(*PR);
+  initializeSIInsertWaitcntsLegacyPass(*PR);
   initializeSIModeRegisterLegacyPass(*PR);
   initializeSIWholeQuadModeLegacyPass(*PR);
   initializeSILowerControlFlowLegacyPass(*PR);
@@ -2153,7 +2153,7 @@ void 
AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   }
 
   addPass(SIMemoryLegalizerPass());
-  // TODO: addPass(SIInsertWaitcntsPass());
+  addPass(SIInsertWaitcntsPass());
 
   // TODO: addPass(SIModeRegisterPass());
 
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp 
b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index ee263f58bcaf2..8951a4144bd68 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -33,6 +33,7 @@
 #include "llvm/ADT/Sequence.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/Support/DebugCounter.h"
 #include "llvm/TargetParser/TargetParser.h"
@@ -594,7 +595,7 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
   AMDGPU::Waitcnt getAllZeroWaitcnt(bool IncludeVSCnt) const override;
 };
 
-class SIInsertWaitcnts : public MachineFunctionPass {
+class SIInsertWaitcnts {
 private:
   const GCNSubtarg

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIInsertHardClauses to NPM (PR #130062)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130062

>From 39768ea2dd18e37366c4034c677c83c1887fce0b Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 04:52:38 +
Subject: [PATCH] [AMDGPU][NPM] Port SIInsertHardClauses to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  8 ++-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  2 +-
 .../lib/Target/AMDGPU/SIInsertHardClauses.cpp | 50 +--
 .../CodeGen/AMDGPU/hard-clauses-img-gfx10.mir |  1 +
 .../CodeGen/AMDGPU/hard-clauses-img-gfx11.mir |  1 +
 6 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index a72cf21d8907a..61df9191cbbb9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -377,6 +377,12 @@ class SIInsertWaitcntsPass : public 
PassInfoMixin {
   static bool isRequired() { return true; }
 };
 
+class SIInsertHardClausesPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -450,7 +456,7 @@ extern char &SIModeRegisterID;
 void initializeAMDGPUInsertDelayAluLegacyPass(PassRegistry &);
 extern char &AMDGPUInsertDelayAluID;
 
-void initializeSIInsertHardClausesPass(PassRegistry &);
+void initializeSIInsertHardClausesLegacyPass(PassRegistry &);
 extern char &SIInsertHardClausesID;
 
 void initializeSIInsertWaitcntsLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index c4641cba60e53..3eabe087a8a33 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -109,6 +109,7 @@ MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", 
SIFixVGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
 MACHINE_FUNCTION_PASS("si-form-memory-clauses", SIFormMemoryClausesPass())
 MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
+MACHINE_FUNCTION_PASS("si-insert-hard-clauses", SIInsertHardClausesPass())
 MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
 MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
@@ -131,7 +132,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizations
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPass())
 
-DUMMY_MACHINE_FUNCTION_PASS("si-insert-hard-clauses", 
SIInsertHardClausesPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", 
SILateBranchLoweringPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c3cc1dc6e495b..6c24fe5f1441a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -534,7 +534,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeAMDGPUUnifyMetadataPass(*PR);
   initializeSIAnnotateControlFlowLegacyPass(*PR);
   initializeAMDGPUInsertDelayAluLegacyPass(*PR);
-  initializeSIInsertHardClausesPass(*PR);
+  initializeSIInsertHardClausesLegacyPass(*PR);
   initializeSIInsertWaitcntsLegacyPass(*PR);
   initializeSIModeRegisterLegacyPass(*PR);
   initializeSIWholeQuadModeLegacyPass(*PR);
diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp 
b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index dcc60765cc203..71b937f23cc3c 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -36,6 +36,7 @@
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 
 using namespace llvm;
 
@@ -89,18 +90,10 @@ enum HardClauseType {
   HARDCLAUSE_ILLEGAL,
 };
 
-class SIInsertHardClauses : public MachineFunctionPass {
+class SIInsertHardClauses {
 public:
-  static char ID;
   const GCNSubtarget *ST = nullptr;
 
-  SIInsertHardClauses() : MachineFunctionPass(ID) {}
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-AU.setPreservesCFG();
-MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
   HardClauseType getHardClauseType(const MachineInstr &MI) {
 if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
   if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
@@ -189,9 +182,7 @@ class SIInsertHardClauses : public Machin

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port AMDGPUSetWavePriority to NPM (PR #130064)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130064

>From 147e7aff2a06c2b55dac8b9c9d1cd0c366325264 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 06:07:23 +
Subject: [PATCH] [AMDGPU][NPM] Port AMDGPUSetWavePriority to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  9 +++-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  2 +-
 .../Target/AMDGPU/AMDGPUSetWavePriority.cpp   | 44 ++-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  5 +--
 llvm/test/CodeGen/AMDGPU/set-wave-priority.ll |  5 +++
 5 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 6f11a200bac64..b885c352b061e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -391,6 +391,13 @@ class SILateBranchLoweringPass
   static bool isRequired() { return true; }
 };
 
+class AMDGPUSetWavePriorityPass
+: public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -503,7 +510,7 @@ void initializeGCNPreRAOptimizationsLegacyPass(PassRegistry 
&);
 extern char &GCNPreRAOptimizationsID;
 
 FunctionPass *createAMDGPUSetWavePriorityPass();
-void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
+void initializeAMDGPUSetWavePriorityLegacyPass(PassRegistry &);
 
 void initializeGCNRewritePartialRegUsesLegacyPass(llvm::PassRegistry &);
 extern char &GCNRewritePartialRegUsesID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 318aad5590cda..4956897d22fde 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -100,6 +100,7 @@ MACHINE_FUNCTION_PASS("amdgpu-insert-delay-alu", 
AMDGPUInsertDelayAluPass())
 MACHINE_FUNCTION_PASS("amdgpu-isel", AMDGPUISelDAGToDAGPass(*this))
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", 
GCNPreRALongBranchRegPass())
 MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
+MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
 MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
 MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
@@ -131,7 +132,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 #define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPass())
 
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp
index c16d33f1453c0..29aecda82bc4b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSetWavePriority.cpp
@@ -19,6 +19,7 @@
 #include "SIInstrInfo.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 
 using namespace llvm;
 
@@ -40,15 +41,11 @@ struct MBBInfo {
 
 using MBBInfoSet = DenseMap;
 
-class AMDGPUSetWavePriority : public MachineFunctionPass {
+class AMDGPUSetWavePriority {
 public:
   static char ID;
 
-  AMDGPUSetWavePriority() : MachineFunctionPass(ID) {}
-
-  StringRef getPassName() const override { return "Set wave priority"; }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
+  bool run(MachineFunction &MF);
 
 private:
   MachineInstr *BuildSetprioMI(MachineBasicBlock &MBB,
@@ -58,15 +55,30 @@ class AMDGPUSetWavePriority : public MachineFunctionPass {
   const SIInstrInfo *TII;
 };
 
+class AMDGPUSetWavePriorityLegacy : public MachineFunctionPass {
+public:
+  static char ID;
+
+  AMDGPUSetWavePriorityLegacy() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override { return "Set wave priority"; }
+
+  bool runOnMachineFunction(MachineFunction &MF) override {
+if (skipFunction(MF.getFunction()))
+  return false;
+return AMDGPUSetWavePriority().run(MF);
+  }
+};
+
 } // End anonymous namespace.
 
-INITIALIZE_PASS(AMDGPUSetWavePriority, DEBUG_TYPE, "Set wave priority", false,
-false)
+INITIALIZE_PASS(AMDGPUSetWavePriorityLegacy, DEBUG_TYPE, "Set wave priority",
+false, false)
 
-char AMDGPUSetWavePriority::ID = 0;
+char AMDGPUSetWavePriorityLegacy::ID = 0;
 
 FunctionPass *llvm::createAMDGPUSetWavePriorityPass() {
-  return new AMD

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIPreEmitPeephole to NPM (PR #130065)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130065

>From 18645979e311d48be0926edac75cc92ed255fe5e Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 06:20:13 +
Subject: [PATCH] [AMDGPU][NPM] Port SIPreEmitPeephole to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  9 +-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  8 ++---
 llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp  | 31 ++-
 .../AMDGPU/insert-handle-flat-vmem-ds.mir |  1 +
 ...ort-exec-branches-special-instructions.mir |  1 +
 .../CodeGen/AMDGPU/set-gpr-idx-peephole.mir   |  1 +
 7 files changed, 39 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index b885c352b061e..e97a85ef3b00b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -210,7 +210,7 @@ extern char &SIWholeQuadModeID;
 void initializeSILowerControlFlowLegacyPass(PassRegistry &);
 extern char &SILowerControlFlowLegacyID;
 
-void initializeSIPreEmitPeepholePass(PassRegistry &);
+void initializeSIPreEmitPeepholeLegacyPass(PassRegistry &);
 extern char &SIPreEmitPeepholeID;
 
 void initializeSILateBranchLoweringLegacyPass(PassRegistry &);
@@ -391,6 +391,13 @@ class SILateBranchLoweringPass
   static bool isRequired() { return true; }
 };
 
+class SIPreEmitPeepholePass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
 class AMDGPUSetWavePriorityPass
 : public PassInfoMixin {
 public:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 4956897d22fde..f14499d0d3146 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -125,6 +125,7 @@ MACHINE_FUNCTION_PASS("si-optimize-exec-masking-pre-ra", 
SIOptimizeExecMaskingPr
 MACHINE_FUNCTION_PASS("si-peephole-sdwa", SIPeepholeSDWAPass())
 MACHINE_FUNCTION_PASS("si-post-ra-bundler", SIPostRABundlerPass())
 MACHINE_FUNCTION_PASS("si-pre-allocate-wwm-regs", SIPreAllocateWWMRegsPass())
+MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 MACHINE_FUNCTION_PASS("si-shrink-instructions", SIShrinkInstructionsPass())
 MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 #undef MACHINE_FUNCTION_PASS
@@ -133,7 +134,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
 
-DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
 // already exists.
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", 
AMDGPUPreloadKernArgPrologPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 857af30b348cb..4b4acbf82a470 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -539,7 +539,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeSIModeRegisterLegacyPass(*PR);
   initializeSIWholeQuadModeLegacyPass(*PR);
   initializeSILowerControlFlowLegacyPass(*PR);
-  initializeSIPreEmitPeepholePass(*PR);
+  initializeSIPreEmitPeepholeLegacyPass(*PR);
   initializeSILateBranchLoweringLegacyPass(*PR);
   initializeSIMemoryLegalizerLegacyPass(*PR);
   initializeSIOptimizeExecMaskingLegacyPass(*PR);
@@ -2166,9 +2166,9 @@ void 
AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less))
 addPass(AMDGPUSetWavePriorityPass());
 
-  if (TM.getOptLevel() > CodeGenOptLevel::None) {
-// TODO: addPass(SIPreEmitPeepholePass());
-  }
+  if (TM.getOptLevel() > CodeGenOptLevel::None) 
+addPass(SIPreEmitPeepholePass());
+  
 
   // The hazard recognizer that runs as part of the post-ra scheduler does not
   // guarantee to be able handle all hazards correctly. This is because if 
there
diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp 
b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 2bb70c138a50c..9db2118f2997b 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
 
 namespace {
 
-class SIPreEmitPeephole : public MachineFunctionPass {
+class SIPreEmitPeephole {
 private:
   const SIInstrInfo *TII = nullptr;
   const SIRegisterInfo *TRI = nullptr;
@@ -40,24 +40,31 @@ class SIPreEmitPeephole : public MachineFunctionPass {
  const MachineBasicBlock &To) const;
   bool removeExeczBranch

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port PostRAHazardRecognizer to NPM (PR #130066)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130066

>From 1d23b9ce3735a442acdf3d1d2bae44cf81a9712c Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 06:42:54 +
Subject: [PATCH] [CodeGen][NPM] Port PostRAHazardRecognizer to NPM

---
 .../llvm/CodeGen/PostRAHazardRecognizer.h | 26 +++
 llvm/include/llvm/InitializePasses.h  |  2 +-
 .../llvm/Passes/MachinePassRegistry.def   |  1 +
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/PostRAHazardRecognizer.cpp   | 46 +--
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  3 +-
 .../AMDGPU/break-smem-soft-clauses.mir|  2 +
 llvm/test/CodeGen/AMDGPU/dst-sel-hazard.mir   |  2 +
 .../hazard-flat-instruction-valu-check.mir|  1 +
 10 files changed, 68 insertions(+), 18 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/PostRAHazardRecognizer.h

diff --git a/llvm/include/llvm/CodeGen/PostRAHazardRecognizer.h 
b/llvm/include/llvm/CodeGen/PostRAHazardRecognizer.h
new file mode 100644
index 0..3e0c04ac5e403
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/PostRAHazardRecognizer.h
@@ -0,0 +1,26 @@
+//===- llvm/CodeGen/PostRAHazardRecognizer.h *- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_POSTRAHAZARDRECOGNIZER_H
+#define LLVM_CODEGEN_POSTRAHAZARDRECOGNIZER_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class PostRAHazardRecognizerPass
+: public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_POSTRAHAZARDRECOGNIZER_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index f1c16e3b1cb40..a3fd97ee99f3b 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -237,7 +237,7 @@ void initializePostDomViewerWrapperPassPass(PassRegistry &);
 void initializePostDominatorTreeWrapperPassPass(PassRegistry &);
 void initializePostInlineEntryExitInstrumenterPass(PassRegistry &);
 void initializePostMachineSchedulerLegacyPass(PassRegistry &);
-void initializePostRAHazardRecognizerPass(PassRegistry &);
+void initializePostRAHazardRecognizerLegacyPass(PassRegistry &);
 void initializePostRAMachineSinkingPass(PassRegistry &);
 void initializePostRASchedulerLegacyPass(PassRegistry &);
 void initializePreISelIntrinsicLoweringLegacyPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index bedbc3e88a7ce..285ad9601c6ff 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -155,6 +155,7 @@ MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass())
 MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass())
 MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass())
 MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass())
+MACHINE_FUNCTION_PASS("post-RA-hazard-rec", PostRAHazardRecognizerPass())
 MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass(TM))
 MACHINE_FUNCTION_PASS("postmisched", PostMachineSchedulerPass(TM))
 MACHINE_FUNCTION_PASS("post-ra-pseudos", ExpandPostRAPseudosPass())
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 375176ed4b1ce..69b4d8bac94cf 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -106,7 +106,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializePatchableFunctionLegacyPass(Registry);
   initializePeepholeOptimizerLegacyPass(Registry);
   initializePostMachineSchedulerLegacyPass(Registry);
-  initializePostRAHazardRecognizerPass(Registry);
+  initializePostRAHazardRecognizerLegacyPass(Registry);
   initializePostRAMachineSinkingPass(Registry);
   initializePostRASchedulerLegacyPass(Registry);
   initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
diff --git a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp 
b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
index 97b1532300b17..3ead2087fc1d9 100644
--- a/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/llvm/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -26,6 +26,7 @@
 //
 
//===--===//
 
+#include "llvm/CodeGen/PostRAHazardRecognizer.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
@@ -40,30 +41,45 @@ using namespace llvm;
 STA

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SILateBranchLowering to NPM (PR #130063)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130063

>From 33a5201fec71751cec72bf63fd80b873961ac247 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 05:26:49 +
Subject: [PATCH] [AMDGPU][NPM] Port SILateBranchLowering to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   | 10 -
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  5 ++-
 .../Target/AMDGPU/SILateBranchLowering.cpp| 40 ++-
 llvm/test/CodeGen/AMDGPU/early-term.mir   |  2 +
 llvm/test/CodeGen/AMDGPU/readlane_exec0.mir   |  1 +
 6 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 61df9191cbbb9..6f11a200bac64 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -213,7 +213,7 @@ extern char &SILowerControlFlowLegacyID;
 void initializeSIPreEmitPeepholePass(PassRegistry &);
 extern char &SIPreEmitPeepholeID;
 
-void initializeSILateBranchLoweringPass(PassRegistry &);
+void initializeSILateBranchLoweringLegacyPass(PassRegistry &);
 extern char &SILateBranchLoweringPassID;
 
 void initializeSIOptimizeExecMaskingLegacyPass(PassRegistry &);
@@ -383,6 +383,14 @@ class SIInsertHardClausesPass : public 
PassInfoMixin {
 MachineFunctionAnalysisManager &MFAM);
 };
 
+class SILateBranchLoweringPass
+: public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 3eabe087a8a33..318aad5590cda 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -111,6 +111,7 @@ MACHINE_FUNCTION_PASS("si-form-memory-clauses", 
SIFormMemoryClausesPass())
 MACHINE_FUNCTION_PASS("si-i1-copies", SILowerI1CopiesPass())
 MACHINE_FUNCTION_PASS("si-insert-hard-clauses", SIInsertHardClausesPass())
 MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
+MACHINE_FUNCTION_PASS("si-late-branch-lowering", SILateBranchLoweringPass())
 MACHINE_FUNCTION_PASS("si-load-store-opt", SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
 MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
@@ -132,7 +133,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizations
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPass())
 
-DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", 
SILateBranchLoweringPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
 // already exists.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 6c24fe5f1441a..b9d62cc9e4b63 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -540,7 +540,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeSIWholeQuadModeLegacyPass(*PR);
   initializeSILowerControlFlowLegacyPass(*PR);
   initializeSIPreEmitPeepholePass(*PR);
-  initializeSILateBranchLoweringPass(*PR);
+  initializeSILateBranchLoweringLegacyPass(*PR);
   initializeSIMemoryLegalizerLegacyPass(*PR);
   initializeSIOptimizeExecMaskingLegacyPass(*PR);
   initializeSIPreAllocateWWMRegsLegacyPass(*PR);
@@ -2161,7 +2161,8 @@ void 
AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
 // TODO: addPass(SIInsertHardClausesPass());
   }
 
-  // addPass(SILateBranchLoweringPass());
+  addPass(SILateBranchLoweringPass());
+
   if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less)) {
 // TODO: addPass(AMDGPUSetWavePriorityPass());
   }
diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp 
b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
index d02173f57ee37..0f5b6bd9374b0 100644
--- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
@@ -16,6 +16,7 @@
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 
 using namespace llvm;
 
@@ -23,7 +24,7 @@ using namespace llvm;
 
 namespace {
 
-class SILateBranchLowering : public MachineFunctionPass {
+class SILateBranchLowering {
 private:
   const SIRegisterInfo *TRI = nullptr;
   const SIInstrInfo *TII = nullptr;
@@ -33,14 +34,23 @@ class SI

[llvm-branch-commits] [llvm] [AMDGPU] Add SubtargetFeature for dynamic VGPR mode (PR #130030)

2025-03-07 Thread Diana Picus via llvm-branch-commits

rovka wrote:

> It seems to me this should be a separate attribute, it's not really part of 
> the target

I don't really disagree with that. I think I made it a feature because it's 
kind of a hardware mode (i.e. it takes over a CU, and waves that use dynamic 
VGPRs can't be mixed with waves that don't). If you feel strongly about this, I 
can make it an attribute. We already have places downstream that use it as a 
target feature, so I would like to have the subtarget feature as well while we 
migrate away from it. Should I add an attribute too in this PR?

https://github.com/llvm/llvm-project/pull/130030
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port MachineSanitizerBinaryMetadata to NPM (PR #130069)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130069

>From 5c5dde0748e0ea175f51c462eae329bea3a9188e Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 10:20:36 +
Subject: [PATCH] [CodeGen][NPM] Port MachineSanitizerBinaryMetadata to NPM

---
 .../llvm/CodeGen/SanitizerBinaryMetadata.h| 26 +
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  3 +-
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp  | 37 ++-
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 llvm/test/tools/llc/new-pm/pipeline.ll|  2 +-
 8 files changed, 61 insertions(+), 14 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/SanitizerBinaryMetadata.h

diff --git a/llvm/include/llvm/CodeGen/SanitizerBinaryMetadata.h 
b/llvm/include/llvm/CodeGen/SanitizerBinaryMetadata.h
new file mode 100644
index 0..6cf2e11aa911e
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/SanitizerBinaryMetadata.h
@@ -0,0 +1,26 @@
+//===- llvm/CodeGen/SanitizerBinaryMetadata.h ---*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_SANITIZERBINARYMETADATA_H
+#define LLVM_CODEGEN_SANITIZERBINARYMETADATA_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class MachineSanitizerBinaryMetadataPass
+: public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_SANITIZERBINARYMETADATA_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 3fd3cbb28bc3e..c7bc4320cf8f0 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -206,7 +206,7 @@ void initializeMachineOutlinerPass(PassRegistry &);
 void initializeMachinePipelinerPass(PassRegistry &);
 void initializeMachinePostDominatorTreeWrapperPassPass(PassRegistry &);
 void initializeMachineRegionInfoPassPass(PassRegistry &);
-void initializeMachineSanitizerBinaryMetadataPass(PassRegistry &);
+void initializeMachineSanitizerBinaryMetadataLegacyPass(PassRegistry &);
 void initializeMachineSchedulerLegacyPass(PassRegistry &);
 void initializeMachineSinkingLegacyPass(PassRegistry &);
 void initializeMachineTraceMetricsWrapperPassPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index a86dc8d632a4e..74cdc7d66810b 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -75,6 +75,7 @@
 #include "llvm/CodeGen/RenameIndependentSubregs.h"
 #include "llvm/CodeGen/ReplaceWithVeclib.h"
 #include "llvm/CodeGen/SafeStack.h"
+#include "llvm/CodeGen/SanitizerBinaryMetadata.h"
 #include "llvm/CodeGen/SelectOptimize.h"
 #include "llvm/CodeGen/ShadowStackGCLowering.h"
 #include "llvm/CodeGen/SjLjEHPrepare.h"
@@ -1002,7 +1003,7 @@ Error CodeGenPassBuilder::addMachinePasses(
   addPass(RemoveLoadsIntoFakeUsesPass());
   addPass(StackMapLivenessPass());
   addPass(LiveDebugValuesPass());
-  addPass(MachineSanitizerBinaryMetadata());
+  addPass(MachineSanitizerBinaryMetadataPass());
 
   if (TM.Options.EnableMachineOutliner &&
   getOptLevel() != CodeGenOptLevel::None &&
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index cab8108ed30f6..8fa21751392f3 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -149,6 +149,7 @@ MACHINE_FUNCTION_PASS("localstackalloc", 
LocalStackSlotAllocationPass())
 MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass())
 MACHINE_FUNCTION_PASS("machine-cse", MachineCSEPass())
 MACHINE_FUNCTION_PASS("machine-latecleanup", MachineLateInstrsCleanupPass())
+MACHINE_FUNCTION_PASS("machine-sanmd", MachineSanitizerBinaryMetadataPass())
 MACHINE_FUNCTION_PASS("machine-scheduler", MachineSchedulerPass(TM))
 MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass())
 MACHINE_FUNCTION_PASS("no-op-machine-function", NoOpMachineFunctionPass())
@@ -279,7 +280,6 @@ DUMMY_MACHINE_FUNCTION_PASS("lrshrink", LiveRangeShrinkPass)
 DUMMY_MACHINE_FUNCTION_PASS("machine-combiner", MachineCombinerPass)
 DUMMY_MACHINE_FUNCTION_PASS("static-data-splitter", StaticDataSplitter)
 DUMMY_MACHINE_FUNCTION_PASS("machine-function-splitter", 
MachineFunctionSplitterPass)
-DUMMY_MACHINE_FUNCTION_PASS("machine-sanmd", MachineSanitizerBinary

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Cleanup AMDGPUPassRegistry.def (PR #130071)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130071

>From e281c45b9e5853115605cd0c9fbb232b0dacdc3b Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 10:56:28 +
Subject: [PATCH] [AMDGPU][NPM] Cleanup AMDGPUPassRegistry.def

---
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def | 8 +---
 llvm/lib/Target/AMDGPU/AMDGPUPreloadKernArgProlog.cpp | 2 +-
 llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp| 1 +
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index f14499d0d3146..ad2f3fc29077c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -102,6 +102,7 @@ MACHINE_FUNCTION_PASS("amdgpu-pre-ra-long-branch-reg", 
GCNPreRALongBranchRegPass
 MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
 MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", AMDGPUSetWavePriorityPass())
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
+MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", 
AMDGPUPreloadKernArgPrologPass())
 MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
 MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
 MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass())
@@ -131,13 +132,6 @@ MACHINE_FUNCTION_PASS("si-wqm", SIWholeQuadModePass())
 #undef MACHINE_FUNCTION_PASS
 
 #define DUMMY_MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUsesPass())
-
-// TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
-// already exists.
-DUMMY_MACHINE_FUNCTION_PASS("amdgpu-preload-kern-arg-prolog", 
AMDGPUPreloadKernArgPrologPass())
-
 // Global ISel passes
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-prelegalizer-combiner", 
AMDGPUPreLegalizerCombinerPass())
 DUMMY_MACHINE_FUNCTION_PASS("amdgpu-postlegalizer-combiner", 
AMDGPUPostLegalizerCombinerPass())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernArgProlog.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernArgProlog.cpp
index b3a2139dfd24e..40094518dce0a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernArgProlog.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreloadKernArgProlog.cpp
@@ -207,5 +207,5 @@ AMDGPUPreloadKernArgPrologPass::run(MachineFunction &MF,
   if (!AMDGPUPreloadKernArgProlog(MF).run())
 return PreservedAnalyses::all();
 
-  return PreservedAnalyses::none();
+  return getMachineFunctionPassPreservedAnalyses();
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index f380ddd03957f..a71766f2fd012 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -24,6 +24,7 @@
 #include "AMDGPUMacroFusion.h"
 #include "AMDGPUOpenCLEnqueuedBlockLowering.h"
 #include "AMDGPUPerfHintAnalysis.h"
+#include "AMDGPUPreloadKernArgProlog.h"
 #include "AMDGPURemoveIncompatibleFunctions.h"
 #include "AMDGPUSplitModule.h"
 #include "AMDGPUTargetObjectFile.h"

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port BranchRelaxation to NPM (PR #130067)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130067

>From b4d0fbae6828037b73903aef6a122458c8ce48fa Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 06:56:04 +
Subject: [PATCH] [CodeGen][NPM] Port BranchRelaxation to NPM

This completes the PreEmitPasses
---
 llvm/include/llvm/CodeGen/BranchRelaxation.h  | 25 +++
 llvm/include/llvm/InitializePasses.h  |  2 +-
 .../llvm/Passes/MachinePassRegistry.def   |  1 +
 llvm/lib/CodeGen/BranchRelaxation.cpp | 31 ++-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  3 +-
 .../AArch64/branch-relax-block-size.mir   |  1 +
 .../AArch64/branch-relax-cross-section.mir|  2 ++
 .../AMDGPU/branch-relax-no-terminators.mir|  1 +
 10 files changed, 59 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/BranchRelaxation.h

diff --git a/llvm/include/llvm/CodeGen/BranchRelaxation.h 
b/llvm/include/llvm/CodeGen/BranchRelaxation.h
new file mode 100644
index 0..2007cf05b3aa1
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/BranchRelaxation.h
@@ -0,0 +1,25 @@
+//===- llvm/CodeGen/BranchRelaxation.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_BRANCHRELAXATION_H
+#define LLVM_CODEGEN_BRANCHRELAXATION_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class BranchRelaxationPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_BRANCHRELAXATION_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index a3fd97ee99f3b..e5bffde815117 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -61,7 +61,7 @@ void initializeBasicAAWrapperPassPass(PassRegistry &);
 void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry &);
 void initializeBranchFolderPassPass(PassRegistry &);
 void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry &);
-void initializeBranchRelaxationPass(PassRegistry &);
+void initializeBranchRelaxationLegacyPass(PassRegistry &);
 void initializeBreakCriticalEdgesPass(PassRegistry &);
 void initializeBreakFalseDepsPass(PassRegistry &);
 void initializeCanonicalizeFreezeInLoopsPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 285ad9601c6ff..9300f6935aa90 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -138,6 +138,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", 
VirtRegMapAnalysis())
 #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
 #endif
 MACHINE_FUNCTION_PASS("block-placement-stats", 
MachineBlockPlacementStatsPass())
+MACHINE_FUNCTION_PASS("branch-relaxation", BranchRelaxationPass())
 MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass())
 MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass())
 MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass())
diff --git a/llvm/lib/CodeGen/BranchRelaxation.cpp 
b/llvm/lib/CodeGen/BranchRelaxation.cpp
index a762aab43ddd2..134ca59808c27 100644
--- a/llvm/lib/CodeGen/BranchRelaxation.cpp
+++ b/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -6,6 +6,7 @@
 //
 
//===--===//
 
+#include "llvm/CodeGen/BranchRelaxation.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
@@ -44,7 +45,7 @@ STATISTIC(NumUnconditionalRelaxed, "Number of unconditional 
branches relaxed");
 
 namespace {
 
-class BranchRelaxation : public MachineFunctionPass {
+class BranchRelaxation {
   /// BasicBlockInfo - Information about the offset and size of a single
   /// basic block.
   struct BasicBlockInfo {
@@ -115,23 +116,31 @@ class BranchRelaxation : public MachineFunctionPass {
   void dumpBBs();
   void verify();
 
+public:
+  bool run(MachineFunction &MF);
+};
+
+class BranchRelaxationLegacy : public MachineFunctionPass {
 public:
   static char ID;
 
-  BranchRelaxation() : MachineFunctionPass(ID) {}
+  BranchRelaxationLegacy() : MachineFunctionPass(ID) {}
 
-  bool runOnMachineFunction(MachineFunction &MF) override;
+  bool runOnMachineFunction(MachineFunction &MF) override {
+return BranchRelaxation().run(MF);
+  }
 
   StringRef getPassName() const override { return BRANCH_R

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)

2025-03-07 Thread via llvm-branch-commits


@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -o - %s | 
FileCheck %s
+
+// CHECK: !dx.rootsignatures = !{![[#FIRST_ENTRY:]], ![[#SECOND_ENTRY:]]}
+
+// CHECK: ![[#FIRST_ENTRY]] = !{ptr @FirstEntry, ![[#EMPTY:]]}
+// CHECK: ![[#EMPTY]] = !{}
+
+[shader("compute"), RootSignature("")]
+[numthreads(1,1,1)]
+void FirstEntry() {}
+
+// CHECK: ![[#SECOND_ENTRY]] = !{ptr @SecondEntry, ![[#SECOND_RS:]]}
+// CHECK: ![[#SECOND_RS]] = !{![[#TABLE:]]}
+// CHECK: ![[#TABLE]] = !{!"DescriptorTable", i32 0, ![[#CBV:]], ![[#SRV:]]}
+// CHECK: ![[#CBV]] = !{!"CBV", i32 1, i32 0, i32 0, i32 -1, i32 4}
+// CHECK: ![[#SRV]] = !{!"SRV", i32 4, i32 42, i32 3, i32 32, i32 0}

joaosaffran wrote:

There is one additional field here, check my other comment.

https://github.com/llvm/llvm-project/pull/125131
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)

2025-03-07 Thread via llvm-branch-commits


@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -o - %s | 
FileCheck %s
+
+// CHECK: !dx.rootsignatures = !{![[#FIRST_ENTRY:]], ![[#SECOND_ENTRY:]]}
+
+// CHECK: ![[#FIRST_ENTRY]] = !{ptr @FirstEntry, ![[#EMPTY:]]}
+// CHECK: ![[#EMPTY]] = !{}
+
+[shader("compute"), RootSignature("")]
+[numthreads(1,1,1)]
+void FirstEntry() {}
+
+// CHECK: ![[#SECOND_ENTRY]] = !{ptr @SecondEntry, ![[#SECOND_RS:]]}

joaosaffran wrote:

nit: Should version supports be added in this PR or this will be addressed in 
the future?

https://github.com/llvm/llvm-project/pull/125131
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)

2025-03-07 Thread via llvm-branch-commits


@@ -14,10 +14,16 @@
 #ifndef LLVM_FRONTEND_HLSL_HLSLROOTSIGNATURE_H
 #define LLVM_FRONTEND_HLSL_HLSLROOTSIGNATURE_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLForwardCompat.h"

joaosaffran wrote:

nit: are those needed?

https://github.com/llvm/llvm-project/pull/125131
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Temporal divergence lowering i1 (PR #124299)

2025-03-07 Thread Nicolai Hähnle via llvm-branch-commits


@@ -228,6 +229,66 @@ bool DivergenceLoweringHelper::lowerTemporalDivergence() {
   return false;
 }
 
+bool DivergenceLoweringHelper::lowerTemporalDivergenceI1() {
+  MachineRegisterInfo::VRegAttrs BoolS1 = {ST->getBoolRC(), LLT::scalar(1)};
+  initializeLaneMaskRegisterAttributes(BoolS1);
+  MachineSSAUpdater SSAUpdater(*MF);
+
+  // In case of use outside muliple nested cycles or muliple uses we only need
+  // to merge lane mask across largest relevant cycle.
+  SmallDenseMap> LRCCache;
+  for (auto [Reg, UseInst, LRC] : MUI->getTemporalDivergenceList()) {
+if (MRI->getType(Reg) != LLT::scalar(1))
+  continue;
+
+const MachineCycle *CachedLRC = LRCCache.lookup(Reg).first;
+if (CachedLRC) {
+  LRC = CachedLRC->contains(LRC) ? CachedLRC : LRC;
+  assert(LRC->contains(CachedLRC));
+}
+
+LRCCache[Reg] = {LRC, {}};
+  }
+
+  for (auto LRCIter : LRCCache) {
+Register Reg = LRCIter.first;
+const MachineCycle *Cycle = LRCIter.second.first;
+
+if (MRI->getType(Reg) != LLT::scalar(1))
+  continue;
+
+Register MergedMask = MRI->createVirtualRegister(BoolS1);
+SSAUpdater.Initialize(MergedMask);
+
+MachineBasicBlock *MBB = MRI->getVRegDef(Reg)->getParent();
+SSAUpdater.AddAvailableValue(MBB, MergedMask);
+
+for (auto Entry : Cycle->getEntries()) {
+  for (MachineBasicBlock *Pred : Entry->predecessors()) {
+if (!Cycle->contains(Pred)) {
+  B.setInsertPt(*Pred, Pred->getFirstTerminator());
+  auto ImplDef = B.buildInstr(AMDGPU::IMPLICIT_DEF, {BoolS1}, {});
+  SSAUpdater.AddAvailableValue(Pred, ImplDef.getReg(0));
+}
+  }
+}
+
+buildMergeLaneMasks(*MBB, MBB->getFirstTerminator(), {}, MergedMask,
+SSAUpdater.GetValueInMiddleOfBlock(MBB), Reg);
+
+LRCCache[Reg].second = MergedMask;

nhaehnle wrote:

Should be able to just keep LRCIter/Entry as a reference and update via that 
instead of repeating the cache lookup.

https://github.com/llvm/llvm-project/pull/124299
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL] Remove old resource annotations (PR #130338)

2025-03-07 Thread Helena Kotas via llvm-branch-commits

https://github.com/hekota created 
https://github.com/llvm/llvm-project/pull/130338

Fixes #114126

>From c574bedb001f61f4bcdbf00613eec932028cf392 Mon Sep 17 00:00:00 2001
From: Helena Kotas 
Date: Fri, 7 Mar 2025 12:09:33 -0800
Subject: [PATCH] [HLSL] Remove old resource annotations

Fixes #114126
---
 clang/lib/CodeGen/CGDeclCXX.cpp   |   3 -
 clang/lib/CodeGen/CGHLSLRuntime.cpp   | 129 --
 clang/lib/CodeGen/CGHLSLRuntime.h |  15 --
 .../include/llvm/Frontend/HLSL/HLSLResource.h |  22 ---
 llvm/lib/Frontend/HLSL/HLSLResource.cpp   |  54 +---
 5 files changed, 2 insertions(+), 221 deletions(-)

diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index f5950f03673a1..1ad34ae61f96a 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -1071,9 +1071,6 @@ void 
CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
 EmitCXXGlobalVarDeclInit(*D, Addr, PerformInit);
   }
 
-  if (getLangOpts().HLSL)
-CGM.getHLSLRuntime().annotateHLSLResource(D, Addr);
-
   FinishFunction();
 }
 
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp 
b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index dc34653e8f497..7ea23032b6519 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -260,135 +260,6 @@ void CGHLSLRuntime::finishCodeGen() {
   generateGlobalCtorDtorCalls();
 }
 
-void CGHLSLRuntime::addBufferResourceAnnotation(llvm::GlobalVariable *GV,
-llvm::hlsl::ResourceClass RC,
-llvm::hlsl::ResourceKind RK,
-bool IsROV,
-llvm::hlsl::ElementType ET,
-BufferResBinding &Binding) {
-  llvm::Module &M = CGM.getModule();
-
-  NamedMDNode *ResourceMD = nullptr;
-  switch (RC) {
-  case llvm::hlsl::ResourceClass::UAV:
-ResourceMD = M.getOrInsertNamedMetadata("hlsl.uavs");
-break;
-  case llvm::hlsl::ResourceClass::SRV:
-ResourceMD = M.getOrInsertNamedMetadata("hlsl.srvs");
-break;
-  case llvm::hlsl::ResourceClass::CBuffer:
-ResourceMD = M.getOrInsertNamedMetadata("hlsl.cbufs");
-break;
-  default:
-assert(false && "Unsupported buffer type!");
-return;
-  }
-  assert(ResourceMD != nullptr &&
- "ResourceMD must have been set by the switch above.");
-
-  llvm::hlsl::FrontendResource Res(
-  GV, RK, ET, IsROV, Binding.Reg.value_or(UINT_MAX), Binding.Space);
-  ResourceMD->addOperand(Res.getMetadata());
-}
-
-static llvm::hlsl::ElementType
-calculateElementType(const ASTContext &Context, const clang::Type *ResourceTy) 
{
-  using llvm::hlsl::ElementType;
-
-  // TODO: We may need to update this when we add things like ByteAddressBuffer
-  // that don't have a template parameter (or, indeed, an element type).
-  const auto *TST = ResourceTy->getAs();
-  assert(TST && "Resource types must be template specializations");
-  ArrayRef Args = TST->template_arguments();
-  assert(!Args.empty() && "Resource has no element type");
-
-  // At this point we have a resource with an element type, so we can assume
-  // that it's valid or we would have diagnosed the error earlier.
-  QualType ElTy = Args[0].getAsType();
-
-  // We should either have a basic type or a vector of a basic type.
-  if (const auto *VecTy = ElTy->getAs())
-ElTy = VecTy->getElementType();
-
-  if (ElTy->isSignedIntegerType()) {
-switch (Context.getTypeSize(ElTy)) {
-case 16:
-  return ElementType::I16;
-case 32:
-  return ElementType::I32;
-case 64:
-  return ElementType::I64;
-}
-  } else if (ElTy->isUnsignedIntegerType()) {
-switch (Context.getTypeSize(ElTy)) {
-case 16:
-  return ElementType::U16;
-case 32:
-  return ElementType::U32;
-case 64:
-  return ElementType::U64;
-}
-  } else if (ElTy->isSpecificBuiltinType(BuiltinType::Half))
-return ElementType::F16;
-  else if (ElTy->isSpecificBuiltinType(BuiltinType::Float))
-return ElementType::F32;
-  else if (ElTy->isSpecificBuiltinType(BuiltinType::Double))
-return ElementType::F64;
-
-  // TODO: We need to handle unorm/snorm float types here once we support them
-  llvm_unreachable("Invalid element type for resource");
-}
-
-void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) 
{
-  const Type *Ty = D->getType()->getPointeeOrArrayElementType();
-  if (!Ty)
-return;
-  const auto *RD = Ty->getAsCXXRecordDecl();
-  if (!RD)
-return;
-  // the resource related attributes are on the handle member
-  // inside the record decl
-  for (auto *FD : RD->fields()) {
-const auto *HLSLResAttr = FD->getAttr();
-const HLSLAttributedResourceType *AttrResType =
-dyn_cast(FD->getType().getTypePtr());
-if (!HLSLResAttr || !AttrResType)
-  continue;
-
-llvm::hls

[llvm-branch-commits] [clang] [llvm] [HLSL] Remove old resource annotations (PR #130338)

2025-03-07 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-hlsl

Author: Helena Kotas (hekota)


Changes

Fixes #114126

---
Full diff: https://github.com/llvm/llvm-project/pull/130338.diff


5 Files Affected:

- (modified) clang/lib/CodeGen/CGDeclCXX.cpp (-3) 
- (modified) clang/lib/CodeGen/CGHLSLRuntime.cpp (-129) 
- (modified) clang/lib/CodeGen/CGHLSLRuntime.h (-15) 
- (modified) llvm/include/llvm/Frontend/HLSL/HLSLResource.h (-22) 
- (modified) llvm/lib/Frontend/HLSL/HLSLResource.cpp (+2-52) 


``diff
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index f5950f03673a1..1ad34ae61f96a 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -1071,9 +1071,6 @@ void 
CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
 EmitCXXGlobalVarDeclInit(*D, Addr, PerformInit);
   }
 
-  if (getLangOpts().HLSL)
-CGM.getHLSLRuntime().annotateHLSLResource(D, Addr);
-
   FinishFunction();
 }
 
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp 
b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index dc34653e8f497..7ea23032b6519 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -260,135 +260,6 @@ void CGHLSLRuntime::finishCodeGen() {
   generateGlobalCtorDtorCalls();
 }
 
-void CGHLSLRuntime::addBufferResourceAnnotation(llvm::GlobalVariable *GV,
-llvm::hlsl::ResourceClass RC,
-llvm::hlsl::ResourceKind RK,
-bool IsROV,
-llvm::hlsl::ElementType ET,
-BufferResBinding &Binding) {
-  llvm::Module &M = CGM.getModule();
-
-  NamedMDNode *ResourceMD = nullptr;
-  switch (RC) {
-  case llvm::hlsl::ResourceClass::UAV:
-ResourceMD = M.getOrInsertNamedMetadata("hlsl.uavs");
-break;
-  case llvm::hlsl::ResourceClass::SRV:
-ResourceMD = M.getOrInsertNamedMetadata("hlsl.srvs");
-break;
-  case llvm::hlsl::ResourceClass::CBuffer:
-ResourceMD = M.getOrInsertNamedMetadata("hlsl.cbufs");
-break;
-  default:
-assert(false && "Unsupported buffer type!");
-return;
-  }
-  assert(ResourceMD != nullptr &&
- "ResourceMD must have been set by the switch above.");
-
-  llvm::hlsl::FrontendResource Res(
-  GV, RK, ET, IsROV, Binding.Reg.value_or(UINT_MAX), Binding.Space);
-  ResourceMD->addOperand(Res.getMetadata());
-}
-
-static llvm::hlsl::ElementType
-calculateElementType(const ASTContext &Context, const clang::Type *ResourceTy) 
{
-  using llvm::hlsl::ElementType;
-
-  // TODO: We may need to update this when we add things like ByteAddressBuffer
-  // that don't have a template parameter (or, indeed, an element type).
-  const auto *TST = ResourceTy->getAs();
-  assert(TST && "Resource types must be template specializations");
-  ArrayRef Args = TST->template_arguments();
-  assert(!Args.empty() && "Resource has no element type");
-
-  // At this point we have a resource with an element type, so we can assume
-  // that it's valid or we would have diagnosed the error earlier.
-  QualType ElTy = Args[0].getAsType();
-
-  // We should either have a basic type or a vector of a basic type.
-  if (const auto *VecTy = ElTy->getAs())
-ElTy = VecTy->getElementType();
-
-  if (ElTy->isSignedIntegerType()) {
-switch (Context.getTypeSize(ElTy)) {
-case 16:
-  return ElementType::I16;
-case 32:
-  return ElementType::I32;
-case 64:
-  return ElementType::I64;
-}
-  } else if (ElTy->isUnsignedIntegerType()) {
-switch (Context.getTypeSize(ElTy)) {
-case 16:
-  return ElementType::U16;
-case 32:
-  return ElementType::U32;
-case 64:
-  return ElementType::U64;
-}
-  } else if (ElTy->isSpecificBuiltinType(BuiltinType::Half))
-return ElementType::F16;
-  else if (ElTy->isSpecificBuiltinType(BuiltinType::Float))
-return ElementType::F32;
-  else if (ElTy->isSpecificBuiltinType(BuiltinType::Double))
-return ElementType::F64;
-
-  // TODO: We need to handle unorm/snorm float types here once we support them
-  llvm_unreachable("Invalid element type for resource");
-}
-
-void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) 
{
-  const Type *Ty = D->getType()->getPointeeOrArrayElementType();
-  if (!Ty)
-return;
-  const auto *RD = Ty->getAsCXXRecordDecl();
-  if (!RD)
-return;
-  // the resource related attributes are on the handle member
-  // inside the record decl
-  for (auto *FD : RD->fields()) {
-const auto *HLSLResAttr = FD->getAttr();
-const HLSLAttributedResourceType *AttrResType =
-dyn_cast(FD->getType().getTypePtr());
-if (!HLSLResAttr || !AttrResType)
-  continue;
-
-llvm::hlsl::ResourceClass RC = AttrResType->getAttrs().ResourceClass;
-if (RC == llvm::hlsl::ResourceClass::UAV ||
-RC == llvm::hlsl::ResourceClass::SRV)
- 

[llvm-branch-commits] [clang] [llvm] [HLSL] Remove old resource annotations (PR #130338)

2025-03-07 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Helena Kotas (hekota)


Changes

Fixes #114126

---
Full diff: https://github.com/llvm/llvm-project/pull/130338.diff


5 Files Affected:

- (modified) clang/lib/CodeGen/CGDeclCXX.cpp (-3) 
- (modified) clang/lib/CodeGen/CGHLSLRuntime.cpp (-129) 
- (modified) clang/lib/CodeGen/CGHLSLRuntime.h (-15) 
- (modified) llvm/include/llvm/Frontend/HLSL/HLSLResource.h (-22) 
- (modified) llvm/lib/Frontend/HLSL/HLSLResource.cpp (+2-52) 


``diff
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index f5950f03673a1..1ad34ae61f96a 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -1071,9 +1071,6 @@ void 
CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
 EmitCXXGlobalVarDeclInit(*D, Addr, PerformInit);
   }
 
-  if (getLangOpts().HLSL)
-CGM.getHLSLRuntime().annotateHLSLResource(D, Addr);
-
   FinishFunction();
 }
 
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp 
b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index dc34653e8f497..7ea23032b6519 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -260,135 +260,6 @@ void CGHLSLRuntime::finishCodeGen() {
   generateGlobalCtorDtorCalls();
 }
 
-void CGHLSLRuntime::addBufferResourceAnnotation(llvm::GlobalVariable *GV,
-llvm::hlsl::ResourceClass RC,
-llvm::hlsl::ResourceKind RK,
-bool IsROV,
-llvm::hlsl::ElementType ET,
-BufferResBinding &Binding) {
-  llvm::Module &M = CGM.getModule();
-
-  NamedMDNode *ResourceMD = nullptr;
-  switch (RC) {
-  case llvm::hlsl::ResourceClass::UAV:
-ResourceMD = M.getOrInsertNamedMetadata("hlsl.uavs");
-break;
-  case llvm::hlsl::ResourceClass::SRV:
-ResourceMD = M.getOrInsertNamedMetadata("hlsl.srvs");
-break;
-  case llvm::hlsl::ResourceClass::CBuffer:
-ResourceMD = M.getOrInsertNamedMetadata("hlsl.cbufs");
-break;
-  default:
-assert(false && "Unsupported buffer type!");
-return;
-  }
-  assert(ResourceMD != nullptr &&
- "ResourceMD must have been set by the switch above.");
-
-  llvm::hlsl::FrontendResource Res(
-  GV, RK, ET, IsROV, Binding.Reg.value_or(UINT_MAX), Binding.Space);
-  ResourceMD->addOperand(Res.getMetadata());
-}
-
-static llvm::hlsl::ElementType
-calculateElementType(const ASTContext &Context, const clang::Type *ResourceTy) 
{
-  using llvm::hlsl::ElementType;
-
-  // TODO: We may need to update this when we add things like ByteAddressBuffer
-  // that don't have a template parameter (or, indeed, an element type).
-  const auto *TST = ResourceTy->getAs();
-  assert(TST && "Resource types must be template specializations");
-  ArrayRef Args = TST->template_arguments();
-  assert(!Args.empty() && "Resource has no element type");
-
-  // At this point we have a resource with an element type, so we can assume
-  // that it's valid or we would have diagnosed the error earlier.
-  QualType ElTy = Args[0].getAsType();
-
-  // We should either have a basic type or a vector of a basic type.
-  if (const auto *VecTy = ElTy->getAs())
-ElTy = VecTy->getElementType();
-
-  if (ElTy->isSignedIntegerType()) {
-switch (Context.getTypeSize(ElTy)) {
-case 16:
-  return ElementType::I16;
-case 32:
-  return ElementType::I32;
-case 64:
-  return ElementType::I64;
-}
-  } else if (ElTy->isUnsignedIntegerType()) {
-switch (Context.getTypeSize(ElTy)) {
-case 16:
-  return ElementType::U16;
-case 32:
-  return ElementType::U32;
-case 64:
-  return ElementType::U64;
-}
-  } else if (ElTy->isSpecificBuiltinType(BuiltinType::Half))
-return ElementType::F16;
-  else if (ElTy->isSpecificBuiltinType(BuiltinType::Float))
-return ElementType::F32;
-  else if (ElTy->isSpecificBuiltinType(BuiltinType::Double))
-return ElementType::F64;
-
-  // TODO: We need to handle unorm/snorm float types here once we support them
-  llvm_unreachable("Invalid element type for resource");
-}
-
-void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) 
{
-  const Type *Ty = D->getType()->getPointeeOrArrayElementType();
-  if (!Ty)
-return;
-  const auto *RD = Ty->getAsCXXRecordDecl();
-  if (!RD)
-return;
-  // the resource related attributes are on the handle member
-  // inside the record decl
-  for (auto *FD : RD->fields()) {
-const auto *HLSLResAttr = FD->getAttr();
-const HLSLAttributedResourceType *AttrResType =
-dyn_cast(FD->getType().getTypePtr());
-if (!HLSLResAttr || !AttrResType)
-  continue;
-
-llvm::hlsl::ResourceClass RC = AttrResType->getAttrs().ResourceClass;
-if (RC == llvm::hlsl::ResourceClass::UAV ||
-RC == llvm::hlsl::ResourceClass::SRV)
-

[llvm-branch-commits] [llvm] [DirectX] Remove DXILResourceMDAnalysis (PR #130323)

2025-03-07 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-directx

Author: Helena Kotas (hekota)


Changes

`DXILResourceMDAnalysis` gathers information about resources from obsolete 
resource metadata annotations that are going to be removed in a follow-up PR.

Part 1/2 of #114126

---

Patch is 33.16 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/130323.diff


15 Files Affected:

- (modified) llvm/lib/Target/DirectX/CMakeLists.txt (-2) 
- (modified) llvm/lib/Target/DirectX/DXILDataScalarization.h (-1) 
- (modified) llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h (-1) 
- (modified) llvm/lib/Target/DirectX/DXILOpLowering.cpp (-2) 
- (modified) llvm/lib/Target/DirectX/DXILPrepare.cpp (-2) 
- (modified) llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp (+3-20) 
- (removed) llvm/lib/Target/DirectX/DXILResource.cpp (-346) 
- (removed) llvm/lib/Target/DirectX/DXILResource.h (-133) 
- (removed) llvm/lib/Target/DirectX/DXILResourceAnalysis.cpp (-41) 
- (removed) llvm/lib/Target/DirectX/DXILResourceAnalysis.h (-55) 
- (modified) llvm/lib/Target/DirectX/DXILTranslateMetadata.cpp (+4-27) 
- (modified) llvm/lib/Target/DirectX/DirectX.h (-3) 
- (modified) llvm/lib/Target/DirectX/DirectXPassRegistry.def (-1) 
- (modified) llvm/lib/Target/DirectX/DirectXTargetMachine.cpp (-2) 
- (modified) llvm/test/CodeGen/DirectX/llc-pipeline.ll (-1) 


``diff
diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt 
b/llvm/lib/Target/DirectX/CMakeLists.txt
index 5a167535b0afa..6904a1c0f1e73 100644
--- a/llvm/lib/Target/DirectX/CMakeLists.txt
+++ b/llvm/lib/Target/DirectX/CMakeLists.txt
@@ -28,8 +28,6 @@ add_llvm_target(DirectXCodeGen
   DXILOpLowering.cpp
   DXILPrepare.cpp
   DXILPrettyPrinter.cpp
-  DXILResource.cpp
-  DXILResourceAnalysis.cpp
   DXILResourceAccess.cpp
   DXILShaderFlags.cpp
   DXILTranslateMetadata.cpp
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.h 
b/llvm/lib/Target/DirectX/DXILDataScalarization.h
index 560e061db96d0..e8cd495729431 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.h
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.h
@@ -9,7 +9,6 @@
 #ifndef LLVM_TARGET_DIRECTX_DXILDATASCALARIZATION_H
 #define LLVM_TARGET_DIRECTX_DXILDATASCALARIZATION_H
 
-#include "DXILResource.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h 
b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
index c283386c6e3df..43fc4d7735e1f 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
@@ -8,7 +8,6 @@
 #ifndef LLVM_TARGET_DIRECTX_DXILINTRINSICEXPANSION_H
 #define LLVM_TARGET_DIRECTX_DXILINTRINSICEXPANSION_H
 
-#include "DXILResource.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp 
b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 92b6787b9571e..1b02206464661 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -10,7 +10,6 @@
 #include "DXILConstants.h"
 #include "DXILIntrinsicExpansion.h"
 #include "DXILOpBuilder.h"
-#include "DXILResourceAnalysis.h"
 #include "DXILShaderFlags.h"
 #include "DirectX.h"
 #include "llvm/ADT/SmallVector.h"
@@ -889,7 +888,6 @@ class DXILOpLoweringLegacy : public ModulePass {
 AU.addRequired();
 AU.addRequired();
 AU.addPreserved();
-AU.addPreserved();
 AU.addPreserved();
 AU.addPreserved();
   }
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp 
b/llvm/lib/Target/DirectX/DXILPrepare.cpp
index 355c79ae0edc9..0014cc9e1f67c 100644
--- a/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -11,7 +11,6 @@
 /// Language (DXIL).
 
//===--===//
 
-#include "DXILResourceAnalysis.h"
 #include "DXILShaderFlags.h"
 #include "DirectX.h"
 #include "DirectXIRPasses/PointerTypeAnalysis.h"
@@ -248,7 +247,6 @@ class DXILPrepareModule : public ModulePass {
   void getAnalysisUsage(AnalysisUsage &AU) const override {
 AU.addRequired();
 AU.addPreserved();
-AU.addPreserved();
 AU.addPreserved();
 AU.addPreserved();
   }
diff --git a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp 
b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
index 7255a9be06d51..c1f2483044693 100644
--- a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
@@ -7,7 +7,6 @@
 
//===--===//
 
 #include "DXILPrettyPrinter.h"
-#include "DXILResourceAnalysis.h"
 #include "DirectX.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/DXILResource.h"
@@ -222,8 +221,7 @@ struct FormatBindingSize
 } // namespace
 
 static void prettyPrintResources(raw_ostream &OS, const DXILBindingMap &DBM,
- DXILResourceTypeMap &DRTM,
-  

[llvm-branch-commits] [llvm] [DirectX] Remove DXILResourceMDAnalysis (PR #130323)

2025-03-07 Thread Helena Kotas via llvm-branch-commits

https://github.com/hekota edited 
https://github.com/llvm/llvm-project/pull/130323
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Remove DXILResourceMDAnalysis (PR #130323)

2025-03-07 Thread Helena Kotas via llvm-branch-commits

https://github.com/hekota created 
https://github.com/llvm/llvm-project/pull/130323

`DXILResourceMDAnalysis` gathers information about resources from obsolete 
resource metadata annotations that are going to be removed in a follow-up PR.

Part 1/2 of #114126

>From 80765757f067527816c4c8b9d728169568b04b7a Mon Sep 17 00:00:00 2001
From: Helena Kotas 
Date: Fri, 7 Mar 2025 10:40:20 -0800
Subject: [PATCH] [DirectX] Remove DXILResourceMDAnalysis

Part 1/2 of #114126
---
 llvm/lib/Target/DirectX/CMakeLists.txt|   2 -
 .../Target/DirectX/DXILDataScalarization.h|   1 -
 .../Target/DirectX/DXILIntrinsicExpansion.h   |   1 -
 llvm/lib/Target/DirectX/DXILOpLowering.cpp|   2 -
 llvm/lib/Target/DirectX/DXILPrepare.cpp   |   2 -
 llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp |  23 +-
 llvm/lib/Target/DirectX/DXILResource.cpp  | 346 --
 llvm/lib/Target/DirectX/DXILResource.h| 133 ---
 .../Target/DirectX/DXILResourceAnalysis.cpp   |  41 ---
 .../lib/Target/DirectX/DXILResourceAnalysis.h |  55 ---
 .../Target/DirectX/DXILTranslateMetadata.cpp  |  31 +-
 llvm/lib/Target/DirectX/DirectX.h |   3 -
 .../Target/DirectX/DirectXPassRegistry.def|   1 -
 .../Target/DirectX/DirectXTargetMachine.cpp   |   2 -
 llvm/test/CodeGen/DirectX/llc-pipeline.ll |   1 -
 15 files changed, 7 insertions(+), 637 deletions(-)
 delete mode 100644 llvm/lib/Target/DirectX/DXILResource.cpp
 delete mode 100644 llvm/lib/Target/DirectX/DXILResource.h
 delete mode 100644 llvm/lib/Target/DirectX/DXILResourceAnalysis.cpp
 delete mode 100644 llvm/lib/Target/DirectX/DXILResourceAnalysis.h

diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt 
b/llvm/lib/Target/DirectX/CMakeLists.txt
index 5a167535b0afa..6904a1c0f1e73 100644
--- a/llvm/lib/Target/DirectX/CMakeLists.txt
+++ b/llvm/lib/Target/DirectX/CMakeLists.txt
@@ -28,8 +28,6 @@ add_llvm_target(DirectXCodeGen
   DXILOpLowering.cpp
   DXILPrepare.cpp
   DXILPrettyPrinter.cpp
-  DXILResource.cpp
-  DXILResourceAnalysis.cpp
   DXILResourceAccess.cpp
   DXILShaderFlags.cpp
   DXILTranslateMetadata.cpp
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.h 
b/llvm/lib/Target/DirectX/DXILDataScalarization.h
index 560e061db96d0..e8cd495729431 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.h
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.h
@@ -9,7 +9,6 @@
 #ifndef LLVM_TARGET_DIRECTX_DXILDATASCALARIZATION_H
 #define LLVM_TARGET_DIRECTX_DXILDATASCALARIZATION_H
 
-#include "DXILResource.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h 
b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
index c283386c6e3df..43fc4d7735e1f 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
@@ -8,7 +8,6 @@
 #ifndef LLVM_TARGET_DIRECTX_DXILINTRINSICEXPANSION_H
 #define LLVM_TARGET_DIRECTX_DXILINTRINSICEXPANSION_H
 
-#include "DXILResource.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp 
b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 92b6787b9571e..1b02206464661 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -10,7 +10,6 @@
 #include "DXILConstants.h"
 #include "DXILIntrinsicExpansion.h"
 #include "DXILOpBuilder.h"
-#include "DXILResourceAnalysis.h"
 #include "DXILShaderFlags.h"
 #include "DirectX.h"
 #include "llvm/ADT/SmallVector.h"
@@ -889,7 +888,6 @@ class DXILOpLoweringLegacy : public ModulePass {
 AU.addRequired();
 AU.addRequired();
 AU.addPreserved();
-AU.addPreserved();
 AU.addPreserved();
 AU.addPreserved();
   }
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp 
b/llvm/lib/Target/DirectX/DXILPrepare.cpp
index 355c79ae0edc9..0014cc9e1f67c 100644
--- a/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -11,7 +11,6 @@
 /// Language (DXIL).
 
//===--===//
 
-#include "DXILResourceAnalysis.h"
 #include "DXILShaderFlags.h"
 #include "DirectX.h"
 #include "DirectXIRPasses/PointerTypeAnalysis.h"
@@ -248,7 +247,6 @@ class DXILPrepareModule : public ModulePass {
   void getAnalysisUsage(AnalysisUsage &AU) const override {
 AU.addRequired();
 AU.addPreserved();
-AU.addPreserved();
 AU.addPreserved();
 AU.addPreserved();
   }
diff --git a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp 
b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
index 7255a9be06d51..c1f2483044693 100644
--- a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
@@ -7,7 +7,6 @@
 
//===--===//
 
 #include "DXILPrettyPrinter.h"
-#include "DXILResourceAnalysis.h"
 #include "DirectX.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/DXILRes

[llvm-branch-commits] [clang] [HLSL] Implement explicit layout for default constant buffer ($Globals) (PR #128991)

2025-03-07 Thread Helena Kotas via llvm-branch-commits

https://github.com/hekota closed 
https://github.com/llvm/llvm-project/pull/128991
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Temporal divergence lowering i1 (PR #124299)

2025-03-07 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle commented:

Thanks, this now looks good to me in terms of the overall flow. I have a bunch 
of nitpickier, mostly style-related comments.

https://github.com/llvm/llvm-project/pull/124299
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Temporal divergence lowering i1 (PR #124299)

2025-03-07 Thread Nicolai Hähnle via llvm-branch-commits


@@ -228,6 +229,66 @@ bool DivergenceLoweringHelper::lowerTemporalDivergence() {
   return false;
 }
 
+bool DivergenceLoweringHelper::lowerTemporalDivergenceI1() {
+  MachineRegisterInfo::VRegAttrs BoolS1 = {ST->getBoolRC(), LLT::scalar(1)};
+  initializeLaneMaskRegisterAttributes(BoolS1);
+  MachineSSAUpdater SSAUpdater(*MF);
+
+  // In case of use outside muliple nested cycles or muliple uses we only need
+  // to merge lane mask across largest relevant cycle.
+  SmallDenseMap> LRCCache;
+  for (auto [Reg, UseInst, LRC] : MUI->getTemporalDivergenceList()) {
+if (MRI->getType(Reg) != LLT::scalar(1))
+  continue;
+
+const MachineCycle *CachedLRC = LRCCache.lookup(Reg).first;
+if (CachedLRC) {
+  LRC = CachedLRC->contains(LRC) ? CachedLRC : LRC;
+  assert(LRC->contains(CachedLRC));
+}
+
+LRCCache[Reg] = {LRC, {}};
+  }
+
+  for (auto LRCIter : LRCCache) {

nhaehnle wrote:

Naming: this isn't an iterator, a more accurate generic name would be just 
"Entry" (or LRCCacheEntry, but that's long)

https://github.com/llvm/llvm-project/pull/124299
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Allocate scratch space for dVGPRs for CWSR (PR #130055)

2025-03-07 Thread Diana Picus via llvm-branch-commits


@@ -511,6 +511,14 @@ SIRegisterInfo::getLargestLegalSuperClass(const 
TargetRegisterClass *RC,
 Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const SIFrameLowering *TFI = ST.getFrameLowering();
   const SIMachineFunctionInfo *FuncInfo = MF.getInfo();
+
+  // If we need to reserve scratch space for saving the VGPRs, then we should
+  // use the frame register for accessing our own frame (which may start at a
+  // non-zero offset).
+  if (TFI->mayReserveScratchForCWSR(MF))
+return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()

rovka wrote:

I was actually already doing that :D I cleaned this up now.

https://github.com/llvm/llvm-project/pull/130055
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL] Remove old resource annotations (PR #130338)

2025-03-07 Thread Helena Kotas via llvm-branch-commits

https://github.com/hekota edited 
https://github.com/llvm/llvm-project/pull/130338
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Deallocate VGPRs before exiting in dynamic VGPR mode (PR #130037)

2025-03-07 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm edited 
https://github.com/llvm/llvm-project/pull/130037
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [HLSL] Implement explicit layout for default constant buffer ($Globals) (PR #128991)

2025-03-07 Thread Helena Kotas via llvm-branch-commits

https://github.com/hekota reopened 
https://github.com/llvm/llvm-project/pull/128991
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL] Remove old resource annotations (PR #130338)

2025-03-07 Thread Joshua Batista via llvm-branch-commits

https://github.com/bob80905 approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/130338
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [AArch64] Fix SVE scalar fcopysign lowering without neon. (#129787) (PR #129997)

2025-03-07 Thread via llvm-branch-commits

llvmbot wrote:

@david-arm What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/129997
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)

2025-03-07 Thread Finn Plummer via llvm-branch-commits


@@ -0,0 +1,31 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -o - %s | 
FileCheck %s
+
+// CHECK: !dx.rootsignatures = !{![[#FIRST_ENTRY:]], ![[#SECOND_ENTRY:]]}
+
+// CHECK: ![[#FIRST_ENTRY]] = !{ptr @FirstEntry, ![[#EMPTY:]]}
+// CHECK: ![[#EMPTY]] = !{}
+
+[shader("compute"), RootSignature("")]
+[numthreads(1,1,1)]
+void FirstEntry() {}
+
+// CHECK: ![[#SECOND_ENTRY]] = !{ptr @SecondEntry, ![[#SECOND_RS:]]}

inbelic wrote:

https://github.com/llvm/llvm-project/issues/126557 tracks this work

https://github.com/llvm/llvm-project/pull/125131
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)

2025-03-07 Thread Finn Plummer via llvm-branch-commits


@@ -0,0 +1,108 @@
+//===- HLSLRootSignature.cpp - HLSL Root Signature helper objects
+//--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+///
+/// \file This file contains helpers for working with HLSL Root Signatures.
+///
+//===--===//
+
+#include "llvm/Frontend/HLSL/HLSLRootSignature.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+
+namespace llvm {
+namespace hlsl {
+namespace rootsig {
+
+// Static helper functions
+
+static MDString *ClauseTypeToName(LLVMContext &Ctx, ClauseType Type) {
+  StringRef Name;
+  switch (Type) {
+  case ClauseType::CBuffer:
+Name = "CBV";
+break;
+  case ClauseType::SRV:
+Name = "SRV";
+break;
+  case ClauseType::UAV:
+Name = "UAV";
+break;
+  case ClauseType::Sampler:
+Name = "Sampler";
+break;
+  }
+  return MDString::get(Ctx, Name);
+}
+
+// Helper struct so that we can use the overloaded notation of std::visit
+template  struct OverloadBuilds : Ts... {
+  using Ts::operator()...;
+};
+template  OverloadBuilds(Ts...) -> OverloadBuilds;
+
+MDNode *MetadataBuilder::BuildRootSignature() {
+  for (const RootElement &Element : Elements) {
+MDNode *ElementMD =
+std::visit(OverloadBuilds{
+   [&](DescriptorTable Table) -> MDNode * {
+ return BuildDescriptorTable(Table);
+   },
+   [&](DescriptorTableClause Clause) -> MDNode * {
+ return BuildDescriptorTableClause(Clause);
+   },
+   },
+   Element);
+GeneratedMetadata.push_back(ElementMD);
+  }
+
+  return MDNode::get(Ctx, GeneratedMetadata);
+}
+
+MDNode *MetadataBuilder::BuildDescriptorTable(const DescriptorTable &Table) {
+  IRBuilder<> B(Ctx);
+  SmallVector TableOperands;
+  // Set the mandatory arguments
+  TableOperands.push_back(MDString::get(Ctx, "DescriptorTable"));
+  TableOperands.push_back(ConstantAsMetadata::get(
+  B.getInt32(llvm::to_underlying(Table.Visibility;
+
+  // Remaining operands are references to the table's clauses. The in-memory
+  // representation of the Root Elements created from parsing will ensure that
+  // the previous N elements are the clauses for this table.
+  assert(Table.NumClauses <= GeneratedMetadata.size() &&
+ "Table expected all owned clauses to be generated already");
+  // So, add a refence to each clause to our operands
+  TableOperands.append(GeneratedMetadata.end() - Table.NumClauses,
+   GeneratedMetadata.end());
+  // Then, remove those clauses from the general list of Root Elements
+  GeneratedMetadata.pop_back_n(Table.NumClauses);
+
+  return MDNode::get(Ctx, TableOperands);
+}
+
+MDNode *MetadataBuilder::BuildDescriptorTableClause(
+const DescriptorTableClause &Clause) {
+  IRBuilder<> B(Ctx);
+  return MDNode::get(
+  Ctx, {
+   ClauseTypeToName(Ctx, Clause.Type),
+   ConstantAsMetadata::get(B.getInt32(Clause.NumDescriptors)),
+   ConstantAsMetadata::get(B.getInt32(Clause.Register.Number)),
+   ConstantAsMetadata::get(B.getInt32(Clause.Space)),
+   ConstantAsMetadata::get(

inbelic wrote:

Good catch. I think the specification is wrong then? Based on 
https://learn.microsoft.com/en-us/windows/win32/api/d3d12/ns-d3d12-d3d12_descriptor_range?redirectedfrom=MSDN
 we should need to pass that down.

I will raise this

https://github.com/llvm/llvm-project/pull/125131
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] a7c76aa - Revert "Reland [EquivClasses] Introduce members iterator-helper (#130319)"

2025-03-07 Thread via llvm-branch-commits

Author: Vitaly Buka
Date: 2025-03-07T17:45:57-08:00
New Revision: a7c76aa6112e36a74d91dcd8b08cb114a6727556

URL: 
https://github.com/llvm/llvm-project/commit/a7c76aa6112e36a74d91dcd8b08cb114a6727556
DIFF: 
https://github.com/llvm/llvm-project/commit/a7c76aa6112e36a74d91dcd8b08cb114a6727556.diff

LOG: Revert "Reland [EquivClasses] Introduce members iterator-helper (#130319)"

This reverts commit 21d973dbb335547848b77c01b106734942893693.

Added: 


Modified: 
llvm/include/llvm/ADT/EquivalenceClasses.h
llvm/lib/Analysis/LoopAccessAnalysis.cpp
llvm/lib/Analysis/VectorUtils.cpp
llvm/unittests/ADT/EquivalenceClassesTest.cpp

Removed: 




diff  --git a/llvm/include/llvm/ADT/EquivalenceClasses.h 
b/llvm/include/llvm/ADT/EquivalenceClasses.h
index 345107cb0..4f98b84cf97d2 100644
--- a/llvm/include/llvm/ADT/EquivalenceClasses.h
+++ b/llvm/include/llvm/ADT/EquivalenceClasses.h
@@ -15,7 +15,6 @@
 #ifndef LLVM_ADT_EQUIVALENCECLASSES_H
 #define LLVM_ADT_EQUIVALENCECLASSES_H
 
-#include "llvm/ADT/iterator_range.h"
 #include 
 #include 
 #include 
@@ -179,9 +178,6 @@ class EquivalenceClasses {
   member_iterator member_end() const {
 return member_iterator(nullptr);
   }
-  iterator_range members(iterator I) const {
-return make_range(member_begin(I), member_end());
-  }
 
   /// findValue - Return an iterator to the specified value.  If it does not
   /// exist, end() is returned.

diff  --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp 
b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 207f5417934e5..38ee82b77a946 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -527,8 +527,9 @@ void RuntimePointerChecking::groupChecks(
 // iteration order within an equivalence class member is only dependent on
 // the order in which unions and insertions are performed on the
 // equivalence class, the iteration order is deterministic.
-for (const auto &MI : DepCands.members(LeaderI)) {
-  auto PointerI = PositionMap.find(MI.getPointer());
+for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
+ MI != ME; ++MI) {
+  auto PointerI = PositionMap.find(MI->getPointer());
   assert(PointerI != PositionMap.end() &&
  "pointer in equivalence class not found in PositionMap");
   for (unsigned Pointer : PointerI->second) {

diff  --git a/llvm/lib/Analysis/VectorUtils.cpp 
b/llvm/lib/Analysis/VectorUtils.cpp
index c0bc451973c6e..91ba68fe03324 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -845,7 +845,7 @@ llvm::computeMinimumValueSizes(ArrayRef 
Blocks, DemandedBits &DB,
 
   for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) {
 uint64_t LeaderDemandedBits = 0;
-for (Value *M : ECs.members(I))
+for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
   LeaderDemandedBits |= DBits[M];
 
 uint64_t MinBW = llvm::bit_width(LeaderDemandedBits);
@@ -857,7 +857,7 @@ llvm::computeMinimumValueSizes(ArrayRef 
Blocks, DemandedBits &DB,
 // indvars.
 // If we are required to shrink a PHI, abandon this entire equivalence 
class.
 bool Abort = false;
-for (Value *M : ECs.members(I))
+for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end()))
   if (isa(M) && MinBW < M->getType()->getScalarSizeInBits()) {
 Abort = true;
 break;
@@ -865,7 +865,7 @@ llvm::computeMinimumValueSizes(ArrayRef 
Blocks, DemandedBits &DB,
 if (Abort)
   continue;
 
-for (Value *M : ECs.members(I)) {
+for (Value *M : llvm::make_range(ECs.member_begin(I), ECs.member_end())) {
   auto *MI = dyn_cast(M);
   if (!MI)
 continue;

diff  --git a/llvm/unittests/ADT/EquivalenceClassesTest.cpp 
b/llvm/unittests/ADT/EquivalenceClassesTest.cpp
index c24c09d8a2815..70e161a03d988 100644
--- a/llvm/unittests/ADT/EquivalenceClassesTest.cpp
+++ b/llvm/unittests/ADT/EquivalenceClassesTest.cpp
@@ -7,7 +7,6 @@
 
//===--===//
 
 #include "llvm/ADT/EquivalenceClasses.h"
-#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -67,19 +66,6 @@ TEST(EquivalenceClassesTest, TwoSets) {
 EXPECT_FALSE(EqClasses.isEquivalent(i, j));
 }
 
-TEST(EquivalenceClassesTest, MembersIterator) {
-  EquivalenceClasses EC;
-  EC.unionSets(1, 2);
-  EC.insert(4);
-  EC.insert(5);
-  EC.unionSets(5, 1);
-  EXPECT_EQ(EC.getNumClasses(), 2u);
-
-  EquivalenceClasses::iterator I = EC.findValue(EC.getLeaderValue(1));
-  EXPECT_THAT(EC.members(I), testing::ElementsAre(5, 1, 2));
-  EXPECT_EQ(EC.members(EC.end()).begin(), EC.member_end());
-}
-
 // Type-parameterized tests: Run the same test cases with 
diff erent element
 // types.
 template  class ParameterizedTest : public testing::Test {};



_

[llvm-branch-commits] [llvm] [AMDGPU] Add SubtargetFeature for dynamic VGPR mode (PR #130030)

2025-03-07 Thread Diana Picus via llvm-branch-commits


@@ -1239,6 +1239,12 @@ def FeatureXF32Insts : SubtargetFeature<"xf32-insts",
"v_mfma_f32_16x16x8_xf32 and v_mfma_f32_32x32x4_xf32"
  >;
 
+def FeatureDynamicVGPR : SubtargetFeature <"dynamic-vgpr",

rovka wrote:

That's right, this is enabled from above the backend. 

https://github.com/llvm/llvm-project/pull/130030
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIPreEmitPeephole to NPM (PR #130065)

2025-03-07 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff 147e7aff2a06c2b55dac8b9c9d1cd0c366325264 
18645979e311d48be0926edac75cc92ed255fe5e --extensions h,cpp -- 
llvm/lib/Target/AMDGPU/AMDGPU.h llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
``





View the diff from clang-format here.


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 4b4acbf82a..05eb609956 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -2166,9 +2166,8 @@ void 
AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   if (isPassEnabled(EnableSetWavePriority, CodeGenOptLevel::Less))
 addPass(AMDGPUSetWavePriorityPass());
 
-  if (TM.getOptLevel() > CodeGenOptLevel::None) 
+  if (TM.getOptLevel() > CodeGenOptLevel::None)
 addPass(SIPreEmitPeepholePass());
-  
 
   // The hazard recognizer that runs as part of the post-ra scheduler does not
   // guarantee to be able handle all hazards correctly. This is because if 
there

``




https://github.com/llvm/llvm-project/pull/130065
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port RemoveLoadsIntoFakeUses to NPM (PR #130068)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130068

>From f06e7f4a6b9638adc3ee286ce64191741e021d91 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 09:30:37 +
Subject: [PATCH] [CodeGen][NPM] Port RemoveLoadsIntoFakeUses to NPM

---
 .../llvm/CodeGen/RemoveLoadsIntoFakeUses.h| 30 +
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  2 +
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/RemoveLoadsIntoFakeUses.cpp  | 44 +++
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 .../CodeGen/X86/fake-use-remove-loads.mir |  2 +
 8 files changed, 73 insertions(+), 12 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h

diff --git a/llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h 
b/llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h
new file mode 100644
index 0..bbd5b8b430bf6
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/RemoveLoadsIntoFakeUses.h
@@ -0,0 +1,30 @@
+//===- llvm/CodeGen/RemoveLoadsIntoFakeUses.h ---*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_REMOVELOADSINTOFAKEUSES_H
+#define LLVM_CODEGEN_REMOVELOADSINTOFAKEUSES_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class RemoveLoadsIntoFakeUsesPass
+: public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+
+  MachineFunctionProperties getRequiredProperties() const {
+return MachineFunctionProperties().set(
+MachineFunctionProperties::Property::NoVRegs);
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_REMOVELOADSINTOFAKEUSES_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index e5bffde815117..3fd3cbb28bc3e 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -265,7 +265,7 @@ void initializeRegionOnlyViewerPass(PassRegistry &);
 void initializeRegionPrinterPass(PassRegistry &);
 void initializeRegionViewerPass(PassRegistry &);
 void initializeRegisterCoalescerLegacyPass(PassRegistry &);
-void initializeRemoveLoadsIntoFakeUsesPass(PassRegistry &);
+void initializeRemoveLoadsIntoFakeUsesLegacyPass(PassRegistry &);
 void initializeRemoveRedundantDebugValuesLegacyPass(PassRegistry &);
 void initializeRenameIndependentSubregsLegacyPass(PassRegistry &);
 void initializeReplaceWithVeclibLegacyPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index aab2c58ac0f78..a86dc8d632a4e 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -70,6 +70,7 @@
 #include "llvm/CodeGen/RegUsageInfoPropagate.h"
 #include "llvm/CodeGen/RegisterCoalescerPass.h"
 #include "llvm/CodeGen/RegisterUsageInfo.h"
+#include "llvm/CodeGen/RemoveLoadsIntoFakeUses.h"
 #include "llvm/CodeGen/RemoveRedundantDebugValues.h"
 #include "llvm/CodeGen/RenameIndependentSubregs.h"
 #include "llvm/CodeGen/ReplaceWithVeclib.h"
@@ -998,6 +999,7 @@ Error CodeGenPassBuilder::addMachinePasses(
 
   addPass(FuncletLayoutPass());
 
+  addPass(RemoveLoadsIntoFakeUsesPass());
   addPass(StackMapLivenessPass());
   addPass(LiveDebugValuesPass());
   addPass(MachineSanitizerBinaryMetadata());
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 9300f6935aa90..cab8108ed30f6 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -181,6 +181,7 @@ MACHINE_FUNCTION_PASS("reg-usage-collector", 
RegUsageInfoCollectorPass())
 MACHINE_FUNCTION_PASS("reg-usage-propagation", RegUsageInfoPropagationPass())
 MACHINE_FUNCTION_PASS("register-coalescer", RegisterCoalescerPass())
 MACHINE_FUNCTION_PASS("rename-independent-subregs", 
RenameIndependentSubregsPass())
+MACHINE_FUNCTION_PASS("remove-loads-into-fake-uses", 
RemoveLoadsIntoFakeUsesPass())
 MACHINE_FUNCTION_PASS("remove-redundant-debug-values", 
RemoveRedundantDebugValuesPass())
 MACHINE_FUNCTION_PASS("require-all-machine-function-properties",
   RequireAllMachineFunctionPropertiesPass())
@@ -292,7 +293,6 @@ DUMMY_MACHINE_FUNCTION_PASS("ra-pbqp", RAPBQPPass)
 DUMMY_MACHINE_FUNCTION_PASS("regalloc", RegAllocPass)
 DUMMY_MACHINE_FUNCTION_PASS("regallocscoringpass", RegAllocScoringPass)
 DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass)
-DUMMY_MACHINE_FUNCTION_PASS("remove-loads-into-fa

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port PatchableFunction to NPM (PR #129866)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/129866

>From 399b9973e9f788a58c7476925a85f090d673ca0f Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 5 Mar 2025 10:34:25 +
Subject: [PATCH] [CodeGen][NPM] Port PatchableFunction to NPM

---
 llvm/include/llvm/CodeGen/PatchableFunction.h | 30 +++
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  1 +
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/PatchableFunction.cpp| 37 ++-
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 7 files changed, 62 insertions(+), 13 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/PatchableFunction.h

diff --git a/llvm/include/llvm/CodeGen/PatchableFunction.h 
b/llvm/include/llvm/CodeGen/PatchableFunction.h
new file mode 100644
index 0..d81a92f9eef26
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/PatchableFunction.h
@@ -0,0 +1,30 @@
+//===- llvm/CodeGen/PatchableFunction.h -*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_PATCHABLEFUNCTION_H
+#define LLVM_CODEGEN_PATCHABLEFUNCTION_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class PatchableFunctionPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+
+  MachineFunctionProperties getRequiredProperties() const {
+return MachineFunctionProperties().set(
+MachineFunctionProperties::Property::NoVRegs);
+  }
+  static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_PATCHABLEFUNCTION_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index dcfd9fc6a86b9..f1c16e3b1cb40 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -225,7 +225,7 @@ void initializeOptimizePHIsLegacyPass(PassRegistry &);
 void initializePEIPass(PassRegistry &);
 void initializePHIEliminationPass(PassRegistry &);
 void initializePartiallyInlineLibCallsLegacyPassPass(PassRegistry &);
-void initializePatchableFunctionPass(PassRegistry &);
+void initializePatchableFunctionLegacyPass(PassRegistry &);
 void initializePeepholeOptimizerLegacyPass(PassRegistry &);
 void initializePhiValuesWrapperPassPass(PassRegistry &);
 void initializePhysicalRegisterUsageInfoWrapperLegacyPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 426dc6c7eacfd..aab2c58ac0f78 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -59,6 +59,7 @@
 #include "llvm/CodeGen/MachineVerifier.h"
 #include "llvm/CodeGen/OptimizePHIs.h"
 #include "llvm/CodeGen/PHIElimination.h"
+#include "llvm/CodeGen/PatchableFunction.h"
 #include "llvm/CodeGen/PeepholeOptimizer.h"
 #include "llvm/CodeGen/PostRASchedulerList.h"
 #include "llvm/CodeGen/PreISelIntrinsicLowering.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 8b1373c0ffefd..bedbc3e88a7ce 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -152,6 +152,7 @@ MACHINE_FUNCTION_PASS("machine-scheduler", 
MachineSchedulerPass(TM))
 MACHINE_FUNCTION_PASS("machinelicm", MachineLICMPass())
 MACHINE_FUNCTION_PASS("no-op-machine-function", NoOpMachineFunctionPass())
 MACHINE_FUNCTION_PASS("opt-phis", OptimizePHIsPass())
+MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass())
 MACHINE_FUNCTION_PASS("peephole-opt", PeepholeOptimizerPass())
 MACHINE_FUNCTION_PASS("phi-node-elimination", PHIEliminationPass())
 MACHINE_FUNCTION_PASS("post-RA-sched", PostRASchedulerPass(TM))
@@ -279,7 +280,6 @@ DUMMY_MACHINE_FUNCTION_PASS("machine-sanmd", 
MachineSanitizerBinaryMetadata)
 DUMMY_MACHINE_FUNCTION_PASS("machine-uniformity", 
MachineUniformityInfoWrapperPass)
 DUMMY_MACHINE_FUNCTION_PASS("machineinstr-printer", MachineFunctionPrinterPass)
 DUMMY_MACHINE_FUNCTION_PASS("mirfs-discriminators", MIRAddFSDiscriminatorsPass)
-DUMMY_MACHINE_FUNCTION_PASS("patchable-function", PatchableFunctionPass)
 DUMMY_MACHINE_FUNCTION_PASS("postra-machine-sink", PostRAMachineSinkingPass)
 DUMMY_MACHINE_FUNCTION_PASS("print-machine-uniformity", 
MachineUniformityInfoPrinterPass)
 DUMMY_MACHINE_FUNCTION_PASS("processimpdefs", ProcessImplicitDefsPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index b299983503232..375176ed4b1ce 100644

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port FEntryInserter to NPM (PR #129857)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/129857

>From 4b18d66c696656f0f04dff15028904f9b4c17901 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 5 Mar 2025 09:19:08 +
Subject: [PATCH] [CodeGen][NPM] Port FEntryInserter to NPM

---
 llvm/include/llvm/CodeGen/FEntryInserter.h| 25 ++
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  1 +
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/FEntryInserter.cpp   | 33 ++-
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 7 files changed, 54 insertions(+), 12 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/FEntryInserter.h

diff --git a/llvm/include/llvm/CodeGen/FEntryInserter.h 
b/llvm/include/llvm/CodeGen/FEntryInserter.h
new file mode 100644
index 0..4a82975cad879
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/FEntryInserter.h
@@ -0,0 +1,25 @@
+//===- llvm/CodeGen/FEntryInserter.h *- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_FENTRYINSERTER_H
+#define LLVM_CODEGEN_FENTRYINSERTER_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class FEntryInserterPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_FENTRYINSERTER_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 5f8e55d783161..63917b2b7f729 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -112,7 +112,7 @@ void initializeExpandPostRALegacyPass(PassRegistry &);
 void initializeExpandReductionsPass(PassRegistry &);
 void initializeExpandVariadicsPass(PassRegistry &);
 void initializeExternalAAWrapperPassPass(PassRegistry &);
-void initializeFEntryInserterPass(PassRegistry &);
+void initializeFEntryInserterLegacyPass(PassRegistry &);
 void initializeFinalizeISelPass(PassRegistry &);
 void initializeFinalizeMachineBundlesPass(PassRegistry &);
 void initializeFixIrreduciblePass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 4db489d804013..bab475d740467 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -33,6 +33,7 @@
 #include "llvm/CodeGen/ExpandMemCmp.h"
 #include "llvm/CodeGen/ExpandPostRAPseudos.h"
 #include "llvm/CodeGen/ExpandReductions.h"
+#include "llvm/CodeGen/FEntryInserter.h"
 #include "llvm/CodeGen/FinalizeISel.h"
 #include "llvm/CodeGen/GCMetadata.h"
 #include "llvm/CodeGen/GlobalMerge.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index d032087fa7073..667a7352930ea 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -142,6 +142,7 @@ MACHINE_FUNCTION_PASS("dead-mi-elimination", 
DeadMachineInstructionElimPass())
 MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass())
 MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass())
 MACHINE_FUNCTION_PASS("early-tailduplication", EarlyTailDuplicatePass())
+MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass())
 MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass())
 MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass())
 MACHINE_FUNCTION_PASS("machine-cp", MachineCopyPropagationPass())
@@ -258,7 +259,6 @@ DUMMY_MACHINE_FUNCTION_PASS("cfi-fixup", CFIFixupPass)
 DUMMY_MACHINE_FUNCTION_PASS("cfi-instr-inserter", CFIInstrInserterPass)
 DUMMY_MACHINE_FUNCTION_PASS("detect-dead-lanes", DetectDeadLanesPass)
 DUMMY_MACHINE_FUNCTION_PASS("dot-machine-cfg", MachineCFGPrinter)
-DUMMY_MACHINE_FUNCTION_PASS("fentry-insert", FEntryInserterPass)
 DUMMY_MACHINE_FUNCTION_PASS("fixup-statepoint-caller-saved", 
FixupStatepointCallerSavedPass)
 DUMMY_MACHINE_FUNCTION_PASS("fs-profile-loader", MIRProfileLoaderNewPass)
 DUMMY_MACHINE_FUNCTION_PASS("funclet-layout", FuncletLayoutPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 2cc4bf14e9804..effb556e63435 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -43,7 +43,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeExpandLargeFpConvertLegacyPassPass(Registry);
   initializeExpandMemCmpLegacyPassPass(Registry);
   initializeExpandPostRALegacyPass(Registry);
-  initiali

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port GCNCreateVOPD to NPM (PR #130059)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130059

>From 9ba01338705902014ddf5f6d4285cd0563ce1e28 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 5 Mar 2025 10:52:00 +
Subject: [PATCH] [AMDGPU][NPM] Port GCNCreateVOPD to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  7 ++-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  1 +
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  4 +-
 llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp  | 53 ---
 4 files changed, 43 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 57297288eecb4..f208a8bb9964b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -358,6 +358,11 @@ class SIModeRegisterPass : public 
PassInfoMixin {
   PreservedAnalyses run(MachineFunction &F, MachineFunctionAnalysisManager 
&AM);
 };
 
+class GCNCreateVOPDPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager 
&AM);
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -443,7 +448,7 @@ extern char &SIFormMemoryClausesID;
 void initializeSIPostRABundlerLegacyPass(PassRegistry &);
 extern char &SIPostRABundlerLegacyID;
 
-void initializeGCNCreateVOPDPass(PassRegistry &);
+void initializeGCNCreateVOPDLegacyPass(PassRegistry &);
 extern char &GCNCreateVOPDID;
 
 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 1050855176c04..0e3dcb4267ede 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -103,6 +103,7 @@ MACHINE_FUNCTION_PASS("amdgpu-rewrite-partial-reg-uses", 
GCNRewritePartialRegUse
 MACHINE_FUNCTION_PASS("amdgpu-pre-ra-optimizations", 
GCNPreRAOptimizationsPass())
 MACHINE_FUNCTION_PASS("amdgpu-nsa-reassign", GCNNSAReassignPass())
 MACHINE_FUNCTION_PASS("gcn-dpp-combine", GCNDPPCombinePass())
+MACHINE_FUNCTION_PASS("gcn-create-vopd", GCNCreateVOPDPass())
 MACHINE_FUNCTION_PASS("si-fix-sgpr-copies", SIFixSGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fix-vgpr-copies", SIFixVGPRCopiesPass())
 MACHINE_FUNCTION_PASS("si-fold-operands", SIFoldOperandsPass());
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index ce3dcd920bce3..73ae9135eb319 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -546,7 +546,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeSIPreAllocateWWMRegsLegacyPass(*PR);
   initializeSIFormMemoryClausesLegacyPass(*PR);
   initializeSIPostRABundlerLegacyPass(*PR);
-  initializeGCNCreateVOPDPass(*PR);
+  initializeGCNCreateVOPDLegacyPass(*PR);
   initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
   initializeAMDGPUAAWrapperPassPass(*PR);
   initializeAMDGPUExternalAAWrapperPass(*PR);
@@ -2149,7 +2149,7 @@ void 
AMDGPUCodeGenPassBuilder::addPostRegAlloc(AddMachinePass &addPass) const {
 
 void AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) {
-// TODO: addPass(GCNCreateVOPDPass());
+addPass(GCNCreateVOPDPass());
   }
   // TODO: addPass(SIMemoryLegalizerPass());
   // TODO: addPass(SIInsertWaitcntsPass());
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp 
b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index d40a1a2a10d9b..614262e817162 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/Support/Debug.h"
 
 #define DEBUG_TYPE "gcn-create-vopd"
@@ -36,7 +37,7 @@ using namespace llvm;
 
 namespace {
 
-class GCNCreateVOPD : public MachineFunctionPass {
+class GCNCreateVOPD {
 private:
 class VOPDCombineInfo {
 public:
@@ -49,20 +50,8 @@ class GCNCreateVOPD : public MachineFunctionPass {
 };
 
 public:
-  static char ID;
   const GCNSubtarget *ST = nullptr;
 
-  GCNCreateVOPD() : MachineFunctionPass(ID) {}
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-AU.setPreservesCFG();
-MachineFunctionPass::getAnalysisUsage(AU);
-  }
-
-  StringRef getPassName() const override {
-return "GCN Create VOPD Instructions";
-  }
-
   bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
 auto *FirstMI = CI.FirstMI;
 auto *SecondMI = CI.SecondMI;
@@ -112,9 +101,7 @@ class GCNCreateVOPD : public MachineFunctionPass {
 return true;
   }
 
-  bool runOnMachineFunction(MachineFunction &MF) override {
-if (skipFunction(MF.getFunction()))
-  return false;
+  bool ru

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port GCNCreateVOPD to NPM (PR #130059)

2025-03-07 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff 399b9973e9f788a58c7476925a85f090d673ca0f 
9ba01338705902014ddf5f6d4285cd0563ce1e28 --extensions h,cpp -- 
llvm/lib/Target/AMDGPU/AMDGPU.h llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
``





View the diff from clang-format here.


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index f208a8bb99..f331f741e3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -360,7 +360,8 @@ public:
 
 class GCNCreateVOPDPass : public PassInfoMixin {
 public:
-  PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager 
&AM);
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &AM);
 };
 
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
diff --git a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp 
b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
index 614262e817..b58511cafb 100644
--- a/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNCreateVOPD.cpp
@@ -49,56 +49,59 @@ private:
   MachineInstr *SecondMI;
 };
 
-public:
-  const GCNSubtarget *ST = nullptr;
-
-  bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
-auto *FirstMI = CI.FirstMI;
-auto *SecondMI = CI.SecondMI;
-unsigned Opc1 = FirstMI->getOpcode();
-unsigned Opc2 = SecondMI->getOpcode();
-unsigned EncodingFamily =
-AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
-int NewOpcode =
-AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
-AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
-assert(NewOpcode != -1 &&
-   "Should have previously determined this as a possible VOPD\n");
-
-auto VOPDInst = BuildMI(*FirstMI->getParent(), FirstMI,
-FirstMI->getDebugLoc(), SII->get(NewOpcode))
-.setMIFlags(FirstMI->getFlags() | 
SecondMI->getFlags());
-
-namespace VOPD = AMDGPU::VOPD;
-MachineInstr *MI[] = {FirstMI, SecondMI};
-auto InstInfo =
-AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
-
-for (auto CompIdx : VOPD::COMPONENTS) {
-  auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
-  VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
-}
-
-for (auto CompIdx : VOPD::COMPONENTS) {
-  auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
-  for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum; ++CompSrcIdx) {
-auto MCOprIdx = 
InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
+  public:
+const GCNSubtarget *ST = nullptr;
+
+bool doReplace(const SIInstrInfo *SII, VOPDCombineInfo &CI) {
+  auto *FirstMI = CI.FirstMI;
+  auto *SecondMI = CI.SecondMI;
+  unsigned Opc1 = FirstMI->getOpcode();
+  unsigned Opc2 = SecondMI->getOpcode();
+  unsigned EncodingFamily =
+  AMDGPU::getVOPDEncodingFamily(SII->getSubtarget());
+  int NewOpcode =
+  AMDGPU::getVOPDFull(AMDGPU::getVOPDOpcode(Opc1),
+  AMDGPU::getVOPDOpcode(Opc2), EncodingFamily);
+  assert(NewOpcode != -1 &&
+ "Should have previously determined this as a possible VOPD\n");
+
+  auto VOPDInst =
+  BuildMI(*FirstMI->getParent(), FirstMI, FirstMI->getDebugLoc(),
+  SII->get(NewOpcode))
+  .setMIFlags(FirstMI->getFlags() | SecondMI->getFlags());
+
+  namespace VOPD = AMDGPU::VOPD;
+  MachineInstr *MI[] = {FirstMI, SecondMI};
+  auto InstInfo =
+  AMDGPU::getVOPDInstInfo(FirstMI->getDesc(), SecondMI->getDesc());
+
+  for (auto CompIdx : VOPD::COMPONENTS) {
+auto MCOprIdx = InstInfo[CompIdx].getIndexOfDstInMCOperands();
 VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
   }
-}
 
-SII->fixImplicitOperands(*VOPDInst);
-for (auto CompIdx : VOPD::COMPONENTS)
-  VOPDInst.copyImplicitOps(*MI[CompIdx]);
+  for (auto CompIdx : VOPD::COMPONENTS) {
+auto CompSrcOprNum = InstInfo[CompIdx].getCompSrcOperandsNum();
+for (unsigned CompSrcIdx = 0; CompSrcIdx < CompSrcOprNum;
+ ++CompSrcIdx) {
+  auto MCOprIdx =
+  InstInfo[CompIdx].getIndexOfSrcInMCOperands(CompSrcIdx);
+  VOPDInst.add(MI[CompIdx]->getOperand(MCOprIdx));
+}
+  }
+
+  SII->fixImplicitOperands(*VOPDInst);
+  for (auto CompIdx : VOPD::COMPONENTS)
+VOPDInst.copyImplicitOps(*MI[CompIdx]);
 
-LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
-  << *CI.FirstMI << "\tY: " << *CI.SecondMI << "\n");
+  LLVM_DEBUG(dbgs() << "VOPD Fused: " << *VOPDInst << " from\tX: "
+<< *CI.Firs

[llvm-branch-commits] [llvm] [AMDGPU] Allocate scratch space for dVGPRs for CWSR (PR #130055)

2025-03-07 Thread Matt Arsenault via llvm-branch-commits


@@ -511,6 +511,14 @@ SIRegisterInfo::getLargestLegalSuperClass(const 
TargetRegisterClass *RC,
 Register SIRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
   const SIFrameLowering *TFI = ST.getFrameLowering();
   const SIMachineFunctionInfo *FuncInfo = MF.getInfo();
+
+  // If we need to reserve scratch space for saving the VGPRs, then we should
+  // use the frame register for accessing our own frame (which may start at a
+  // non-zero offset).
+  if (TFI->mayReserveScratchForCWSR(MF))
+return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()

arsenm wrote:

Why not just make hasFP true if mayReserveScratchForCWSR

https://github.com/llvm/llvm-project/pull/130055
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Add SubtargetFeature for dynamic VGPR mode (PR #130030)

2025-03-07 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm commented:

It seems to me this should be a separate attribute, it's not really part of the 
target 

https://github.com/llvm/llvm-project/pull/130030
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] clang: Switch linker-wrapper test to unsupported windows (PR #130247)

2025-03-07 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

### Merge activity

* **Mar 7, 4:21 AM EST**: A user started a stack merge that includes this pull 
request via 
[Graphite](https://app.graphite.dev/github/pr/llvm/llvm-project/130247).


https://github.com/llvm/llvm-project/pull/130247
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port StackFrameLayoutAnalysisPass to NPM (PR #130070)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130070

>From dc9e5f0c8e37c54b7ba8fa24c57e8c8912861619 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Thu, 6 Mar 2025 10:45:25 +
Subject: [PATCH] [CodeGen][NPM] Port StackFrameLayoutAnalysisPass to NPM

---
 .../CodeGen/StackFrameLayoutAnalysisPass.h| 26 
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  3 +
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 .../CodeGen/StackFrameLayoutAnalysisPass.cpp  | 61 +--
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 .../CodeGen/X86/stack-frame-layout-remarks.ll |  1 +
 8 files changed, 75 insertions(+), 23 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/StackFrameLayoutAnalysisPass.h

diff --git a/llvm/include/llvm/CodeGen/StackFrameLayoutAnalysisPass.h 
b/llvm/include/llvm/CodeGen/StackFrameLayoutAnalysisPass.h
new file mode 100644
index 0..5283cda30da12
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/StackFrameLayoutAnalysisPass.h
@@ -0,0 +1,26 @@
+//===- llvm/CodeGen/StackFrameLayoutAnalysisPass.h --*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_STACKFRAMELAYOUTANALYSISPASS_H
+#define LLVM_CODEGEN_STACKFRAMELAYOUTANALYSISPASS_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class StackFrameLayoutAnalysisPass
+: public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_STACKFRAMELAYOUTANALYSISPASS_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index c7bc4320cf8f0..9068aee8f8193 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -290,7 +290,7 @@ void initializeSlotIndexesWrapperPassPass(PassRegistry &);
 void initializeSpeculativeExecutionLegacyPassPass(PassRegistry &);
 void initializeSpillPlacementWrapperLegacyPass(PassRegistry &);
 void initializeStackColoringLegacyPass(PassRegistry &);
-void initializeStackFrameLayoutAnalysisPassPass(PassRegistry &);
+void initializeStackFrameLayoutAnalysisLegacyPass(PassRegistry &);
 void initializeStaticDataSplitterPass(PassRegistry &);
 void initializeStackMapLivenessPass(PassRegistry &);
 void initializeStackProtectorPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 74cdc7d66810b..8cba36b36fbb2 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -80,6 +80,7 @@
 #include "llvm/CodeGen/ShadowStackGCLowering.h"
 #include "llvm/CodeGen/SjLjEHPrepare.h"
 #include "llvm/CodeGen/StackColoring.h"
+#include "llvm/CodeGen/StackFrameLayoutAnalysisPass.h"
 #include "llvm/CodeGen/StackProtector.h"
 #include "llvm/CodeGen/StackSlotColoring.h"
 #include "llvm/CodeGen/TailDuplication.h"
@@ -1015,6 +1016,8 @@ Error CodeGenPassBuilder::addMachinePasses(
   addPass(MachineOutlinerPass(RunOnAllFunctions));
   }
 
+  addPass(StackFrameLayoutAnalysisPass());
+
   // Add passes that directly emit MI after all other MI passes.
   derived().addPreEmitPass2(addPass);
 
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 8fa21751392f3..01dd423de6955 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -187,6 +187,7 @@ MACHINE_FUNCTION_PASS("remove-redundant-debug-values", 
RemoveRedundantDebugValue
 MACHINE_FUNCTION_PASS("require-all-machine-function-properties",
   RequireAllMachineFunctionPropertiesPass())
 MACHINE_FUNCTION_PASS("stack-coloring", StackColoringPass())
+MACHINE_FUNCTION_PASS("stack-frame-layout", StackFrameLayoutAnalysisPass())
 MACHINE_FUNCTION_PASS("stack-slot-coloring", StackSlotColoringPass())
 MACHINE_FUNCTION_PASS("tailduplication", TailDuplicatePass())
 MACHINE_FUNCTION_PASS("trigger-verifier-error", TriggerVerifierErrorPass())
@@ -295,7 +296,6 @@ DUMMY_MACHINE_FUNCTION_PASS("regallocscoringpass", 
RegAllocScoringPass)
 DUMMY_MACHINE_FUNCTION_PASS("regbankselect", RegBankSelectPass)
 DUMMY_MACHINE_FUNCTION_PASS("reset-machine-function", ResetMachineFunctionPass)
 DUMMY_MACHINE_FUNCTION_PASS("shrink-wrap", ShrinkWrapPass)
-DUMMY_MACHINE_FUNCTION_PASS("stack-frame-layout", StackFrameLayoutAnalysisPass)
 DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass)
 DUMMY_MA

[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-07 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-selectiondag

Author: Diana Picus (rovka)


Changes

The llvm.amdgcn.cs.chain intrinsic has a 'flags' operand which may indicate 
that we want to reallocate the VGPRs before performing the call.

A call with the following arguments:
```
llvm.amdgcn.cs.chain %callee, %exec, %sgpr_args, %vgpr_args,
  /*flags*/0x1, %num_vgprs, %fallback_exec, %fallback_callee
```
is supposed to do the following:
- copy the SGPR and VGPR args into their respective registers
- try to change the VGPR allocation
- if the allocation has succeeded, set EXEC to %exec and jump to %callee, 
otherwise set EXEC to %fallback_exec and jump to %fallback_callee

This patch implements the dynamic VGPR behaviour by generating an S_ALLOC_VGPR 
followed by S_CSELECT_B32/64 instructions for the EXEC and callee. The rest of 
the call sequence is left undisturbed (i.e. identical to the case where the 
flags are 0 and we don't use dynamic VGPRs). We achieve this by introducing 
some new pseudos (SI_CS_CHAIN_TC_Wn_DVGPR) which are expanded in the 
SILateBranchLowering pass, just like the simpler SI_CS_CHAIN_TC_Wn pseudos. The 
main reason is so that we don't risk other passes (particularly the PostRA 
scheduler) introducing instructions between the S_ALLOC_VGPR and the jump. Such 
instructions might end up using VGPRs that have been deallocated, or the wrong 
EXEC mask. Once the whole backend treats S_ALLOC_VGPR and changes to EXEC as 
barriers for instructions that use VGPRs, we could in principle move the 
expansion earlier (but in the absence of a good reason for that my personal 
preference is to keep it later in order to make debugging easier).

Since the expansion happens after register allocation, we're careful to select 
constants to immediate operands instead of letting ISel generate S_MOVs which 
could interfere with register allocation (i.e. make it look like we need more 
registers than we actually do).

For GFX12, S_ALLOC_VGPR only works in wave32 mode, so we bail out during ISel 
in wave64 mode. However, we can define the pseudos for wave64 too so it's easy 
to handle if future generations support it.

---

Patch is 94.66 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/130094.diff


11 Files Affected:

- (modified) llvm/include/llvm/CodeGen/SelectionDAGISel.h (+15-14) 
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+9-4) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp (+95-31) 
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+66-22) 
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+51-25) 
- (modified) llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp (+55-7) 
- (added) llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll 
(+97) 
- (modified) llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w32.ll 
(+12-24) 
- (modified) llvm/test/CodeGen/AMDGPU/isel-amdgcn-cs-chain-intrinsic-w64.ll 
(+12-24) 
- (added) 
llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll (+315) 
- (added) llvm/test/CodeGen/AMDGPU/remove-register-flags.mir (+19) 


``diff
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h 
b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index e9452a6dc6233..55f8f19d437a0 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -328,20 +328,21 @@ class SelectionDAGISel {
   };
 
   enum {
-OPFL_None   = 0,  // Node has no chain or glue input and isn't 
variadic.
-OPFL_Chain  = 1, // Node has a chain input.
-OPFL_GlueInput  = 2, // Node has a glue input.
-OPFL_GlueOutput = 4, // Node has a glue output.
-OPFL_MemRefs= 8, // Node gets accumulated MemRefs.
-OPFL_Variadic0  = 1<<4,  // Node is variadic, root has 0 fixed inputs.
-OPFL_Variadic1  = 2<<4,  // Node is variadic, root has 1 fixed inputs.
-OPFL_Variadic2  = 3<<4,  // Node is variadic, root has 2 fixed inputs.
-OPFL_Variadic3  = 4<<4,  // Node is variadic, root has 3 fixed inputs.
-OPFL_Variadic4  = 5<<4,  // Node is variadic, root has 4 fixed inputs.
-OPFL_Variadic5  = 6<<4,  // Node is variadic, root has 5 fixed inputs.
-OPFL_Variadic6  = 7<<4,  // Node is variadic, root has 6 fixed inputs.
-
-OPFL_VariadicInfo = OPFL_Variadic6
+OPFL_None = 0,   // Node has no chain or glue input and isn't variadic.
+OPFL_Chain = 1,  // Node has a chain input.
+OPFL_GlueInput = 2,  // Node has a glue input.
+OPFL_GlueOutput = 4, // Node has a glue output.
+OPFL_MemRefs = 8,// Node gets accumulated MemRefs.
+OPFL_Variadic0 = 1 << 4, // Node is variadic, root has 0 fixed inputs.
+OPFL_Variadic1 = 2 << 4, // Node is variadic, root has 1 fixed inputs.
+OPFL_Variadic2 = 3 << 4, // Node is variadic, root has 2 fixed inputs.
+OPFL_Variadic3 = 4 << 4, // Node is variadic, root has 3 fixed inputs.
+OPFL_Variadic4 = 5 << 4, // Node is v

[llvm-branch-commits] [llvm] release/20.x: [LoongArch] Relax the restrictions of inlineasm operand modifier 'u' and 'w' (#129864) (PR #130009)

2025-03-07 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-ir

Author: None (llvmbot)


Changes

Backport bae6644e1227b2555f92b1962dac6c2444eaaaf2

Requested by: @SixWeining

---
Full diff: https://github.com/llvm/llvm-project/pull/130009.diff


3 Files Affected:

- (modified) llvm/docs/LangRef.rst (+2) 
- (modified) llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp (+22-13) 
- (modified) llvm/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.ll 
(+40) 


``diff
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index e002195cb7ed5..1c8eaa60e1c8a 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -5826,6 +5826,8 @@ Hexagon:
 
 LoongArch:
 
+- ``u``: Print an LASX register.
+- ``w``: Print an LSX register.
 - ``z``: Print $zero register if operand is zero, otherwise print it normally.
 
 MSP430:
diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp 
b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
index 169f9568e5362..895a8e2646692 100644
--- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp
@@ -90,20 +90,29 @@ bool LoongArchAsmPrinter::PrintAsmOperand(const 
MachineInstr *MI, unsigned OpNo,
 return false;
   }
   break;
-case 'w': // Print LSX registers.
-  if (MO.getReg().id() >= LoongArch::VR0 &&
-  MO.getReg().id() <= LoongArch::VR31)
-break;
-  // The modifier is 'w' but the operand is not an LSX register; Report an
-  // unknown operand error.
-  return true;
 case 'u': // Print LASX registers.
-  if (MO.getReg().id() >= LoongArch::XR0 &&
-  MO.getReg().id() <= LoongArch::XR31)
-break;
-  // The modifier is 'u' but the operand is not an LASX register; Report an
-  // unknown operand error.
-  return true;
+case 'w': // Print LSX registers.
+{
+  // If the operand is an LASX, LSX or floating point register, print the
+  // name of LASX or LSX register with the same index in that register
+  // class.
+  unsigned RegID = MO.getReg().id(), FirstReg;
+  if (RegID >= LoongArch::XR0 && RegID <= LoongArch::XR31)
+FirstReg = LoongArch::XR0;
+  else if (RegID >= LoongArch::VR0 && RegID <= LoongArch::VR31)
+FirstReg = LoongArch::VR0;
+  else if (RegID >= LoongArch::F0_64 && RegID <= LoongArch::F31_64)
+FirstReg = LoongArch::F0_64;
+  else if (RegID >= LoongArch::F0 && RegID <= LoongArch::F31)
+FirstReg = LoongArch::F0;
+  else
+return true;
+  OS << '$'
+ << LoongArchInstPrinter::getRegisterName(
+RegID - FirstReg +
+(ExtraCode[0] == 'u' ? LoongArch::XR0 : LoongArch::VR0));
+  return false;
+}
   // TODO: handle other extra codes if any.
 }
   }
diff --git a/llvm/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.ll 
b/llvm/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.ll
index 201e34c8b5ae0..8b25a6525381b 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/inline-asm-operand-modifier.ll
@@ -12,3 +12,43 @@ entry:
   %0 = tail call <4 x i64> asm sideeffect "xvldi ${0:u}, 1", "=f"()
   ret void
 }
+
+define void @test_u_2xi64() nounwind {
+; CHECK-LABEL: test_u_2xi64:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:#APP
+; CHECK-NEXT:xvldi $xr0, 1
+; CHECK-NEXT:#NO_APP
+; CHECK-NEXT:ret
+entry:
+  %0 = tail call <2 x i64> asm sideeffect "xvldi ${0:u}, 1", "=f"()
+  ret void
+}
+
+define void @test_w_4xi64() nounwind {
+; CHECK-LABEL: test_w_4xi64:
+; CHECK:   # %bb.0: # %entry
+; CHECK-NEXT:#APP
+; CHECK-NEXT:vldi $vr0, 1
+; CHECK-NEXT:#NO_APP
+; CHECK-NEXT:ret
+entry:
+  %0 = tail call <4 x i64> asm sideeffect "vldi ${0:w}, 1", "=f"()
+  ret void
+}
+
+define void @m128i_to_m256i(ptr %out, ptr %in) nounwind {
+; CHECK-LABEL: m128i_to_m256i:
+; CHECK:   # %bb.0:
+; CHECK-NEXT:vld $vr0, $a1, 0
+; CHECK-NEXT:xvrepli.b $xr1, 0
+; CHECK-NEXT:#APP
+; CHECK-NEXT:xvpermi.q $xr1, $xr0, 32
+; CHECK-NEXT:#NO_APP
+; CHECK-NEXT:xvst $xr1, $a0, 0
+; CHECK-NEXT:ret
+  %v = load <2 x i64>, ptr %in
+  %x = call <4 x i64> asm sideeffect "xvpermi.q ${0:u}, ${1:u}, 32", 
"=f,f,0"(<2 x i64> %v, <4 x i64> zeroinitializer)
+  store <4 x i64> %x, ptr %out
+  ret void
+}

``




https://github.com/llvm/llvm-project/pull/130009
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-07 Thread Wei Xiao via llvm-branch-commits


@@ -112,21 +117,52 @@ bool 
StaticDataSplitter::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
+const Constant *
+StaticDataSplitter::getConstant(const MachineOperand &Op,
+const TargetMachine &TM,
+const MachineConstantPool *MCP) {
+  if (!Op.isGlobal() && !Op.isCPI())
+return nullptr;
+
+  if (Op.isGlobal()) {
+// Find global variables with local linkage.
+const GlobalVariable *GV = getLocalLinkageGlobalVariable(Op.getGlobal());
+// Skip 'special' global variables conservatively because they are
+// often handled specially, and skip those not in static data
+// sections.
+if (!GV || GV->getName().starts_with("llvm.") ||
+!inStaticDataSection(GV, TM))
+  return nullptr;
+return GV;

williamweixiao wrote:

What's the motivation to handle "GlobalVariable" here? Any test to cover it?

https://github.com/llvm/llvm-project/pull/129781
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port XRayInstrumentation to NPM (PR #129865)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/129865

>From 20188b3b5d1dbfea562b0912bf4624d9c7c18ca1 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 5 Mar 2025 10:11:27 +
Subject: [PATCH] [CodeGen][NPM] Port XRayInstrumentation to NPM

---
 .../llvm/CodeGen/XRayInstrumentation.h| 25 ++
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  1 +
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/XRayInstrumentation.cpp  | 90 +++
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 llvm/test/CodeGen/X86/xray-empty-firstmbb.mir |  1 +
 .../X86/xray-multiplerets-in-blocks.mir   |  1 +
 9 files changed, 107 insertions(+), 18 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/XRayInstrumentation.h

diff --git a/llvm/include/llvm/CodeGen/XRayInstrumentation.h 
b/llvm/include/llvm/CodeGen/XRayInstrumentation.h
new file mode 100644
index 0..b8754d5e2c9db
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/XRayInstrumentation.h
@@ -0,0 +1,25 @@
+//===- llvm/CodeGen/XRayInstrumentation.h *- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+
+#ifndef LLVM_CODEGEN_XRAYINSTRUMENTATION_H
+#define LLVM_CODEGEN_XRAYINSTRUMENTATION_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class XRayInstrumentationPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_XRAYINSTRUMENTATION_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 63917b2b7f729..dcfd9fc6a86b9 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -320,7 +320,7 @@ void initializeVirtRegRewriterPass(PassRegistry &);
 void initializeWasmEHPreparePass(PassRegistry &);
 void initializeWinEHPreparePass(PassRegistry &);
 void initializeWriteBitcodePassPass(PassRegistry &);
-void initializeXRayInstrumentationPass(PassRegistry &);
+void initializeXRayInstrumentationLegacyPass(PassRegistry &);
 
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index bab475d740467..426dc6c7eacfd 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -85,6 +85,7 @@
 #include "llvm/CodeGen/UnreachableBlockElim.h"
 #include "llvm/CodeGen/WasmEHPrepare.h"
 #include "llvm/CodeGen/WinEHPrepare.h"
+#include "llvm/CodeGen/XRayInstrumentation.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/IRPrinter/IRPrintingPasses.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 667a7352930ea..8b1373c0ffefd 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -188,6 +188,7 @@ MACHINE_FUNCTION_PASS("trigger-verifier-error", 
TriggerVerifierErrorPass())
 MACHINE_FUNCTION_PASS("two-address-instruction", TwoAddressInstructionPass())
 MACHINE_FUNCTION_PASS("verify", MachineVerifierPass())
 MACHINE_FUNCTION_PASS("verify", 
MachineTraceMetricsVerifierPass())
+MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass())
 #undef MACHINE_FUNCTION_PASS
 
 #ifndef MACHINE_FUNCTION_PASS_WITH_PARAMS
@@ -296,5 +297,4 @@ DUMMY_MACHINE_FUNCTION_PASS("stack-frame-layout", 
StackFrameLayoutAnalysisPass)
 DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass)
 DUMMY_MACHINE_FUNCTION_PASS("unpack-mi-bundles", UnpackMachineBundlesPass)
 DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass)
-DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass)
 #undef DUMMY_MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index effb556e63435..b299983503232 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -144,5 +144,5 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeVirtRegRewriterPass(Registry);
   initializeWasmEHPreparePass(Registry);
   initializeWinEHPreparePass(Registry);
-  initializeXRayInstrumentationPass(Registry);
+  initializeXRayInstrumentationLegacyPass(Registry);
 }
diff --git a/llvm/lib/CodeGen/XRayInstrumentation.cpp 
b/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 0873d9956356e..00aa26c5f369a 100644
--- a/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/llvm/lib/Cod

[llvm-branch-commits] [flang] Reland " [flang] Rely on global initialization for simpler derived types" (PR #130290)

2025-03-07 Thread via llvm-branch-commits

https://github.com/jeanPerier created 
https://github.com/llvm/llvm-project/pull/130290

Reland #114002 with an implementation of the FIXME that should solve the 
regressions that have been seen.
The first commit is the original PR, the second is the fix.

>From ad33569ce2ff3a97ea9767817e37ea85119dd5f9 Mon Sep 17 00:00:00 2001
From: Jean Perier 
Date: Fri, 7 Mar 2025 06:51:29 -0800
Subject: [PATCH 1/2] Revert "Revert " [flang] Rely on global initialization
 for simpler derived types" (#130278)"

This reverts commit d31a7dde485461f564a6ae995a3fd58b2aa1bfa5.
---
 .../flang/Optimizer/Support/InternalNames.h   |  1 +
 flang/lib/Lower/ConvertVariable.cpp   | 63 ++-
 .../Lower/HLFIR/structure-constructor.f90 | 63 +++
 .../Lower/OpenMP/private-derived-type.f90 | 15 ++---
 flang/test/Lower/default-initialization.f90   | 48 +++---
 .../test/Lower/derived-type-finalization.f90  |  9 ++-
 flang/test/Lower/derived-type-temp.f90|  8 +--
 .../Lower/forall/forall-allocatable-2.f90 |  9 +--
 flang/test/Lower/pointer-default-init.f90 |  5 +-
 9 files changed, 131 insertions(+), 90 deletions(-)

diff --git a/flang/include/flang/Optimizer/Support/InternalNames.h 
b/flang/include/flang/Optimizer/Support/InternalNames.h
index 41f2cb9842dc7..62375ab8f9de3 100644
--- a/flang/include/flang/Optimizer/Support/InternalNames.h
+++ b/flang/include/flang/Optimizer/Support/InternalNames.h
@@ -30,6 +30,7 @@ static constexpr llvm::StringRef kProcPtrSeparator = ".p.";
 static constexpr llvm::StringRef kSpecialBindingSeparator = ".s.";
 static constexpr llvm::StringRef kBindingTableSeparator = ".v.";
 static constexpr llvm::StringRef boxprocSuffix = "UnboxProc";
+static constexpr llvm::StringRef kDerivedTypeInitSuffix = "DerivedInit";
 
 /// Internal name mangling of identifiers
 ///
diff --git a/flang/lib/Lower/ConvertVariable.cpp 
b/flang/lib/Lower/ConvertVariable.cpp
index 48f7b9f99e960..8a7be19f8040b 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -798,8 +798,67 @@ void Fortran::lower::defaultInitializeAtRuntime(
 })
 .end();
   } else {
-mlir::Value box = builder.createBox(loc, exv);
-fir::runtime::genDerivedTypeInitialize(builder, loc, box);
+/// For "simpler" types, relying on "_FortranAInitialize"
+/// leads to poor runtime performance. Hence optimize
+/// the same.
+const Fortran::semantics::DeclTypeSpec *declTy = sym.GetType();
+mlir::Type symTy = converter.genType(sym);
+const auto *details =
+sym.detailsIf();
+if (details && !Fortran::semantics::IsPolymorphic(sym) &&
+declTy->category() ==
+Fortran::semantics::DeclTypeSpec::Category::TypeDerived &&
+!mlir::isa(symTy) &&
+!sym.test(Fortran::semantics::Symbol::Flag::OmpPrivate) &&
+!sym.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) {
+  std::string globalName = fir::NameUniquer::doGenerated(
+  (converter.mangleName(*declTy->AsDerived()) + fir::kNameSeparator +
+   fir::kDerivedTypeInitSuffix)
+  .str());
+  mlir::Location loc = genLocation(converter, sym);
+  mlir::StringAttr linkage = builder.createInternalLinkage();
+  fir::GlobalOp global = builder.getNamedGlobal(globalName);
+  if (!global && details->init()) {
+global = builder.createGlobal(loc, symTy, globalName, linkage,
+  mlir::Attribute{},
+  /*isConst=*/true,
+  /*isTarget=*/false,
+  /*dataAttr=*/{});
+Fortran::lower::createGlobalInitialization(
+builder, global, [&](fir::FirOpBuilder &builder) {
+  Fortran::lower::StatementContext stmtCtx(
+  /*cleanupProhibited=*/true);
+  fir::ExtendedValue initVal = genInitializerExprValue(
+  converter, loc, details->init().value(), stmtCtx);
+  mlir::Value castTo =
+  builder.createConvert(loc, symTy, fir::getBase(initVal));
+  builder.create(loc, castTo);
+});
+  } else if (!global) {
+global = builder.createGlobal(loc, symTy, globalName, linkage,
+  mlir::Attribute{},
+  /*isConst=*/true,
+  /*isTarget=*/false,
+  /*dataAttr=*/{});
+Fortran::lower::createGlobalInitialization(
+builder, global, [&](fir::FirOpBuilder &builder) {
+  Fortran::lower::StatementContext stmtCtx(
+  /*cleanupProhibited=*/true);
+  mlir::Value initVal = genDefaultInitializerValue(
+  converter, loc, sym, symTy, stmtCtx);
+  mlir::Value castTo = builder.createConvert(loc, symTy, initVal);
+  builder.cr

[llvm-branch-commits] [flang] [llvm] [flang][OpenMP] Parse cancel-directive-name as clause (PR #130146)

2025-03-07 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz updated 
https://github.com/llvm/llvm-project/pull/130146

>From 98df18461bb06afa06b8968b157a3c5a5cf50324 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Thu, 6 Mar 2025 08:51:34 -0600
Subject: [PATCH 1/2] [flang][OpenMP] Parse cancel-directive-name as clause

The cancellable construct names on CANCEL or CANCELLATION POINT directives
are actually clauses (with the same names as the corresponding constructs).

Instead of parsing them into a custom structure, parse them as a clause,
which will make CANCEL/CANCELLATION POINT follow the same uniform scheme
as other constructs ( [()] [clauses]).
---
 flang/include/flang/Parser/dump-parse-tree.h |   2 +-
 flang/include/flang/Parser/parse-tree.h  |  11 +-
 flang/lib/Parser/openmp-parsers.cpp  |  31 -
 flang/lib/Parser/unparse.cpp |   5 +-
 flang/lib/Semantics/check-omp-structure.cpp  | 133 +--
 flang/lib/Semantics/check-omp-structure.h|   5 +-
 flang/test/Semantics/OpenMP/cancel.f90   |  29 
 llvm/include/llvm/Frontend/OpenMP/OMP.td |   5 +
 8 files changed, 169 insertions(+), 52 deletions(-)
 create mode 100644 flang/test/Semantics/OpenMP/cancel.f90

diff --git a/flang/include/flang/Parser/dump-parse-tree.h 
b/flang/include/flang/Parser/dump-parse-tree.h
index fcd902d25fa40..004e22a21ecfa 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -546,6 +546,7 @@ class ParseTreeDumper {
 #define GEN_FLANG_DUMP_PARSE_TREE_CLAUSES
 #include "llvm/Frontend/OpenMP/OMP.inc"
   NODE(parser, OmpClauseList)
+  NODE(parser, OmpCancellationConstructTypeClause)
   NODE(parser, OmpContainsClause)
   NODE(parser, OmpCriticalDirective)
   NODE(parser, OmpErrorDirective)
@@ -689,7 +690,6 @@ class ParseTreeDumper {
   NODE(parser, OpenMPAtomicConstruct)
   NODE(parser, OpenMPBlockConstruct)
   NODE(parser, OpenMPCancelConstruct)
-  NODE(OpenMPCancelConstruct, If)
   NODE(parser, OpenMPCancellationPointConstruct)
   NODE(parser, OpenMPConstruct)
   NODE(parser, OpenMPCriticalConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h 
b/flang/include/flang/Parser/parse-tree.h
index a197249ebae91..cb0eb884e1193 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4048,6 +4048,12 @@ struct OmpBindClause {
   WRAPPER_CLASS_BOILERPLATE(OmpBindClause, Binding);
 };
 
+// Artificial clause to represent a cancellable construct.
+struct OmpCancellationConstructTypeClause {
+  TUPLE_CLASS_BOILERPLATE(OmpCancellationConstructTypeClause);
+  std::tuple> t;
+};
+
 // Ref: [5.2:214]
 //
 // contains-clause ->
@@ -4870,15 +4876,14 @@ struct OmpCancelType {
 struct OpenMPCancellationPointConstruct {
   TUPLE_CLASS_BOILERPLATE(OpenMPCancellationPointConstruct);
   CharBlock source;
-  std::tuple t;
+  std::tuple t;
 };
 
 // 2.14.1 cancel -> CANCEL construct-type-clause [ [,] if-clause]
 struct OpenMPCancelConstruct {
   TUPLE_CLASS_BOILERPLATE(OpenMPCancelConstruct);
-  WRAPPER_CLASS(If, ScalarLogicalExpr);
   CharBlock source;
-  std::tuple> t;
+  std::tuple t;
 };
 
 // Ref: [5.0:254-255], [5.1:287-288], [5.2:322-323]
diff --git a/flang/lib/Parser/openmp-parsers.cpp 
b/flang/lib/Parser/openmp-parsers.cpp
index 80831db0e7d50..51b2567a3894d 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -803,8 +803,9 @@ TYPE_PARSER(construct(many(maybe(","_tok) 
>>
 TYPE_PARSER(construct(many(maybe(","_tok) >>
 construct(unwrap(OmpDirectiveNameParser{})
 
-TYPE_PARSER("ABSENT" >> construct(construct(
-parenthesized(Parser{}))) ||
+TYPE_PARSER( //
+"ABSENT" >> construct(construct(
+parenthesized(Parser{}))) ||
 "ACQUIRE" >> construct(construct()) ||
 "ACQ_REL" >> construct(construct()) ||
 "AFFINITY" >> construct(construct(
@@ -981,7 +982,24 @@ TYPE_PARSER("ABSENT" >> 
construct(construct(
 "UPDATE" >> construct(construct(
 parenthesized(Parser{}))) ||
 "WHEN" >> construct(construct(
-  parenthesized(Parser{}
+  parenthesized(Parser{}))) ||
+// Cancellable constructs
+construct(construct(
+construct( //
+construct(verbatim("DO"_id)),
+maybe(parenthesized(scalarLogicalExpr) ||
+construct(construct(
+construct( //
+construct(verbatim("PARALLEL"_id)),
+maybe(parenthesized(scalarLogicalExpr) ||
+construct(construct(
+construct( //
+construct(verbatim("SECTIONS"_id)),
+maybe(parenthesized(scalarLogicalExpr) ||
+construct(construct(
+construct( //
+construct(verbatim("TASKGROUP"_id)),
+maybe(parenthesized(scalarLogicalExpr))
 
 // [Clause, [Clause], ...]
 TYPE_PARSER(sourced(construct(
@@ -1104,11 +1122,11 @@ TYPE_PARSER(sourced(construct(
 
 // 2.14.2 Cancellation Point

[llvm-branch-commits] [flang] Reland " [flang] Rely on global initialization for simpler derived types" (PR #130290)

2025-03-07 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-fir-hlfir

Author: None (jeanPerier)


Changes

Reland #114002 with an implementation of the FIXME that should solve 
the regressions that have been seen.
The first commit is the original PR, the second is the fix.

---

Patch is 42.75 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/130290.diff


9 Files Affected:

- (modified) flang/include/flang/Optimizer/Support/InternalNames.h (+1) 
- (modified) flang/lib/Lower/ConvertVariable.cpp (+60-2) 
- (modified) flang/test/Lower/HLFIR/structure-constructor.f90 (+14-42) 
- (modified) flang/test/Lower/OpenMP/private-derived-type.f90 (+5-9) 
- (modified) flang/test/Lower/default-initialization.f90 (+17-24) 
- (modified) flang/test/Lower/derived-type-finalization.f90 (+6-2) 
- (modified) flang/test/Lower/derived-type-temp.f90 (+5-5) 
- (modified) flang/test/Lower/forall/forall-allocatable-2.f90 (+2-6) 
- (modified) flang/test/Lower/pointer-default-init.f90 (+3-1) 


``diff
diff --git a/flang/include/flang/Optimizer/Support/InternalNames.h 
b/flang/include/flang/Optimizer/Support/InternalNames.h
index 41f2cb9842dc7..62375ab8f9de3 100644
--- a/flang/include/flang/Optimizer/Support/InternalNames.h
+++ b/flang/include/flang/Optimizer/Support/InternalNames.h
@@ -30,6 +30,7 @@ static constexpr llvm::StringRef kProcPtrSeparator = ".p.";
 static constexpr llvm::StringRef kSpecialBindingSeparator = ".s.";
 static constexpr llvm::StringRef kBindingTableSeparator = ".v.";
 static constexpr llvm::StringRef boxprocSuffix = "UnboxProc";
+static constexpr llvm::StringRef kDerivedTypeInitSuffix = "DerivedInit";
 
 /// Internal name mangling of identifiers
 ///
diff --git a/flang/lib/Lower/ConvertVariable.cpp 
b/flang/lib/Lower/ConvertVariable.cpp
index 48f7b9f99e960..b11251ecabf05 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -798,8 +798,66 @@ void Fortran::lower::defaultInitializeAtRuntime(
 })
 .end();
   } else {
-mlir::Value box = builder.createBox(loc, exv);
-fir::runtime::genDerivedTypeInitialize(builder, loc, box);
+/// For "simpler" types, relying on "_FortranAInitialize"
+/// leads to poor runtime performance. Hence optimize
+/// the same.
+const Fortran::semantics::DeclTypeSpec *declTy = sym.GetType();
+mlir::Type symTy = converter.genType(sym);
+const auto *details =
+sym.detailsIf();
+if (details && !Fortran::semantics::IsPolymorphic(sym) &&
+declTy->category() ==
+Fortran::semantics::DeclTypeSpec::Category::TypeDerived &&
+!mlir::isa(symTy) &&
+!sym.test(Fortran::semantics::Symbol::Flag::OmpPrivate) &&
+!sym.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) {
+  std::string globalName = fir::NameUniquer::doGenerated(
+  (converter.mangleName(*declTy->AsDerived()) + fir::kNameSeparator +
+   fir::kDerivedTypeInitSuffix)
+  .str());
+  mlir::Location loc = genLocation(converter, sym);
+  mlir::StringAttr linkage = builder.createInternalLinkage();
+  fir::GlobalOp global = builder.getNamedGlobal(globalName);
+  if (!global && details->init()) {
+global = builder.createGlobal(loc, symTy, globalName, linkage,
+  mlir::Attribute{},
+  /*isConst=*/true,
+  /*isTarget=*/false,
+  /*dataAttr=*/{});
+Fortran::lower::createGlobalInitialization(
+builder, global, [&](fir::FirOpBuilder &builder) {
+  Fortran::lower::StatementContext stmtCtx(
+  /*cleanupProhibited=*/true);
+  fir::ExtendedValue initVal = genInitializerExprValue(
+  converter, loc, details->init().value(), stmtCtx);
+  mlir::Value castTo =
+  builder.createConvert(loc, symTy, fir::getBase(initVal));
+  builder.create(loc, castTo);
+});
+  } else if (!global) {
+global = builder.createGlobal(loc, symTy, globalName, linkage,
+  mlir::Attribute{},
+  /*isConst=*/true,
+  /*isTarget=*/false,
+  /*dataAttr=*/{});
+Fortran::lower::createGlobalInitialization(
+builder, global, [&](fir::FirOpBuilder &builder) {
+  Fortran::lower::StatementContext stmtCtx(
+  /*cleanupProhibited=*/true);
+  mlir::Value initVal = genDefaultInitializerValue(
+  converter, loc, sym, symTy, stmtCtx);
+  mlir::Value castTo = builder.createConvert(loc, symTy, initVal);
+  builder.create(loc, castTo);
+});
+  }
+  auto addrOf = builder.create(loc, global.resultType(),
+  

[llvm-branch-commits] [llvm] a25b6a1 - Revert "AMDGPU: Handle demanded subvectors for readfirstlane (#128648)"

2025-03-07 Thread via llvm-branch-commits

Author: Jan Patrick Lehr
Date: 2025-03-07T13:13:26+01:00
New Revision: a25b6a1976cc628b4cba8a8c2a77c8e72279f2a1

URL: 
https://github.com/llvm/llvm-project/commit/a25b6a1976cc628b4cba8a8c2a77c8e72279f2a1
DIFF: 
https://github.com/llvm/llvm-project/commit/a25b6a1976cc628b4cba8a8c2a77c8e72279f2a1.diff

LOG: Revert "AMDGPU: Handle demanded subvectors for readfirstlane (#128648)"

This reverts commit af755af2003464f1cb9268de86b34d373cc6ac2d.

Added: 


Modified: 
llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp

llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll

Removed: 




diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
index ef076814ffdab..ebe740f884ea6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
@@ -1574,59 +1574,35 @@ Value *GCNTTIImpl::simplifyAMDGCNLaneIntrinsicDemanded(
   const unsigned LastElt = DemandedElts.getActiveBits() - 1;
   const unsigned MaskLen = LastElt - FirstElt + 1;
 
-  unsigned OldNumElts = VT->getNumElements();
-  if (MaskLen == OldNumElts && MaskLen != 1)
+  // TODO: Handle general subvector extract.
+  if (MaskLen != 1)
 return nullptr;
 
   Type *EltTy = VT->getElementType();
-  Type *NewVT = MaskLen == 1 ? EltTy : FixedVectorType::get(EltTy, MaskLen);
-
-  // Theoretically we should support these intrinsics for any legal type. Avoid
-  // introducing cases that aren't direct register types like v3i16.
-  if (!isTypeLegal(NewVT))
+  if (!isTypeLegal(EltTy))
 return nullptr;
 
   Value *Src = II.getArgOperand(0);
 
+  assert(FirstElt == LastElt);
+  Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
+
   // Make sure convergence tokens are preserved.
   // TODO: CreateIntrinsic should allow directly copying bundles
   SmallVector OpBundles;
   II.getOperandBundlesAsDefs(OpBundles);
 
   Module *M = IC.Builder.GetInsertBlock()->getModule();
-  Function *Remangled =
-  Intrinsic::getOrInsertDeclaration(M, II.getIntrinsicID(), {NewVT});
-
-  if (MaskLen == 1) {
-Value *Extract = IC.Builder.CreateExtractElement(Src, FirstElt);
-
-// TODO: Preserve callsite attributes?
-CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
-
-return IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()),
-  NewCall, FirstElt);
-  }
-
-  SmallVector ExtractMask(MaskLen, -1);
-  for (unsigned I = 0; I != MaskLen; ++I) {
-if (DemandedElts[FirstElt + I])
-  ExtractMask[I] = FirstElt + I;
-  }
-
-  Value *Extract = IC.Builder.CreateShuffleVector(Src, ExtractMask);
+  Function *Remangled = Intrinsic::getOrInsertDeclaration(
+  M, II.getIntrinsicID(), {Extract->getType()});
 
   // TODO: Preserve callsite attributes?
   CallInst *NewCall = IC.Builder.CreateCall(Remangled, {Extract}, OpBundles);
 
-  SmallVector InsertMask(OldNumElts, -1);
-  for (unsigned I = 0; I != MaskLen; ++I) {
-if (DemandedElts[FirstElt + I])
-  InsertMask[FirstElt + I] = I;
-  }
-
   // FIXME: If the call has a convergence bundle, we end up leaving the dead
   // call behind.
-  return IC.Builder.CreateShuffleVector(NewCall, InsertMask);
+  return IC.Builder.CreateInsertElement(PoisonValue::get(II.getType()), 
NewCall,
+FirstElt);
 }
 
 std::optional GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(

diff  --git 
a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
 
b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
index ec645a7ff4519..e9d3b5e963b35 100644
--- 
a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
+++ 
b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll
@@ -64,8 +64,8 @@ define i16 @extract_elt2_v4i16_readfirstlane(<4 x i16> %src) {
 define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> %src) {
 ; CHECK-LABEL: define <2 x i16> @extract_elt01_v4i16_readfirstlane(
 ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:[[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> 
poison, <2 x i32> 
-; CHECK-NEXT:[[SHUFFLE:%.*]] = call <2 x i16> 
@llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
+; CHECK-NEXT:[[VEC:%.*]] = call <4 x i16> 
@llvm.amdgcn.readfirstlane.v4i16(<4 x i16> [[SRC]])
+; CHECK-NEXT:[[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> 
poison, <2 x i32> 
 ; CHECK-NEXT:ret <2 x i16> [[SHUFFLE]]
 ;
   %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
@@ -76,8 +76,8 @@ define <2 x i16> @extract_elt01_v4i16_readfirstlane(<4 x i16> 
%src) {
 define <2 x i16> @extract_elt12_v4i16_readfirstlane(<4 x i16> %src) {
 ; CHECK-LABEL: 

[llvm-branch-commits] [flang] [llvm] [flang][OpenMP] Parse cancel-directive-name as clause (PR #130146)

2025-03-07 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz updated 
https://github.com/llvm/llvm-project/pull/130146

>From 98df18461bb06afa06b8968b157a3c5a5cf50324 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Thu, 6 Mar 2025 08:51:34 -0600
Subject: [PATCH 1/3] [flang][OpenMP] Parse cancel-directive-name as clause

The cancellable construct names on CANCEL or CANCELLATION POINT directives
are actually clauses (with the same names as the corresponding constructs).

Instead of parsing them into a custom structure, parse them as a clause,
which will make CANCEL/CANCELLATION POINT follow the same uniform scheme
as other constructs ( [()] [clauses]).
---
 flang/include/flang/Parser/dump-parse-tree.h |   2 +-
 flang/include/flang/Parser/parse-tree.h  |  11 +-
 flang/lib/Parser/openmp-parsers.cpp  |  31 -
 flang/lib/Parser/unparse.cpp |   5 +-
 flang/lib/Semantics/check-omp-structure.cpp  | 133 +--
 flang/lib/Semantics/check-omp-structure.h|   5 +-
 flang/test/Semantics/OpenMP/cancel.f90   |  29 
 llvm/include/llvm/Frontend/OpenMP/OMP.td |   5 +
 8 files changed, 169 insertions(+), 52 deletions(-)
 create mode 100644 flang/test/Semantics/OpenMP/cancel.f90

diff --git a/flang/include/flang/Parser/dump-parse-tree.h 
b/flang/include/flang/Parser/dump-parse-tree.h
index fcd902d25fa40..004e22a21ecfa 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -546,6 +546,7 @@ class ParseTreeDumper {
 #define GEN_FLANG_DUMP_PARSE_TREE_CLAUSES
 #include "llvm/Frontend/OpenMP/OMP.inc"
   NODE(parser, OmpClauseList)
+  NODE(parser, OmpCancellationConstructTypeClause)
   NODE(parser, OmpContainsClause)
   NODE(parser, OmpCriticalDirective)
   NODE(parser, OmpErrorDirective)
@@ -689,7 +690,6 @@ class ParseTreeDumper {
   NODE(parser, OpenMPAtomicConstruct)
   NODE(parser, OpenMPBlockConstruct)
   NODE(parser, OpenMPCancelConstruct)
-  NODE(OpenMPCancelConstruct, If)
   NODE(parser, OpenMPCancellationPointConstruct)
   NODE(parser, OpenMPConstruct)
   NODE(parser, OpenMPCriticalConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h 
b/flang/include/flang/Parser/parse-tree.h
index a197249ebae91..cb0eb884e1193 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4048,6 +4048,12 @@ struct OmpBindClause {
   WRAPPER_CLASS_BOILERPLATE(OmpBindClause, Binding);
 };
 
+// Artificial clause to represent a cancellable construct.
+struct OmpCancellationConstructTypeClause {
+  TUPLE_CLASS_BOILERPLATE(OmpCancellationConstructTypeClause);
+  std::tuple> t;
+};
+
 // Ref: [5.2:214]
 //
 // contains-clause ->
@@ -4870,15 +4876,14 @@ struct OmpCancelType {
 struct OpenMPCancellationPointConstruct {
   TUPLE_CLASS_BOILERPLATE(OpenMPCancellationPointConstruct);
   CharBlock source;
-  std::tuple t;
+  std::tuple t;
 };
 
 // 2.14.1 cancel -> CANCEL construct-type-clause [ [,] if-clause]
 struct OpenMPCancelConstruct {
   TUPLE_CLASS_BOILERPLATE(OpenMPCancelConstruct);
-  WRAPPER_CLASS(If, ScalarLogicalExpr);
   CharBlock source;
-  std::tuple> t;
+  std::tuple t;
 };
 
 // Ref: [5.0:254-255], [5.1:287-288], [5.2:322-323]
diff --git a/flang/lib/Parser/openmp-parsers.cpp 
b/flang/lib/Parser/openmp-parsers.cpp
index 80831db0e7d50..51b2567a3894d 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -803,8 +803,9 @@ TYPE_PARSER(construct(many(maybe(","_tok) 
>>
 TYPE_PARSER(construct(many(maybe(","_tok) >>
 construct(unwrap(OmpDirectiveNameParser{})
 
-TYPE_PARSER("ABSENT" >> construct(construct(
-parenthesized(Parser{}))) ||
+TYPE_PARSER( //
+"ABSENT" >> construct(construct(
+parenthesized(Parser{}))) ||
 "ACQUIRE" >> construct(construct()) ||
 "ACQ_REL" >> construct(construct()) ||
 "AFFINITY" >> construct(construct(
@@ -981,7 +982,24 @@ TYPE_PARSER("ABSENT" >> 
construct(construct(
 "UPDATE" >> construct(construct(
 parenthesized(Parser{}))) ||
 "WHEN" >> construct(construct(
-  parenthesized(Parser{}
+  parenthesized(Parser{}))) ||
+// Cancellable constructs
+construct(construct(
+construct( //
+construct(verbatim("DO"_id)),
+maybe(parenthesized(scalarLogicalExpr) ||
+construct(construct(
+construct( //
+construct(verbatim("PARALLEL"_id)),
+maybe(parenthesized(scalarLogicalExpr) ||
+construct(construct(
+construct( //
+construct(verbatim("SECTIONS"_id)),
+maybe(parenthesized(scalarLogicalExpr) ||
+construct(construct(
+construct( //
+construct(verbatim("TASKGROUP"_id)),
+maybe(parenthesized(scalarLogicalExpr))
 
 // [Clause, [Clause], ...]
 TYPE_PARSER(sourced(construct(
@@ -1104,11 +1122,11 @@ TYPE_PARSER(sourced(construct(
 
 // 2.14.2 Cancellation Point

[llvm-branch-commits] [llvm] [AMDGPU] Allocate scratch space for dVGPRs for CWSR (PR #130055)

2025-03-07 Thread Diana Picus via llvm-branch-commits

https://github.com/rovka updated 
https://github.com/llvm/llvm-project/pull/130055

>From 3e20edfc6f3b1bfa60f5d778ce98c1fb984b1aee Mon Sep 17 00:00:00 2001
From: Diana Picus 
Date: Tue, 24 Sep 2024 09:57:25 +0200
Subject: [PATCH 1/6] [AMDGPU] Allocate scratch space for dVGPRs for CWSR

The CWSR trap handler needs to save and restore the VGPRs. When dynamic
VGPRs are in use, the fixed function hardware will only allocate enough
space for one VGPR block. The rest will have to be stored in scratch, at
offset 0.

This patch allocates the necessary space by:
- generating a prologue that checks at runtime if we're on a compute
  queue (since CWSR only works on compute queues); for this we will have
  to check the ME_ID bits of the ID_HW_ID2 register - if that is
  non-zero, we can assume we're on a compute queue and initialize the SP
  and FP with enough room for the dynamic VGPRs
- forcing all compute entry functions to use a FP so they can access
  their locals/spills correctly (this isn't ideal but it's the quickest
  to implement)

Note that at the moment we allocate enough space for the theoretical
maximum number of VGPRs that can be allocated dynamically (for blocks of
16 registers, this will be 128, of which we subtract the first 16, which
are already allocated by the fixed function hardware). Future patches
may decide to allocate less if they can prove the shader never allocates
that many blocks.

Also note that this should not affect any reported stack sizes (e.g. PAL
backend_stack_size etc).
---
 llvm/docs/AMDGPUUsage.rst |  65 +++--
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp   |   9 +-
 llvm/lib/Target/AMDGPU/SIDefines.h|   1 +
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp|  66 -
 llvm/lib/Target/AMDGPU/SIFrameLowering.h  |   4 +
 .../lib/Target/AMDGPU/SIMachineFunctionInfo.h |  13 +
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp |   8 +
 .../dynamic-vgpr-reserve-stack-for-cwsr.ll| 263 ++
 llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll  |  11 +-
 9 files changed, 399 insertions(+), 41 deletions(-)
 create mode 100644 
llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll

diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 59cc08a59ed7c..b5196930a50f7 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -6020,8 +6020,13 @@ Frame Pointer
 
 If the kernel needs a frame pointer for the reasons defined in
 ``SIFrameLowering`` then SGPR33 is used and is always set to ``0`` in the
-kernel prolog. If a frame pointer is not required then all uses of the frame
-pointer are replaced with immediate ``0`` offsets.
+kernel prolog. On GFX12+, when dynamic VGPRs are enabled, the prologue will
+check if the kernel is running on a compute queue, and if so it will reserve
+some scratch space for any dynamic VGPRs that might need to be saved by the
+CWSR trap handler. In this case, the frame pointer will be initialized to
+a suitably aligned offset above this reserved area. If a frame pointer is not
+required then all uses of the frame pointer are replaced with immediate ``0``
+offsets.
 
 .. _amdgpu-amdhsa-kernel-prolog-flat-scratch:
 
@@ -17133,33 +17138,35 @@ within a map that has been added by the same 
*vendor-name*.
   .. table:: AMDPAL Code Object Hardware Stage Metadata Map
  :name: amdgpu-amdpal-code-object-hardware-stage-metadata-map-table
 
- == == = 
===
- String Key Value Type Required? Description
- == == = 
===
- ".entry_point" string   The ELF symbol 
pointing to this pipeline's stage entry point.
- ".scratch_memory_size" integer  Scratch memory size 
in bytes.
- ".lds_size"integer  Local Data Share size 
in bytes.
- ".perf_data_buffer_size"   integer  Performance data 
buffer size in bytes.
- ".vgpr_count"  integer  Number of VGPRs used.
- ".agpr_count"  integer  Number of AGPRs used.
- ".sgpr_count"  integer  Number of SGPRs used.
- ".vgpr_limit"  integer  If non-zero, 
indicates the shader was compiled with a
- directive to instruct 
the compiler to limit the VGPR usage to
- be less than or equal 
to the specified value (only set if
- different from HW 
default).
- ".sgpr_limit"  integer  SGPR count upper 
limit (only set if different from HW
- default).
- ".

[llvm-branch-commits] [llvm] [AMDGPU] Allocate scratch space for dVGPRs for CWSR (PR #130055)

2025-03-07 Thread Diana Picus via llvm-branch-commits


@@ -0,0 +1,263 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -mattr=+dynamic-vgpr < %s 
| FileCheck -check-prefix=CHECK %s
+
+; Make sure we use a stack pointer and allocate 112 * 4 bytes at the beginning 
of the stack.
+
+define amdgpu_cs void @amdgpu_cs() #0 {
+; CHECK-LABEL: amdgpu_cs:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 1)
+; CHECK-NEXT:s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-NEXT:s_cmp_lg_u32 0, s33
+; CHECK-NEXT:s_cmovk_i32 s33, 0x1c0
+; CHECK-NEXT:s_alloc_vgpr 0
+; CHECK-NEXT:s_endpgm
+  ret void
+}
+
+define amdgpu_kernel void @kernel() #0 {
+; CHECK-LABEL: kernel:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 1)
+; CHECK-NEXT:s_delay_alu instid0(SALU_CYCLE_1)
+; CHECK-NEXT:s_cmp_lg_u32 0, s33
+; CHECK-NEXT:s_cmovk_i32 s33, 0x1c0
+; CHECK-NEXT:s_alloc_vgpr 0
+; CHECK-NEXT:s_endpgm
+  ret void
+}
+
+define amdgpu_cs void @with_local() #0 {
+; CHECK-LABEL: with_local:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 1)
+; CHECK-NEXT:v_mov_b32_e32 v0, 13
+; CHECK-NEXT:s_cmp_lg_u32 0, s33
+; CHECK-NEXT:s_cmovk_i32 s33, 0x1c0
+; CHECK-NEXT:scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
+; CHECK-NEXT:s_wait_storecnt 0x0
+; CHECK-NEXT:s_alloc_vgpr 0
+; CHECK-NEXT:s_endpgm
+  %local = alloca i32, addrspace(5)
+  store volatile i8 13, ptr addrspace(5) %local
+  ret void
+}
+
+; Check that we generate s_cselect for SP if we can fit
+; the offset in an inline constant.
+define amdgpu_cs void @with_calls_inline_const() #0 {
+; CHECK-LABEL: with_calls_inline_const:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 1)
+; CHECK-NEXT:v_mov_b32_e32 v0, 15
+; CHECK-NEXT:s_cmp_lg_u32 0, s33
+; CHECK-NEXT:s_mov_b32 s1, callee@abs32@hi
+; CHECK-NEXT:s_cmovk_i32 s33, 0x1c0
+; CHECK-NEXT:s_mov_b32 s0, callee@abs32@lo
+; CHECK-NEXT:scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
+; CHECK-NEXT:s_wait_storecnt 0x0
+; CHECK-NEXT:v_mov_b32_e32 v0, 0x47
+; CHECK-NEXT:s_cselect_b32 s32, 0x1d0, 16
+; CHECK-NEXT:s_swappc_b64 s[30:31], s[0:1]
+; CHECK-NEXT:s_alloc_vgpr 0
+; CHECK-NEXT:s_endpgm
+  %local = alloca i32, addrspace(5)
+  store volatile i8 15, ptr addrspace(5) %local
+  call amdgpu_gfx void @callee(i32 71)
+  ret void
+}
+
+; Check that we generate s_mov + s_cmovk if we can't
+; fit the offset for SP in an inline constant.
+define amdgpu_cs void @with_calls_no_inline_const() #0 {
+; CHECK-LABEL: with_calls_no_inline_const:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 1)
+; CHECK-NEXT:v_mov_b32_e32 v0, 15
+; CHECK-NEXT:s_cmp_lg_u32 0, s33
+; CHECK-NEXT:s_mov_b32 s1, callee@abs32@hi
+; CHECK-NEXT:s_cmovk_i32 s33, 0x1c0
+; CHECK-NEXT:s_mov_b32 s0, callee@abs32@lo
+; CHECK-NEXT:scratch_store_b8 off, v0, s33 scope:SCOPE_SYS
+; CHECK-NEXT:s_wait_storecnt 0x0
+; CHECK-NEXT:v_mov_b32_e32 v0, 0x47
+; CHECK-NEXT:s_movk_i32 s32, 0x100
+; CHECK-NEXT:s_cmovk_i32 s32, 0x2c0
+; CHECK-NEXT:s_swappc_b64 s[30:31], s[0:1]
+; CHECK-NEXT:s_alloc_vgpr 0
+; CHECK-NEXT:s_endpgm
+  %local = alloca i32, i32 61, addrspace(5)
+  store volatile i8 15, ptr addrspace(5) %local
+  call amdgpu_gfx void @callee(i32 71)
+  ret void
+}
+
+; We're going to limit this to 16 VGPRs, so we need to spill the rest.
+define amdgpu_cs void @with_spills(ptr addrspace(1) %p1, ptr addrspace(1) %p2) 
#1 {
+; CHECK-LABEL: with_spills:
+; CHECK:   ; %bb.0:
+; CHECK-NEXT:s_getreg_b32 s33, hwreg(HW_REG_HW_ID2, 8, 1)
+; CHECK-NEXT:global_load_b128 v[4:7], v[0:1], off offset:96
+; CHECK-NEXT:s_cmp_lg_u32 0, s33
+; CHECK-NEXT:s_cmovk_i32 s33, 0x1c0
+; CHECK-NEXT:s_wait_loadcnt 0x0
+; CHECK-NEXT:scratch_store_b128 off, v[4:7], s33 offset:80 ; 16-byte 
Folded Spill
+; CHECK-NEXT:s_clause 0x2
+; CHECK-NEXT:global_load_b128 v[8:11], v[0:1], off offset:112
+; CHECK-NEXT:global_load_b128 v[12:15], v[0:1], off offset:64
+; CHECK-NEXT:global_load_b128 v[4:7], v[0:1], off offset:80
+; CHECK-NEXT:s_wait_loadcnt 0x0
+; CHECK-NEXT:scratch_store_b128 off, v[4:7], s33 offset:64 ; 16-byte 
Folded Spill
+; CHECK-NEXT:global_load_b128 v[4:7], v[0:1], off offset:32
+; CHECK-NEXT:s_wait_loadcnt 0x0
+; CHECK-NEXT:scratch_store_b128 off, v[4:7], s33 offset:48 ; 16-byte 
Folded Spill
+; CHECK-NEXT:global_load_b128 v[4:7], v[0:1], off offset:48
+; CHECK-NEXT:s_wait_loadcnt 0x0
+; CHECK-NEXT:scratch_store_b128 off, v[4:7], s33 offset:32 ; 16-byte 
Folded Spill
+; CHECK-NEXT:global_load_b128 v[4:7], v[0:1], off
+; CHECK-NEXT:s_wait_loadcnt 0x0
+; CHECK-NEXT:scratch_store_b128 off, v[4:7], s33 offset:16 ; 16-byte 
Folded Spill

[llvm-branch-commits] [flang] 6e3e9f6 - Revert " [flang] Rely on global initialization for simpler derived types (#11…"

2025-03-07 Thread via llvm-branch-commits

Author: Tom Eccles
Date: 2025-03-07T12:21:57Z
New Revision: 6e3e9f6b34d5f08c8b0b77e60fe1526db7e69633

URL: 
https://github.com/llvm/llvm-project/commit/6e3e9f6b34d5f08c8b0b77e60fe1526db7e69633
DIFF: 
https://github.com/llvm/llvm-project/commit/6e3e9f6b34d5f08c8b0b77e60fe1526db7e69633.diff

LOG: Revert " [flang] Rely on global initialization for simpler derived types 
(#11…"

This reverts commit 0ae1f0a31062f2447c04ec99ec0933cd71c21224.

Added: 


Modified: 
flang/include/flang/Optimizer/Support/InternalNames.h
flang/lib/Lower/ConvertVariable.cpp
flang/test/Lower/HLFIR/structure-constructor.f90
flang/test/Lower/OpenMP/private-derived-type.f90
flang/test/Lower/default-initialization.f90
flang/test/Lower/derived-type-finalization.f90
flang/test/Lower/derived-type-temp.f90
flang/test/Lower/forall/forall-allocatable-2.f90
flang/test/Lower/pointer-default-init.f90

Removed: 




diff  --git a/flang/include/flang/Optimizer/Support/InternalNames.h 
b/flang/include/flang/Optimizer/Support/InternalNames.h
index 62375ab8f9de3..41f2cb9842dc7 100644
--- a/flang/include/flang/Optimizer/Support/InternalNames.h
+++ b/flang/include/flang/Optimizer/Support/InternalNames.h
@@ -30,7 +30,6 @@ static constexpr llvm::StringRef kProcPtrSeparator = ".p.";
 static constexpr llvm::StringRef kSpecialBindingSeparator = ".s.";
 static constexpr llvm::StringRef kBindingTableSeparator = ".v.";
 static constexpr llvm::StringRef boxprocSuffix = "UnboxProc";
-static constexpr llvm::StringRef kDerivedTypeInitSuffix = "DerivedInit";
 
 /// Internal name mangling of identifiers
 ///

diff  --git a/flang/lib/Lower/ConvertVariable.cpp 
b/flang/lib/Lower/ConvertVariable.cpp
index 8a7be19f8040b..48f7b9f99e960 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -798,67 +798,8 @@ void Fortran::lower::defaultInitializeAtRuntime(
 })
 .end();
   } else {
-/// For "simpler" types, relying on "_FortranAInitialize"
-/// leads to poor runtime performance. Hence optimize
-/// the same.
-const Fortran::semantics::DeclTypeSpec *declTy = sym.GetType();
-mlir::Type symTy = converter.genType(sym);
-const auto *details =
-sym.detailsIf();
-if (details && !Fortran::semantics::IsPolymorphic(sym) &&
-declTy->category() ==
-Fortran::semantics::DeclTypeSpec::Category::TypeDerived &&
-!mlir::isa(symTy) &&
-!sym.test(Fortran::semantics::Symbol::Flag::OmpPrivate) &&
-!sym.test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) {
-  std::string globalName = fir::NameUniquer::doGenerated(
-  (converter.mangleName(*declTy->AsDerived()) + fir::kNameSeparator +
-   fir::kDerivedTypeInitSuffix)
-  .str());
-  mlir::Location loc = genLocation(converter, sym);
-  mlir::StringAttr linkage = builder.createInternalLinkage();
-  fir::GlobalOp global = builder.getNamedGlobal(globalName);
-  if (!global && details->init()) {
-global = builder.createGlobal(loc, symTy, globalName, linkage,
-  mlir::Attribute{},
-  /*isConst=*/true,
-  /*isTarget=*/false,
-  /*dataAttr=*/{});
-Fortran::lower::createGlobalInitialization(
-builder, global, [&](fir::FirOpBuilder &builder) {
-  Fortran::lower::StatementContext stmtCtx(
-  /*cleanupProhibited=*/true);
-  fir::ExtendedValue initVal = genInitializerExprValue(
-  converter, loc, details->init().value(), stmtCtx);
-  mlir::Value castTo =
-  builder.createConvert(loc, symTy, fir::getBase(initVal));
-  builder.create(loc, castTo);
-});
-  } else if (!global) {
-global = builder.createGlobal(loc, symTy, globalName, linkage,
-  mlir::Attribute{},
-  /*isConst=*/true,
-  /*isTarget=*/false,
-  /*dataAttr=*/{});
-Fortran::lower::createGlobalInitialization(
-builder, global, [&](fir::FirOpBuilder &builder) {
-  Fortran::lower::StatementContext stmtCtx(
-  /*cleanupProhibited=*/true);
-  mlir::Value initVal = genDefaultInitializerValue(
-  converter, loc, sym, symTy, stmtCtx);
-  mlir::Value castTo = builder.createConvert(loc, symTy, initVal);
-  builder.create(loc, castTo);
-});
-  }
-  auto addrOf = builder.create(loc, global.resultType(),
-  global.getSymbol());
-  fir::LoadOp load = builder.create(loc, addrOf.getResult());
-  // FIXME: Use mem

[llvm-branch-commits] [llvm] AMDGPU: Implement bitcode autoupgrade for old style enqueue blocks (PR #128520)

2025-03-07 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/128520

>From e8245205377e9f81ac768c9193ea902037c24750 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 17 Nov 2023 14:21:52 +0900
Subject: [PATCH] AMDGPU: Implement bitcode autoupgrade for old style enqueue
 blocks

Introduces circular dependency in build for appendToUsed, and I'm not
sure it's worth the trouble to fix it. We can most likely get away
without upgrading this. We could move appendToUsed / appendToCompilerUsed
directly to be in Module.
---
 llvm/lib/IR/AutoUpgrade.cpp   |  49 +++
 llvm/lib/IR/CMakeLists.txt|   1 +
 .../amdgpu-autoupgrade-enqueued-block.ll  | 138 ++
 3 files changed, 188 insertions(+)
 create mode 100644 llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll

diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index cb4ecc60aa473..0e4e135e90972 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 #include 
 #include 
 #include 
@@ -5518,6 +5519,51 @@ struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
 };
 } // namespace
 
+static StructType *getAMDGPURuntimeHandleType(LLVMContext &C,
+  Type *KernelDescriptorPtrTy) {
+  Type *Int32 = Type::getInt32Ty(C);
+  return StructType::create(C, {KernelDescriptorPtrTy, Int32, Int32},
+"block.runtime.handle.t");
+}
+
+/// Rewrite to new scheme for enqueued block lowering
+static void upgradeAMDGPUKernelEnqueuedBlock(Function &F) {
+  if (F.isMaterializable()) {
+// A verifier error is produced if we add metadata to the function during
+// linking.
+return;
+  }
+
+  const StringLiteral EnqueuedBlockName("enqueued-block");
+  if (!F.hasFnAttribute(EnqueuedBlockName))
+return;
+
+  F.removeFnAttr(EnqueuedBlockName);
+
+  Module *M = F.getParent();
+  LLVMContext &Ctx = M->getContext();
+  const DataLayout &DL = M->getDataLayout();
+
+  StructType *HandleTy = getAMDGPURuntimeHandleType(
+  Ctx, PointerType::get(Ctx, DL.getDefaultGlobalsAddressSpace()));
+
+  Twine RuntimeHandleName = F.getName() + ".runtime.handle";
+
+  auto *RuntimeHandle = new GlobalVariable(
+  *M, HandleTy,
+  /*isConstant=*/true, F.getLinkage(),
+  /*Initializer=*/ConstantAggregateZero::get(HandleTy), RuntimeHandleName,
+  /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
+  DL.getDefaultGlobalsAddressSpace(),
+  /*isExternallyInitialized=*/true);
+  RuntimeHandle->setSection(".amdgpu.kernel.runtime.handle");
+
+  MDNode *HandleAsMD = MDNode::get(Ctx, ValueAsMetadata::get(RuntimeHandle));
+  F.setMetadata(LLVMContext::MD_associated, HandleAsMD);
+
+  appendToUsed(*M, {&F, RuntimeHandle});
+}
+
 void llvm::UpgradeFunctionAttributes(Function &F) {
   // If a function definition doesn't have the strictfp attribute,
   // convert any callsite strictfp attributes to nobuiltin.
@@ -5558,6 +5604,9 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
   F.removeFnAttr("amdgpu-unsafe-fp-atomics");
 }
   }
+
+  if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL)
+upgradeAMDGPUKernelEnqueuedBlock(F);
 }
 
 static bool isOldLoopArgument(Metadata *MD) {
diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt
index eb00829fd8c70..a78c58c807f6a 100644
--- a/llvm/lib/IR/CMakeLists.txt
+++ b/llvm/lib/IR/CMakeLists.txt
@@ -92,6 +92,7 @@ add_llvm_component_library(LLVMCore
   LINK_COMPONENTS
   BinaryFormat
   Demangle
+  TransformUtils
   Remarks
   Support
   TargetParser
diff --git a/llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll 
b/llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll
new file mode 100644
index 0..41521c1f2025d
--- /dev/null
+++ b/llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll
@@ -0,0 +1,138 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+%struct.ndrange_t = type { i32 }
+%opencl.queue_t = type opaque
+
+; CHECK: %block.runtime.handle.t = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.0 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.1 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.2 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.3 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.4 = type { ptr, i32, i32 }
+
+
+; CHECK: @kernel_address_user = global [1 x ptr] [ptr 
@block_has_used_kernel_address]
+; CHECK: @__test_block_invoke_kernel.runtime.handle = internal 
externally_initialized constant %block.runtime.handle.t zeroinitializer, 
section ".amdgpu.kernel.runtime.handle"
+; CHECK: @__test_block_invoke_2_kernel.runtime.handle = internal 
externally_initialized constant %block.runtime.handle.t.0 zeroinitializer, 
section ".amdgpu.kernel.runtime.handle"
+; CHECK: @block_has_used_k

[llvm-branch-commits] [llvm] AMDGPU: Implement bitcode autoupgrade for old style enqueue blocks (PR #128520)

2025-03-07 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/128520

>From e8245205377e9f81ac768c9193ea902037c24750 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 17 Nov 2023 14:21:52 +0900
Subject: [PATCH] AMDGPU: Implement bitcode autoupgrade for old style enqueue
 blocks

Introduces circular dependency in build for appendToUsed, and I'm not
sure it's worth the trouble to fix it. We can most likely get away
without upgrading this. We could move appendToUsed / appendToCompilerUsed
directly to be in Module.
---
 llvm/lib/IR/AutoUpgrade.cpp   |  49 +++
 llvm/lib/IR/CMakeLists.txt|   1 +
 .../amdgpu-autoupgrade-enqueued-block.ll  | 138 ++
 3 files changed, 188 insertions(+)
 create mode 100644 llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll

diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index cb4ecc60aa473..0e4e135e90972 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 #include 
 #include 
 #include 
@@ -5518,6 +5519,51 @@ struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
 };
 } // namespace
 
+static StructType *getAMDGPURuntimeHandleType(LLVMContext &C,
+  Type *KernelDescriptorPtrTy) {
+  Type *Int32 = Type::getInt32Ty(C);
+  return StructType::create(C, {KernelDescriptorPtrTy, Int32, Int32},
+"block.runtime.handle.t");
+}
+
+/// Rewrite to new scheme for enqueued block lowering
+static void upgradeAMDGPUKernelEnqueuedBlock(Function &F) {
+  if (F.isMaterializable()) {
+// A verifier error is produced if we add metadata to the function during
+// linking.
+return;
+  }
+
+  const StringLiteral EnqueuedBlockName("enqueued-block");
+  if (!F.hasFnAttribute(EnqueuedBlockName))
+return;
+
+  F.removeFnAttr(EnqueuedBlockName);
+
+  Module *M = F.getParent();
+  LLVMContext &Ctx = M->getContext();
+  const DataLayout &DL = M->getDataLayout();
+
+  StructType *HandleTy = getAMDGPURuntimeHandleType(
+  Ctx, PointerType::get(Ctx, DL.getDefaultGlobalsAddressSpace()));
+
+  Twine RuntimeHandleName = F.getName() + ".runtime.handle";
+
+  auto *RuntimeHandle = new GlobalVariable(
+  *M, HandleTy,
+  /*isConstant=*/true, F.getLinkage(),
+  /*Initializer=*/ConstantAggregateZero::get(HandleTy), RuntimeHandleName,
+  /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
+  DL.getDefaultGlobalsAddressSpace(),
+  /*isExternallyInitialized=*/true);
+  RuntimeHandle->setSection(".amdgpu.kernel.runtime.handle");
+
+  MDNode *HandleAsMD = MDNode::get(Ctx, ValueAsMetadata::get(RuntimeHandle));
+  F.setMetadata(LLVMContext::MD_associated, HandleAsMD);
+
+  appendToUsed(*M, {&F, RuntimeHandle});
+}
+
 void llvm::UpgradeFunctionAttributes(Function &F) {
   // If a function definition doesn't have the strictfp attribute,
   // convert any callsite strictfp attributes to nobuiltin.
@@ -5558,6 +5604,9 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
   F.removeFnAttr("amdgpu-unsafe-fp-atomics");
 }
   }
+
+  if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL)
+upgradeAMDGPUKernelEnqueuedBlock(F);
 }
 
 static bool isOldLoopArgument(Metadata *MD) {
diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt
index eb00829fd8c70..a78c58c807f6a 100644
--- a/llvm/lib/IR/CMakeLists.txt
+++ b/llvm/lib/IR/CMakeLists.txt
@@ -92,6 +92,7 @@ add_llvm_component_library(LLVMCore
   LINK_COMPONENTS
   BinaryFormat
   Demangle
+  TransformUtils
   Remarks
   Support
   TargetParser
diff --git a/llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll 
b/llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll
new file mode 100644
index 0..41521c1f2025d
--- /dev/null
+++ b/llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll
@@ -0,0 +1,138 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+%struct.ndrange_t = type { i32 }
+%opencl.queue_t = type opaque
+
+; CHECK: %block.runtime.handle.t = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.0 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.1 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.2 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.3 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.4 = type { ptr, i32, i32 }
+
+
+; CHECK: @kernel_address_user = global [1 x ptr] [ptr 
@block_has_used_kernel_address]
+; CHECK: @__test_block_invoke_kernel.runtime.handle = internal 
externally_initialized constant %block.runtime.handle.t zeroinitializer, 
section ".amdgpu.kernel.runtime.handle"
+; CHECK: @__test_block_invoke_2_kernel.runtime.handle = internal 
externally_initialized constant %block.runtime.handle.t.0 zeroinitializer, 
section ".amdgpu.kernel.runtime.handle"
+; CHECK: @block_has_used_k

[llvm-branch-commits] [clang] 5a71fab - Revert "[AArch64][SVE] Improve fixed-length addressing modes. (#129732)"

2025-03-07 Thread via llvm-branch-commits

Author: Ricardo Jesus
Date: 2025-03-07T09:16:20Z
New Revision: 5a71fab0067bae0f532a6268749df71dbe66b4ac

URL: 
https://github.com/llvm/llvm-project/commit/5a71fab0067bae0f532a6268749df71dbe66b4ac
DIFF: 
https://github.com/llvm/llvm-project/commit/5a71fab0067bae0f532a6268749df71dbe66b4ac.diff

LOG: Revert "[AArch64][SVE] Improve fixed-length addressing modes. (#129732)"

This reverts commit f01e760c08365426de95f02dc2c2dc670eb47352.

Added: 


Modified: 
clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c
llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
llvm/lib/Target/AArch64/AArch64Subtarget.h
llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll

Removed: 
llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll



diff  --git a/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c 
b/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c
index 1391a1b09fbd1..0ed14b4b3b793 100644
--- a/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c
+++ b/clang/test/CodeGen/AArch64/sve-vector-bits-codegen.c
@@ -13,9 +13,12 @@
 
 void func(int *restrict a, int *restrict b) {
 // CHECK-LABEL: func
-// CHECK256-COUNT-8: str
-// CHECK512-COUNT-4: str
-// CHECK1024-COUNT-2: str
+// CHECK256-COUNT-1: str
+// CHECK256-COUNT-7: st1w
+// CHECK512-COUNT-1: str
+// CHECK512-COUNT-3: st1w
+// CHECK1024-COUNT-1: str
+// CHECK1024-COUNT-1: st1w
 // CHECK2048-COUNT-1: st1w
 #pragma clang loop vectorize(enable)
   for (int i = 0; i < 64; ++i)

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 07bcd802962fa..3ca9107cb2ce5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -7380,23 +7380,12 @@ bool 
AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
 return false;
 
   SDValue VScale = N.getOperand(1);
-  int64_t MulImm = std::numeric_limits::max();
-  if (VScale.getOpcode() == ISD::VSCALE) {
-MulImm = cast(VScale.getOperand(0))->getSExtValue();
-  } else if (auto C = dyn_cast(VScale)) {
-int64_t ByteOffset = C->getSExtValue();
-const auto KnownVScale =
-Subtarget->getSVEVectorSizeInBits() / AArch64::SVEBitsPerBlock;
-
-if (!KnownVScale || ByteOffset % KnownVScale != 0)
-  return false;
-
-MulImm = ByteOffset / KnownVScale;
-  } else
+  if (VScale.getOpcode() != ISD::VSCALE)
 return false;
 
   TypeSize TS = MemVT.getSizeInBits();
   int64_t MemWidthBytes = static_cast(TS.getKnownMinValue()) / 8;
+  int64_t MulImm = cast(VScale.getOperand(0))->getSExtValue();
 
   if ((MulImm % MemWidthBytes) != 0)
 return false;

diff  --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h 
b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index f5ffc72cae537..c6eb77e3bc3ba 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -391,7 +391,7 @@ class AArch64Subtarget final : public 
AArch64GenSubtargetInfo {
   void mirFileLoaded(MachineFunction &MF) const override;
 
   // Return the known range for the bit length of SVE data registers. A value
-  // of 0 means nothing is known about that particular limit beyond what's
+  // of 0 means nothing is known about that particular limit beyong what's
   // implied by the architecture.
   unsigned getMaxSVEVectorSizeInBits() const {
 assert(isSVEorStreamingSVEAvailable() &&
@@ -405,16 +405,6 @@ class AArch64Subtarget final : public 
AArch64GenSubtargetInfo {
 return MinSVEVectorSizeInBits;
   }
 
-  // Return the known bit length of SVE data registers. A value of 0 means the
-  // length is unkown beyond what's implied by the architecture.
-  unsigned getSVEVectorSizeInBits() const {
-assert(isSVEorStreamingSVEAvailable() &&
-   "Tried to get SVE vector length without SVE support!");
-if (MinSVEVectorSizeInBits == MaxSVEVectorSizeInBits)
-  return MaxSVEVectorSizeInBits;
-return 0;
-  }
-
   bool useSVEForFixedLengthVectors() const {
 if (!isSVEorStreamingSVEAvailable())
   return false;

diff  --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll 
b/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll
deleted file mode 100644
index 700bbe4f060ca..0
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-offsets.ll
+++ /dev/null
@@ -1,362 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve 
-aarch64-sve-vector-bits-min=128 -aarch64-sve-vector-bits-max=128 < %s | 
FileCheck %s --check-prefix=CHECK-128
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve 
-aarch64-sve-vector-bits-min=256 -aarch64-sve-vector-bits-max=256 < %s | 
FileCheck %s --check-prefix=CHECK-256
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve 
-aarch64-sve-vector-bits-min

[llvm-branch-commits] [llvm] obj2yaml: Introduce CovMap dump (PR #127432)

2025-03-07 Thread NAKAMURA Takumi via llvm-branch-commits

https://github.com/chapuni updated 
https://github.com/llvm/llvm-project/pull/127432

>From 7e29d6ace39058b631dcfff5533d8aee055de6dd Mon Sep 17 00:00:00 2001
From: NAKAMURA Takumi 
Date: Mon, 3 Mar 2025 12:25:13 +0900
Subject: [PATCH] obj2yaml

---
 llvm/include/llvm/ObjectYAML/CovMap.h   |  47 ++-
 llvm/include/llvm/ProfileData/InstrProf.h   |   6 +
 llvm/lib/ObjectYAML/CovMap.cpp  | 377 +++-
 llvm/lib/ProfileData/InstrProf.cpp  |  23 +-
 llvm/test/tools/obj2yaml/ELF/covmap-be.yaml |   2 +
 llvm/test/tools/obj2yaml/ELF/covmap.yaml|   2 +
 llvm/tools/obj2yaml/elf2yaml.cpp|  59 ++-
 llvm/tools/obj2yaml/obj2yaml.cpp|   2 +-
 llvm/tools/obj2yaml/obj2yaml.h  |   4 +
 9 files changed, 512 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/ObjectYAML/CovMap.h 
b/llvm/include/llvm/ObjectYAML/CovMap.h
index 3a0b86435d490..406204ee024fb 100644
--- a/llvm/include/llvm/ObjectYAML/CovMap.h
+++ b/llvm/include/llvm/ObjectYAML/CovMap.h
@@ -16,7 +16,7 @@
 //
 // - llvm::covmap
 //
-//   Provides YAML encoder for coverage map.
+//   Provides YAML encoder and decoder for coverage map.
 //
 
//===--===//
 
@@ -27,6 +27,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ObjectYAML/ELFYAML.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/YAMLTraits.h"
 #include 
 #include 
@@ -41,6 +42,8 @@ class raw_ostream;
 
 namespace llvm::coverage::yaml {
 
+struct DecoderContext;
+
 /// Base Counter, corresponding to coverage::Counter.
 struct CounterTy {
   enum TagTy : uint8_t {
@@ -65,6 +68,12 @@ struct CounterTy {
 
   virtual void mapping(llvm::yaml::IO &IO);
 
+  /// Holds Val for extensions.
+  Error decodeOrTag(DecoderContext &Data);
+
+  /// Raise Error if Val isn't empty.
+  Error decode(DecoderContext &Data);
+
   void encode(raw_ostream &OS) const;
 };
 
@@ -85,6 +94,8 @@ struct DecisionTy {
 
   void mapping(llvm::yaml::IO &IO);
 
+  Error decode(DecoderContext &Data);
+
   void encode(raw_ostream &OS) const;
 };
 
@@ -118,6 +129,8 @@ struct RecTy : CounterTy {
 
   void mapping(llvm::yaml::IO &IO) override;
 
+  Error decode(DecoderContext &Data);
+
   void encode(uint64_t &StartLoc, raw_ostream &OS) const;
 };
 
@@ -142,6 +155,10 @@ struct CovFunTy {
 
   void mapping(llvm::yaml::IO &IO);
 
+  /// Depends on CovMap and SymTab(IPSK_names)
+  Expected decode(const ArrayRef Content, uint64_t Offset,
+endianness Endianness);
+
   void encode(raw_ostream &OS, endianness Endianness) const;
 };
 
@@ -170,6 +187,9 @@ struct CovMapTy {
   bool useWD() const { return (!Version || *Version >= 4); }
   StringRef getWD() const { return (WD ? *WD : StringRef()); }
 
+  Expected decode(const ArrayRef Content, uint64_t Offset,
+endianness Endianness);
+
   /// Generate Accumulated list with WD.
   /// Returns a single element {WD} if AccFiles is not given.
   std::vector
@@ -236,6 +256,31 @@ LLVM_COVERAGE_YAML_ELEM_MAPPING(CovMapTy)
 
 namespace llvm::covmap {
 
+class Decoder {
+protected:
+  endianness Endianness;
+
+public:
+  Decoder(endianness Endianness) : Endianness(Endianness) {}
+  virtual ~Decoder() {}
+
+  /// Returns DecoderImpl.
+  static std::unique_ptr get(endianness Endianness,
+  bool CovMapEnabled);
+
+  /// Called from the Sections loop in advance of the final dump.
+  /// Decoder predecodes CovMap for Version info.
+  virtual Error acquire(unsigned AddressAlign, StringRef Name,
+ArrayRef Content) = 0;
+
+  /// Make contents on ELFYAML object. CovMap is predecoded.
+  virtual Error make(ELFYAML::CovMapSectionBase *Base,
+ ArrayRef Content) = 0;
+
+  /// Suppress emission of CovMap unless enabled.
+  static bool enabled;
+};
+
 class Encoder {
 protected:
   endianness Endianness;
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h 
b/llvm/include/llvm/ProfileData/InstrProf.h
index 7133c0c6a302c..e20424da3cac2 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -545,6 +545,12 @@ class InstrProfSymtab {
   /// This method is a wrapper to \c readAndDecodeStrings method.
   Error create(StringRef NameStrings);
 
+  // PrfNames is nested array.
+  using PrfNamesTy = SmallVector;
+  using PrfNamesChunksTy = SmallVector;
+
+  Expected createAndGetList(ArrayRef Content);
+
   /// Initialize symtab states with function names and vtable names. \c
   /// FuncNameStrings is a string composed of one or more encoded function name
   /// strings, and \c VTableNameStrings composes of one or more encoded vtable
diff --git a/llvm/lib/ObjectYAML/CovMap.cpp b/llvm/lib/ObjectYAML/CovMap.cpp
index 7662284caee76..dcf90f7b109cb 100644
--- a/llvm/lib/ObjectYAML/CovMap.cpp
+++ b/llvm/lib/ObjectYAML/CovMap.cpp
@@ -6,7 +6,7 @@
 //
 
//===--

[llvm-branch-commits] [llvm] obj2yaml: Add "detailed" output in CovMap dump (PR #129473)

2025-03-07 Thread NAKAMURA Takumi via llvm-branch-commits

https://github.com/chapuni updated 
https://github.com/llvm/llvm-project/pull/129473

>From e2dd98690a0f43b35ee22d59efeb04d2c7fead68 Mon Sep 17 00:00:00 2001
From: NAKAMURA Takumi 
Date: Mon, 3 Mar 2025 12:26:08 +0900
Subject: [PATCH] detailed

---
 llvm/include/llvm/ObjectYAML/CovMap.h   |  97 --
 llvm/lib/ObjectYAML/CovMap.cpp  | 190 
 llvm/test/tools/obj2yaml/ELF/covmap-be.yaml |   7 +
 llvm/test/tools/obj2yaml/ELF/covmap.yaml|   7 +
 llvm/tools/obj2yaml/elf2yaml.cpp|  25 ++-
 5 files changed, 269 insertions(+), 57 deletions(-)

diff --git a/llvm/include/llvm/ObjectYAML/CovMap.h 
b/llvm/include/llvm/ObjectYAML/CovMap.h
index 406204ee024fb..b55d902f999e8 100644
--- a/llvm/include/llvm/ObjectYAML/CovMap.h
+++ b/llvm/include/llvm/ObjectYAML/CovMap.h
@@ -24,6 +24,7 @@
 #define LLVM_OBJECTYAML_COVMAP_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ObjectYAML/ELFYAML.h"
 #include "llvm/Support/Endian.h"
@@ -34,14 +35,67 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 namespace llvm {
+class InstrProfSymtab;
 class raw_ostream;
 } // namespace llvm
 
 namespace llvm::coverage::yaml {
 
+/// This works like vector container but can be replaced with
+/// MutableArrayRef. See also SequenceTraits.
+template > class VectorOrRef {
+  using Ref = MutableArrayRef;
+
+  /// Holds vector type initially.
+  std::variant Array;
+
+public:
+  // FIXME: Iterator impl is minimal easy.
+  using iterator = T *;
+
+  iterator begin() {
+if (auto *V = std::get_if(&Array))
+  return &V->front();
+return &std::get(Array).front();
+  }
+
+  iterator end() {
+if (auto *V = std::get_if(&Array))
+  return &V->back() + 1;
+return &std::get(Array).back() + 1;
+  }
+
+  size_t size() const {
+if (const auto *V = std::get_if(&Array))
+  return V->size();
+return std::get(Array).size();
+  }
+
+  T &operator[](int Idx) {
+if (auto *V = std::get_if(&Array))
+  return (*V)[Idx];
+return std::get(Array)[Idx];
+  }
+
+  void resize(size_t Size) { std::get(Array).resize(Size); }
+
+  VectorOrRef() = default;
+
+  /// Initialize with MutableArrayRef.
+  VectorOrRef(Ref &&Tmp) : Array(std::move(Tmp)) {}
+};
+
+/// Options for Decoder.
+struct DecoderParam {
+  bool Detailed; ///< Generate and show processed records.
+  bool Raw;  ///< Show raw data oriented records.
+  bool dLoc; ///< Show raw dLoc (differential Loc).
+};
+
 struct DecoderContext;
 
 /// Base Counter, corresponding to coverage::Counter.
@@ -143,6 +197,9 @@ struct FileRecsTy {
   void mapping(llvm::yaml::IO &IO);
 };
 
+/// Key is FilenamesRef.
+using CovMapByRefTy = llvm::DenseMap;
+
 /// An element of CovFun array.
 struct CovFunTy {
   std::optional NameRef; ///< Hash value of the symbol.
@@ -157,7 +214,8 @@ struct CovFunTy {
 
   /// Depends on CovMap and SymTab(IPSK_names)
   Expected decode(const ArrayRef Content, uint64_t Offset,
-endianness Endianness);
+endianness Endianness, CovMapByRefTy &CovMapByRef,
+InstrProfSymtab *SymTab, const DecoderParam 
&Param);
 
   void encode(raw_ostream &OS, endianness Endianness) const;
 };
@@ -180,7 +238,7 @@ struct CovMapTy {
   /// This may be ArrayRef in Decoder since Filenames has been
   /// filled. On the other hand in Encoder, this should be a vector
   /// since YAML parser doesn't endorse references.
-  std::optional> Files;
+  std::optional> Files;
 
   void mapping(llvm::yaml::IO &IO);
 
@@ -188,7 +246,7 @@ struct CovMapTy {
   StringRef getWD() const { return (WD ? *WD : StringRef()); }
 
   Expected decode(const ArrayRef Content, uint64_t Offset,
-endianness Endianness);
+endianness Endianness, const DecoderParam &Param);
 
   /// Generate Accumulated list with WD.
   /// Returns a single element {WD} if AccFiles is not given.
@@ -210,6 +268,21 @@ struct CovMapTy {
 
 } // namespace llvm::coverage::yaml
 
+namespace llvm::yaml {
+template 
+struct SequenceTraits> {
+  static size_t size(IO &io, llvm::coverage::yaml::VectorOrRef &seq) {
+return seq.size();
+  }
+  static T &element(IO &, llvm::coverage::yaml::VectorOrRef &seq,
+size_t index) {
+if (index >= seq.size())
+  seq.resize(index + 1);
+return seq[index];
+  }
+};
+} // namespace llvm::yaml
+
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::coverage::yaml::CovMapTy)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::coverage::yaml::CovFunTy)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::coverage::yaml::ExpressionTy)
@@ -265,17 +338,21 @@ class Decoder {
   virtual ~Decoder() {}
 
   /// Returns DecoderImpl.
-  static std::unique_ptr get(endianness Endianness,
-  bool CovMapEnabled);
+  static std::unique_ptr
+  get(endianness Endianness, const coverage::yaml::DecoderParam &Param);
 
   /// Called 

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Add Metadata generation of Root Signatures for Attr (PR #125131)

2025-03-07 Thread via llvm-branch-commits


@@ -0,0 +1,108 @@
+//===- HLSLRootSignature.cpp - HLSL Root Signature helper objects
+//--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+///
+/// \file This file contains helpers for working with HLSL Root Signatures.
+///
+//===--===//
+
+#include "llvm/Frontend/HLSL/HLSLRootSignature.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+
+namespace llvm {
+namespace hlsl {
+namespace rootsig {
+
+// Static helper functions
+
+static MDString *ClauseTypeToName(LLVMContext &Ctx, ClauseType Type) {
+  StringRef Name;
+  switch (Type) {
+  case ClauseType::CBuffer:
+Name = "CBV";
+break;
+  case ClauseType::SRV:
+Name = "SRV";
+break;
+  case ClauseType::UAV:
+Name = "UAV";
+break;
+  case ClauseType::Sampler:
+Name = "Sampler";
+break;
+  }
+  return MDString::get(Ctx, Name);
+}
+
+// Helper struct so that we can use the overloaded notation of std::visit
+template  struct OverloadBuilds : Ts... {
+  using Ts::operator()...;
+};
+template  OverloadBuilds(Ts...) -> OverloadBuilds;
+
+MDNode *MetadataBuilder::BuildRootSignature() {
+  for (const RootElement &Element : Elements) {
+MDNode *ElementMD =
+std::visit(OverloadBuilds{
+   [&](DescriptorTable Table) -> MDNode * {
+ return BuildDescriptorTable(Table);
+   },
+   [&](DescriptorTableClause Clause) -> MDNode * {
+ return BuildDescriptorTableClause(Clause);
+   },
+   },
+   Element);
+GeneratedMetadata.push_back(ElementMD);
+  }
+
+  return MDNode::get(Ctx, GeneratedMetadata);
+}
+
+MDNode *MetadataBuilder::BuildDescriptorTable(const DescriptorTable &Table) {
+  IRBuilder<> B(Ctx);
+  SmallVector TableOperands;
+  // Set the mandatory arguments
+  TableOperands.push_back(MDString::get(Ctx, "DescriptorTable"));
+  TableOperands.push_back(ConstantAsMetadata::get(
+  B.getInt32(llvm::to_underlying(Table.Visibility;
+
+  // Remaining operands are references to the table's clauses. The in-memory
+  // representation of the Root Elements created from parsing will ensure that
+  // the previous N elements are the clauses for this table.
+  assert(Table.NumClauses <= GeneratedMetadata.size() &&
+ "Table expected all owned clauses to be generated already");
+  // So, add a refence to each clause to our operands
+  TableOperands.append(GeneratedMetadata.end() - Table.NumClauses,
+   GeneratedMetadata.end());
+  // Then, remove those clauses from the general list of Root Elements
+  GeneratedMetadata.pop_back_n(Table.NumClauses);
+
+  return MDNode::get(Ctx, TableOperands);
+}
+
+MDNode *MetadataBuilder::BuildDescriptorTableClause(
+const DescriptorTableClause &Clause) {
+  IRBuilder<> B(Ctx);
+  return MDNode::get(
+  Ctx, {
+   ClauseTypeToName(Ctx, Clause.Type),
+   ConstantAsMetadata::get(B.getInt32(Clause.NumDescriptors)),
+   ConstantAsMetadata::get(B.getInt32(Clause.Register.Number)),
+   ConstantAsMetadata::get(B.getInt32(Clause.Space)),
+   ConstantAsMetadata::get(

joaosaffran wrote:

The Offset doesn't seem to be defined in the metadata spec: 
https://github.com/llvm/wg-hlsl/blob/main/proposals/0002-root-signature-in-clang.md#descriptor-ranges.
 Am I missing something?

https://github.com/llvm/llvm-project/pull/125131
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL] Remove old resource annotations (PR #130338)

2025-03-07 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-codegen

Author: Helena Kotas (hekota)


Changes

Fixes #114126

---
Full diff: https://github.com/llvm/llvm-project/pull/130338.diff


5 Files Affected:

- (modified) clang/lib/CodeGen/CGDeclCXX.cpp (-3) 
- (modified) clang/lib/CodeGen/CGHLSLRuntime.cpp (-129) 
- (modified) clang/lib/CodeGen/CGHLSLRuntime.h (-15) 
- (modified) llvm/include/llvm/Frontend/HLSL/HLSLResource.h (-22) 
- (modified) llvm/lib/Frontend/HLSL/HLSLResource.cpp (+2-52) 


``diff
diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index f5950f03673a1..1ad34ae61f96a 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -1071,9 +1071,6 @@ void 
CodeGenFunction::GenerateCXXGlobalVarDeclInitFunc(llvm::Function *Fn,
 EmitCXXGlobalVarDeclInit(*D, Addr, PerformInit);
   }
 
-  if (getLangOpts().HLSL)
-CGM.getHLSLRuntime().annotateHLSLResource(D, Addr);
-
   FinishFunction();
 }
 
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp 
b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index dc34653e8f497..7ea23032b6519 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -260,135 +260,6 @@ void CGHLSLRuntime::finishCodeGen() {
   generateGlobalCtorDtorCalls();
 }
 
-void CGHLSLRuntime::addBufferResourceAnnotation(llvm::GlobalVariable *GV,
-llvm::hlsl::ResourceClass RC,
-llvm::hlsl::ResourceKind RK,
-bool IsROV,
-llvm::hlsl::ElementType ET,
-BufferResBinding &Binding) {
-  llvm::Module &M = CGM.getModule();
-
-  NamedMDNode *ResourceMD = nullptr;
-  switch (RC) {
-  case llvm::hlsl::ResourceClass::UAV:
-ResourceMD = M.getOrInsertNamedMetadata("hlsl.uavs");
-break;
-  case llvm::hlsl::ResourceClass::SRV:
-ResourceMD = M.getOrInsertNamedMetadata("hlsl.srvs");
-break;
-  case llvm::hlsl::ResourceClass::CBuffer:
-ResourceMD = M.getOrInsertNamedMetadata("hlsl.cbufs");
-break;
-  default:
-assert(false && "Unsupported buffer type!");
-return;
-  }
-  assert(ResourceMD != nullptr &&
- "ResourceMD must have been set by the switch above.");
-
-  llvm::hlsl::FrontendResource Res(
-  GV, RK, ET, IsROV, Binding.Reg.value_or(UINT_MAX), Binding.Space);
-  ResourceMD->addOperand(Res.getMetadata());
-}
-
-static llvm::hlsl::ElementType
-calculateElementType(const ASTContext &Context, const clang::Type *ResourceTy) 
{
-  using llvm::hlsl::ElementType;
-
-  // TODO: We may need to update this when we add things like ByteAddressBuffer
-  // that don't have a template parameter (or, indeed, an element type).
-  const auto *TST = ResourceTy->getAs();
-  assert(TST && "Resource types must be template specializations");
-  ArrayRef Args = TST->template_arguments();
-  assert(!Args.empty() && "Resource has no element type");
-
-  // At this point we have a resource with an element type, so we can assume
-  // that it's valid or we would have diagnosed the error earlier.
-  QualType ElTy = Args[0].getAsType();
-
-  // We should either have a basic type or a vector of a basic type.
-  if (const auto *VecTy = ElTy->getAs())
-ElTy = VecTy->getElementType();
-
-  if (ElTy->isSignedIntegerType()) {
-switch (Context.getTypeSize(ElTy)) {
-case 16:
-  return ElementType::I16;
-case 32:
-  return ElementType::I32;
-case 64:
-  return ElementType::I64;
-}
-  } else if (ElTy->isUnsignedIntegerType()) {
-switch (Context.getTypeSize(ElTy)) {
-case 16:
-  return ElementType::U16;
-case 32:
-  return ElementType::U32;
-case 64:
-  return ElementType::U64;
-}
-  } else if (ElTy->isSpecificBuiltinType(BuiltinType::Half))
-return ElementType::F16;
-  else if (ElTy->isSpecificBuiltinType(BuiltinType::Float))
-return ElementType::F32;
-  else if (ElTy->isSpecificBuiltinType(BuiltinType::Double))
-return ElementType::F64;
-
-  // TODO: We need to handle unorm/snorm float types here once we support them
-  llvm_unreachable("Invalid element type for resource");
-}
-
-void CGHLSLRuntime::annotateHLSLResource(const VarDecl *D, GlobalVariable *GV) 
{
-  const Type *Ty = D->getType()->getPointeeOrArrayElementType();
-  if (!Ty)
-return;
-  const auto *RD = Ty->getAsCXXRecordDecl();
-  if (!RD)
-return;
-  // the resource related attributes are on the handle member
-  // inside the record decl
-  for (auto *FD : RD->fields()) {
-const auto *HLSLResAttr = FD->getAttr();
-const HLSLAttributedResourceType *AttrResType =
-dyn_cast(FD->getType().getTypePtr());
-if (!HLSLResAttr || !AttrResType)
-  continue;
-
-llvm::hlsl::ResourceClass RC = AttrResType->getAttrs().ResourceClass;
-if (RC == llvm::hlsl::ResourceClass::UAV ||
-RC == llvm::hlsl::ResourceClass

[llvm-branch-commits] [flang] Reland " [flang] Rely on global initialization for simpler derived types" (PR #130290)

2025-03-07 Thread via llvm-branch-commits

https://github.com/NimishMishra approved this pull request.

Thanks. LGTM.

https://github.com/llvm/llvm-project/pull/130290
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [llvm] [flang][OpenMP] Parse cancel-directive-name as clause (PR #130146)

2025-03-07 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz updated 
https://github.com/llvm/llvm-project/pull/130146

>From 98df18461bb06afa06b8968b157a3c5a5cf50324 Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Thu, 6 Mar 2025 08:51:34 -0600
Subject: [PATCH 1/4] [flang][OpenMP] Parse cancel-directive-name as clause

The cancellable construct names on CANCEL or CANCELLATION POINT directives
are actually clauses (with the same names as the corresponding constructs).

Instead of parsing them into a custom structure, parse them as a clause,
which will make CANCEL/CANCELLATION POINT follow the same uniform scheme
as other constructs ( [()] [clauses]).
---
 flang/include/flang/Parser/dump-parse-tree.h |   2 +-
 flang/include/flang/Parser/parse-tree.h  |  11 +-
 flang/lib/Parser/openmp-parsers.cpp  |  31 -
 flang/lib/Parser/unparse.cpp |   5 +-
 flang/lib/Semantics/check-omp-structure.cpp  | 133 +--
 flang/lib/Semantics/check-omp-structure.h|   5 +-
 flang/test/Semantics/OpenMP/cancel.f90   |  29 
 llvm/include/llvm/Frontend/OpenMP/OMP.td |   5 +
 8 files changed, 169 insertions(+), 52 deletions(-)
 create mode 100644 flang/test/Semantics/OpenMP/cancel.f90

diff --git a/flang/include/flang/Parser/dump-parse-tree.h 
b/flang/include/flang/Parser/dump-parse-tree.h
index fcd902d25fa40..004e22a21ecfa 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -546,6 +546,7 @@ class ParseTreeDumper {
 #define GEN_FLANG_DUMP_PARSE_TREE_CLAUSES
 #include "llvm/Frontend/OpenMP/OMP.inc"
   NODE(parser, OmpClauseList)
+  NODE(parser, OmpCancellationConstructTypeClause)
   NODE(parser, OmpContainsClause)
   NODE(parser, OmpCriticalDirective)
   NODE(parser, OmpErrorDirective)
@@ -689,7 +690,6 @@ class ParseTreeDumper {
   NODE(parser, OpenMPAtomicConstruct)
   NODE(parser, OpenMPBlockConstruct)
   NODE(parser, OpenMPCancelConstruct)
-  NODE(OpenMPCancelConstruct, If)
   NODE(parser, OpenMPCancellationPointConstruct)
   NODE(parser, OpenMPConstruct)
   NODE(parser, OpenMPCriticalConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h 
b/flang/include/flang/Parser/parse-tree.h
index a197249ebae91..cb0eb884e1193 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -4048,6 +4048,12 @@ struct OmpBindClause {
   WRAPPER_CLASS_BOILERPLATE(OmpBindClause, Binding);
 };
 
+// Artificial clause to represent a cancellable construct.
+struct OmpCancellationConstructTypeClause {
+  TUPLE_CLASS_BOILERPLATE(OmpCancellationConstructTypeClause);
+  std::tuple> t;
+};
+
 // Ref: [5.2:214]
 //
 // contains-clause ->
@@ -4870,15 +4876,14 @@ struct OmpCancelType {
 struct OpenMPCancellationPointConstruct {
   TUPLE_CLASS_BOILERPLATE(OpenMPCancellationPointConstruct);
   CharBlock source;
-  std::tuple t;
+  std::tuple t;
 };
 
 // 2.14.1 cancel -> CANCEL construct-type-clause [ [,] if-clause]
 struct OpenMPCancelConstruct {
   TUPLE_CLASS_BOILERPLATE(OpenMPCancelConstruct);
-  WRAPPER_CLASS(If, ScalarLogicalExpr);
   CharBlock source;
-  std::tuple> t;
+  std::tuple t;
 };
 
 // Ref: [5.0:254-255], [5.1:287-288], [5.2:322-323]
diff --git a/flang/lib/Parser/openmp-parsers.cpp 
b/flang/lib/Parser/openmp-parsers.cpp
index 80831db0e7d50..51b2567a3894d 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -803,8 +803,9 @@ TYPE_PARSER(construct(many(maybe(","_tok) 
>>
 TYPE_PARSER(construct(many(maybe(","_tok) >>
 construct(unwrap(OmpDirectiveNameParser{})
 
-TYPE_PARSER("ABSENT" >> construct(construct(
-parenthesized(Parser{}))) ||
+TYPE_PARSER( //
+"ABSENT" >> construct(construct(
+parenthesized(Parser{}))) ||
 "ACQUIRE" >> construct(construct()) ||
 "ACQ_REL" >> construct(construct()) ||
 "AFFINITY" >> construct(construct(
@@ -981,7 +982,24 @@ TYPE_PARSER("ABSENT" >> 
construct(construct(
 "UPDATE" >> construct(construct(
 parenthesized(Parser{}))) ||
 "WHEN" >> construct(construct(
-  parenthesized(Parser{}
+  parenthesized(Parser{}))) ||
+// Cancellable constructs
+construct(construct(
+construct( //
+construct(verbatim("DO"_id)),
+maybe(parenthesized(scalarLogicalExpr) ||
+construct(construct(
+construct( //
+construct(verbatim("PARALLEL"_id)),
+maybe(parenthesized(scalarLogicalExpr) ||
+construct(construct(
+construct( //
+construct(verbatim("SECTIONS"_id)),
+maybe(parenthesized(scalarLogicalExpr) ||
+construct(construct(
+construct( //
+construct(verbatim("TASKGROUP"_id)),
+maybe(parenthesized(scalarLogicalExpr))
 
 // [Clause, [Clause], ...]
 TYPE_PARSER(sourced(construct(
@@ -1104,11 +1122,11 @@ TYPE_PARSER(sourced(construct(
 
 // 2.14.2 Cancellation Point

[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Temporal divergence lowering (non i1) (PR #124298)

2025-03-07 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle approved this pull request.

Thanks!

https://github.com/llvm/llvm-project/pull/124298
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Allocate scratch space for dVGPRs for CWSR (PR #130055)

2025-03-07 Thread Diana Picus via llvm-branch-commits

https://github.com/rovka updated 
https://github.com/llvm/llvm-project/pull/130055

>From 3e20edfc6f3b1bfa60f5d778ce98c1fb984b1aee Mon Sep 17 00:00:00 2001
From: Diana Picus 
Date: Tue, 24 Sep 2024 09:57:25 +0200
Subject: [PATCH 1/7] [AMDGPU] Allocate scratch space for dVGPRs for CWSR

The CWSR trap handler needs to save and restore the VGPRs. When dynamic
VGPRs are in use, the fixed function hardware will only allocate enough
space for one VGPR block. The rest will have to be stored in scratch, at
offset 0.

This patch allocates the necessary space by:
- generating a prologue that checks at runtime if we're on a compute
  queue (since CWSR only works on compute queues); for this we will have
  to check the ME_ID bits of the ID_HW_ID2 register - if that is
  non-zero, we can assume we're on a compute queue and initialize the SP
  and FP with enough room for the dynamic VGPRs
- forcing all compute entry functions to use a FP so they can access
  their locals/spills correctly (this isn't ideal but it's the quickest
  to implement)

Note that at the moment we allocate enough space for the theoretical
maximum number of VGPRs that can be allocated dynamically (for blocks of
16 registers, this will be 128, of which we subtract the first 16, which
are already allocated by the fixed function hardware). Future patches
may decide to allocate less if they can prove the shader never allocates
that many blocks.

Also note that this should not affect any reported stack sizes (e.g. PAL
backend_stack_size etc).
---
 llvm/docs/AMDGPUUsage.rst |  65 +++--
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp   |   9 +-
 llvm/lib/Target/AMDGPU/SIDefines.h|   1 +
 llvm/lib/Target/AMDGPU/SIFrameLowering.cpp|  66 -
 llvm/lib/Target/AMDGPU/SIFrameLowering.h  |   4 +
 .../lib/Target/AMDGPU/SIMachineFunctionInfo.h |  13 +
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp |   8 +
 .../dynamic-vgpr-reserve-stack-for-cwsr.ll| 263 ++
 llvm/test/CodeGen/AMDGPU/pal-metadata-3.0.ll  |  11 +-
 9 files changed, 399 insertions(+), 41 deletions(-)
 create mode 100644 
llvm/test/CodeGen/AMDGPU/dynamic-vgpr-reserve-stack-for-cwsr.ll

diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 59cc08a59ed7c..b5196930a50f7 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -6020,8 +6020,13 @@ Frame Pointer
 
 If the kernel needs a frame pointer for the reasons defined in
 ``SIFrameLowering`` then SGPR33 is used and is always set to ``0`` in the
-kernel prolog. If a frame pointer is not required then all uses of the frame
-pointer are replaced with immediate ``0`` offsets.
+kernel prolog. On GFX12+, when dynamic VGPRs are enabled, the prologue will
+check if the kernel is running on a compute queue, and if so it will reserve
+some scratch space for any dynamic VGPRs that might need to be saved by the
+CWSR trap handler. In this case, the frame pointer will be initialized to
+a suitably aligned offset above this reserved area. If a frame pointer is not
+required then all uses of the frame pointer are replaced with immediate ``0``
+offsets.
 
 .. _amdgpu-amdhsa-kernel-prolog-flat-scratch:
 
@@ -17133,33 +17138,35 @@ within a map that has been added by the same 
*vendor-name*.
   .. table:: AMDPAL Code Object Hardware Stage Metadata Map
  :name: amdgpu-amdpal-code-object-hardware-stage-metadata-map-table
 
- == == = 
===
- String Key Value Type Required? Description
- == == = 
===
- ".entry_point" string   The ELF symbol 
pointing to this pipeline's stage entry point.
- ".scratch_memory_size" integer  Scratch memory size 
in bytes.
- ".lds_size"integer  Local Data Share size 
in bytes.
- ".perf_data_buffer_size"   integer  Performance data 
buffer size in bytes.
- ".vgpr_count"  integer  Number of VGPRs used.
- ".agpr_count"  integer  Number of AGPRs used.
- ".sgpr_count"  integer  Number of SGPRs used.
- ".vgpr_limit"  integer  If non-zero, 
indicates the shader was compiled with a
- directive to instruct 
the compiler to limit the VGPR usage to
- be less than or equal 
to the specified value (only set if
- different from HW 
default).
- ".sgpr_limit"  integer  SGPR count upper 
limit (only set if different from HW
- default).
- ".

[llvm-branch-commits] [llvm] [InstCombine] Enable select freeze poison folding when storing value (PR #129776)

2025-03-07 Thread Nuno Lopes via llvm-branch-commits

nunoplopes wrote:

FWIW, we have been using this patch internally and it helps substancial in a 
couple of benchmarks.

https://github.com/llvm/llvm-project/pull/129776
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] Reland " [flang] Rely on global initialization for simpler derived types" (PR #130290)

2025-03-07 Thread Tom Eccles via llvm-branch-commits

https://github.com/tblah approved this pull request.

Thank you for the quick fix. This does fix the issue I saw.

https://github.com/llvm/llvm-project/pull/130290
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-07 Thread David Li via llvm-branch-commits


@@ -386,6 +386,16 @@ MCSection *TargetLoweringObjectFile::getSectionForConstant(
   return DataSection;
 }
 
+MCSection *TargetLoweringObjectFile::getSectionForConstant(
+const DataLayout &DL, SectionKind Kind, const Constant *C, Align 
&Alignment,
+StringRef SectionPrefix) const {
+  // Fallback to `getSectionForConstant` without `SectionPrefix` parameter if 
it
+  // is empty.
+  if (SectionPrefix.empty())

david-xl wrote:

current implementation is fine. Perhaps improve the message in 
report_fatal_error like "Not implemented for the object format".

https://github.com/llvm/llvm-project/pull/129781
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL] Remove old resource annotations (PR #130338)

2025-03-07 Thread Helena Kotas via llvm-branch-commits


@@ -11,59 +11,9 @@
 
//===--===//
 
 #include "llvm/Frontend/HLSL/HLSLResource.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Metadata.h"
 
 using namespace llvm;
 using namespace llvm::hlsl;
 
-GlobalVariable *FrontendResource::getGlobalVariable() {
-  return cast(
-  cast(Entry->getOperand(0))->getValue());
-}
-
-ResourceKind FrontendResource::getResourceKind() {
-  return static_cast(
-  cast(
-  cast(Entry->getOperand(1))->getValue())
-  ->getLimitedValue());
-}
-ElementType FrontendResource::getElementType() {
-  return static_cast(
-  cast(
-  cast(Entry->getOperand(2))->getValue())
-  ->getLimitedValue());
-}
-bool FrontendResource::getIsROV() {
-  return cast(
- cast(Entry->getOperand(3))->getValue())
-  ->getLimitedValue();
-}
-uint32_t FrontendResource::getResourceIndex() {
-  return cast(
- cast(Entry->getOperand(4))->getValue())
-  ->getLimitedValue();
-}
-uint32_t FrontendResource::getSpace() {
-  return cast(
- cast(Entry->getOperand(5))->getValue())
-  ->getLimitedValue();
-}
-
-FrontendResource::FrontendResource(MDNode *E) : Entry(E) {
-  assert(Entry->getNumOperands() == 6 && "Unexpected metadata shape");
-}
-
-FrontendResource::FrontendResource(GlobalVariable *GV, ResourceKind RK,
-   ElementType ElTy, bool IsROV,
-   uint32_t ResIndex, uint32_t Space) {
-  auto &Ctx = GV->getContext();
-  IRBuilder<> B(Ctx);
-  Entry = MDNode::get(
-  Ctx, {ValueAsMetadata::get(GV),
-ConstantAsMetadata::get(B.getInt32(static_cast(RK))),
-ConstantAsMetadata::get(B.getInt32(static_cast(ElTy))),
-ConstantAsMetadata::get(B.getInt1(IsROV)),
-ConstantAsMetadata::get(B.getInt32(ResIndex)),
-ConstantAsMetadata::get(B.getInt32(Space))});
-}
+// Intentionally empty; this file can be removed when more cpp files are added

hekota wrote:

HLSLResource.cpp is currently the only file in the FrontendHLSL library. If I 
remove it the library is empty, and the compiler/linker is not happy about 
that. I don't want to remove the whole library because I know more changes are 
coming in here soon (https://github.com/llvm/llvm-project/pull/125131), so I 
have decided to just leave it in with a comment.

https://github.com/llvm/llvm-project/pull/130338
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL] Remove old resource annotations (PR #130338)

2025-03-07 Thread Helena Kotas via llvm-branch-commits

https://github.com/hekota edited 
https://github.com/llvm/llvm-project/pull/130338
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL] Remove old resource annotations (PR #130338)

2025-03-07 Thread Helena Kotas via llvm-branch-commits


@@ -125,15 +125,6 @@ class CGHLSLRuntime {
   // End of reserved area for HLSL intrinsic getters.
   
//===--===//
 
-  struct BufferResBinding {

hekota wrote:

The register number and space are used in the 
`llvm.dx.resource.handlefrombinding` intrinsic call when the code to initialize 
the resource is generated. Then in the LLVM backed the 
`DXILResourceBindingAnalysis` analyzes the code and collects information about 
all the shader resources and their bindings based on these calls. This info is 
then used by other LLVM passes to make sure we generate the correct 
"dx.resources" metadata in the DXIL container. 

https://github.com/llvm/llvm-project/pull/130338
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Temporal divergence lowering i1 (PR #124299)

2025-03-07 Thread Nicolai Hähnle via llvm-branch-commits


@@ -228,6 +229,66 @@ bool DivergenceLoweringHelper::lowerTemporalDivergence() {
   return false;
 }
 
+bool DivergenceLoweringHelper::lowerTemporalDivergenceI1() {
+  MachineRegisterInfo::VRegAttrs BoolS1 = {ST->getBoolRC(), LLT::scalar(1)};
+  initializeLaneMaskRegisterAttributes(BoolS1);
+  MachineSSAUpdater SSAUpdater(*MF);
+
+  // In case of use outside muliple nested cycles or muliple uses we only need
+  // to merge lane mask across largest relevant cycle.
+  SmallDenseMap> LRCCache;
+  for (auto [Reg, UseInst, LRC] : MUI->getTemporalDivergenceList()) {
+if (MRI->getType(Reg) != LLT::scalar(1))
+  continue;
+
+const MachineCycle *CachedLRC = LRCCache.lookup(Reg).first;
+if (CachedLRC) {
+  LRC = CachedLRC->contains(LRC) ? CachedLRC : LRC;
+  assert(LRC->contains(CachedLRC));
+}
+
+LRCCache[Reg] = {LRC, {}};

nhaehnle wrote:

This is actually a great use case for try_emplace to do the cache lookup only 
once:
```suggestion
const MachineCycle *&CachedLRC = LRCCache.try_emplace(Reg);
if (!CachedLRC || !CachedLRC->contains(LRC))
  CachedLRC = LRC;
```

https://github.com/llvm/llvm-project/pull/124299
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Temporal divergence lowering i1 (PR #124299)

2025-03-07 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle edited 
https://github.com/llvm/llvm-project/pull/124299
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Temporal divergence lowering i1 (PR #124299)

2025-03-07 Thread Nicolai Hähnle via llvm-branch-commits


@@ -228,6 +229,66 @@ bool DivergenceLoweringHelper::lowerTemporalDivergence() {
   return false;
 }
 
+bool DivergenceLoweringHelper::lowerTemporalDivergenceI1() {
+  MachineRegisterInfo::VRegAttrs BoolS1 = {ST->getBoolRC(), LLT::scalar(1)};
+  initializeLaneMaskRegisterAttributes(BoolS1);
+  MachineSSAUpdater SSAUpdater(*MF);
+
+  // In case of use outside muliple nested cycles or muliple uses we only need
+  // to merge lane mask across largest relevant cycle.
+  SmallDenseMap> LRCCache;
+  for (auto [Reg, UseInst, LRC] : MUI->getTemporalDivergenceList()) {
+if (MRI->getType(Reg) != LLT::scalar(1))
+  continue;
+
+const MachineCycle *CachedLRC = LRCCache.lookup(Reg).first;
+if (CachedLRC) {
+  LRC = CachedLRC->contains(LRC) ? CachedLRC : LRC;
+  assert(LRC->contains(CachedLRC));
+}
+
+LRCCache[Reg] = {LRC, {}};
+  }
+
+  for (auto LRCIter : LRCCache) {
+Register Reg = LRCIter.first;
+const MachineCycle *Cycle = LRCIter.second.first;
+
+if (MRI->getType(Reg) != LLT::scalar(1))
+  continue;

nhaehnle wrote:

This check is now redundant

https://github.com/llvm/llvm-project/pull/124299
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Temporal divergence lowering i1 (PR #124299)

2025-03-07 Thread Nicolai Hähnle via llvm-branch-commits


@@ -228,6 +229,66 @@ bool DivergenceLoweringHelper::lowerTemporalDivergence() {
   return false;
 }
 
+bool DivergenceLoweringHelper::lowerTemporalDivergenceI1() {
+  MachineRegisterInfo::VRegAttrs BoolS1 = {ST->getBoolRC(), LLT::scalar(1)};
+  initializeLaneMaskRegisterAttributes(BoolS1);
+  MachineSSAUpdater SSAUpdater(*MF);
+
+  // In case of use outside muliple nested cycles or muliple uses we only need
+  // to merge lane mask across largest relevant cycle.
+  SmallDenseMap> LRCCache;
+  for (auto [Reg, UseInst, LRC] : MUI->getTemporalDivergenceList()) {
+if (MRI->getType(Reg) != LLT::scalar(1))
+  continue;
+
+const MachineCycle *CachedLRC = LRCCache.lookup(Reg).first;
+if (CachedLRC) {
+  LRC = CachedLRC->contains(LRC) ? CachedLRC : LRC;
+  assert(LRC->contains(CachedLRC));
+}
+
+LRCCache[Reg] = {LRC, {}};
+  }
+
+  for (auto LRCIter : LRCCache) {
+Register Reg = LRCIter.first;
+const MachineCycle *Cycle = LRCIter.second.first;
+
+if (MRI->getType(Reg) != LLT::scalar(1))
+  continue;
+
+Register MergedMask = MRI->createVirtualRegister(BoolS1);
+SSAUpdater.Initialize(MergedMask);
+
+MachineBasicBlock *MBB = MRI->getVRegDef(Reg)->getParent();
+SSAUpdater.AddAvailableValue(MBB, MergedMask);
+
+for (auto Entry : Cycle->getEntries()) {
+  for (MachineBasicBlock *Pred : Entry->predecessors()) {
+if (!Cycle->contains(Pred)) {
+  B.setInsertPt(*Pred, Pred->getFirstTerminator());
+  auto ImplDef = B.buildInstr(AMDGPU::IMPLICIT_DEF, {BoolS1}, {});
+  SSAUpdater.AddAvailableValue(Pred, ImplDef.getReg(0));
+}
+  }
+}
+
+buildMergeLaneMasks(*MBB, MBB->getFirstTerminator(), {}, MergedMask,
+SSAUpdater.GetValueInMiddleOfBlock(MBB), Reg);
+
+LRCCache[Reg].second = MergedMask;
+  }
+
+  for (auto [Reg, UseInst, Cycle] : MUI->getTemporalDivergenceList()) {
+if (MRI->getType(Reg) != LLT::scalar(1))
+  continue;
+
+replaceUsesOfRegInInstWith(Reg, UseInst, LRCCache[Reg].second);

nhaehnle wrote:

Can use .lookup instead of operator[] for consistency with above.

https://github.com/llvm/llvm-project/pull/124299
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [HLSL] Implement explicit layout for default constant buffer ($Globals) (PR #128991)

2025-03-07 Thread Helena Kotas via llvm-branch-commits

hekota wrote:

 Reopening. I've accidentally deleted the pr branch this depends on and it 
closed this PR.

https://github.com/llvm/llvm-project/pull/128991
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL] Remove old resource annotations (PR #130338)

2025-03-07 Thread Joshua Batista via llvm-branch-commits


@@ -125,15 +125,6 @@ class CGHLSLRuntime {
   // End of reserved area for HLSL intrinsic getters.
   
//===--===//
 
-  struct BufferResBinding {

bob80905 wrote:

May I ask how the buffer register number `2` is being stored when this goes 
away? 

https://github.com/llvm/llvm-project/pull/130338
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL] Remove old resource annotations (PR #130338)

2025-03-07 Thread Joshua Batista via llvm-branch-commits


@@ -11,59 +11,9 @@
 
//===--===//
 
 #include "llvm/Frontend/HLSL/HLSLResource.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Metadata.h"
 
 using namespace llvm;
 using namespace llvm::hlsl;
 
-GlobalVariable *FrontendResource::getGlobalVariable() {
-  return cast(
-  cast(Entry->getOperand(0))->getValue());
-}
-
-ResourceKind FrontendResource::getResourceKind() {
-  return static_cast(
-  cast(
-  cast(Entry->getOperand(1))->getValue())
-  ->getLimitedValue());
-}
-ElementType FrontendResource::getElementType() {
-  return static_cast(
-  cast(
-  cast(Entry->getOperand(2))->getValue())
-  ->getLimitedValue());
-}
-bool FrontendResource::getIsROV() {
-  return cast(
- cast(Entry->getOperand(3))->getValue())
-  ->getLimitedValue();
-}
-uint32_t FrontendResource::getResourceIndex() {
-  return cast(
- cast(Entry->getOperand(4))->getValue())
-  ->getLimitedValue();
-}
-uint32_t FrontendResource::getSpace() {
-  return cast(
- cast(Entry->getOperand(5))->getValue())
-  ->getLimitedValue();
-}
-
-FrontendResource::FrontendResource(MDNode *E) : Entry(E) {
-  assert(Entry->getNumOperands() == 6 && "Unexpected metadata shape");
-}
-
-FrontendResource::FrontendResource(GlobalVariable *GV, ResourceKind RK,
-   ElementType ElTy, bool IsROV,
-   uint32_t ResIndex, uint32_t Space) {
-  auto &Ctx = GV->getContext();
-  IRBuilder<> B(Ctx);
-  Entry = MDNode::get(
-  Ctx, {ValueAsMetadata::get(GV),
-ConstantAsMetadata::get(B.getInt32(static_cast(RK))),
-ConstantAsMetadata::get(B.getInt32(static_cast(ElTy))),
-ConstantAsMetadata::get(B.getInt1(IsROV)),
-ConstantAsMetadata::get(B.getInt32(ResIndex)),
-ConstantAsMetadata::get(B.getInt32(Space))});
-}
+// Intentionally empty; this file can be removed when more cpp files are added

bob80905 wrote:

Would you mind explaining why more cpp files being added later justifies the 
removal of this file later instead of now?
Is it because it's more convenient to unlink this file from the cmakelists when 
adding new cpp files and the cmakelists needs to be updated anyways?

https://github.com/llvm/llvm-project/pull/130338
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [libcxx] release/20.x: [libc++][test] extend XFAIL clauses to cover Amazon Linux too (#129377) (PR #129566)

2025-03-07 Thread Paul Osmialowski via llvm-branch-commits

pawosm-arm wrote:

I can't merge it myself, it says `Cannot update this protected ref.`

https://github.com/llvm/llvm-project/pull/129566
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Remove DXILResourceMDAnalysis (PR #130323)

2025-03-07 Thread Joshua Batista via llvm-branch-commits

https://github.com/bob80905 commented:

LGTM. I'll leave the approval to someone with more expertise.

https://github.com/llvm/llvm-project/pull/130323
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU/GlobalISel: Update divergence lowering tests (PR #128702)

2025-03-07 Thread Nicolai Hähnle via llvm-branch-commits

https://github.com/nhaehnle approved this pull request.


https://github.com/llvm/llvm-project/pull/128702
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Update target helpers & GCNSchedStrategy for dynamic VGPRs (PR #130047)

2025-03-07 Thread Diana Picus via llvm-branch-commits

rovka wrote:

> Is there any test for the revert scheduling portion of the change?

Sadly, no. It's a bit difficult to add.

https://github.com/llvm/llvm-project/pull/130047
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port RemoveLoadsIntoFakeUses to NPM (PR #130068)

2025-03-07 Thread Akshat Oke via llvm-branch-commits

optimisan wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/130068?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#130071** https://app.graphite.dev/github/pr/llvm/llvm-project/130071?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#130070** https://app.graphite.dev/github/pr/llvm/llvm-project/130070?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#130069** https://app.graphite.dev/github/pr/llvm/llvm-project/130069?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#130068** https://app.graphite.dev/github/pr/llvm/llvm-project/130068?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/130068?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#130067** https://app.graphite.dev/github/pr/llvm/llvm-project/130067?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#130066** https://app.graphite.dev/github/pr/llvm/llvm-project/130066?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#130065** https://app.graphite.dev/github/pr/llvm/llvm-project/130065?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#130064** https://app.graphite.dev/github/pr/llvm/llvm-project/130064?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#130063** https://app.graphite.dev/github/pr/llvm/llvm-project/130063?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#130062** https://app.graphite.dev/github/pr/llvm/llvm-project/130062?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#130061** https://app.graphite.dev/github/pr/llvm/llvm-project/130061?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#130060** https://app.graphite.dev/github/pr/llvm/llvm-project/130060?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#130059** https://app.graphite.dev/github/pr/llvm/llvm-project/130059?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#129866** https://app.graphite.dev/github/pr/llvm/llvm-project/129866?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#129865** https://app.graphite.dev/github/pr/llvm/llvm-project/129865?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#129857** https://app.graphite.dev/github/pr/llvm/llvm-project/129857?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#129853** https://app.graphite.dev/github/pr/llvm/llvm-project/129853?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* **#129828** https://app.graphite.dev/github/pr/llvm/llvm-project/129828?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/130068
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits