[llvm-branch-commits] [clang] [llvm] [HLSL] Remove HLSLResource attribute (PR #130342)

2025-03-10 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner approved this pull request.


https://github.com/llvm/llvm-project/pull/130342
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SeparateConstOffsetFromGEP] Preserve inbounds flag based on ValueTracking (PR #130617)

2025-03-10 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a created 
https://github.com/llvm/llvm-project/pull/130617

If we know that the initial GEP was inbounds, and we change it to a
sequence of GEPs from the same base pointer where every offset is
non-negative, then the new GEPs are inbounds.

For SWDEV-516125.

>From e880d4330b4a2fe9b281013b128763e6d24f81dd Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Mon, 10 Mar 2025 06:55:10 -0400
Subject: [PATCH] [SeparateConstOffsetFromGEP] Preserve inbounds flag based on
 ValueTracking

If we know that the initial GEP was inbounds, and we change it to a
sequence of GEPs from the same base pointer where every offset is
non-negative, then the new GEPs are inbounds.

For SWDEV-516125.
---
 .../Scalar/SeparateConstOffsetFromGEP.cpp | 18 +++
 .../AMDGPU/preserve-inbounds.ll   | 23 +++
 .../NVPTX/split-gep-and-gvn.ll| 16 ++---
 .../NVPTX/split-gep.ll|  8 +++
 4 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp 
b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 138a71ce79cef..070afdf0752f4 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -1052,6 +1052,8 @@ bool 
SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
 }
   }
 
+  bool MayRecoverInbounds = AccumulativeByteOffset >= 0 && GEP->isInBounds();
+
   // Remove the constant offset in each sequential index. The resultant GEP
   // computes the variadic base.
   // Notice that we don't remove struct field indices here. If LowerGEP is
@@ -1079,6 +1081,8 @@ bool 
SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
 // and the old index if they are not used.
 RecursivelyDeleteTriviallyDeadInstructions(UserChainTail);
 RecursivelyDeleteTriviallyDeadInstructions(OldIdx);
+MayRecoverInbounds =
+MayRecoverInbounds && computeKnownBits(NewIdx, 
*DL).isNonNegative();
   }
 }
   }
@@ -1100,11 +1104,15 @@ bool 
SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   // address with silently-wrapping two's complement arithmetic".
   // Therefore, the final code will be a semantically equivalent.
   //
-  // TODO(jingyue): do some range analysis to keep as many inbounds as
-  // possible. GEPs with inbounds are more friendly to alias analysis.
-  // TODO(gep_nowrap): Preserve nuw at least.
-  auto NewGEPFlags = GEPNoWrapFlags::none();
-  GEP->setNoWrapFlags(GEPNoWrapFlags::none());
+  // If the initial GEP was inbounds and all variable indices and the
+  // accumulated offsets are non-negative, they can be added in any order and
+  // the intermediate results are in bounds. So, we can preserve the inbounds
+  // flag for both GEPs. GEPs with inbounds are more friendly to alias 
analysis.
+  //
+  // TODO(gep_nowrap): Preserve nuw?
+  auto NewGEPFlags =
+  MayRecoverInbounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none();
+  GEP->setNoWrapFlags(NewGEPFlags);
 
   // Lowers a GEP to either GEPs with a single index or arithmetic operations.
   if (LowerGEP) {
diff --git 
a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll 
b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
index 877de38776839..91b5bc874c154 100644
--- 
a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
+++ 
b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/preserve-inbounds.ll
@@ -24,3 +24,26 @@ entry:
   store float %3, ptr %arrayidx.dst, align 4
   ret void
 }
+
+; All offsets must be positive, so inbounds can be preserved.
+define void @must_be_inbounds(ptr %dst, ptr %src, i32 %i) {
+; CHECK-LABEL: @must_be_inbounds(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:[[I_PROM:%.*]] = zext i32 [[I:%.*]] to i64
+; CHECK-NEXT:[[TMP0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], 
i64 [[I_PROM]]
+; CHECK-NEXT:[[ARRAYIDX_SRC2:%.*]] = getelementptr inbounds i8, ptr 
[[TMP0]], i64 4
+; CHECK-NEXT:[[TMP1:%.*]] = load float, ptr [[ARRAYIDX_SRC2]], align 4
+; CHECK-NEXT:[[TMP2:%.*]] = getelementptr inbounds float, ptr [[DST:%.*]], 
i64 [[I_PROM]]
+; CHECK-NEXT:[[ARRAYIDX_DST4:%.*]] = getelementptr inbounds i8, ptr 
[[TMP2]], i64 4
+; CHECK-NEXT:store float [[TMP1]], ptr [[ARRAYIDX_DST4]], align 4
+; CHECK-NEXT:ret void
+;
+entry:
+  %i.prom = zext i32 %i to i64
+  %idx = add nsw i64 %i.prom, 1
+  %arrayidx.src = getelementptr inbounds float, ptr %src, i64 %idx
+  %3 = load float, ptr %arrayidx.src, align 4
+  %arrayidx.dst = getelementptr inbounds float, ptr %dst, i64 %idx
+  store float %3, ptr %arrayidx.dst, align 4
+  ret void
+}
diff --git 
a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll 
b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index 9a73feb2c4b5c..447

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port MachineBlockPlacementStats to NPM (PR #129853)

2025-03-10 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/129853

>From 957ca954e2bdaf7f1c3dc841eeeadf76e2580ce8 Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 5 Mar 2025 08:59:23 +
Subject: [PATCH 1/2] [CodeGen][NPM] Port MachineBlockPlacementStats to NPM

---
 .../llvm/CodeGen/MachineBlockPlacement.h  |  8 
 llvm/include/llvm/InitializePasses.h  |  2 +-
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/MachineBlockPlacement.cpp| 45 ++-
 5 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h 
b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
index 733d24ab719a8..119f9a637aff3 100644
--- a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
+++ b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
@@ -30,6 +30,14 @@ class MachineBlockPlacementPass
 function_ref MapClassName2PassName) 
const;
 };
 
+class MachineBlockPlacementStatsPass
+: public PassInfoMixin {
+
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index a27b5630b308e..62566492d0b87 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -185,7 +185,7 @@ void initializeMIRNamerPass(PassRegistry &);
 void initializeMIRPrintingPassPass(PassRegistry &);
 void initializeMachineBlockFrequencyInfoWrapperPassPass(PassRegistry &);
 void initializeMachineBlockPlacementLegacyPass(PassRegistry &);
-void initializeMachineBlockPlacementStatsPass(PassRegistry &);
+void initializeMachineBlockPlacementStatsLegacyPass(PassRegistry &);
 void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &);
 void initializeMachineCFGPrinterPass(PassRegistry &);
 void initializeMachineCSELegacyPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 517401b3f4ebe..505dce9098254 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -137,6 +137,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", 
VirtRegMapAnalysis())
 #ifndef MACHINE_FUNCTION_PASS
 #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
 #endif
+MACHINE_FUNCTION_PASS("block-placement-stats", 
MachineBlockPlacementStatsPass())
 MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass())
 MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass())
 MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass())
@@ -251,7 +252,6 @@ DUMMY_MACHINE_MODULE_PASS("mir-strip-debug", 
StripDebugMachineModulePass)
 #endif
 DUMMY_MACHINE_FUNCTION_PASS("bbsections-prepare", BasicBlockSectionsPass)
 DUMMY_MACHINE_FUNCTION_PASS("bbsections-profile-reader", 
BasicBlockSectionsProfileReaderPass)
-DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", 
MachineBlockPlacementStatsPass)
 DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass)
 DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass)
 DUMMY_MACHINE_FUNCTION_PASS("cfguard-longjmp", CFGuardLongjmpPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index daa31073e7151..8f9d9ff92d366 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -73,7 +73,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeMIRProfileLoaderPassPass(Registry);
   initializeMachineBlockFrequencyInfoWrapperPassPass(Registry);
   initializeMachineBlockPlacementLegacyPass(Registry);
-  initializeMachineBlockPlacementStatsPass(Registry);
+  initializeMachineBlockPlacementStatsLegacyPass(Registry);
   initializeMachineCFGPrinterPass(Registry);
   initializeMachineCSELegacyPass(Registry);
   initializeMachineCombinerPass(Registry);
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp 
b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 40edc47f3e6bb..8829731ef0e87 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -3837,21 +3837,35 @@ namespace {
 /// placement. This is separate from the actual placement pass so that they can
 /// be computed in the absence of any placement transformations or when using
 /// alternative placement strategies.
-class MachineBlockPlacementStats : public MachineFunctionPass {
+class MachineBlockPlacementStats {
   /// A handle to the branch probability pass.
   const MachineBranchProbabilityInfo *MBPI;
 
   /// A handle to the function-wide block frequency pass.
   const MachineBlockFrequencyInfo *MBFI;
 
+public:
+  MachineBlockPlacementStats(const MachineBranchProbabilityInfo *MBPI,
+ const MachineBlockFrequencyInfo *MBFI)
+  : 

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port MachineBlockPlacementStats to NPM (PR #129853)

2025-03-10 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/129853

>From 19662079ef1d5ede80dc6cdf0d8c0983033b15ed Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 5 Mar 2025 08:59:23 +
Subject: [PATCH 1/2] [CodeGen][NPM] Port MachineBlockPlacementStats to NPM

---
 .../llvm/CodeGen/MachineBlockPlacement.h  |  8 
 llvm/include/llvm/InitializePasses.h  |  2 +-
 .../llvm/Passes/MachinePassRegistry.def   |  2 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/MachineBlockPlacement.cpp| 45 ++-
 5 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h 
b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
index 733d24ab719a8..119f9a637aff3 100644
--- a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
+++ b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
@@ -30,6 +30,14 @@ class MachineBlockPlacementPass
 function_ref MapClassName2PassName) 
const;
 };
 
+class MachineBlockPlacementStatsPass
+: public PassInfoMixin {
+
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index a27b5630b308e..62566492d0b87 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -185,7 +185,7 @@ void initializeMIRNamerPass(PassRegistry &);
 void initializeMIRPrintingPassPass(PassRegistry &);
 void initializeMachineBlockFrequencyInfoWrapperPassPass(PassRegistry &);
 void initializeMachineBlockPlacementLegacyPass(PassRegistry &);
-void initializeMachineBlockPlacementStatsPass(PassRegistry &);
+void initializeMachineBlockPlacementStatsLegacyPass(PassRegistry &);
 void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &);
 void initializeMachineCFGPrinterPass(PassRegistry &);
 void initializeMachineCSELegacyPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 517401b3f4ebe..505dce9098254 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -137,6 +137,7 @@ MACHINE_FUNCTION_ANALYSIS("virtregmap", 
VirtRegMapAnalysis())
 #ifndef MACHINE_FUNCTION_PASS
 #define MACHINE_FUNCTION_PASS(NAME, CREATE_PASS)
 #endif
+MACHINE_FUNCTION_PASS("block-placement-stats", 
MachineBlockPlacementStatsPass())
 MACHINE_FUNCTION_PASS("dead-mi-elimination", DeadMachineInstructionElimPass())
 MACHINE_FUNCTION_PASS("early-ifcvt", EarlyIfConverterPass())
 MACHINE_FUNCTION_PASS("early-machinelicm", EarlyMachineLICMPass())
@@ -251,7 +252,6 @@ DUMMY_MACHINE_MODULE_PASS("mir-strip-debug", 
StripDebugMachineModulePass)
 #endif
 DUMMY_MACHINE_FUNCTION_PASS("bbsections-prepare", BasicBlockSectionsPass)
 DUMMY_MACHINE_FUNCTION_PASS("bbsections-profile-reader", 
BasicBlockSectionsProfileReaderPass)
-DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", 
MachineBlockPlacementStatsPass)
 DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass)
 DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass)
 DUMMY_MACHINE_FUNCTION_PASS("cfguard-longjmp", CFGuardLongjmpPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index daa31073e7151..8f9d9ff92d366 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -73,7 +73,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeMIRProfileLoaderPassPass(Registry);
   initializeMachineBlockFrequencyInfoWrapperPassPass(Registry);
   initializeMachineBlockPlacementLegacyPass(Registry);
-  initializeMachineBlockPlacementStatsPass(Registry);
+  initializeMachineBlockPlacementStatsLegacyPass(Registry);
   initializeMachineCFGPrinterPass(Registry);
   initializeMachineCSELegacyPass(Registry);
   initializeMachineCombinerPass(Registry);
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp 
b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 40edc47f3e6bb..8829731ef0e87 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -3837,21 +3837,35 @@ namespace {
 /// placement. This is separate from the actual placement pass so that they can
 /// be computed in the absence of any placement transformations or when using
 /// alternative placement strategies.
-class MachineBlockPlacementStats : public MachineFunctionPass {
+class MachineBlockPlacementStats {
   /// A handle to the branch probability pass.
   const MachineBranchProbabilityInfo *MBPI;
 
   /// A handle to the function-wide block frequency pass.
   const MachineBlockFrequencyInfo *MBFI;
 
+public:
+  MachineBlockPlacementStats(const MachineBranchProbabilityInfo *MBPI,
+ const MachineBlockFrequencyInfo *MBFI)
+  : 

[llvm-branch-commits] [llvm] [DirectX] Remove DXILResourceMDAnalysis (PR #130323)

2025-03-10 Thread Justin Bogner via llvm-branch-commits


@@ -99,18 +96,6 @@ static NamedMDNode *emitResourceMetadata(Module &M, 
DXILBindingMap &DBM,
   Metadata *SmpMD = Smps.empty() ? nullptr : MDNode::get(Context, Smps);
   bool HasResources = !DBM.empty();
 
-  if (MDResources.hasUAVs()) {
-assert(!UAVMD && "Old and new UAV representations can't coexist");
-UAVMD = MDResources.writeUAVs(M);
-HasResources = true;
-  }
-
-  if (MDResources.hasCBuffers()) {
-assert(!CBufMD && "Old and new cbuffer representations can't coexist");
-CBufMD = MDResources.writeCBuffers(M);
-HasResources = true;
-  }
-
   if (!HasResources)

bogner wrote:

This can be simplified to `if (DBM.empty())` rather than setting `HasResources` 
and checking against it now.

https://github.com/llvm/llvm-project/pull/130323
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Remove DXILResourceMDAnalysis (PR #130323)

2025-03-10 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner approved this pull request.


https://github.com/llvm/llvm-project/pull/130323
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Remove DXILResourceMDAnalysis (PR #130323)

2025-03-10 Thread Helena Kotas via llvm-branch-commits

https://github.com/hekota updated 
https://github.com/llvm/llvm-project/pull/130323

>From 80765757f067527816c4c8b9d728169568b04b7a Mon Sep 17 00:00:00 2001
From: Helena Kotas 
Date: Fri, 7 Mar 2025 10:40:20 -0800
Subject: [PATCH 1/2] [DirectX] Remove DXILResourceMDAnalysis

Part 1/2 of #114126
---
 llvm/lib/Target/DirectX/CMakeLists.txt|   2 -
 .../Target/DirectX/DXILDataScalarization.h|   1 -
 .../Target/DirectX/DXILIntrinsicExpansion.h   |   1 -
 llvm/lib/Target/DirectX/DXILOpLowering.cpp|   2 -
 llvm/lib/Target/DirectX/DXILPrepare.cpp   |   2 -
 llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp |  23 +-
 llvm/lib/Target/DirectX/DXILResource.cpp  | 346 --
 llvm/lib/Target/DirectX/DXILResource.h| 133 ---
 .../Target/DirectX/DXILResourceAnalysis.cpp   |  41 ---
 .../lib/Target/DirectX/DXILResourceAnalysis.h |  55 ---
 .../Target/DirectX/DXILTranslateMetadata.cpp  |  31 +-
 llvm/lib/Target/DirectX/DirectX.h |   3 -
 .../Target/DirectX/DirectXPassRegistry.def|   1 -
 .../Target/DirectX/DirectXTargetMachine.cpp   |   2 -
 llvm/test/CodeGen/DirectX/llc-pipeline.ll |   1 -
 15 files changed, 7 insertions(+), 637 deletions(-)
 delete mode 100644 llvm/lib/Target/DirectX/DXILResource.cpp
 delete mode 100644 llvm/lib/Target/DirectX/DXILResource.h
 delete mode 100644 llvm/lib/Target/DirectX/DXILResourceAnalysis.cpp
 delete mode 100644 llvm/lib/Target/DirectX/DXILResourceAnalysis.h

diff --git a/llvm/lib/Target/DirectX/CMakeLists.txt 
b/llvm/lib/Target/DirectX/CMakeLists.txt
index 5a167535b0afa..6904a1c0f1e73 100644
--- a/llvm/lib/Target/DirectX/CMakeLists.txt
+++ b/llvm/lib/Target/DirectX/CMakeLists.txt
@@ -28,8 +28,6 @@ add_llvm_target(DirectXCodeGen
   DXILOpLowering.cpp
   DXILPrepare.cpp
   DXILPrettyPrinter.cpp
-  DXILResource.cpp
-  DXILResourceAnalysis.cpp
   DXILResourceAccess.cpp
   DXILShaderFlags.cpp
   DXILTranslateMetadata.cpp
diff --git a/llvm/lib/Target/DirectX/DXILDataScalarization.h 
b/llvm/lib/Target/DirectX/DXILDataScalarization.h
index 560e061db96d0..e8cd495729431 100644
--- a/llvm/lib/Target/DirectX/DXILDataScalarization.h
+++ b/llvm/lib/Target/DirectX/DXILDataScalarization.h
@@ -9,7 +9,6 @@
 #ifndef LLVM_TARGET_DIRECTX_DXILDATASCALARIZATION_H
 #define LLVM_TARGET_DIRECTX_DXILDATASCALARIZATION_H
 
-#include "DXILResource.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 
diff --git a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h 
b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
index c283386c6e3df..43fc4d7735e1f 100644
--- a/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
+++ b/llvm/lib/Target/DirectX/DXILIntrinsicExpansion.h
@@ -8,7 +8,6 @@
 #ifndef LLVM_TARGET_DIRECTX_DXILINTRINSICEXPANSION_H
 #define LLVM_TARGET_DIRECTX_DXILINTRINSICEXPANSION_H
 
-#include "DXILResource.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 
diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp 
b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
index 92b6787b9571e..1b02206464661 100644
--- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp
+++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp
@@ -10,7 +10,6 @@
 #include "DXILConstants.h"
 #include "DXILIntrinsicExpansion.h"
 #include "DXILOpBuilder.h"
-#include "DXILResourceAnalysis.h"
 #include "DXILShaderFlags.h"
 #include "DirectX.h"
 #include "llvm/ADT/SmallVector.h"
@@ -889,7 +888,6 @@ class DXILOpLoweringLegacy : public ModulePass {
 AU.addRequired();
 AU.addRequired();
 AU.addPreserved();
-AU.addPreserved();
 AU.addPreserved();
 AU.addPreserved();
   }
diff --git a/llvm/lib/Target/DirectX/DXILPrepare.cpp 
b/llvm/lib/Target/DirectX/DXILPrepare.cpp
index 355c79ae0edc9..0014cc9e1f67c 100644
--- a/llvm/lib/Target/DirectX/DXILPrepare.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrepare.cpp
@@ -11,7 +11,6 @@
 /// Language (DXIL).
 
//===--===//
 
-#include "DXILResourceAnalysis.h"
 #include "DXILShaderFlags.h"
 #include "DirectX.h"
 #include "DirectXIRPasses/PointerTypeAnalysis.h"
@@ -248,7 +247,6 @@ class DXILPrepareModule : public ModulePass {
   void getAnalysisUsage(AnalysisUsage &AU) const override {
 AU.addRequired();
 AU.addPreserved();
-AU.addPreserved();
 AU.addPreserved();
 AU.addPreserved();
   }
diff --git a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp 
b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
index 7255a9be06d51..c1f2483044693 100644
--- a/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
+++ b/llvm/lib/Target/DirectX/DXILPrettyPrinter.cpp
@@ -7,7 +7,6 @@
 
//===--===//
 
 #include "DXILPrettyPrinter.h"
-#include "DXILResourceAnalysis.h"
 #include "DirectX.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/DXILResource.h"
@@ -222,8 +221,7 @@ struct FormatBindingSize
 } // namespace
 
 static void prettyPrintResources(raw_ostream &OS, const DXILBindingMap &DBM,
- 

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SILateBranchLowering to NPM (PR #130063)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits


@@ -2,6 +2,8 @@
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 
-run-pass=si-late-branch-lowering -verify-machineinstrs  %s -o - | FileCheck 
-check-prefixes=GCN,GFX10 %s
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 
-run-pass=si-late-branch-lowering -verify-machineinstrs  %s -o - | FileCheck 
-check-prefixes=GCN,GFX11 %s
 
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 
-passes=si-late-branch-lowering -verify-machineinstrs  %s -o - | FileCheck 
-check-prefixes=GCN,GFX11 %s

cdevadas wrote:

```suggestion
# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 
-passes=si-late-branch-lowering %s -o - | FileCheck -check-prefixes=GCN,GFX11 %s
```

https://github.com/llvm/llvm-project/pull/130063
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SILateBranchLowering to NPM (PR #130063)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits


@@ -1,4 +1,5 @@
 # RUN: llc -o - %s -mtriple=amdgcn -mcpu=fiji 
-run-pass=si-late-branch-lowering -verify-machineinstrs | FileCheck 
-check-prefix=GCN %s
+# RUN: llc -o - %s -mtriple=amdgcn -mcpu=fiji -passes=si-late-branch-lowering 
-verify-machineinstrs | FileCheck -check-prefix=GCN %s

cdevadas wrote:

```suggestion
# RUN: llc -o - %s -mtriple=amdgcn -mcpu=fiji -passes=si-late-branch-lowering | 
FileCheck -check-prefix=GCN %s
```

https://github.com/llvm/llvm-project/pull/130063
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port AMDGPUSetWavePriority to NPM (PR #130064)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits


@@ -96,12 +108,20 @@ static bool isVMEMLoad(const MachineInstr &MI) {
   return SIInstrInfo::isVMEM(MI) && MI.mayLoad();
 }
 
-bool AMDGPUSetWavePriority::runOnMachineFunction(MachineFunction &MF) {
+PreservedAnalyses
+llvm::AMDGPUSetWavePriorityPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+  if (!AMDGPUSetWavePriority().run(MF))
+return PreservedAnalyses::all();
+  return getMachineFunctionPassPreservedAnalyses();

cdevadas wrote:

```suggestion
return PreservedAnalyses::all();

  return getMachineFunctionPassPreservedAnalyses();
```

https://github.com/llvm/llvm-project/pull/130064
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port AMDGPUSetWavePriority to NPM (PR #130064)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits

https://github.com/cdevadas approved this pull request.


https://github.com/llvm/llvm-project/pull/130064
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIPreEmitPeephole to NPM (PR #130065)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits


@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=si-pre-emit-peephole 
-verify-machineinstrs  %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-pre-emit-peephole 
-verify-machineinstrs  %s -o - | FileCheck %s

cdevadas wrote:

```suggestion
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=si-pre-emit-peephole %s -o - | 
FileCheck %s
```

https://github.com/llvm/llvm-project/pull/130065
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port PostRAHazardRecognizer to NPM (PR #130066)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits


@@ -1,6 +1,8 @@
 # RUN: llc -mtriple=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass  
post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s
 # RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs 
-run-pass  post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN %s
 
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-xnack -verify-machineinstrs 
-passes post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN %s

cdevadas wrote:

```suggestion
# RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-xnack -passes post-RA-hazard-rec 
%s -o - | FileCheck -check-prefixes=GCN %s
```

https://github.com/llvm/llvm-project/pull/130066
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port PostRAHazardRecognizer to NPM (PR #130066)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits


@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
 # RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs 
-run-pass=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs 
-passes=post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN %s

cdevadas wrote:

```suggestion
# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -passes=post-RA-hazard-rec %s -o - | 
FileCheck -check-prefix=GCN %s
```

https://github.com/llvm/llvm-project/pull/130066
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Map simple `do concurrent` loops to OpenMP host constructs (PR #127633)

2025-03-10 Thread Sergio Afonso via llvm-branch-commits


@@ -0,0 +1,45 @@
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \
+! RUN:   | FileCheck %s
+
+program main
+implicit none
+
+call foo(10)
+
+contains
+subroutine foo(n)
+implicit none
+integer :: n
+integer :: i
+integer, dimension(n) :: a
+
+do concurrent(i=1:n)
+a(i) = i
+end do
+end subroutine
+
+end program main
+
+! CHECK: %[[N_DECL:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %{{.*}} 
{uniq_name = "_QFFfooEn"}
+
+! CHECK: fir.load
+
+! CHECK: %[[LB:.*]] = fir.convert %{{c1_.*}} : (i32) -> index
+! CHECK: %[[N_VAL:.*]] = fir.load %[[N_DECL]]#0 : !fir.ref
+! CHECK: %[[UB:.*]] = fir.convert %[[N_VAL]] : (i32) -> index
+! CHECK: %[[C1:.*]] = arith.constant 1 : index
+
+! CHECK: omp.parallel {
+
+
+! Verify that we restort to using the outside value for the upper bound since 
it

skatrak wrote:

```suggestion
! Verify that we resort to using the outside value for the upper bound since it
```

https://github.com/llvm/llvm-project/pull/127633
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Map simple `do concurrent` loops to OpenMP host constructs (PR #127633)

2025-03-10 Thread Sergio Afonso via llvm-branch-commits


@@ -152,26 +199,140 @@ class DoConcurrentConversion : public 
mlir::OpConversionPattern {
 public:
   using mlir::OpConversionPattern::OpConversionPattern;
 
-  DoConcurrentConversion(mlir::MLIRContext *context, bool mapToDevice)
-  : OpConversionPattern(context), mapToDevice(mapToDevice) {}
+  DoConcurrentConversion(mlir::MLIRContext *context, bool mapToDevice,
+ llvm::DenseSet &concurrentLoopsToSkip)
+  : OpConversionPattern(context), mapToDevice(mapToDevice),
+concurrentLoopsToSkip(concurrentLoopsToSkip) {}
 
   mlir::LogicalResult
   matchAndRewrite(fir::DoLoopOp doLoop, OpAdaptor adaptor,
   mlir::ConversionPatternRewriter &rewriter) const override {
-looputils::LoopNest loopNest;
+if (mapToDevice)
+  return doLoop.emitError(
+  "not yet implemented: Mapping `do concurrent` loops to device");
+
+looputils::LoopNestToIndVarMap loopNest;
 bool hasRemainingNestedLoops =
 failed(looputils::collectLoopNest(doLoop, loopNest));
 if (hasRemainingNestedLoops)
   mlir::emitWarning(doLoop.getLoc(),
 "Some `do concurent` loops are not perfectly-nested. "
 "These will be serialized.");
 
-// TODO This will be filled in with the next PRs that upstreams the rest of
-// the ROCm implementaion.
+mlir::IRMapping mapper;
+genParallelOp(doLoop.getLoc(), rewriter, loopNest, mapper);
+mlir::omp::LoopNestOperands loopNestClauseOps;
+genLoopNestClauseOps(doLoop.getLoc(), rewriter, loopNest, mapper,
+ loopNestClauseOps);
+
+mlir::omp::LoopNestOp ompLoopNest =
+genWsLoopOp(rewriter, loopNest.back().first, mapper, loopNestClauseOps,
+/*isComposite=*/mapToDevice);
+
+rewriter.eraseOp(doLoop);
+
+// Mark `unordered` loops that are not perfectly nested to be skipped from
+// the legality check of the `ConversionTarget` since we are not interested
+// in mapping them to OpenMP.
+ompLoopNest->walk([&](fir::DoLoopOp doLoop) {
+  if (doLoop.getUnordered()) {
+concurrentLoopsToSkip.insert(doLoop);
+  }
+});
+
 return mlir::success();
   }
 
+private:
+  mlir::omp::ParallelOp genParallelOp(mlir::Location loc,
+  mlir::ConversionPatternRewriter 
&rewriter,
+  looputils::LoopNestToIndVarMap &loopNest,
+  mlir::IRMapping &mapper) const {
+auto parallelOp = rewriter.create(loc);
+rewriter.createBlock(¶llelOp.getRegion());
+rewriter.setInsertionPoint(rewriter.create(loc));
+
+genLoopNestIndVarAllocs(rewriter, loopNest, mapper);
+return parallelOp;
+  }
+
+  void genLoopNestIndVarAllocs(mlir::ConversionPatternRewriter &rewriter,
+   looputils::LoopNestToIndVarMap &loopNest,
+   mlir::IRMapping &mapper) const {
+
+for (auto &[_, indVarInfo] : loopNest)
+  genInductionVariableAlloc(rewriter, indVarInfo.iterVarMemDef, mapper);
+  }
+
+  mlir::Operation *
+  genInductionVariableAlloc(mlir::ConversionPatternRewriter &rewriter,
+mlir::Operation *indVarMemDef,
+mlir::IRMapping &mapper) const {
+assert(
+indVarMemDef != nullptr &&
+"Induction variable memdef is expected to have a defining operation.");
+
+llvm::SmallSetVector indVarDeclareAndAlloc;
+for (auto operand : indVarMemDef->getOperands())
+  indVarDeclareAndAlloc.insert(operand.getDefiningOp());
+indVarDeclareAndAlloc.insert(indVarMemDef);
+
+mlir::Operation *result;
+for (mlir::Operation *opToClone : indVarDeclareAndAlloc)
+  result = rewriter.clone(*opToClone, mapper);
+
+return result;
+  }
+
+  void genLoopNestClauseOps(
+  mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
+  looputils::LoopNestToIndVarMap &loopNest, mlir::IRMapping &mapper,
+  mlir::omp::LoopNestOperands &loopNestClauseOps) const {
+assert(loopNestClauseOps.loopLowerBounds.empty() &&
+   "Loop nest bounds were already emitted!");
+
+auto populateBounds = [&](mlir::Value var,

skatrak wrote:

Nit: No need to capture anything here.
```suggestion
auto populateBounds = [](mlir::Value var,
```

https://github.com/llvm/llvm-project/pull/127633
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port BranchRelaxation to NPM (PR #130067)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits


@@ -744,7 +753,15 @@ bool BranchRelaxation::relaxBranchInstructions() {
   return Changed;
 }
 
-bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
+PreservedAnalyses
+BranchRelaxationPass::run(MachineFunction &MF,
+  MachineFunctionAnalysisManager &MFAM) {
+  if (!BranchRelaxation().run(MF))
+return PreservedAnalyses::all();
+  return getMachineFunctionPassPreservedAnalyses();

cdevadas wrote:

```suggestion
return PreservedAnalyses::all();

  return getMachineFunctionPassPreservedAnalyses();
```

https://github.com/llvm/llvm-project/pull/130067
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port BranchRelaxation to NPM (PR #130067)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits

https://github.com/cdevadas approved this pull request.


https://github.com/llvm/llvm-project/pull/130067
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Map simple `do concurrent` loops to OpenMP host constructs (PR #127633)

2025-03-10 Thread Sergio Afonso via llvm-branch-commits


@@ -24,7 +25,50 @@ namespace flangomp {
 
 namespace {
 namespace looputils {
-using LoopNest = llvm::SetVector;
+/// Stores info needed about the induction/iteration variable for each `do
+/// concurrent` in a loop nest.
+struct InductionVariableInfo {
+  /// the operation allocating memory for iteration variable,
+  mlir::Operation *iterVarMemDef;
+};
+
+using LoopNestToIndVarMap =
+llvm::MapVector;
+
+/// For the \p doLoop parameter, find the operation that declares its iteration
+/// variable or allocates memory for it.
+///
+/// For example, give the following loop:
+/// ```
+///   ...
+///   %i:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : ...
+///   ...
+///   fir.do_loop %ind_var = %lb to %ub step %s unordered {
+/// %ind_var_conv = fir.convert %ind_var : (index) -> i32
+/// fir.store %ind_var_conv to %i#1 : !fir.ref
+/// ...
+///   }
+/// ```
+///
+/// This function returns the `hlfir.declare` op for `%i`.
+///
+/// Note: The current implementation is dependent on how flang emits loop
+/// bodies; which is sufficient for the current simple test/use cases. If this
+/// proves to be insufficient, this should be made more generic.
+mlir::Operation *findLoopIterationVarMemDecl(fir::DoLoopOp doLoop) {
+  mlir::Value result = nullptr;
+  for (mlir::Operation &op : doLoop) {
+// The first `fir.store` op we come across should be the op that updates 
the
+// loop's iteration variable.
+if (auto storeOp = mlir::dyn_cast(op)) {

skatrak wrote:

Could you also check that `storeOp.getValue()` is defined by a `fir.convert` 
whose argument is the `fir.do_loop`-defined induction variable? That way it's 
less likely to return a wrong value if for some reason other `fir.store` 
operations are introduced before.

https://github.com/llvm/llvm-project/pull/127633
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port RemoveLoadsIntoFakeUses to NPM (PR #130068)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits

https://github.com/cdevadas approved this pull request.


https://github.com/llvm/llvm-project/pull/130068
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port MachineSanitizerBinaryMetadata to NPM (PR #130069)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits

https://github.com/cdevadas approved this pull request.


https://github.com/llvm/llvm-project/pull/130069
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-10 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff c29d8202c06488a9466aea49dda4cf2b4663236e 
296a9db2611c31497fb40b5b01a027440dcda2a5 --extensions h,cpp -- 
llvm/include/llvm/CodeGen/SelectionDAGISel.h 
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp 
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp 
llvm/lib/Target/AMDGPU/SIISelLowering.cpp 
llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
``





View the diff from clang-format here.


``diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index cbbf9dcd82..4e16a70bd5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -1246,7 +1246,8 @@ bool AMDGPUCallLowering::lowerTailCall(
 
   // On GFX12, we can only change the VGPR allocation for wave32.
   if (!ST.isWave32()) {
-LLVM_DEBUG(dbgs() << "Dynamic VGPR mode is only supported for 
wave32\n");
+LLVM_DEBUG(
+dbgs() << "Dynamic VGPR mode is only supported for wave32\n");
 return false;
   }
 
diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp 
b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
index f1ecd25392..9100e07ea4 100644
--- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
@@ -215,7 +215,7 @@ bool 
SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
 
   case AMDGPU::SI_CS_CHAIN_TC_W32:
   case AMDGPU::SI_CS_CHAIN_TC_W64:
-expandChainCall(MI, ST, /*DynamicVGPR=*/ false);
+expandChainCall(MI, ST, /*DynamicVGPR=*/false);
 MadeChange = true;
 break;
   case AMDGPU::SI_CS_CHAIN_TC_W32_DVGPR:

``




https://github.com/llvm/llvm-project/pull/130094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port PostRAHazardRecognizer to NPM (PR #130066)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits


@@ -40,30 +41,45 @@ using namespace llvm;
 STATISTIC(NumNoops, "Number of noops inserted");
 
 namespace {
-  class PostRAHazardRecognizer : public MachineFunctionPass {
+struct PostRAHazardRecognizer {
+  bool run(MachineFunction &MF);
+};
 
-  public:
-static char ID;
-PostRAHazardRecognizer() : MachineFunctionPass(ID) {}
+class PostRAHazardRecognizerLegacy : public MachineFunctionPass {
 
-void getAnalysisUsage(AnalysisUsage &AU) const override {
-  AU.setPreservesCFG();
-  MachineFunctionPass::getAnalysisUsage(AU);
-}
+public:
+  static char ID;
+  PostRAHazardRecognizerLegacy() : MachineFunctionPass(ID) {}
 
-bool runOnMachineFunction(MachineFunction &Fn) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+AU.setPreservesCFG();
+MachineFunctionPass::getAnalysisUsage(AU);
+  }
 
-  };
-  char PostRAHazardRecognizer::ID = 0;
+  bool runOnMachineFunction(MachineFunction &Fn) override {
+return PostRAHazardRecognizer().run(Fn);
+  }
+};
+char PostRAHazardRecognizerLegacy::ID = 0;
 
-}
+} // namespace
 
-char &llvm::PostRAHazardRecognizerID = PostRAHazardRecognizer::ID;
+char &llvm::PostRAHazardRecognizerID = PostRAHazardRecognizerLegacy::ID;
 
-INITIALIZE_PASS(PostRAHazardRecognizer, DEBUG_TYPE,
+INITIALIZE_PASS(PostRAHazardRecognizerLegacy, DEBUG_TYPE,
 "Post RA hazard recognizer", false, false)
 
-bool PostRAHazardRecognizer::runOnMachineFunction(MachineFunction &Fn) {
+PreservedAnalyses
+llvm::PostRAHazardRecognizerPass::run(MachineFunction &MF,
+  MachineFunctionAnalysisManager &MFAM) {
+  if (!PostRAHazardRecognizer().run(MF))
+return PreservedAnalyses::all();
+  auto PA = getMachineFunctionPassPreservedAnalyses();

cdevadas wrote:

```suggestion
return PreservedAnalyses::all();

  auto PA = getMachineFunctionPassPreservedAnalyses();
```

https://github.com/llvm/llvm-project/pull/130066
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-10 Thread Diana Picus via llvm-branch-commits

https://github.com/rovka updated 
https://github.com/llvm/llvm-project/pull/130094

>From aff1e132263dba730999eb017b7548a5d2f46b6f Mon Sep 17 00:00:00 2001
From: Diana Picus 
Date: Tue, 10 Oct 2023 11:06:23 +0200
Subject: [PATCH 1/4] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain

The llvm.amdgcn.cs.chain intrinsic has a 'flags' operand which may
indicate that we want to reallocate the VGPRs before performing the
call.

A call with the following arguments:
```
llvm.amdgcn.cs.chain %callee, %exec, %sgpr_args, %vgpr_args,
  /*flags*/0x1, %num_vgprs, %fallback_exec, %fallback_callee
```
is supposed to do the following:
- copy the SGPR and VGPR args into their respective registers
- try to change the VGPR allocation
- if the allocation has succeeded, set EXEC to %exec and jump to
  %callee, otherwise set EXEC to %fallback_exec and jump to
  %fallback_callee

This patch implements the dynamic VGPR behaviour by generating an
S_ALLOC_VGPR followed by S_CSELECT_B32/64 instructions for the EXEC and
callee. The rest of the call sequence is left undisturbed (i.e.
identical to the case where the flags are 0 and we don't use dynamic
VGPRs). We achieve this by introducing some new pseudos
(SI_CS_CHAIN_TC_Wn_DVGPR) which are expanded in the SILateBranchLowering
pass, just like the simpler SI_CS_CHAIN_TC_Wn pseudos. The main reason
is so that we don't risk other passes (particularly the PostRA
scheduler) introducing instructions between the S_ALLOC_VGPR and the
jump. Such instructions might end up using VGPRs that have been
deallocated, or the wrong EXEC mask. Once the whole backend treats
S_ALLOC_VGPR and changes to EXEC as barriers for instructions that use
VGPRs, we could in principle move the expansion earlier (but in the
absence of a good reason for that my personal preference is to keep
it later in order to make debugging easier).

Since the expansion happens after register allocation, we're
careful to select constants to immediate operands instead of letting
ISel generate S_MOVs which could interfere with register allocation
(i.e. make it look like we need more registers than we actually do).

For GFX12, S_ALLOC_VGPR only works in wave32 mode, so we bail out
during ISel in wave64 mode. However, we can define the pseudos for
wave64 too so it's easy to handle if future generations support it.

Co-authored-by: Ana Mihajlovic 
---
 llvm/include/llvm/CodeGen/SelectionDAGISel.h  |  29 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp  |  13 +-
 llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 126 +--
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  88 +++--
 llvm/lib/Target/AMDGPU/SIInstructions.td  |  76 +++--
 .../Target/AMDGPU/SILateBranchLowering.cpp|  62 +++-
 .../amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll |  97 ++
 .../isel-amdgcn-cs-chain-intrinsic-w32.ll |  36 +-
 .../isel-amdgcn-cs-chain-intrinsic-w64.ll |  36 +-
 ...-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll | 315 ++
 .../CodeGen/AMDGPU/remove-register-flags.mir  |  19 ++
 11 files changed, 746 insertions(+), 151 deletions(-)
 create mode 100644 
llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll
 create mode 100644 
llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/remove-register-flags.mir

diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h 
b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index e9452a6dc6233..55f8f19d437a0 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -328,20 +328,21 @@ class SelectionDAGISel {
   };
 
   enum {
-OPFL_None   = 0,  // Node has no chain or glue input and isn't 
variadic.
-OPFL_Chain  = 1, // Node has a chain input.
-OPFL_GlueInput  = 2, // Node has a glue input.
-OPFL_GlueOutput = 4, // Node has a glue output.
-OPFL_MemRefs= 8, // Node gets accumulated MemRefs.
-OPFL_Variadic0  = 1<<4,  // Node is variadic, root has 0 fixed inputs.
-OPFL_Variadic1  = 2<<4,  // Node is variadic, root has 1 fixed inputs.
-OPFL_Variadic2  = 3<<4,  // Node is variadic, root has 2 fixed inputs.
-OPFL_Variadic3  = 4<<4,  // Node is variadic, root has 3 fixed inputs.
-OPFL_Variadic4  = 5<<4,  // Node is variadic, root has 4 fixed inputs.
-OPFL_Variadic5  = 6<<4,  // Node is variadic, root has 5 fixed inputs.
-OPFL_Variadic6  = 7<<4,  // Node is variadic, root has 6 fixed inputs.
-
-OPFL_VariadicInfo = OPFL_Variadic6
+OPFL_None = 0,   // Node has no chain or glue input and isn't variadic.
+OPFL_Chain = 1,  // Node has a chain input.
+OPFL_GlueInput = 2,  // Node has a glue input.
+OPFL_GlueOutput = 4, // Node has a glue output.
+OPFL_MemRefs = 8,// Node gets accumulated MemRefs.
+OPFL_Variadic0 = 1 << 4, // Node is variadic, root has 0 fixed inputs.
+OPFL_Variadic1 = 2 << 4, // Node is variadic, root has 1 fixed inputs.
+OPFL_Variad

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIInsertWaitcnts to NPM (PR #130061)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits


@@ -742,6 +730,36 @@ class SIInsertWaitcnts : public MachineFunctionPass {
 WaitcntBrackets &ScoreBrackets);
 };
 
+class SIInsertWaitcntsLegacy : public MachineFunctionPass {
+public:
+  static char ID;
+  SIInsertWaitcntsLegacy() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override {

cdevadas wrote:

Better put this definition outside.

https://github.com/llvm/llvm-project/pull/130061
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port RemoveLoadsIntoFakeUses to NPM (PR #130068)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits


@@ -66,21 +69,44 @@ class RemoveLoadsIntoFakeUses : public MachineFunctionPass {
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
 
-char RemoveLoadsIntoFakeUses::ID = 0;
-char &llvm::RemoveLoadsIntoFakeUsesID = RemoveLoadsIntoFakeUses::ID;
+struct RemoveLoadsIntoFakeUses {
+  bool run(MachineFunction &MF);
+};
+
+char RemoveLoadsIntoFakeUsesLegacy::ID = 0;
+char &llvm::RemoveLoadsIntoFakeUsesID = RemoveLoadsIntoFakeUsesLegacy::ID;
 
-INITIALIZE_PASS_BEGIN(RemoveLoadsIntoFakeUses, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(RemoveLoadsIntoFakeUsesLegacy, DEBUG_TYPE,
   "Remove Loads Into Fake Uses", false, false)
-INITIALIZE_PASS_END(RemoveLoadsIntoFakeUses, DEBUG_TYPE,
+INITIALIZE_PASS_END(RemoveLoadsIntoFakeUsesLegacy, DEBUG_TYPE,
 "Remove Loads Into Fake Uses", false, false)
 
-bool RemoveLoadsIntoFakeUses::runOnMachineFunction(MachineFunction &MF) {
+bool RemoveLoadsIntoFakeUsesLegacy::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()))
+return false;
+  return RemoveLoadsIntoFakeUses().run(MF);

cdevadas wrote:

```suggestion
return false;

  return RemoveLoadsIntoFakeUses().run(MF);
```

https://github.com/llvm/llvm-project/pull/130068
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [Driver] Temporarily probe aarch64-linux-gnu GCC installation (PR #102039)

2025-03-10 Thread Fangrui Song via llvm-branch-commits

MaskRay wrote:

> We're facing it again, with LLVM20 this time

... which indicates that the distro and boost should really fix the problem and 
not rely on the clang driver hacks.

https://github.com/llvm/llvm-project/pull/102039
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Updating DXContainer documentation to add Root Descriptors (PR #129759)

2025-03-10 Thread Finn Plummer via llvm-branch-commits


@@ -497,3 +497,49 @@ signature and passed to the shader without requiring a 
constant buffer resource:
 #. **Num32BitValues**: The number of 32-bit values included in this constant 
buffer.
 
 Root constants provide a fast way to pass small amounts of data directly to 
the shader without the overhead of creating and binding a constant buffer 
resource.
+
+Root Descriptor
+~~~
+
+Root descriptors provide a direct mechanism for binding individual resources 
to shader stages in the Direct3D 12 
+rendering pipeline. They represent a critical interface for efficient resource 
management, allowing applications 
+to specify how shader stages access specific GPU resources.
+
+.. code-block:: cpp
+
+   enum RootDescriptorFlags {
+  None = 0,
+  DataVolatile = 0x2,
+  DataStaticWhileSetAtExecute = 0x4,
+  DataStatic = 0x8,
+   }
+
+   // Version 1.0 Root Descriptor
+   struct RootDescriptor_V1_0 {
+  uint32_t ShaderRegister;
+  uint32_t RegisterSpace;
+   };
+   
+   // Version 1.1 Root Descriptor
+   struct RootDescriptor_V1_1 {
+  uint32_t ShaderRegister;
+  uint32_t RegisterSpace;  
+  // Bitfield of flags from the Flags enum
+  uint32_t Flags;
+   };
+
+Version 1.1 of Root Descriptors has introduced some flags that can hint the 
drivers into
+performing further code optimizations. For details about it, check `Direct X 
documentation 
`_.

inbelic wrote:

```suggestion
performing further code optimizations. For details, check
`Direct X documentation 
`_.
```

https://github.com/llvm/llvm-project/pull/129759
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Map simple `do concurrent` loops to OpenMP host constructs (PR #127633)

2025-03-10 Thread Kareem Ergawy via llvm-branch-commits


@@ -0,0 +1,45 @@
+! Tests that if `do concurrent` is not perfectly nested in its parent loop, 
that
+! we skip converting the not-perfectly nested `do concurrent` loop.
+
+! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \
+! RUN:   | FileCheck %s
+
+program main
+   integer, parameter :: n = 10
+   integer, parameter :: m = 20
+   integer, parameter :: l = 30
+   integer x;
+   integer :: a(n, m, l)
+
+   do concurrent(i=1:n)
+ x = 10
+ do concurrent(j=1:m, k=1:l)
+   a(i,j,k) = i * j + k
+ end do
+   end do
+end
+
+! CHECK: %[[ORIG_K_ALLOC:.*]] = fir.alloca i32 {bindc_name = "k"}
+! CHECK: %[[ORIG_K_DECL:.*]]:2 = hlfir.declare %[[ORIG_K_ALLOC]]
+
+! CHECK: %[[ORIG_J_ALLOC:.*]] = fir.alloca i32 {bindc_name = "j"}
+! CHECK: %[[ORIG_J_DECL:.*]]:2 = hlfir.declare %[[ORIG_J_ALLOC]]
+
+! CHECK: omp.parallel {
+
+! CHECK: omp.wsloop {
+! CHECK: omp.loop_nest ({{[^[:space:]]+}}) {{.*}} {
+! CHECK:   fir.do_loop %[[J_IV:.*]] = {{.*}} {

ergawy wrote:

I would like to leave it this way to give a better idea of how the whole nested 
construct was translated.

https://github.com/llvm/llvm-project/pull/127633
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][OpenMP] Map simple `do concurrent` loops to OpenMP host constructs (PR #127633)

2025-03-10 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy updated 
https://github.com/llvm/llvm-project/pull/127633

>From 2e6bf4c394db115bd4a369473742b7411a03334c Mon Sep 17 00:00:00 2001
From: ergawy 
Date: Tue, 18 Feb 2025 02:50:46 -0600
Subject: [PATCH 1/3] [flang][OpenMP] Map simple `do concurrent` loops to
 OpenMP host constructs

Upstreams one more part of the ROCm `do concurrent` to OpenMP mapping
pass. This PR add support for converting simple loops to the equivalent
OpenMP constructs on the host: `omp parallel do`. Towards that end, we
have to collect more information about loop nests for which we add new
utils in the `looputils` name space.
---
 flang/docs/DoConcurrentConversionToOpenMP.md  |  47 
 .../OpenMP/DoConcurrentConversion.cpp | 211 +-
 .../Transforms/DoConcurrent/basic_host.f90|  14 +-
 .../Transforms/DoConcurrent/basic_host.mlir   |  62 +
 .../DoConcurrent/non_const_bounds.f90 |  45 
 .../DoConcurrent/not_perfectly_nested.f90 |  45 
 6 files changed, 405 insertions(+), 19 deletions(-)
 create mode 100644 flang/test/Transforms/DoConcurrent/basic_host.mlir
 create mode 100644 flang/test/Transforms/DoConcurrent/non_const_bounds.f90
 create mode 100644 flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90

diff --git a/flang/docs/DoConcurrentConversionToOpenMP.md 
b/flang/docs/DoConcurrentConversionToOpenMP.md
index 7b49af742f242..19611615ee9d6 100644
--- a/flang/docs/DoConcurrentConversionToOpenMP.md
+++ b/flang/docs/DoConcurrentConversionToOpenMP.md
@@ -126,6 +126,53 @@ see the "Data environment" section below.
 See `flang/test/Transforms/DoConcurrent/loop_nest_test.f90` for more examples
 of what is and is not detected as a perfect loop nest.
 
+### Single-range loops
+
+Given the following loop:
+```fortran
+  do concurrent(i=1:n)
+a(i) = i * i
+  end do
+```
+
+ Mapping to `host`
+
+Mapping this loop to the `host`, generates MLIR operations of the following
+structure:
+
+```
+%4 = fir.address_of(@_QFEa) ...
+%6:2 = hlfir.declare %4 ...
+
+omp.parallel {
+  // Allocate private copy for `i`.
+  // TODO Use delayed privatization.
+  %19 = fir.alloca i32 {bindc_name = "i"}
+  %20:2 = hlfir.declare %19 {uniq_name = "_QFEi"} ...
+
+  omp.wsloop {
+omp.loop_nest (%arg0) : index = (%21) to (%22) inclusive step (%c1_2) {
+  %23 = fir.convert %arg0 : (index) -> i32
+  // Use the privatized version of `i`.
+  fir.store %23 to %20#1 : !fir.ref
+  ...
+
+  // Use "shared" SSA value of `a`.
+  %42 = hlfir.designate %6#0
+  hlfir.assign %35 to %42
+  ...
+  omp.yield
+}
+omp.terminator
+  }
+  omp.terminator
+}
+```
+
+ Mapping to `device`
+
+
+
 

[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-10 Thread Snehasish Kumar via llvm-branch-commits


@@ -2769,6 +2769,23 @@ namespace {
 
 } // end anonymous namespace
 
+StringRef AsmPrinter::getConstantSectionSuffix(const Constant *C) const {
+  SmallString<8> SectionNameSuffix;
+  if (TM.Options.EnableStaticDataPartitioning) {
+if (C && SDPI && PSI) {
+  auto Count = SDPI->getConstantProfileCount(C);
+  if (Count) {
+if (PSI->isHotCount(*Count)) {
+  SectionNameSuffix.append("hot");
+} else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(C)) {
+  SectionNameSuffix.append("unlikely");
+}
+  }
+}
+  }
+  return SectionNameSuffix.str();

snehasish wrote:

This is returning a StringRef whose underlying memory is stack allocated. I 
don't think you need a SmallString here, just return std::string or c-string 
and convert it at the callsite?

https://github.com/llvm/llvm-project/pull/129781
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][StaticDataSplitter]Support constant pool partitioning (PR #129781)

2025-03-10 Thread Snehasish Kumar via llvm-branch-commits


@@ -2769,6 +2769,23 @@ namespace {
 
 } // end anonymous namespace
 
+StringRef AsmPrinter::getConstantSectionSuffix(const Constant *C) const {

snehasish wrote:

Consider refactoring a bit to reduce nesting --

```
if(!TM.Options.EnableStaticDataPartitioning || C == nullptr || SDPI == nullptr 
|| PSI == nullptr) return "";
auto Count = SDPI->getConstantProfileCount(C);
if(!Count.has_value()) return "";

if (PSI->isHotCount(*Count)) {
 return "hot";
} else if (PSI->isColdCount(*Count) && !SDPI->hasUnknownCount(C)) {
  return "unlikely";
}
return "";
```

https://github.com/llvm/llvm-project/pull/129781
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang-tools-extra] [clang-tidy] support to detect conversion in `make_optional` for `bugprone-optional-value-conversion` (PR #130417)

2025-03-10 Thread Congcong Cai via llvm-branch-commits

https://github.com/HerrCai0907 updated 
https://github.com/llvm/llvm-project/pull/130417

>From 1706e3fc5819602febf9dfa554e98eb1e1fea365 Mon Sep 17 00:00:00 2001
From: Congcong Cai 
Date: Sat, 8 Mar 2025 21:50:19 +0800
Subject: [PATCH] [clang-tidy] support to detect conversion in `make_optional`
 for `bugprone-optional-value-conversion`

Fixes: #119554
---
 .../bugprone/OptionalValueConversionCheck.cpp  | 14 ++
 clang-tools-extra/docs/ReleaseNotes.rst|  4 
 ...ptional-value-conversion-construct-from-std.cpp | 13 +
 3 files changed, 31 insertions(+)

diff --git 
a/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp 
b/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp
index 33e823ac07490..cb5a1c7bea801 100644
--- a/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp
+++ b/clang-tools-extra/clang-tidy/bugprone/OptionalValueConversionCheck.cpp
@@ -12,6 +12,7 @@
 #include "../utils/OptionsUtils.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/ASTMatchers/ASTMatchers.h"
 #include 
 
 using namespace clang::ast_matchers;
@@ -31,6 +32,7 @@ constexpr std::array MakeSmartPtrList{
 "::std::make_unique",
 "::std::make_shared",
 };
+constexpr StringRef MakeOptional = "::std::make_optional";
 
 } // namespace
 
@@ -86,6 +88,18 @@ void 
OptionalValueConversionCheck::registerMatchers(MatchFinder *Finder) {
callee(functionDecl(
matchers::matchesAnyListedName(MakeSmartPtrList),
hasTemplateArgument(0, 
refersToType(BindOptionalType,
+   hasArgument(0, OptionalDerefMatcher)),
+   callExpr(
+   // match first std::make_optional by limit argument count 
(1)
+   // and template count (1).
+   // 1. template< class T > constexpr
+   //std::optional> make_optional(T&& value);
+   // 2. template< class T, class... Args > constexpr
+   //std::optional make_optional(Args&&... args);
+   argumentCountIs(1),
+   callee(functionDecl(templateArgumentCountIs(1),
+   hasName(MakeOptional),
+   returns(BindOptionalType))),
hasArgument(0, OptionalDerefMatcher))),
unless(anyOf(hasAncestor(typeLoc()),
 hasAncestor(expr(matchers::hasUnevaluatedContext())
diff --git a/clang-tools-extra/docs/ReleaseNotes.rst 
b/clang-tools-extra/docs/ReleaseNotes.rst
index fa68b6fabd549..5e2d87e0c2fa1 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -124,6 +124,10 @@ Changes in existing checks
   no longer be needed and will be removed. Also fixing false positive from
   const reference accessors to objects containing optional member.
 
+- Improved :doc:`bugprone-optional-value-conversion
+  ` check to detect
+  conversion in argument of ``std::make_optional``.
+
 - Improved :doc:`bugprone-unsafe-functions
   ` check to allow specifying
   additional C++ member functions to match.
diff --git 
a/clang-tools-extra/test/clang-tidy/checkers/bugprone/optional-value-conversion-construct-from-std.cpp
 
b/clang-tools-extra/test/clang-tidy/checkers/bugprone/optional-value-conversion-construct-from-std.cpp
index 768ab1ce014ce..305fd6890710d 100644
--- 
a/clang-tools-extra/test/clang-tidy/checkers/bugprone/optional-value-conversion-construct-from-std.cpp
+++ 
b/clang-tools-extra/test/clang-tidy/checkers/bugprone/optional-value-conversion-construct-from-std.cpp
@@ -27,9 +27,19 @@ class unique_ptr {};
 template 
 class shared_ptr {};
 
+template 
+class initializer_list {};
+
 template  unique_ptr make_unique(Args &&...args);
 template  shared_ptr make_shared(Args &&...args);
 
+template 
+constexpr std::optional<__decay(T)> make_optional(T &&value);
+template 
+constexpr std::optional make_optional(Args &&...args);
+template 
+constexpr std::optional make_optional(std::initializer_list il, Args 
&&...args);
+
 } // namespace std
 
 struct A {
@@ -45,9 +55,12 @@ void invalid() {
   // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: conversion from 
'std::optional' into 'int' and back into 'std::optional', remove 
potentially error-prone optional dereference 
[bugprone-optional-value-conversion]
   std::make_shared>(opt.value());
   // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: conversion from 
'std::optional' into 'int' and back into 'std::optional', remove 
potentially error-prone optional dereference 
[bugprone-optional-value-conversion]
+  std::make_optional(opt.value());
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: conversion from 
'std::optional' into 'int' and back into 'std::optional', remove 
potentially error-prone optional dereference 
[bugprone-optional-value-conversion]
 }
 
 void valid() {
   st

[llvm-branch-commits] [clang] release/20.x: [clang] Reject constexpr-unknown values as constant expressions more consistently (PR #130658)

2025-03-10 Thread A. Jiang via llvm-branch-commits

https://github.com/frederick-vs-ja approved this pull request.


https://github.com/llvm/llvm-project/pull/130658
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [AstMatcher]`templateArgumentCountIs` support `FunctionDecl` (PR #130416)

2025-03-10 Thread Piotr Zegar via llvm-branch-commits

https://github.com/PiotrZSL requested changes to this pull request.

You may need to update documentation for this matcher in 
clang/docs/LibASTMatchersReference.html

https://github.com/llvm/llvm-project/pull/130416
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [clang-format] Don't remove parentheses separated from ellipsis by comma (#130471) (PR #130702)

2025-03-10 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-format

Author: None (llvmbot)


Changes

Backport 7d4d8509cbec7eecd8aaf2510015b54bc5c173e1

Requested by: @owenca

---
Full diff: https://github.com/llvm/llvm-project/pull/130702.diff


2 Files Affected:

- (modified) clang/lib/Format/UnwrappedLineParser.cpp (+51-35) 
- (modified) clang/unittests/Format/FormatTest.cpp (+4) 


``diff
diff --git a/clang/lib/Format/UnwrappedLineParser.cpp 
b/clang/lib/Format/UnwrappedLineParser.cpp
index 9b4257fdd8c8f..9a03e9409fcbc 100644
--- a/clang/lib/Format/UnwrappedLineParser.cpp
+++ b/clang/lib/Format/UnwrappedLineParser.cpp
@@ -2562,12 +2562,12 @@ bool UnwrappedLineParser::parseBracedList(bool 
IsAngleBracket, bool IsEnum) {
 /// Returns whether there is a `=` token between the parentheses.
 bool UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
   assert(FormatTok->is(tok::l_paren) && "'(' expected.");
-  auto *LeftParen = FormatTok;
+  auto *LParen = FormatTok;
   bool SeenComma = false;
   bool SeenEqual = false;
   bool MightBeFoldExpr = false;
-  const bool MightBeStmtExpr = Tokens->peekNextToken()->is(tok::l_brace);
   nextToken();
+  const bool MightBeStmtExpr = FormatTok->is(tok::l_brace);
   do {
 switch (FormatTok->Tok.getKind()) {
 case tok::l_paren:
@@ -2577,44 +2577,60 @@ bool UnwrappedLineParser::parseParens(TokenType 
AmpAmpTokenType) {
 parseChildBlock();
   break;
 case tok::r_paren: {
-  auto *Prev = LeftParen->Previous;
-  if (!MightBeStmtExpr && !MightBeFoldExpr && !Line->InMacroBody &&
-  Style.RemoveParentheses > FormatStyle::RPS_Leave) {
-const auto *Next = Tokens->peekNextToken();
-const bool DoubleParens =
-Prev && Prev->is(tok::l_paren) && Next && Next->is(tok::r_paren);
-const bool CommaSeparated =
-!DoubleParens && Prev && Prev->isOneOf(tok::l_paren, tok::comma) &&
-Next && Next->isOneOf(tok::comma, tok::r_paren);
-const auto *PrevPrev = Prev ? Prev->getPreviousNonComment() : nullptr;
-const bool Excluded =
-PrevPrev &&
-(PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
- SeenComma ||
- (SeenEqual &&
-  (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
-   PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if;
-const bool ReturnParens =
-Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
-((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
- (!NestedLambdas.empty() && !NestedLambdas.back())) &&
-Prev && Prev->isOneOf(tok::kw_return, tok::kw_co_return) && Next &&
-Next->is(tok::semi);
-if ((DoubleParens && !Excluded) || (CommaSeparated && !SeenComma) ||
-ReturnParens) {
-  LeftParen->Optional = true;
-  FormatTok->Optional = true;
-}
-  }
+  auto *Prev = LParen->Previous;
+  auto *RParen = FormatTok;
+  nextToken();
   if (Prev) {
+auto OptionalParens = [&] {
+  if (MightBeStmtExpr || MightBeFoldExpr || Line->InMacroBody ||
+  SeenComma || Style.RemoveParentheses == FormatStyle::RPS_Leave) {
+return false;
+  }
+  const bool DoubleParens =
+  Prev->is(tok::l_paren) && FormatTok->is(tok::r_paren);
+  if (DoubleParens) {
+const auto *PrevPrev = Prev->getPreviousNonComment();
+const bool Excluded =
+PrevPrev &&
+(PrevPrev->isOneOf(tok::kw___attribute, tok::kw_decltype) ||
+ (SeenEqual &&
+  (PrevPrev->isOneOf(tok::kw_if, tok::kw_while) ||
+   PrevPrev->endsSequence(tok::kw_constexpr, tok::kw_if;
+if (!Excluded)
+  return true;
+  } else {
+const bool CommaSeparated =
+Prev->isOneOf(tok::l_paren, tok::comma) &&
+FormatTok->isOneOf(tok::comma, tok::r_paren);
+if (CommaSeparated &&
+// LParen is not preceded by ellipsis, comma.
+!Prev->endsSequence(tok::comma, tok::ellipsis) &&
+// RParen is not followed by comma, ellipsis.
+!(FormatTok->is(tok::comma) &&
+  Tokens->peekNextToken()->is(tok::ellipsis))) {
+  return true;
+}
+const bool ReturnParens =
+Style.RemoveParentheses == FormatStyle::RPS_ReturnStatement &&
+((NestedLambdas.empty() && !IsDecltypeAutoFunction) ||
+ (!NestedLambdas.empty() && !NestedLambdas.back())) &&
+Prev->isOneOf(tok::kw_return, tok::kw_co_return) &&
+FormatTok->is(tok::semi);
+if (ReturnParens)
+  return true;
+  }
+  return false;
+};
 if (Prev->is(TT_TypenameMacro)) {
-  LeftParen->setFinali

[llvm-branch-commits] [clang] release/20.x: [clang-format] Don't remove parentheses separated from ellipsis by comma (#130471) (PR #130702)

2025-03-10 Thread via llvm-branch-commits

llvmbot wrote:

@HazardyKnusperkeks What do you think about merging this PR to the release 
branch?

https://github.com/llvm/llvm-project/pull/130702
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/20.x: [clang-format] Don't remove parentheses separated from ellipsis by comma (#130471) (PR #130702)

2025-03-10 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/130702
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-10 Thread Matt Arsenault via llvm-branch-commits


@@ -172,7 +215,12 @@ bool 
SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
 
   case AMDGPU::SI_CS_CHAIN_TC_W32:
   case AMDGPU::SI_CS_CHAIN_TC_W64:
-expandChainCall(MI);
+expandChainCall(MI, ST, /*DynamicVGPR*/ false);
+MadeChange = true;
+break;
+  case AMDGPU::SI_CS_CHAIN_TC_W32_DVGPR:
+  case AMDGPU::SI_CS_CHAIN_TC_W64_DVGPR:
+expandChainCall(MI, ST, /*DynamicVGPR*/ true);

arsenm wrote:

```suggestion
expandChainCall(MI, ST, /*DynamicVGPR=*/ true);
```

https://github.com/llvm/llvm-project/pull/130094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-10 Thread Matt Arsenault via llvm-branch-commits


@@ -116,14 +117,56 @@ static void splitBlock(MachineBasicBlock &MBB, 
MachineInstr &MI,
   MDT->applyUpdates(DTUpdates);
 }
 
-void SILateBranchLowering::expandChainCall(MachineInstr &MI) {
+static void addRegOrCopyOp(MachineInstrBuilder &MIB, MachineOperand &Op) {
+  if (Op.isReg())
+MIB.addReg(Op.getReg());
+  else
+MIB->addOperand(Op);
+}
+
+void SILateBranchLowering::expandChainCall(MachineInstr &MI,
+   const GCNSubtarget &ST,
+   bool DynamicVGPR) {
   // This is a tail call that needs to be expanded into at least
   // 2 instructions, one for setting EXEC and one for the actual tail call.
-  constexpr unsigned ExecIdx = 3;
+  unsigned ExecIdx =
+  AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::exec);
+  if (DynamicVGPR) {
+// We have 3 extra operands and we need to:
+// * Try to change the VGPR allocation
+// * Select the callee based on the result of the reallocation attempt
+// * Select the EXEC mask based on the result of the reallocation attempt
+auto AllocMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
+   TII->get(AMDGPU::S_ALLOC_VGPR));
+addRegOrCopyOp(AllocMI,
+   *TII->getNamedOperand(MI, AMDGPU::OpName::numvgprs));
+
+auto SelectCallee =
+BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
+TII->get(AMDGPU::S_CSELECT_B64))
+.addDef(TII->getNamedOperand(MI, AMDGPU::OpName::src0)->getReg());
+addRegOrCopyOp(SelectCallee,
+   *TII->getNamedOperand(MI, AMDGPU::OpName::src0));
+addRegOrCopyOp(SelectCallee,
+   *TII->getNamedOperand(MI, AMDGPU::OpName::fbcallee));
+
+auto SelectExec = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
+  TII->get(ST.isWave32() ? AMDGPU::S_CSELECT_B32
+ : AMDGPU::S_CSELECT_B64))
+  .addDef(ExecReg);
+
+addRegOrCopyOp(SelectExec, *TII->getNamedOperand(MI, 
AMDGPU::OpName::exec));
+addRegOrCopyOp(SelectExec,
+   *TII->getNamedOperand(MI, AMDGPU::OpName::fbexec));
+  } else {
+auto SetExec = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),

arsenm wrote:

Variable instead of all the repeated MI.getDebugLocs 

https://github.com/llvm/llvm-project/pull/130094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-10 Thread Matt Arsenault via llvm-branch-commits


@@ -1200,34 +1225,78 @@ bool AMDGPUCallLowering::lowerTailCall(
   if (!IsSibCall)
 CallSeqStart = MIRBuilder.buildInstr(AMDGPU::ADJCALLSTACKUP);
 
-  unsigned Opc =
-  getCallOpcode(MF, Info.Callee.isReg(), true, ST.isWave32(), CalleeCC);
+  bool IsChainCall = AMDGPU::isChainCC(Info.CallConv);
+  bool IsDynamicVGPRChainCall = false;
+
+  if (IsChainCall) {
+ArgInfo FlagsArg = Info.OrigArgs[ChainCallArgIdx::Flags];
+const APInt &FlagsValue = 
cast(FlagsArg.OrigValue)->getValue();
+if (FlagsValue.isZero()) {
+  if (Info.OrigArgs.size() != 5) {
+LLVM_DEBUG(dbgs() << "No additional args allowed if flags == 0");
+return false;
+  }
+} else if (FlagsValue.isOneBitSet(0)) {
+  IsDynamicVGPRChainCall = true;
+
+  if (Info.OrigArgs.size() != 8) {
+LLVM_DEBUG(dbgs() << "Expected 3 additional args");
+return false;
+  }
+
+  // On GFX12, we can only change the VGPR allocation for wave32.
+  if (!ST.isWave32()) {
+LLVM_DEBUG(dbgs() << "Dynamic VGPR mode is only supported for wave32");
+return false;
+  }
+
+  ArgInfo FallbackExecArg = Info.OrigArgs[ChainCallArgIdx::FallbackExec];
+  assert(FallbackExecArg.Regs.size() == 1 &&
+ "Expected single register for fallback EXEC");
+  if (!FallbackExecArg.Ty->isIntegerTy(ST.getWavefrontSize())) {
+LLVM_DEBUG(dbgs() << "Bad type for fallback EXEC");
+return false;
+  }

arsenm wrote:

For consistency with other contexts, should this accept a wave64 signature when 
executing wave32, with implicit truncate to i32?

https://github.com/llvm/llvm-project/pull/130094
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU] Support image_bvh8_intersect_ray instruction and intrinsic. (PR #130041)

2025-03-10 Thread Mariusz Sikora via llvm-branch-commits

mariusz-sikora-at-amd wrote:

ping

https://github.com/llvm/llvm-project/pull/130041
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Do not restrict the computed gotos (#114990) (PR #130585)

2025-03-10 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/130585

Backport dd21aacd76e36d4db157a5d7a7b5370d456426e6

Requested by: @DianQK

>From 5449b176ebe63b354ac63e177aff9215c4726060 Mon Sep 17 00:00:00 2001
From: DianQK 
Date: Mon, 10 Mar 2025 19:34:07 +0800
Subject: [PATCH] [TailDuplicator] Do not restrict the computed gotos (#114990)

Fixes #106846.

This is what I learned from GCC. I found that GCC does not duplicate the
BB that has indirect jumps with the jump table. I believe GCC has
provided a clear explanation here:

> Duplicate the blocks containing computed gotos. This basically
unfactors computed gotos that were factored early on in the compilation
process to speed up edge based data flow. We used to not unfactor them
again, which can seriously pessimize code with many computed jumps in
the source code, such as interpreters.

(cherry picked from commit dd21aacd76e36d4db157a5d7a7b5370d456426e6)
---
 llvm/include/llvm/CodeGen/MachineInstr.h  |  13 +-
 llvm/lib/CodeGen/TailDuplicator.cpp   |  12 +-
 .../CodeGen/X86/tail-dup-computed-goto.mir| 255 ++
 3 files changed, 276 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/tail-dup-computed-goto.mir

diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h 
b/llvm/include/llvm/CodeGen/MachineInstr.h
index 102b1eb07358e..b26cabe801ee8 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -994,8 +994,17 @@ class MachineInstr
 
   /// Return true if this is an indirect branch, such as a
   /// branch through a register.
-  bool isIndirectBranch(QueryType Type = AnyInBundle) const {
-return hasProperty(MCID::IndirectBranch, Type);
+  bool isIndirectBranch(QueryType Type = AnyInBundle,
+bool IncludeJumpTable = true) const {
+return hasProperty(MCID::IndirectBranch, Type) &&
+   (IncludeJumpTable || !llvm::any_of(operands(), [](const auto &Op) {
+  return Op.isJTI();
+}));
+  }
+
+  bool isComputedGoto(QueryType Type = AnyInBundle) const {
+// Jump tables are not considered computed gotos.
+return isIndirectBranch(Type, /*IncludeJumpTable=*/false);
   }
 
   /// Return true if this is a branch which may fall
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp 
b/llvm/lib/CodeGen/TailDuplicator.cpp
index 6c6d38462484a..21f75458c90f3 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -601,8 +601,11 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   // that rearrange the predecessors of the indirect branch.
 
   bool HasIndirectbr = false;
-  if (!TailBB.empty())
+  bool HasComputedGoto = false;
+  if (!TailBB.empty()) {
 HasIndirectbr = TailBB.back().isIndirectBranch();
+HasComputedGoto = TailBB.back().isComputedGoto();
+  }
 
   if (HasIndirectbr && PreRegAlloc)
 MaxDuplicateCount = TailDupIndirectBranchSize;
@@ -660,7 +663,12 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   // Duplicating a BB which has both multiple predecessors and successors will
   // may cause huge amount of PHI nodes. If we want to remove this limitation,
   // we have to address https://github.com/llvm/llvm-project/issues/78578.
-  if (TailBB.pred_size() > TailDupPredSize &&
+  // NB. This basically unfactors computed gotos that were factored early on in
+  // the compilation process to speed up edge based data flow. If we do not
+  // unfactor them again, it can seriously pessimize code with many computed
+  // jumps in the source code, such as interpreters. Therefore we do not
+  // restrict the computed gotos.
+  if (!HasComputedGoto && TailBB.pred_size() > TailDupPredSize &&
   TailBB.succ_size() > TailDupSuccSize) {
 // If TailBB or any of its successors contains a phi, we may have to add a
 // large number of additional phis with additional incoming values.
diff --git a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir 
b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
new file mode 100644
index 0..a472dc67d8d51
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
@@ -0,0 +1,255 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication 
-tail-dup-pred-size=1 -tail-dup-succ-size=1 %s -o - | FileCheck %s
+# Check that only the computed goto is not be restrict by tail-dup-pred-size 
and tail-dup-succ-size.
+--- |
+  declare i64 @f0()
+  declare i64 @f1()
+  declare i64 @f2()
+  declare i64 @f3()
+  declare i64 @f4()
+  declare i64 @f5()
+  @computed_goto.dispatch = external global [5 x ptr]
+  define void @computed_goto() { ret void }
+  define void @jump_table() { ret void }
+...
+---
+name:computed_goto
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: computed_goto
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %b

[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Do not restrict the computed gotos (#114990) (PR #130585)

2025-03-10 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: None (llvmbot)


Changes

Backport dd21aacd76e36d4db157a5d7a7b5370d456426e6

Requested by: @DianQK

---
Full diff: https://github.com/llvm/llvm-project/pull/130585.diff


3 Files Affected:

- (modified) llvm/include/llvm/CodeGen/MachineInstr.h (+11-2) 
- (modified) llvm/lib/CodeGen/TailDuplicator.cpp (+10-2) 
- (added) llvm/test/CodeGen/X86/tail-dup-computed-goto.mir (+255) 


``diff
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h 
b/llvm/include/llvm/CodeGen/MachineInstr.h
index 102b1eb07358e..b26cabe801ee8 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -994,8 +994,17 @@ class MachineInstr
 
   /// Return true if this is an indirect branch, such as a
   /// branch through a register.
-  bool isIndirectBranch(QueryType Type = AnyInBundle) const {
-return hasProperty(MCID::IndirectBranch, Type);
+  bool isIndirectBranch(QueryType Type = AnyInBundle,
+bool IncludeJumpTable = true) const {
+return hasProperty(MCID::IndirectBranch, Type) &&
+   (IncludeJumpTable || !llvm::any_of(operands(), [](const auto &Op) {
+  return Op.isJTI();
+}));
+  }
+
+  bool isComputedGoto(QueryType Type = AnyInBundle) const {
+// Jump tables are not considered computed gotos.
+return isIndirectBranch(Type, /*IncludeJumpTable=*/false);
   }
 
   /// Return true if this is a branch which may fall
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp 
b/llvm/lib/CodeGen/TailDuplicator.cpp
index 6c6d38462484a..21f75458c90f3 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -601,8 +601,11 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   // that rearrange the predecessors of the indirect branch.
 
   bool HasIndirectbr = false;
-  if (!TailBB.empty())
+  bool HasComputedGoto = false;
+  if (!TailBB.empty()) {
 HasIndirectbr = TailBB.back().isIndirectBranch();
+HasComputedGoto = TailBB.back().isComputedGoto();
+  }
 
   if (HasIndirectbr && PreRegAlloc)
 MaxDuplicateCount = TailDupIndirectBranchSize;
@@ -660,7 +663,12 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   // Duplicating a BB which has both multiple predecessors and successors will
   // may cause huge amount of PHI nodes. If we want to remove this limitation,
   // we have to address https://github.com/llvm/llvm-project/issues/78578.
-  if (TailBB.pred_size() > TailDupPredSize &&
+  // NB. This basically unfactors computed gotos that were factored early on in
+  // the compilation process to speed up edge based data flow. If we do not
+  // unfactor them again, it can seriously pessimize code with many computed
+  // jumps in the source code, such as interpreters. Therefore we do not
+  // restrict the computed gotos.
+  if (!HasComputedGoto && TailBB.pred_size() > TailDupPredSize &&
   TailBB.succ_size() > TailDupSuccSize) {
 // If TailBB or any of its successors contains a phi, we may have to add a
 // large number of additional phis with additional incoming values.
diff --git a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir 
b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
new file mode 100644
index 0..a472dc67d8d51
--- /dev/null
+++ b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
@@ -0,0 +1,255 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 
UTC_ARGS: --version 5
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication 
-tail-dup-pred-size=1 -tail-dup-succ-size=1 %s -o - | FileCheck %s
+# Check that only the computed goto is not be restrict by tail-dup-pred-size 
and tail-dup-succ-size.
+--- |
+  declare i64 @f0()
+  declare i64 @f1()
+  declare i64 @f2()
+  declare i64 @f3()
+  declare i64 @f4()
+  declare i64 @f5()
+  @computed_goto.dispatch = external global [5 x ptr]
+  define void @computed_goto() { ret void }
+  define void @jump_table() { ret void }
+...
+---
+name:computed_goto
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: computed_goto
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x2000), %bb.2(0x2000), 
%bb.3(0x2000), %bb.4(0x2000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, 
implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+  ; CHECK-NEXT:   CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit 
$rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+  ; CHECK-NEXT:   ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def 
dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gr64 = COPY $rax
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY]]
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]]
+  ; CHECK-NEXT:   JMP64m $noreg, 8, [[COPY1]], @computed_goto.

[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIMemoryLegalizer to NPM (PR #130060)

2025-03-10 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.


https://github.com/llvm/llvm-project/pull/130060
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port VirtRegRewriter to NPM (PR #130564)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits


@@ -276,16 +288,49 @@ void VirtRegRewriter::getAnalysisUsage(AnalysisUsage &AU) 
const {
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
-bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
+bool VirtRegRewriterLegacy::runOnMachineFunction(MachineFunction &MF) {
+  VirtRegMap &VRM = getAnalysis().getVRM();
+  LiveIntervals &LIS = getAnalysis().getLIS();
+  LiveRegMatrix &LRM = getAnalysis().getLRM();
+  SlotIndexes &Indexes = getAnalysis().getSI();
+  LiveDebugVariables &DebugVars =
+  getAnalysis().getLDV();
+
+  VirtRegRewriter R(ClearVirtRegs, &Indexes, &LIS, &LRM, &VRM, &DebugVars);
+  return R.run(MF);
+}
+
+PreservedAnalyses
+VirtRegRewriterPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+  VirtRegMap &VRM = MFAM.getResult(MF);
+  LiveIntervals &LIS = MFAM.getResult(MF);
+  LiveRegMatrix &LRM = MFAM.getResult(MF);
+  SlotIndexes &Indexes = MFAM.getResult(MF);
+  LiveDebugVariables &DebugVars =
+  MFAM.getResult(MF);
+
+  VirtRegRewriter R(ClearVirtRegs, &Indexes, &LIS, &LRM, &VRM, &DebugVars);
+  if (!R.run(MF))
+return PreservedAnalyses::all();
+  auto PA = getMachineFunctionPassPreservedAnalyses();

cdevadas wrote:

```suggestion
return PreservedAnalyses::all();

  auto PA = getMachineFunctionPassPreservedAnalyses();
```

https://github.com/llvm/llvm-project/pull/130564
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] AMDGPU: Implement bitcode autoupgrade for old style enqueue blocks (PR #128520)

2025-03-10 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/128520

>From 652675b203aebda23fcccdd1b411c2aa51181e15 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 17 Nov 2023 14:21:52 +0900
Subject: [PATCH] AMDGPU: Implement bitcode autoupgrade for old style enqueue
 blocks

Introduces circular dependency in build for appendToUsed, and I'm not
sure it's worth the trouble to fix it. We can most likely get away
without upgrading this. We could move appendToUsed / appendToCompilerUsed
directly to be in Module.
---
 llvm/lib/IR/AutoUpgrade.cpp   |  49 +++
 llvm/lib/IR/CMakeLists.txt|   1 +
 .../amdgpu-autoupgrade-enqueued-block.ll  | 138 ++
 3 files changed, 188 insertions(+)
 create mode 100644 llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll

diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index cb4ecc60aa473..0e4e135e90972 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 #include 
 #include 
 #include 
@@ -5518,6 +5519,51 @@ struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
 };
 } // namespace
 
+static StructType *getAMDGPURuntimeHandleType(LLVMContext &C,
+  Type *KernelDescriptorPtrTy) {
+  Type *Int32 = Type::getInt32Ty(C);
+  return StructType::create(C, {KernelDescriptorPtrTy, Int32, Int32},
+"block.runtime.handle.t");
+}
+
+/// Rewrite to new scheme for enqueued block lowering
+static void upgradeAMDGPUKernelEnqueuedBlock(Function &F) {
+  if (F.isMaterializable()) {
+// A verifier error is produced if we add metadata to the function during
+// linking.
+return;
+  }
+
+  const StringLiteral EnqueuedBlockName("enqueued-block");
+  if (!F.hasFnAttribute(EnqueuedBlockName))
+return;
+
+  F.removeFnAttr(EnqueuedBlockName);
+
+  Module *M = F.getParent();
+  LLVMContext &Ctx = M->getContext();
+  const DataLayout &DL = M->getDataLayout();
+
+  StructType *HandleTy = getAMDGPURuntimeHandleType(
+  Ctx, PointerType::get(Ctx, DL.getDefaultGlobalsAddressSpace()));
+
+  Twine RuntimeHandleName = F.getName() + ".runtime.handle";
+
+  auto *RuntimeHandle = new GlobalVariable(
+  *M, HandleTy,
+  /*isConstant=*/true, F.getLinkage(),
+  /*Initializer=*/ConstantAggregateZero::get(HandleTy), RuntimeHandleName,
+  /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
+  DL.getDefaultGlobalsAddressSpace(),
+  /*isExternallyInitialized=*/true);
+  RuntimeHandle->setSection(".amdgpu.kernel.runtime.handle");
+
+  MDNode *HandleAsMD = MDNode::get(Ctx, ValueAsMetadata::get(RuntimeHandle));
+  F.setMetadata(LLVMContext::MD_associated, HandleAsMD);
+
+  appendToUsed(*M, {&F, RuntimeHandle});
+}
+
 void llvm::UpgradeFunctionAttributes(Function &F) {
   // If a function definition doesn't have the strictfp attribute,
   // convert any callsite strictfp attributes to nobuiltin.
@@ -5558,6 +5604,9 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
   F.removeFnAttr("amdgpu-unsafe-fp-atomics");
 }
   }
+
+  if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL)
+upgradeAMDGPUKernelEnqueuedBlock(F);
 }
 
 static bool isOldLoopArgument(Metadata *MD) {
diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt
index eb00829fd8c70..a78c58c807f6a 100644
--- a/llvm/lib/IR/CMakeLists.txt
+++ b/llvm/lib/IR/CMakeLists.txt
@@ -92,6 +92,7 @@ add_llvm_component_library(LLVMCore
   LINK_COMPONENTS
   BinaryFormat
   Demangle
+  TransformUtils
   Remarks
   Support
   TargetParser
diff --git a/llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll 
b/llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll
new file mode 100644
index 0..41521c1f2025d
--- /dev/null
+++ b/llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll
@@ -0,0 +1,138 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+%struct.ndrange_t = type { i32 }
+%opencl.queue_t = type opaque
+
+; CHECK: %block.runtime.handle.t = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.0 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.1 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.2 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.3 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.4 = type { ptr, i32, i32 }
+
+
+; CHECK: @kernel_address_user = global [1 x ptr] [ptr 
@block_has_used_kernel_address]
+; CHECK: @__test_block_invoke_kernel.runtime.handle = internal 
externally_initialized constant %block.runtime.handle.t zeroinitializer, 
section ".amdgpu.kernel.runtime.handle"
+; CHECK: @__test_block_invoke_2_kernel.runtime.handle = internal 
externally_initialized constant %block.runtime.handle.t.0 zeroinitializer, 
section ".amdgpu.kernel.runtime.handle"
+; CHECK: @block_has_used_k

[llvm-branch-commits] [llvm] AMDGPU: Implement bitcode autoupgrade for old style enqueue blocks (PR #128520)

2025-03-10 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/128520

>From 652675b203aebda23fcccdd1b411c2aa51181e15 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 17 Nov 2023 14:21:52 +0900
Subject: [PATCH] AMDGPU: Implement bitcode autoupgrade for old style enqueue
 blocks

Introduces circular dependency in build for appendToUsed, and I'm not
sure it's worth the trouble to fix it. We can most likely get away
without upgrading this. We could move appendToUsed / appendToCompilerUsed
directly to be in Module.
---
 llvm/lib/IR/AutoUpgrade.cpp   |  49 +++
 llvm/lib/IR/CMakeLists.txt|   1 +
 .../amdgpu-autoupgrade-enqueued-block.ll  | 138 ++
 3 files changed, 188 insertions(+)
 create mode 100644 llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll

diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index cb4ecc60aa473..0e4e135e90972 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/TargetParser/Triple.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 #include 
 #include 
 #include 
@@ -5518,6 +5519,51 @@ struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
 };
 } // namespace
 
+static StructType *getAMDGPURuntimeHandleType(LLVMContext &C,
+  Type *KernelDescriptorPtrTy) {
+  Type *Int32 = Type::getInt32Ty(C);
+  return StructType::create(C, {KernelDescriptorPtrTy, Int32, Int32},
+"block.runtime.handle.t");
+}
+
+/// Rewrite to new scheme for enqueued block lowering
+static void upgradeAMDGPUKernelEnqueuedBlock(Function &F) {
+  if (F.isMaterializable()) {
+// A verifier error is produced if we add metadata to the function during
+// linking.
+return;
+  }
+
+  const StringLiteral EnqueuedBlockName("enqueued-block");
+  if (!F.hasFnAttribute(EnqueuedBlockName))
+return;
+
+  F.removeFnAttr(EnqueuedBlockName);
+
+  Module *M = F.getParent();
+  LLVMContext &Ctx = M->getContext();
+  const DataLayout &DL = M->getDataLayout();
+
+  StructType *HandleTy = getAMDGPURuntimeHandleType(
+  Ctx, PointerType::get(Ctx, DL.getDefaultGlobalsAddressSpace()));
+
+  Twine RuntimeHandleName = F.getName() + ".runtime.handle";
+
+  auto *RuntimeHandle = new GlobalVariable(
+  *M, HandleTy,
+  /*isConstant=*/true, F.getLinkage(),
+  /*Initializer=*/ConstantAggregateZero::get(HandleTy), RuntimeHandleName,
+  /*InsertBefore=*/nullptr, GlobalValue::NotThreadLocal,
+  DL.getDefaultGlobalsAddressSpace(),
+  /*isExternallyInitialized=*/true);
+  RuntimeHandle->setSection(".amdgpu.kernel.runtime.handle");
+
+  MDNode *HandleAsMD = MDNode::get(Ctx, ValueAsMetadata::get(RuntimeHandle));
+  F.setMetadata(LLVMContext::MD_associated, HandleAsMD);
+
+  appendToUsed(*M, {&F, RuntimeHandle});
+}
+
 void llvm::UpgradeFunctionAttributes(Function &F) {
   // If a function definition doesn't have the strictfp attribute,
   // convert any callsite strictfp attributes to nobuiltin.
@@ -5558,6 +5604,9 @@ void llvm::UpgradeFunctionAttributes(Function &F) {
   F.removeFnAttr("amdgpu-unsafe-fp-atomics");
 }
   }
+
+  if (F.getCallingConv() == CallingConv::AMDGPU_KERNEL)
+upgradeAMDGPUKernelEnqueuedBlock(F);
 }
 
 static bool isOldLoopArgument(Metadata *MD) {
diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt
index eb00829fd8c70..a78c58c807f6a 100644
--- a/llvm/lib/IR/CMakeLists.txt
+++ b/llvm/lib/IR/CMakeLists.txt
@@ -92,6 +92,7 @@ add_llvm_component_library(LLVMCore
   LINK_COMPONENTS
   BinaryFormat
   Demangle
+  TransformUtils
   Remarks
   Support
   TargetParser
diff --git a/llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll 
b/llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll
new file mode 100644
index 0..41521c1f2025d
--- /dev/null
+++ b/llvm/test/Bitcode/amdgpu-autoupgrade-enqueued-block.ll
@@ -0,0 +1,138 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+%struct.ndrange_t = type { i32 }
+%opencl.queue_t = type opaque
+
+; CHECK: %block.runtime.handle.t = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.0 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.1 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.2 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.3 = type { ptr, i32, i32 }
+; CHECK: %block.runtime.handle.t.4 = type { ptr, i32, i32 }
+
+
+; CHECK: @kernel_address_user = global [1 x ptr] [ptr 
@block_has_used_kernel_address]
+; CHECK: @__test_block_invoke_kernel.runtime.handle = internal 
externally_initialized constant %block.runtime.handle.t zeroinitializer, 
section ".amdgpu.kernel.runtime.handle"
+; CHECK: @__test_block_invoke_2_kernel.runtime.handle = internal 
externally_initialized constant %block.runtime.handle.t.0 zeroinitializer, 
section ".amdgpu.kernel.runtime.handle"
+; CHECK: @block_has_used_k

[llvm-branch-commits] [llvm] WIP: [CodeGen][NPM] Port VirtRegRewriter to NPM (PR #130564)

2025-03-10 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff f20b44edd8de0001efd809147599e48966e51606 
061150df047480e1bcf44460b0ff745e5d6a4b65 --extensions h,cpp -- 
llvm/include/llvm/CodeGen/VirtRegMap.h llvm/include/llvm/InitializePasses.h 
llvm/include/llvm/Passes/CodeGenPassBuilder.h llvm/lib/CodeGen/CodeGen.cpp 
llvm/lib/CodeGen/VirtRegMap.cpp llvm/lib/Passes/PassBuilder.cpp 
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
``





View the diff from clang-format here.


``diff
diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h 
b/llvm/include/llvm/CodeGen/VirtRegMap.h
index e74131545f..f5fba0d654 100644
--- a/llvm/include/llvm/CodeGen/VirtRegMap.h
+++ b/llvm/include/llvm/CodeGen/VirtRegMap.h
@@ -238,8 +238,10 @@ public:
 
 class VirtRegRewriterPass : public PassInfoMixin {
   bool ClearVirtRegs = true;
+
 public:
-  VirtRegRewriterPass(bool ClearVirtRegs = true) : 
ClearVirtRegs(ClearVirtRegs) {}
+  VirtRegRewriterPass(bool ClearVirtRegs = true)
+  : ClearVirtRegs(ClearVirtRegs) {}
   PreservedAnalyses run(MachineFunction &MF,
 MachineFunctionAnalysisManager &MFAM);
 
diff --git a/llvm/lib/CodeGen/VirtRegMap.cpp b/llvm/lib/CodeGen/VirtRegMap.cpp
index 218b52ab7a..bf10fbe48b 100644
--- a/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -230,16 +230,15 @@ public:
   : Indexes(Indexes), LIS(LIS), LRM(LRM), VRM(VRM), DebugVars(DebugVars),
 ClearVirtRegs(ClearVirtRegs) {}
 
-  bool run(MachineFunction&);
-
+  bool run(MachineFunction &);
 };
 
 class VirtRegRewriterLegacy : public MachineFunctionPass {
 public:
   static char ID;
   bool ClearVirtRegs;
-  VirtRegRewriterLegacy(bool ClearVirtRegs = true) :
-MachineFunctionPass(ID), ClearVirtRegs(ClearVirtRegs) {}
+  VirtRegRewriterLegacy(bool ClearVirtRegs = true)
+  : MachineFunctionPass(ID), ClearVirtRegs(ClearVirtRegs) {}
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
@@ -302,13 +301,15 @@ bool 
VirtRegRewriterLegacy::runOnMachineFunction(MachineFunction &MF) {
   return R.run(MF);
 }
 
-PreservedAnalyses VirtRegRewriterPass::run(MachineFunction &MF,
-  MachineFunctionAnalysisManager &MFAM) {
+PreservedAnalyses
+VirtRegRewriterPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
   VirtRegMap &VRM = MFAM.getResult(MF);
   LiveIntervals &LIS = MFAM.getResult(MF);
   LiveRegMatrix &LRM = MFAM.getResult(MF);
   SlotIndexes &Indexes = MFAM.getResult(MF);
-  LiveDebugVariables &DebugVars = 
MFAM.getResult(MF);
+  LiveDebugVariables &DebugVars =
+  MFAM.getResult(MF);
 
   VirtRegRewriter R(ClearVirtRegs, &Indexes, &LIS, &LRM, &VRM, &DebugVars);
   if (!R.run(MF))
@@ -771,7 +772,8 @@ void VirtRegRewriter::rewrite() {
   RewriteRegs.clear();
 }
 
-void VirtRegRewriterPass::printPipeline(raw_ostream &OS, 
function_ref) const {
+void VirtRegRewriterPass::printPipeline(
+raw_ostream &OS, function_ref) const {
   OS << "virt-reg-rewriter<";
   if (!ClearVirtRegs)
 OS << "no-";

``




https://github.com/llvm/llvm-project/pull/130564
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] WIP: [CodeGen][NPM] Port VirtRegRewriter to NPM (PR #130564)

2025-03-10 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130564

>From 0f09f5616d4b681df5349333991748070e07182c Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Mon, 10 Mar 2025 06:14:27 +
Subject: [PATCH] [CodeGen][NPM] Port VirtRegRewriter to NPM

Not sure why this is squished into VirtRegMap.h
---
 llvm/include/llvm/CodeGen/VirtRegMap.h| 13 +++
 llvm/include/llvm/InitializePasses.h  |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  1 +
 .../llvm/Passes/MachinePassRegistry.def   |  7 +-
 llvm/lib/CodeGen/CodeGen.cpp  |  2 +-
 llvm/lib/CodeGen/VirtRegMap.cpp   | 84 +++
 llvm/lib/Passes/PassBuilder.cpp   | 13 +++
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  6 +-
 .../AMDGPU/alloc-aligned-tuples-gfx90a.mir|  2 +
 .../CodeGen/AMDGPU/fold-restore-undef-use.mir |  2 +
 .../greedy-remark-crash-unassigned-reg.mir|  4 +
 llvm/test/CodeGen/X86/pr30821.mir |  1 +
 12 files changed, 115 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h 
b/llvm/include/llvm/CodeGen/VirtRegMap.h
index c9e405e1981d9..e74131545f277 100644
--- a/llvm/include/llvm/CodeGen/VirtRegMap.h
+++ b/llvm/include/llvm/CodeGen/VirtRegMap.h
@@ -235,6 +235,19 @@ class VirtRegMapPrinterPass : public 
PassInfoMixin {
 MachineFunctionAnalysisManager &MFAM);
   static bool isRequired() { return true; }
 };
+
+class VirtRegRewriterPass : public PassInfoMixin {
+  bool ClearVirtRegs = true;
+public:
+  VirtRegRewriterPass(bool ClearVirtRegs = true) : 
ClearVirtRegs(ClearVirtRegs) {}
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+
+  static bool isRequired() { return true; }
+
+  void printPipeline(raw_ostream &OS, function_ref) 
const;
+};
+
 } // end llvm namespace
 
 #endif // LLVM_CODEGEN_VIRTREGMAP_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 9afca6c0dab70..b8b0d09b917fb 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -316,7 +316,7 @@ void 
initializeUnreachableBlockElimLegacyPassPass(PassRegistry &);
 void initializeUnreachableMachineBlockElimPass(PassRegistry &);
 void initializeVerifierLegacyPassPass(PassRegistry &);
 void initializeVirtRegMapWrapperLegacyPass(PassRegistry &);
-void initializeVirtRegRewriterPass(PassRegistry &);
+void initializeVirtRegRewriterLegacyPass(PassRegistry &);
 void initializeWasmEHPreparePass(PassRegistry &);
 void initializeWinEHPreparePass(PassRegistry &);
 void initializeWriteBitcodePassPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 9ec9836c15eb5..db227ddaa0e2b 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -81,6 +81,7 @@
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TwoAddressInstructionPass.h"
 #include "llvm/CodeGen/UnreachableBlockElim.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/CodeGen/WasmEHPrepare.h"
 #include "llvm/CodeGen/WinEHPrepare.h"
 #include "llvm/IR/PassManager.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 87253ebc8b789..eab6a6f6cd494 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -215,6 +215,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
   return parseRegAllocGreedyFilterFunc(*PB, Params);
 }, "reg-filter"
 )
+
+MACHINE_FUNCTION_PASS_WITH_PARAMS(
+"virt-reg-rewriter", "VirtRegRewriterPass",
+[](bool ClearVirtRegs) { return VirtRegRewriterPass(ClearVirtRegs); },
+parseVirtRegRewriterPassOptions, "no-clear-vregs;clear-vregs")
+
 #undef MACHINE_FUNCTION_PASS_WITH_PARAMS
 
 // After a pass is converted to new pass manager, its entry should be moved 
from
@@ -287,6 +293,5 @@ DUMMY_MACHINE_FUNCTION_PASS("shrink-wrap", ShrinkWrapPass)
 DUMMY_MACHINE_FUNCTION_PASS("stack-frame-layout", StackFrameLayoutAnalysisPass)
 DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass)
 DUMMY_MACHINE_FUNCTION_PASS("unpack-mi-bundles", UnpackMachineBundlesPass)
-DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass)
 DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass)
 #undef DUMMY_MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 3169a109aa174..b3ec59889b8b7 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -141,7 +141,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeUnreachableBlockElimLegacyPassPass(Registry);
   initializeUnreachableMachineBlockElimPass(Registry);
   initializeVirtRegMapWrapperLegacyPass(Registry);
-  initializeVirtRegRewriterPass(Registry);
+  init

[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-10 Thread Diana Picus via llvm-branch-commits

https://github.com/rovka updated 
https://github.com/llvm/llvm-project/pull/130094

>From aff1e132263dba730999eb017b7548a5d2f46b6f Mon Sep 17 00:00:00 2001
From: Diana Picus 
Date: Tue, 10 Oct 2023 11:06:23 +0200
Subject: [PATCH 1/2] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain

The llvm.amdgcn.cs.chain intrinsic has a 'flags' operand which may
indicate that we want to reallocate the VGPRs before performing the
call.

A call with the following arguments:
```
llvm.amdgcn.cs.chain %callee, %exec, %sgpr_args, %vgpr_args,
  /*flags*/0x1, %num_vgprs, %fallback_exec, %fallback_callee
```
is supposed to do the following:
- copy the SGPR and VGPR args into their respective registers
- try to change the VGPR allocation
- if the allocation has succeeded, set EXEC to %exec and jump to
  %callee, otherwise set EXEC to %fallback_exec and jump to
  %fallback_callee

This patch implements the dynamic VGPR behaviour by generating an
S_ALLOC_VGPR followed by S_CSELECT_B32/64 instructions for the EXEC and
callee. The rest of the call sequence is left undisturbed (i.e.
identical to the case where the flags are 0 and we don't use dynamic
VGPRs). We achieve this by introducing some new pseudos
(SI_CS_CHAIN_TC_Wn_DVGPR) which are expanded in the SILateBranchLowering
pass, just like the simpler SI_CS_CHAIN_TC_Wn pseudos. The main reason
is so that we don't risk other passes (particularly the PostRA
scheduler) introducing instructions between the S_ALLOC_VGPR and the
jump. Such instructions might end up using VGPRs that have been
deallocated, or the wrong EXEC mask. Once the whole backend treats
S_ALLOC_VGPR and changes to EXEC as barriers for instructions that use
VGPRs, we could in principle move the expansion earlier (but in the
absence of a good reason for that my personal preference is to keep
it later in order to make debugging easier).

Since the expansion happens after register allocation, we're
careful to select constants to immediate operands instead of letting
ISel generate S_MOVs which could interfere with register allocation
(i.e. make it look like we need more registers than we actually do).

For GFX12, S_ALLOC_VGPR only works in wave32 mode, so we bail out
during ISel in wave64 mode. However, we can define the pseudos for
wave64 too so it's easy to handle if future generations support it.

Co-authored-by: Ana Mihajlovic 
---
 llvm/include/llvm/CodeGen/SelectionDAGISel.h  |  29 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp  |  13 +-
 llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 126 +--
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  88 +++--
 llvm/lib/Target/AMDGPU/SIInstructions.td  |  76 +++--
 .../Target/AMDGPU/SILateBranchLowering.cpp|  62 +++-
 .../amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll |  97 ++
 .../isel-amdgcn-cs-chain-intrinsic-w32.ll |  36 +-
 .../isel-amdgcn-cs-chain-intrinsic-w64.ll |  36 +-
 ...-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll | 315 ++
 .../CodeGen/AMDGPU/remove-register-flags.mir  |  19 ++
 11 files changed, 746 insertions(+), 151 deletions(-)
 create mode 100644 
llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll
 create mode 100644 
llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/remove-register-flags.mir

diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h 
b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index e9452a6dc6233..55f8f19d437a0 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -328,20 +328,21 @@ class SelectionDAGISel {
   };
 
   enum {
-OPFL_None   = 0,  // Node has no chain or glue input and isn't 
variadic.
-OPFL_Chain  = 1, // Node has a chain input.
-OPFL_GlueInput  = 2, // Node has a glue input.
-OPFL_GlueOutput = 4, // Node has a glue output.
-OPFL_MemRefs= 8, // Node gets accumulated MemRefs.
-OPFL_Variadic0  = 1<<4,  // Node is variadic, root has 0 fixed inputs.
-OPFL_Variadic1  = 2<<4,  // Node is variadic, root has 1 fixed inputs.
-OPFL_Variadic2  = 3<<4,  // Node is variadic, root has 2 fixed inputs.
-OPFL_Variadic3  = 4<<4,  // Node is variadic, root has 3 fixed inputs.
-OPFL_Variadic4  = 5<<4,  // Node is variadic, root has 4 fixed inputs.
-OPFL_Variadic5  = 6<<4,  // Node is variadic, root has 5 fixed inputs.
-OPFL_Variadic6  = 7<<4,  // Node is variadic, root has 6 fixed inputs.
-
-OPFL_VariadicInfo = OPFL_Variadic6
+OPFL_None = 0,   // Node has no chain or glue input and isn't variadic.
+OPFL_Chain = 1,  // Node has a chain input.
+OPFL_GlueInput = 2,  // Node has a glue input.
+OPFL_GlueOutput = 4, // Node has a glue output.
+OPFL_MemRefs = 8,// Node gets accumulated MemRefs.
+OPFL_Variadic0 = 1 << 4, // Node is variadic, root has 0 fixed inputs.
+OPFL_Variadic1 = 2 << 4, // Node is variadic, root has 1 fixed inputs.
+OPFL_Variad

[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-10 Thread Diana Picus via llvm-branch-commits

https://github.com/rovka updated 
https://github.com/llvm/llvm-project/pull/130094

>From aff1e132263dba730999eb017b7548a5d2f46b6f Mon Sep 17 00:00:00 2001
From: Diana Picus 
Date: Tue, 10 Oct 2023 11:06:23 +0200
Subject: [PATCH 1/5] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain

The llvm.amdgcn.cs.chain intrinsic has a 'flags' operand which may
indicate that we want to reallocate the VGPRs before performing the
call.

A call with the following arguments:
```
llvm.amdgcn.cs.chain %callee, %exec, %sgpr_args, %vgpr_args,
  /*flags*/0x1, %num_vgprs, %fallback_exec, %fallback_callee
```
is supposed to do the following:
- copy the SGPR and VGPR args into their respective registers
- try to change the VGPR allocation
- if the allocation has succeeded, set EXEC to %exec and jump to
  %callee, otherwise set EXEC to %fallback_exec and jump to
  %fallback_callee

This patch implements the dynamic VGPR behaviour by generating an
S_ALLOC_VGPR followed by S_CSELECT_B32/64 instructions for the EXEC and
callee. The rest of the call sequence is left undisturbed (i.e.
identical to the case where the flags are 0 and we don't use dynamic
VGPRs). We achieve this by introducing some new pseudos
(SI_CS_CHAIN_TC_Wn_DVGPR) which are expanded in the SILateBranchLowering
pass, just like the simpler SI_CS_CHAIN_TC_Wn pseudos. The main reason
is so that we don't risk other passes (particularly the PostRA
scheduler) introducing instructions between the S_ALLOC_VGPR and the
jump. Such instructions might end up using VGPRs that have been
deallocated, or the wrong EXEC mask. Once the whole backend treats
S_ALLOC_VGPR and changes to EXEC as barriers for instructions that use
VGPRs, we could in principle move the expansion earlier (but in the
absence of a good reason for that my personal preference is to keep
it later in order to make debugging easier).

Since the expansion happens after register allocation, we're
careful to select constants to immediate operands instead of letting
ISel generate S_MOVs which could interfere with register allocation
(i.e. make it look like we need more registers than we actually do).

For GFX12, S_ALLOC_VGPR only works in wave32 mode, so we bail out
during ISel in wave64 mode. However, we can define the pseudos for
wave64 too so it's easy to handle if future generations support it.

Co-authored-by: Ana Mihajlovic 
---
 llvm/include/llvm/CodeGen/SelectionDAGISel.h  |  29 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp  |  13 +-
 llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 126 +--
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  88 +++--
 llvm/lib/Target/AMDGPU/SIInstructions.td  |  76 +++--
 .../Target/AMDGPU/SILateBranchLowering.cpp|  62 +++-
 .../amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll |  97 ++
 .../isel-amdgcn-cs-chain-intrinsic-w32.ll |  36 +-
 .../isel-amdgcn-cs-chain-intrinsic-w64.ll |  36 +-
 ...-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll | 315 ++
 .../CodeGen/AMDGPU/remove-register-flags.mir  |  19 ++
 11 files changed, 746 insertions(+), 151 deletions(-)
 create mode 100644 
llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll
 create mode 100644 
llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/remove-register-flags.mir

diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h 
b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index e9452a6dc6233..55f8f19d437a0 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -328,20 +328,21 @@ class SelectionDAGISel {
   };
 
   enum {
-OPFL_None   = 0,  // Node has no chain or glue input and isn't 
variadic.
-OPFL_Chain  = 1, // Node has a chain input.
-OPFL_GlueInput  = 2, // Node has a glue input.
-OPFL_GlueOutput = 4, // Node has a glue output.
-OPFL_MemRefs= 8, // Node gets accumulated MemRefs.
-OPFL_Variadic0  = 1<<4,  // Node is variadic, root has 0 fixed inputs.
-OPFL_Variadic1  = 2<<4,  // Node is variadic, root has 1 fixed inputs.
-OPFL_Variadic2  = 3<<4,  // Node is variadic, root has 2 fixed inputs.
-OPFL_Variadic3  = 4<<4,  // Node is variadic, root has 3 fixed inputs.
-OPFL_Variadic4  = 5<<4,  // Node is variadic, root has 4 fixed inputs.
-OPFL_Variadic5  = 6<<4,  // Node is variadic, root has 5 fixed inputs.
-OPFL_Variadic6  = 7<<4,  // Node is variadic, root has 6 fixed inputs.
-
-OPFL_VariadicInfo = OPFL_Variadic6
+OPFL_None = 0,   // Node has no chain or glue input and isn't variadic.
+OPFL_Chain = 1,  // Node has a chain input.
+OPFL_GlueInput = 2,  // Node has a glue input.
+OPFL_GlueOutput = 4, // Node has a glue output.
+OPFL_MemRefs = 8,// Node gets accumulated MemRefs.
+OPFL_Variadic0 = 1 << 4, // Node is variadic, root has 0 fixed inputs.
+OPFL_Variadic1 = 2 << 4, // Node is variadic, root has 1 fixed inputs.
+OPFL_Variad

[llvm-branch-commits] [llvm] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain (PR #130094)

2025-03-10 Thread Diana Picus via llvm-branch-commits

https://github.com/rovka updated 
https://github.com/llvm/llvm-project/pull/130094

>From aff1e132263dba730999eb017b7548a5d2f46b6f Mon Sep 17 00:00:00 2001
From: Diana Picus 
Date: Tue, 10 Oct 2023 11:06:23 +0200
Subject: [PATCH 1/3] [AMDGPU] Dynamic VGPR support for llvm.amdgcn.cs.chain

The llvm.amdgcn.cs.chain intrinsic has a 'flags' operand which may
indicate that we want to reallocate the VGPRs before performing the
call.

A call with the following arguments:
```
llvm.amdgcn.cs.chain %callee, %exec, %sgpr_args, %vgpr_args,
  /*flags*/0x1, %num_vgprs, %fallback_exec, %fallback_callee
```
is supposed to do the following:
- copy the SGPR and VGPR args into their respective registers
- try to change the VGPR allocation
- if the allocation has succeeded, set EXEC to %exec and jump to
  %callee, otherwise set EXEC to %fallback_exec and jump to
  %fallback_callee

This patch implements the dynamic VGPR behaviour by generating an
S_ALLOC_VGPR followed by S_CSELECT_B32/64 instructions for the EXEC and
callee. The rest of the call sequence is left undisturbed (i.e.
identical to the case where the flags are 0 and we don't use dynamic
VGPRs). We achieve this by introducing some new pseudos
(SI_CS_CHAIN_TC_Wn_DVGPR) which are expanded in the SILateBranchLowering
pass, just like the simpler SI_CS_CHAIN_TC_Wn pseudos. The main reason
is so that we don't risk other passes (particularly the PostRA
scheduler) introducing instructions between the S_ALLOC_VGPR and the
jump. Such instructions might end up using VGPRs that have been
deallocated, or the wrong EXEC mask. Once the whole backend treats
S_ALLOC_VGPR and changes to EXEC as barriers for instructions that use
VGPRs, we could in principle move the expansion earlier (but in the
absence of a good reason for that my personal preference is to keep
it later in order to make debugging easier).

Since the expansion happens after register allocation, we're
careful to select constants to immediate operands instead of letting
ISel generate S_MOVs which could interfere with register allocation
(i.e. make it look like we need more registers than we actually do).

For GFX12, S_ALLOC_VGPR only works in wave32 mode, so we bail out
during ISel in wave64 mode. However, we can define the pseudos for
wave64 too so it's easy to handle if future generations support it.

Co-authored-by: Ana Mihajlovic 
---
 llvm/include/llvm/CodeGen/SelectionDAGISel.h  |  29 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp  |  13 +-
 llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp | 126 +--
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |  88 +++--
 llvm/lib/Target/AMDGPU/SIInstructions.td  |  76 +++--
 .../Target/AMDGPU/SILateBranchLowering.cpp|  62 +++-
 .../amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll |  97 ++
 .../isel-amdgcn-cs-chain-intrinsic-w32.ll |  36 +-
 .../isel-amdgcn-cs-chain-intrinsic-w64.ll |  36 +-
 ...-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll | 315 ++
 .../CodeGen/AMDGPU/remove-register-flags.mir  |  19 ++
 11 files changed, 746 insertions(+), 151 deletions(-)
 create mode 100644 
llvm/test/CodeGen/AMDGPU/amdgcn-cs-chain-intrinsic-dyn-vgpr-w32.ll
 create mode 100644 
llvm/test/CodeGen/AMDGPU/isel-amdgpu-cs-chain-intrinsic-dyn-vgpr-w32.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/remove-register-flags.mir

diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h 
b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
index e9452a6dc6233..55f8f19d437a0 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h
@@ -328,20 +328,21 @@ class SelectionDAGISel {
   };
 
   enum {
-OPFL_None   = 0,  // Node has no chain or glue input and isn't 
variadic.
-OPFL_Chain  = 1, // Node has a chain input.
-OPFL_GlueInput  = 2, // Node has a glue input.
-OPFL_GlueOutput = 4, // Node has a glue output.
-OPFL_MemRefs= 8, // Node gets accumulated MemRefs.
-OPFL_Variadic0  = 1<<4,  // Node is variadic, root has 0 fixed inputs.
-OPFL_Variadic1  = 2<<4,  // Node is variadic, root has 1 fixed inputs.
-OPFL_Variadic2  = 3<<4,  // Node is variadic, root has 2 fixed inputs.
-OPFL_Variadic3  = 4<<4,  // Node is variadic, root has 3 fixed inputs.
-OPFL_Variadic4  = 5<<4,  // Node is variadic, root has 4 fixed inputs.
-OPFL_Variadic5  = 6<<4,  // Node is variadic, root has 5 fixed inputs.
-OPFL_Variadic6  = 7<<4,  // Node is variadic, root has 6 fixed inputs.
-
-OPFL_VariadicInfo = OPFL_Variadic6
+OPFL_None = 0,   // Node has no chain or glue input and isn't variadic.
+OPFL_Chain = 1,  // Node has a chain input.
+OPFL_GlueInput = 2,  // Node has a glue input.
+OPFL_GlueOutput = 4, // Node has a glue output.
+OPFL_MemRefs = 8,// Node gets accumulated MemRefs.
+OPFL_Variadic0 = 1 << 4, // Node is variadic, root has 0 fixed inputs.
+OPFL_Variadic1 = 2 << 4, // Node is variadic, root has 1 fixed inputs.
+OPFL_Variad

[llvm-branch-commits] [llvm] Add initial support for SPE brstack format (PR #129231)

2025-03-10 Thread Paschalis Mpeis via llvm-branch-commits


@@ -11,4 +11,4 @@ CHECK-SPE-NO-LBR: PERF2BOLT: Starting data aggregation job
 RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe
 RUN: not perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2>&1 | 
FileCheck %s --check-prefix=CHECK-SPE-LBR
 
-CHECK-SPE-LBR: PERF2BOLT-ERROR: Arm SPE mode is combined only with 
BasicAggregation.
+CHECK-SPE-LBR: PERF2BOLT: spawning perf job to read SPE branch events

paschalis-mpeis wrote:

This test was not failing on my machine (ie not needing `not perf2bolt`), which 
made me realize that these tests might be fragile on buildbots and local 
environments. So I suggest using `perf record` for both non-lbr/lbr testing, 
and overriding the exit value with a sub shell like so:

```diff
-## Check that Arm SPE mode is available on AArch64 with BasicAggregation.
+## Check that Arm SPE mode is available on AArch64.
 
 REQUIRES: system-linux,perf,target=aarch64{{.*}}
 
 RUN: %clang %cflags %p/../../Inputs/asm_foo.s %p/../../Inputs/asm_main.c -o 
%t.exe
-RUN: touch %t.empty.perf.data
-RUN: perf2bolt -p %t.empty.perf.data -o %t.perf.boltdata --nl --spe --pa 
%t.exe 2>&1 | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
+RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe 2> /dev/null
 
-CHECK-SPE-NO-LBR: PERF2BOLT: Starting data aggregation job
+RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe --nl %t.exe 2> 
/dev/null; exit 0) | FileCheck %s --check-prefix=CHECK-SPE-NO-LBR
 
-RUN: perf record -e cycles -q -o %t.perf.data -- %t.exe
-RUN: not perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2>&1 | 
FileCheck %s --check-prefix=CHECK-SPE-LBR
+RUN: (perf2bolt -p %t.perf.data -o %t.perf.boltdata --spe %t.exe 2> /dev/null; 
exit 0) | FileCheck %s --check-prefix=CHECK-SPE-LBR
 
-CHECK-SPE-LBR: PERF2BOLT: spawning perf job to read SPE branch events
+CHECK-SPE-NO-LBR: PERF2BOLT: spawning perf job to read SPE branch events 
(non-lbr)
+CHECK-SPE-LBR: PERF2BOLT: spawning perf job to read SPE brstack events
```

(the diff covers the entire test; lbr/non-lbr)

https://github.com/llvm/llvm-project/pull/129231
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Add initial support for SPE brstack format (PR #129231)

2025-03-10 Thread Paschalis Mpeis via llvm-branch-commits


@@ -1034,7 +1034,11 @@ ErrorOr DataAggregator::parseLBREntry() {
   if (std::error_code EC = MispredStrRes.getError())
 return EC;
   StringRef MispredStr = MispredStrRes.get();
-  if (MispredStr.size() != 1 ||
+  // SPE brstack mispredicted flags might be two characters long: 'PN' or 'MN'.
+  bool ProperStrSize = (MispredStr.size() == 2 && opts::ArmSPE)
+   ? (MispredStr[1] == 'N')
+   : (MispredStr.size() == 1);
+  if (!ProperStrSize ||
   (MispredStr[0] != 'P' && MispredStr[0] != 'M' && MispredStr[0] != '-')) {
 reportError("expected single char for mispred bit");

paschalis-mpeis wrote:

Here you can show a relevant message for each case, eg the error might be 
specific to SPE's taken bit or both misspred/taken parsing errors may occur.

You could extract the earlier `MispredStr[0]` checks on another boolean (say 
`PredictionBitErr`) and reuse?
Also maybe ProperStrSize could get specialized to something like 
`SpeTakenBitErr`.

https://github.com/llvm/llvm-project/pull/129231
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Add initial support for SPE brstack format (PR #129231)

2025-03-10 Thread Paschalis Mpeis via llvm-branch-commits

https://github.com/paschalis-mpeis edited 
https://github.com/llvm/llvm-project/pull/129231
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Add initial support for SPE brstack format (PR #129231)

2025-03-10 Thread Paschalis Mpeis via llvm-branch-commits


@@ -88,6 +89,45 @@ struct PerfSpeEventsTestHelper : public testing::Test {
 
 return SampleSize == DA.BasicSamples.size();
   }
+
+  /// Compare LBREntries
+  bool checkLBREntry(const LBREntry &Lhs, const LBREntry &Rhs) {
+return Lhs.From == Rhs.From && Lhs.To == Rhs.To &&
+   Lhs.Mispred == Rhs.Mispred;
+  }
+
+  /// Parse and check SPE brstack as LBR
+  void parseAndCheckBrstackEvents(
+  uint64_t PID,
+  const std::vector> &ExpectedSamples) {
+int NumSamples = 0;
+
+DataAggregator DA("");
+DA.ParsingBuf = opts::ReadPerfEvents;
+DA.BC = BC.get();
+DataAggregator::MMapInfo MMap;
+DA.BinaryMMapInfo.insert(std::make_pair(PID, MMap));
+
+// Process buffer.
+while (DA.hasData()) {

paschalis-mpeis wrote:

Would it be possible to call `parseBranchEvents` here instead of having a loop 
and replicating logic?

And then check any relevant buffers (ie `DA.BranchLBRs`  in a loop) ?
(probably no need to FallthroughLBRs at this point).

https://github.com/llvm/llvm-project/pull/129231
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [IR] Fix assertion error in User new/delete edge case (#129914) (PR #130580)

2025-03-10 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/130580

Backport 8d38906d08f0189a7a7f865b267f47cab0a3790f

Requested by: @marcauberer

>From b3515aa07b42164268a835e3f5874f49056a2e22 Mon Sep 17 00:00:00 2001
From: Marc Auberer 
Date: Mon, 10 Mar 2025 11:53:45 +0100
Subject: [PATCH] [IR] Fix assertion error in User new/delete edge case
 (#129914)

Fixes #129900

If `operator delete` was called after an unsuccessful constructor call
after `operator new`, we ran into undefined behaviour.
This was discovered by our malfunction tests while preparing an upgrade
to LLVM 20, that explicitly check for such kind of bugs.

(cherry picked from commit 8d38906d08f0189a7a7f865b267f47cab0a3790f)
---
 llvm/lib/IR/User.cpp | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/llvm/lib/IR/User.cpp b/llvm/lib/IR/User.cpp
index b0aa785deb9af..ab44cb4b8a3f7 100644
--- a/llvm/lib/IR/User.cpp
+++ b/llvm/lib/IR/User.cpp
@@ -146,6 +146,9 @@ void *User::allocateFixedOperandUser(size_t Size, unsigned 
Us,
   Use *Start = reinterpret_cast(Storage + DescBytesToAllocate);
   Use *End = Start + Us;
   User *Obj = reinterpret_cast(End);
+  Obj->NumUserOperands = Us;
+  Obj->HasHungOffUses = false;
+  Obj->HasDescriptor = DescBytes != 0;
   for (; Start != End; Start++)
 new (Start) Use(Obj);
 
@@ -172,6 +175,9 @@ void *User::operator new(size_t Size, 
HungOffOperandsAllocMarker) {
   void *Storage = ::operator new(Size + sizeof(Use *));
   Use **HungOffOperandList = static_cast(Storage);
   User *Obj = reinterpret_cast(HungOffOperandList + 1);
+  Obj->NumUserOperands = 0;
+  Obj->HasHungOffUses = true;
+  Obj->HasDescriptor = false;
   *HungOffOperandList = nullptr;
   return Obj;
 }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [IR] Fix assertion error in User new/delete edge case (#129914) (PR #130580)

2025-03-10 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/130580
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [IR] Fix assertion error in User new/delete edge case (#129914) (PR #130580)

2025-03-10 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-ir

Author: None (llvmbot)


Changes

Backport 8d38906d08f0189a7a7f865b267f47cab0a3790f

Requested by: @marcauberer

---
Full diff: https://github.com/llvm/llvm-project/pull/130580.diff


1 Files Affected:

- (modified) llvm/lib/IR/User.cpp (+6) 


``diff
diff --git a/llvm/lib/IR/User.cpp b/llvm/lib/IR/User.cpp
index b0aa785deb9af..ab44cb4b8a3f7 100644
--- a/llvm/lib/IR/User.cpp
+++ b/llvm/lib/IR/User.cpp
@@ -146,6 +146,9 @@ void *User::allocateFixedOperandUser(size_t Size, unsigned 
Us,
   Use *Start = reinterpret_cast(Storage + DescBytesToAllocate);
   Use *End = Start + Us;
   User *Obj = reinterpret_cast(End);
+  Obj->NumUserOperands = Us;
+  Obj->HasHungOffUses = false;
+  Obj->HasDescriptor = DescBytes != 0;
   for (; Start != End; Start++)
 new (Start) Use(Obj);
 
@@ -172,6 +175,9 @@ void *User::operator new(size_t Size, 
HungOffOperandsAllocMarker) {
   void *Storage = ::operator new(Size + sizeof(Use *));
   Use **HungOffOperandList = static_cast(Storage);
   User *Obj = reinterpret_cast(HungOffOperandList + 1);
+  Obj->NumUserOperands = 0;
+  Obj->HasHungOffUses = true;
+  Obj->HasDescriptor = false;
   *HungOffOperandList = nullptr;
   return Obj;
 }

``




https://github.com/llvm/llvm-project/pull/130580
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DirectX] Updating DXContainer documentation to add Root Descriptors (PR #129759)

2025-03-10 Thread Finn Plummer via llvm-branch-commits


@@ -497,3 +497,49 @@ signature and passed to the shader without requiring a 
constant buffer resource:
 #. **Num32BitValues**: The number of 32-bit values included in this constant 
buffer.
 
 Root constants provide a fast way to pass small amounts of data directly to 
the shader without the overhead of creating and binding a constant buffer 
resource.
+
+Root Descriptor
+~~~
+
+Root descriptors provide a direct mechanism for binding individual resources 
to shader stages in the Direct3D 12 
+rendering pipeline. They represent a critical interface for efficient resource 
management, allowing applications 
+to specify how shader stages access specific GPU resources.

inbelic wrote:

Imo, this strays away from the `what and how` tone throughout this document and 
into `why`. I think the previous pr did a good job of keeping that tone, which 
I presume is what we would like. 

https://github.com/llvm/llvm-project/pull/129759
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port FEntryInserter to NPM (PR #129857)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits

https://github.com/cdevadas approved this pull request.

No tests for this pass?

https://github.com/llvm/llvm-project/pull/129857
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIMemoryLegalizer to NPM (PR #130060)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits

https://github.com/cdevadas approved this pull request.


https://github.com/llvm/llvm-project/pull/130060
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIInsertHardClauses to NPM (PR #130062)

2025-03-10 Thread Christudasan Devadasan via llvm-branch-commits


@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -run-pass 
si-insert-hard-clauses %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -passes 
si-insert-hard-clauses %s -o - | FileCheck %s

cdevadas wrote:

Ditto.

https://github.com/llvm/llvm-project/pull/130062
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AMDGPU][NPM] Port SIMemoryLegalizer to NPM (PR #130060)

2025-03-10 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan updated 
https://github.com/llvm/llvm-project/pull/130060

>From b1402edb380ddf044af4810a9b7a88c4f874c0ed Mon Sep 17 00:00:00 2001
From: Akshat Oke 
Date: Wed, 5 Mar 2025 11:06:40 +
Subject: [PATCH 1/2] [AMDGPU][NPM] Port SIMemoryLegalizer to NPM

---
 llvm/lib/Target/AMDGPU/AMDGPU.h   |  9 +++-
 llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def |  2 +-
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  5 ++-
 llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp  | 43 ++-
 4 files changed, 45 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index f331f741e3993..4197a60e77014 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -364,6 +364,13 @@ class GCNCreateVOPDPass : public 
PassInfoMixin {
 MachineFunctionAnalysisManager &AM);
 };
 
+class SIMemoryLegalizerPass : public PassInfoMixin {
+public:
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+};
+
 FunctionPass *createAMDGPUAnnotateUniformValuesLegacy();
 
 ModulePass *createAMDGPUPrintfRuntimeBinding();
@@ -428,7 +435,7 @@ class SIAnnotateControlFlowPass
 void initializeSIAnnotateControlFlowLegacyPass(PassRegistry &);
 extern char &SIAnnotateControlFlowLegacyPassID;
 
-void initializeSIMemoryLegalizerPass(PassRegistry&);
+void initializeSIMemoryLegalizerLegacyPass(PassRegistry &);
 extern char &SIMemoryLegalizerID;
 
 void initializeSIModeRegisterLegacyPass(PassRegistry &);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def 
b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index 0e3dcb4267ede..de959f8a2aa62 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -113,6 +113,7 @@ MACHINE_FUNCTION_PASS("si-load-store-opt", 
SILoadStoreOptimizerPass())
 MACHINE_FUNCTION_PASS("si-lower-control-flow", SILowerControlFlowPass())
 MACHINE_FUNCTION_PASS("si-lower-sgpr-spills", SILowerSGPRSpillsPass())
 MACHINE_FUNCTION_PASS("si-lower-wwm-copies", SILowerWWMCopiesPass())
+MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
 MACHINE_FUNCTION_PASS("si-mode-register", SIModeRegisterPass())
 MACHINE_FUNCTION_PASS("si-opt-vgpr-liverange", SIOptimizeVGPRLiveRangePass())
 MACHINE_FUNCTION_PASS("si-optimize-exec-masking", SIOptimizeExecMaskingPass())
@@ -132,7 +133,6 @@ DUMMY_MACHINE_FUNCTION_PASS("amdgpu-set-wave-priority", 
AMDGPUSetWavePriorityPas
 DUMMY_MACHINE_FUNCTION_PASS("si-insert-hard-clauses", 
SIInsertHardClausesPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-insert-waitcnts", SIInsertWaitcntsPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-late-branch-lowering", 
SILateBranchLoweringPass())
-DUMMY_MACHINE_FUNCTION_PASS("si-memory-legalizer", SIMemoryLegalizerPass())
 DUMMY_MACHINE_FUNCTION_PASS("si-pre-emit-peephole", SIPreEmitPeepholePass())
 // TODO: Move amdgpu-preload-kern-arg-prolog to MACHINE_FUNCTION_PASS since it
 // already exists.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 73ae9135eb319..dbe212ad0a216 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -541,7 +541,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void 
LLVMInitializeAMDGPUTarget() {
   initializeSILowerControlFlowLegacyPass(*PR);
   initializeSIPreEmitPeepholePass(*PR);
   initializeSILateBranchLoweringPass(*PR);
-  initializeSIMemoryLegalizerPass(*PR);
+  initializeSIMemoryLegalizerLegacyPass(*PR);
   initializeSIOptimizeExecMaskingLegacyPass(*PR);
   initializeSIPreAllocateWWMRegsLegacyPass(*PR);
   initializeSIFormMemoryClausesLegacyPass(*PR);
@@ -2151,7 +2151,8 @@ void 
AMDGPUCodeGenPassBuilder::addPreEmitPass(AddMachinePass &addPass) const {
   if (isPassEnabled(EnableVOPD, CodeGenOptLevel::Less)) {
 addPass(GCNCreateVOPDPass());
   }
-  // TODO: addPass(SIMemoryLegalizerPass());
+
+  addPass(SIMemoryLegalizerPass());
   // TODO: addPass(SIInsertWaitcntsPass());
 
   // TODO: addPass(SIModeRegisterPass());
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp 
b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 34953f9c08db7..1375ba201ec58 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -21,8 +21,10 @@
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/MemoryModelRelaxationAnnotations.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/TargetParser/TargetParser.h"
 
@@ -625,9 +627,9 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
   }
 };
 
-class SIMemoryLegalizer final : public MachineFunctionPass {
+class SIM

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port VirtRegRewriter to NPM (PR #130564)

2025-03-10 Thread Akshat Oke via llvm-branch-commits

https://github.com/optimisan ready_for_review 
https://github.com/llvm/llvm-project/pull/130564
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [CodeGen][NPM] Port VirtRegRewriter to NPM (PR #130564)

2025-03-10 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: Akshat Oke (optimisan)


Changes

Not sure why this is squished into VirtRegMap.h

---
Full diff: https://github.com/llvm/llvm-project/pull/130564.diff


12 Files Affected:

- (modified) llvm/include/llvm/CodeGen/VirtRegMap.h (+15) 
- (modified) llvm/include/llvm/InitializePasses.h (+1-1) 
- (modified) llvm/include/llvm/Passes/CodeGenPassBuilder.h (+1) 
- (modified) llvm/include/llvm/Passes/MachinePassRegistry.def (+6-1) 
- (modified) llvm/lib/CodeGen/CodeGen.cpp (+1-1) 
- (modified) llvm/lib/CodeGen/VirtRegMap.cpp (+69-16) 
- (modified) llvm/lib/Passes/PassBuilder.cpp (+13) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir (+2) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir (+2) 
- (modified) llvm/test/CodeGen/AMDGPU/greedy-remark-crash-unassigned-reg.mir 
(+4) 
- (modified) llvm/test/CodeGen/X86/pr30821.mir (+1) 


``diff
diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h 
b/llvm/include/llvm/CodeGen/VirtRegMap.h
index c9e405e1981d9..f5fba0d65401e 100644
--- a/llvm/include/llvm/CodeGen/VirtRegMap.h
+++ b/llvm/include/llvm/CodeGen/VirtRegMap.h
@@ -235,6 +235,21 @@ class VirtRegMapPrinterPass : public 
PassInfoMixin {
 MachineFunctionAnalysisManager &MFAM);
   static bool isRequired() { return true; }
 };
+
+class VirtRegRewriterPass : public PassInfoMixin {
+  bool ClearVirtRegs = true;
+
+public:
+  VirtRegRewriterPass(bool ClearVirtRegs = true)
+  : ClearVirtRegs(ClearVirtRegs) {}
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+
+  static bool isRequired() { return true; }
+
+  void printPipeline(raw_ostream &OS, function_ref) 
const;
+};
+
 } // end llvm namespace
 
 #endif // LLVM_CODEGEN_VIRTREGMAP_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 9afca6c0dab70..b8b0d09b917fb 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -316,7 +316,7 @@ void 
initializeUnreachableBlockElimLegacyPassPass(PassRegistry &);
 void initializeUnreachableMachineBlockElimPass(PassRegistry &);
 void initializeVerifierLegacyPassPass(PassRegistry &);
 void initializeVirtRegMapWrapperLegacyPass(PassRegistry &);
-void initializeVirtRegRewriterPass(PassRegistry &);
+void initializeVirtRegRewriterLegacyPass(PassRegistry &);
 void initializeWasmEHPreparePass(PassRegistry &);
 void initializeWinEHPreparePass(PassRegistry &);
 void initializeWriteBitcodePassPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 9ec9836c15eb5..db227ddaa0e2b 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -81,6 +81,7 @@
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TwoAddressInstructionPass.h"
 #include "llvm/CodeGen/UnreachableBlockElim.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/CodeGen/WasmEHPrepare.h"
 #include "llvm/CodeGen/WinEHPrepare.h"
 #include "llvm/IR/PassManager.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 87253ebc8b789..eab6a6f6cd494 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -215,6 +215,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
   return parseRegAllocGreedyFilterFunc(*PB, Params);
 }, "reg-filter"
 )
+
+MACHINE_FUNCTION_PASS_WITH_PARAMS(
+"virt-reg-rewriter", "VirtRegRewriterPass",
+[](bool ClearVirtRegs) { return VirtRegRewriterPass(ClearVirtRegs); },
+parseVirtRegRewriterPassOptions, "no-clear-vregs;clear-vregs")
+
 #undef MACHINE_FUNCTION_PASS_WITH_PARAMS
 
 // After a pass is converted to new pass manager, its entry should be moved 
from
@@ -287,6 +293,5 @@ DUMMY_MACHINE_FUNCTION_PASS("shrink-wrap", ShrinkWrapPass)
 DUMMY_MACHINE_FUNCTION_PASS("stack-frame-layout", StackFrameLayoutAnalysisPass)
 DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass)
 DUMMY_MACHINE_FUNCTION_PASS("unpack-mi-bundles", UnpackMachineBundlesPass)
-DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass)
 DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass)
 #undef DUMMY_MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 3169a109aa174..b3ec59889b8b7 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -141,7 +141,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeUnreachableBlockElimLegacyPassPass(Registry);
   initializeUnreachableMachineBlockElimPass(Registry);
   initializeVirtRegMapWrapperLegacyPass(Registry);
-  initializeVirtRegRewriterPass(Registry);
+  initializeVirtRegRewriterLegacyPass(Regis

[llvm-branch-commits] [llvm] [CodeGen][NPM] Port VirtRegRewriter to NPM (PR #130564)

2025-03-10 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Akshat Oke (optimisan)


Changes

Not sure why this is squished into VirtRegMap.h

---
Full diff: https://github.com/llvm/llvm-project/pull/130564.diff


12 Files Affected:

- (modified) llvm/include/llvm/CodeGen/VirtRegMap.h (+15) 
- (modified) llvm/include/llvm/InitializePasses.h (+1-1) 
- (modified) llvm/include/llvm/Passes/CodeGenPassBuilder.h (+1) 
- (modified) llvm/include/llvm/Passes/MachinePassRegistry.def (+6-1) 
- (modified) llvm/lib/CodeGen/CodeGen.cpp (+1-1) 
- (modified) llvm/lib/CodeGen/VirtRegMap.cpp (+69-16) 
- (modified) llvm/lib/Passes/PassBuilder.cpp (+13) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/alloc-aligned-tuples-gfx90a.mir (+2) 
- (modified) llvm/test/CodeGen/AMDGPU/fold-restore-undef-use.mir (+2) 
- (modified) llvm/test/CodeGen/AMDGPU/greedy-remark-crash-unassigned-reg.mir 
(+4) 
- (modified) llvm/test/CodeGen/X86/pr30821.mir (+1) 


``diff
diff --git a/llvm/include/llvm/CodeGen/VirtRegMap.h 
b/llvm/include/llvm/CodeGen/VirtRegMap.h
index c9e405e1981d9..f5fba0d65401e 100644
--- a/llvm/include/llvm/CodeGen/VirtRegMap.h
+++ b/llvm/include/llvm/CodeGen/VirtRegMap.h
@@ -235,6 +235,21 @@ class VirtRegMapPrinterPass : public 
PassInfoMixin {
 MachineFunctionAnalysisManager &MFAM);
   static bool isRequired() { return true; }
 };
+
+class VirtRegRewriterPass : public PassInfoMixin {
+  bool ClearVirtRegs = true;
+
+public:
+  VirtRegRewriterPass(bool ClearVirtRegs = true)
+  : ClearVirtRegs(ClearVirtRegs) {}
+  PreservedAnalyses run(MachineFunction &MF,
+MachineFunctionAnalysisManager &MFAM);
+
+  static bool isRequired() { return true; }
+
+  void printPipeline(raw_ostream &OS, function_ref) 
const;
+};
+
 } // end llvm namespace
 
 #endif // LLVM_CODEGEN_VIRTREGMAP_H
diff --git a/llvm/include/llvm/InitializePasses.h 
b/llvm/include/llvm/InitializePasses.h
index 9afca6c0dab70..b8b0d09b917fb 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -316,7 +316,7 @@ void 
initializeUnreachableBlockElimLegacyPassPass(PassRegistry &);
 void initializeUnreachableMachineBlockElimPass(PassRegistry &);
 void initializeVerifierLegacyPassPass(PassRegistry &);
 void initializeVirtRegMapWrapperLegacyPass(PassRegistry &);
-void initializeVirtRegRewriterPass(PassRegistry &);
+void initializeVirtRegRewriterLegacyPass(PassRegistry &);
 void initializeWasmEHPreparePass(PassRegistry &);
 void initializeWinEHPreparePass(PassRegistry &);
 void initializeWriteBitcodePassPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h 
b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 9ec9836c15eb5..db227ddaa0e2b 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -81,6 +81,7 @@
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/CodeGen/TwoAddressInstructionPass.h"
 #include "llvm/CodeGen/UnreachableBlockElim.h"
+#include "llvm/CodeGen/VirtRegMap.h"
 #include "llvm/CodeGen/WasmEHPrepare.h"
 #include "llvm/CodeGen/WinEHPrepare.h"
 #include "llvm/IR/PassManager.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def 
b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 87253ebc8b789..eab6a6f6cd494 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -215,6 +215,12 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
   return parseRegAllocGreedyFilterFunc(*PB, Params);
 }, "reg-filter"
 )
+
+MACHINE_FUNCTION_PASS_WITH_PARAMS(
+"virt-reg-rewriter", "VirtRegRewriterPass",
+[](bool ClearVirtRegs) { return VirtRegRewriterPass(ClearVirtRegs); },
+parseVirtRegRewriterPassOptions, "no-clear-vregs;clear-vregs")
+
 #undef MACHINE_FUNCTION_PASS_WITH_PARAMS
 
 // After a pass is converted to new pass manager, its entry should be moved 
from
@@ -287,6 +293,5 @@ DUMMY_MACHINE_FUNCTION_PASS("shrink-wrap", ShrinkWrapPass)
 DUMMY_MACHINE_FUNCTION_PASS("stack-frame-layout", StackFrameLayoutAnalysisPass)
 DUMMY_MACHINE_FUNCTION_PASS("stackmap-liveness", StackMapLivenessPass)
 DUMMY_MACHINE_FUNCTION_PASS("unpack-mi-bundles", UnpackMachineBundlesPass)
-DUMMY_MACHINE_FUNCTION_PASS("virtregrewriter", VirtRegRewriterPass)
 DUMMY_MACHINE_FUNCTION_PASS("xray-instrumentation", XRayInstrumentationPass)
 #undef DUMMY_MACHINE_FUNCTION_PASS
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 3169a109aa174..b3ec59889b8b7 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -141,7 +141,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeUnreachableBlockElimLegacyPassPass(Registry);
   initializeUnreachableMachineBlockElimPass(Registry);
   initializeVirtRegMapWrapperLegacyPass(Registry);
-  initializeVirtRegRewriterPass(Registry);
+  initializeVirtRegRewriterLegacyPass(Re

[llvm-branch-commits] [clang] [Driver] Temporarily probe aarch64-linux-gnu GCC installation (PR #102039)

2025-03-10 Thread Paul Osmialowski via llvm-branch-commits

pawosm-arm wrote:

We're facing it again, with LLVM20 this time

https://github.com/llvm/llvm-project/pull/102039
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits