[llvm-branch-commits] [clang] [clang][OpenMP] Use DirectiveNameParser to parse directive names (PR #146779)

2025-07-03 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz updated 
https://github.com/llvm/llvm-project/pull/146779

>From ab3f0cc9d240f12e07be452effa75b5c7d010d9b Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Wed, 2 Jul 2025 11:25:00 -0500
Subject: [PATCH] [clang][OpenMP] Use DirectiveNameParser to parse directive
 names

This simplifies the parsing code in clang quite a bit.
---
 clang/lib/Parse/ParseOpenMP.cpp | 181 
 1 file changed, 19 insertions(+), 162 deletions(-)

diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index f694ae1d0d112..c0a17d0e9537d 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -25,6 +25,7 @@
 #include "clang/Sema/SemaOpenMP.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Frontend/OpenMP/DirectiveNameParser.h"
 #include "llvm/Frontend/OpenMP/OMPAssume.h"
 #include "llvm/Frontend/OpenMP/OMPContext.h"
 #include 
@@ -37,48 +38,6 @@ using namespace llvm::omp;
 
//===--===//
 
 namespace {
-enum OpenMPDirectiveKindEx {
-  OMPD_cancellation = llvm::omp::Directive_enumSize + 1,
-  OMPD_data,
-  OMPD_declare,
-  OMPD_end,
-  OMPD_end_declare,
-  OMPD_enter,
-  OMPD_exit,
-  OMPD_point,
-  OMPD_reduction,
-  OMPD_target_enter,
-  OMPD_target_exit,
-  OMPD_update,
-  OMPD_distribute_parallel,
-  OMPD_teams_distribute_parallel,
-  OMPD_target_teams_distribute_parallel,
-  OMPD_mapper,
-  OMPD_variant,
-  OMPD_begin,
-  OMPD_begin_declare,
-};
-
-// Helper to unify the enum class OpenMPDirectiveKind with its extension
-// the OpenMPDirectiveKindEx enum which allows to use them together as if they
-// are unsigned values.
-struct OpenMPDirectiveKindExWrapper {
-  OpenMPDirectiveKindExWrapper(unsigned Value) : Value(Value) {}
-  OpenMPDirectiveKindExWrapper(OpenMPDirectiveKind DK) : Value(unsigned(DK)) {}
-  bool operator==(OpenMPDirectiveKindExWrapper V) const {
-return Value == V.Value;
-  }
-  bool operator!=(OpenMPDirectiveKindExWrapper V) const {
-return Value != V.Value;
-  }
-  bool operator==(OpenMPDirectiveKind V) const { return Value == unsigned(V); }
-  bool operator!=(OpenMPDirectiveKind V) const { return Value != unsigned(V); }
-  bool operator<(OpenMPDirectiveKind V) const { return Value < unsigned(V); }
-  operator unsigned() const { return Value; }
-  operator OpenMPDirectiveKind() const { return OpenMPDirectiveKind(Value); }
-  unsigned Value;
-};
-
 class DeclDirectiveListParserHelper final {
   SmallVector Identifiers;
   Parser *P;
@@ -97,130 +56,32 @@ class DeclDirectiveListParserHelper final {
 };
 } // namespace
 
-// Map token string to extended OMP token kind that are
-// OpenMPDirectiveKind + OpenMPDirectiveKindEx.
-static unsigned getOpenMPDirectiveKindEx(StringRef S) {
-  OpenMPDirectiveKindExWrapper DKind = getOpenMPDirectiveKind(S);
-  if (DKind != OMPD_unknown)
-return DKind;
-
-  return llvm::StringSwitch(S)
-  .Case("cancellation", OMPD_cancellation)
-  .Case("data", OMPD_data)
-  .Case("declare", OMPD_declare)
-  .Case("end", OMPD_end)
-  .Case("enter", OMPD_enter)
-  .Case("exit", OMPD_exit)
-  .Case("point", OMPD_point)
-  .Case("reduction", OMPD_reduction)
-  .Case("update", OMPD_update)
-  .Case("mapper", OMPD_mapper)
-  .Case("variant", OMPD_variant)
-  .Case("begin", OMPD_begin)
-  .Default(OMPD_unknown);
-}
+static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser &P) {
+  static const DirectiveNameParser DNP;
+
+  const DirectiveNameParser::State *S = DNP.initial();
 
-static OpenMPDirectiveKindExWrapper parseOpenMPDirectiveKind(Parser &P) {
-  // Array of foldings: F[i][0] F[i][1] ===> F[i][2].
-  // E.g.: OMPD_for OMPD_simd ===> OMPD_for_simd
-  // TODO: add other combined directives in topological order.
-  static const OpenMPDirectiveKindExWrapper F[][3] = {
-  {OMPD_begin, OMPD_declare, OMPD_begin_declare},
-  {OMPD_begin, OMPD_assumes, OMPD_begin_assumes},
-  {OMPD_end, OMPD_declare, OMPD_end_declare},
-  {OMPD_end, OMPD_assumes, OMPD_end_assumes},
-  {OMPD_cancellation, OMPD_point, OMPD_cancellation_point},
-  {OMPD_declare, OMPD_reduction, OMPD_declare_reduction},
-  {OMPD_declare, OMPD_mapper, OMPD_declare_mapper},
-  {OMPD_declare, OMPD_simd, OMPD_declare_simd},
-  {OMPD_declare, OMPD_target, OMPD_declare_target},
-  {OMPD_declare, OMPD_variant, OMPD_declare_variant},
-  {OMPD_begin_declare, OMPD_target, OMPD_begin_declare_target},
-  {OMPD_begin_declare, OMPD_variant, OMPD_begin_declare_variant},
-  {OMPD_end_declare, OMPD_variant, OMPD_end_declare_variant},
-  {OMPD_distribute, OMPD_parallel, OMPD_distribute_parallel},
-  {OMPD_distribute_parallel, OMPD_for, OMPD_distribute_parallel_for},
-  {OMPD_distribute_parallel_for, OMPD_simd,
-   OMPD_distribute_parallel_for_simd},
-  {OMPD_distribute, OMPD_simd, OMPD_distrib

[llvm-branch-commits] [clang] [clang][OpenMP] Use DirectiveNameParser to parse directive names (PR #146779)

2025-07-03 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz updated 
https://github.com/llvm/llvm-project/pull/146779

>From ab3f0cc9d240f12e07be452effa75b5c7d010d9b Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Wed, 2 Jul 2025 11:25:00 -0500
Subject: [PATCH 1/2] [clang][OpenMP] Use DirectiveNameParser to parse
 directive names

This simplifies the parsing code in clang quite a bit.
---
 clang/lib/Parse/ParseOpenMP.cpp | 181 
 1 file changed, 19 insertions(+), 162 deletions(-)

diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index f694ae1d0d112..c0a17d0e9537d 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -25,6 +25,7 @@
 #include "clang/Sema/SemaOpenMP.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Frontend/OpenMP/DirectiveNameParser.h"
 #include "llvm/Frontend/OpenMP/OMPAssume.h"
 #include "llvm/Frontend/OpenMP/OMPContext.h"
 #include 
@@ -37,48 +38,6 @@ using namespace llvm::omp;
 
//===--===//
 
 namespace {
-enum OpenMPDirectiveKindEx {
-  OMPD_cancellation = llvm::omp::Directive_enumSize + 1,
-  OMPD_data,
-  OMPD_declare,
-  OMPD_end,
-  OMPD_end_declare,
-  OMPD_enter,
-  OMPD_exit,
-  OMPD_point,
-  OMPD_reduction,
-  OMPD_target_enter,
-  OMPD_target_exit,
-  OMPD_update,
-  OMPD_distribute_parallel,
-  OMPD_teams_distribute_parallel,
-  OMPD_target_teams_distribute_parallel,
-  OMPD_mapper,
-  OMPD_variant,
-  OMPD_begin,
-  OMPD_begin_declare,
-};
-
-// Helper to unify the enum class OpenMPDirectiveKind with its extension
-// the OpenMPDirectiveKindEx enum which allows to use them together as if they
-// are unsigned values.
-struct OpenMPDirectiveKindExWrapper {
-  OpenMPDirectiveKindExWrapper(unsigned Value) : Value(Value) {}
-  OpenMPDirectiveKindExWrapper(OpenMPDirectiveKind DK) : Value(unsigned(DK)) {}
-  bool operator==(OpenMPDirectiveKindExWrapper V) const {
-return Value == V.Value;
-  }
-  bool operator!=(OpenMPDirectiveKindExWrapper V) const {
-return Value != V.Value;
-  }
-  bool operator==(OpenMPDirectiveKind V) const { return Value == unsigned(V); }
-  bool operator!=(OpenMPDirectiveKind V) const { return Value != unsigned(V); }
-  bool operator<(OpenMPDirectiveKind V) const { return Value < unsigned(V); }
-  operator unsigned() const { return Value; }
-  operator OpenMPDirectiveKind() const { return OpenMPDirectiveKind(Value); }
-  unsigned Value;
-};
-
 class DeclDirectiveListParserHelper final {
   SmallVector Identifiers;
   Parser *P;
@@ -97,130 +56,32 @@ class DeclDirectiveListParserHelper final {
 };
 } // namespace
 
-// Map token string to extended OMP token kind that are
-// OpenMPDirectiveKind + OpenMPDirectiveKindEx.
-static unsigned getOpenMPDirectiveKindEx(StringRef S) {
-  OpenMPDirectiveKindExWrapper DKind = getOpenMPDirectiveKind(S);
-  if (DKind != OMPD_unknown)
-return DKind;
-
-  return llvm::StringSwitch(S)
-  .Case("cancellation", OMPD_cancellation)
-  .Case("data", OMPD_data)
-  .Case("declare", OMPD_declare)
-  .Case("end", OMPD_end)
-  .Case("enter", OMPD_enter)
-  .Case("exit", OMPD_exit)
-  .Case("point", OMPD_point)
-  .Case("reduction", OMPD_reduction)
-  .Case("update", OMPD_update)
-  .Case("mapper", OMPD_mapper)
-  .Case("variant", OMPD_variant)
-  .Case("begin", OMPD_begin)
-  .Default(OMPD_unknown);
-}
+static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser &P) {
+  static const DirectiveNameParser DNP;
+
+  const DirectiveNameParser::State *S = DNP.initial();
 
-static OpenMPDirectiveKindExWrapper parseOpenMPDirectiveKind(Parser &P) {
-  // Array of foldings: F[i][0] F[i][1] ===> F[i][2].
-  // E.g.: OMPD_for OMPD_simd ===> OMPD_for_simd
-  // TODO: add other combined directives in topological order.
-  static const OpenMPDirectiveKindExWrapper F[][3] = {
-  {OMPD_begin, OMPD_declare, OMPD_begin_declare},
-  {OMPD_begin, OMPD_assumes, OMPD_begin_assumes},
-  {OMPD_end, OMPD_declare, OMPD_end_declare},
-  {OMPD_end, OMPD_assumes, OMPD_end_assumes},
-  {OMPD_cancellation, OMPD_point, OMPD_cancellation_point},
-  {OMPD_declare, OMPD_reduction, OMPD_declare_reduction},
-  {OMPD_declare, OMPD_mapper, OMPD_declare_mapper},
-  {OMPD_declare, OMPD_simd, OMPD_declare_simd},
-  {OMPD_declare, OMPD_target, OMPD_declare_target},
-  {OMPD_declare, OMPD_variant, OMPD_declare_variant},
-  {OMPD_begin_declare, OMPD_target, OMPD_begin_declare_target},
-  {OMPD_begin_declare, OMPD_variant, OMPD_begin_declare_variant},
-  {OMPD_end_declare, OMPD_variant, OMPD_end_declare_variant},
-  {OMPD_distribute, OMPD_parallel, OMPD_distribute_parallel},
-  {OMPD_distribute_parallel, OMPD_for, OMPD_distribute_parallel_for},
-  {OMPD_distribute_parallel_for, OMPD_simd,
-   OMPD_distribute_parallel_for_simd},
-  {OMPD_distribute, OMPD_simd, OMPD_dis

[llvm-branch-commits] [llvm] [AArch64][PAC] Rework discriminator analysis in AUT and AUTPAC (PR #146489)

2025-07-03 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/146489

>From 6634cb01b163b2482ce57915e63e1e386308f218 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Sat, 28 Jun 2025 11:09:01 +0300
Subject: [PATCH] [AArch64][PAC] Rework discriminator analysis in AUT and
 AUTPAC

Make use of post-processing the discriminator components by custom
inserter hook to eliminate duplication for DAGISel and GlobalISel and
improve cross-BB analysis for DAGISel.
---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp|  51 +---
 .../Target/AArch64/AArch64ISelLowering.cpp|  10 +
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |   2 +
 .../GISel/AArch64InstructionSelector.cpp  |  27 +-
 llvm/test/CodeGen/AArch64/ptrauth-isel.ll | 235 +-
 5 files changed, 256 insertions(+), 69 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index da617b7e19266..5d3fd48f448b5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1487,39 +1487,6 @@ void AArch64DAGToDAGISel::SelectTable(SDNode *N, 
unsigned NumVecs, unsigned Opc,
   ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
 }
 
-static std::tuple
-extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG) {
-  SDLoc DL(Disc);
-  SDValue AddrDisc;
-  SDValue ConstDisc;
-
-  // If this is a blend, remember the constant and address discriminators.
-  // Otherwise, it's either a constant discriminator, or a non-blended
-  // address discriminator.
-  if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
-  Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
-AddrDisc = Disc->getOperand(1);
-ConstDisc = Disc->getOperand(2);
-  } else {
-ConstDisc = Disc;
-  }
-
-  // If the constant discriminator (either the blend RHS, or the entire
-  // discriminator value) isn't a 16-bit constant, bail out, and let the
-  // discriminator be computed separately.
-  auto *ConstDiscN = dyn_cast(ConstDisc);
-  if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
-return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
-
-  // If there's no address discriminator, use XZR directly.
-  if (!AddrDisc)
-AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
-
-  return std::make_tuple(
-  DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
-  AddrDisc);
-}
-
 void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
   SDLoc DL(N);
   // IntrinsicID is operand #0
@@ -1530,13 +1497,11 @@ void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
   unsigned AUTKeyC = cast(AUTKey)->getZExtValue();
   AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
 
-  SDValue AUTAddrDisc, AUTConstDisc;
-  std::tie(AUTConstDisc, AUTAddrDisc) =
-  extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
+  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
 
   SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
  AArch64::X16, Val, SDValue());
-  SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
+  SDValue Ops[] = {AUTKey, Zero, AUTDisc, X16Copy.getValue(1)};
 
   SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops);
   ReplaceNode(N, AUT);
@@ -1557,19 +1522,13 @@ void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode 
*N) {
   AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
   PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
 
-  SDValue AUTAddrDisc, AUTConstDisc;
-  std::tie(AUTConstDisc, AUTAddrDisc) =
-  extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
-
-  SDValue PACAddrDisc, PACConstDisc;
-  std::tie(PACConstDisc, PACAddrDisc) =
-  extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
+  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
 
   SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
  AArch64::X16, Val, SDValue());
 
-  SDValue Ops[] = {AUTKey,   AUTConstDisc, AUTAddrDisc,PACKey,
-   PACConstDisc, PACAddrDisc,  X16Copy.getValue(1)};
+  SDValue Ops[] = {
+  AUTKey, Zero, AUTDisc, PACKey, Zero, PACDisc, X16Copy.getValue(1)};
 
   SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
   ReplaceNode(N, AUTPAC);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c8eb9f3dd01ad..d7b835c8acba7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3241,10 +3241,20 @@ MachineBasicBlock 
*AArch64TargetLowering::EmitInstrWithCustomInserter(
   case AArch64::MOVT_TIZ_PSEUDO:
 return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true);
 
+  case AArch64::AUT:
+fixupBlendComponents(MI, BB, MI.getOperand(1), MI.getOperand(2),
+   

[llvm-branch-commits] [llvm] [AArch64][PAC] Introduce AArch64::PAC pseudo instruction (PR #146488)

2025-07-03 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/146488

>From ba9d8965de86e63cce18fc9c2d0fe9484f172e1f Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Sat, 28 Jun 2025 10:50:46 +0300
Subject: [PATCH] [AArch64][PAC] Introduce AArch64::PAC pseudo instruction

Introduce a pseudo instruction to be selected instead of a pair of
`MOVKXi` and `PAC[DI][AB]` carrying address and immediate modifiers
as separate operands. The new pseudo instruction is expanded in
AsmPrinter, so that MOVKXi is emitted immediately before `PAC[DI][AB]`.
This way, an attacker cannot control the immediate modifier used to sign
the value, even if address modifier can be substituted.

To simplify the instruction selection, select AArch64::PAC pseudo using
TableGen pattern and post-process its $AddrDisc operand by custom
inserter hook - this eliminates duplication of the logic for DAGISel
and GlobalISel. Furthermore, this improves cross-BB analysis in case of
DAGISel.
---
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |  32 +++
 .../Target/AArch64/AArch64ISelLowering.cpp|  74 +++
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |   7 +
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  21 +-
 llvm/test/CodeGen/AArch64/ptrauth-isel.ll | 205 ++
 5 files changed, 338 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/ptrauth-isel.ll

diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp 
b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index dd10050592190..f34217a3a8133 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -171,6 +171,9 @@ class AArch64AsmPrinter : public AsmPrinter {
   // Emit the sequence for AUT or AUTPAC.
   void emitPtrauthAuthResign(const MachineInstr *MI);
 
+  // Emit the sequence for PAC.
+  void emitPtrauthSign(const MachineInstr *MI);
+
   // Emit the sequence to compute the discriminator.
   //
   // ScratchReg should be x16/x17.
@@ -2173,6 +2176,31 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const 
MachineInstr *MI) {
 OutStreamer->emitLabel(EndSym);
 }
 
+void AArch64AsmPrinter::emitPtrauthSign(const MachineInstr *MI) {
+  Register Val = MI->getOperand(1).getReg();
+  auto Key = (AArch64PACKey::ID)MI->getOperand(2).getImm();
+  uint64_t Disc = MI->getOperand(3).getImm();
+  Register AddrDisc = MI->getOperand(4).getReg();
+  bool AddrDiscKilled = MI->getOperand(4).isKill();
+
+  // Compute aut discriminator into x17
+  assert(isUInt<16>(Disc));
+  Register DiscReg = emitPtrauthDiscriminator(
+  Disc, AddrDisc, AArch64::X17, /*MayUseAddrAsScratch=*/AddrDiscKilled);
+  bool IsZeroDisc = DiscReg == AArch64::XZR;
+  unsigned Opc = getPACOpcodeForKey(Key, IsZeroDisc);
+
+  //  paciza x16  ; if  IsZeroDisc
+  //  pacia x16, x17  ; if !IsZeroDisc
+  MCInst PACInst;
+  PACInst.setOpcode(Opc);
+  PACInst.addOperand(MCOperand::createReg(Val));
+  PACInst.addOperand(MCOperand::createReg(Val));
+  if (!IsZeroDisc)
+PACInst.addOperand(MCOperand::createReg(DiscReg));
+  EmitToStreamer(*OutStreamer, PACInst);
+}
+
 void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) {
   bool IsCall = MI->getOpcode() == AArch64::BLRA;
   unsigned BrTarget = MI->getOperand(0).getReg();
@@ -2867,6 +2895,10 @@ void AArch64AsmPrinter::emitInstruction(const 
MachineInstr *MI) {
 emitPtrauthAuthResign(MI);
 return;
 
+  case AArch64::PAC:
+emitPtrauthSign(MI);
+return;
+
   case AArch64::LOADauthptrstatic:
 LowerLOADauthptrstatic(*MI);
 return;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fb8bd81c033af..c8eb9f3dd01ad 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3073,6 +3073,75 @@ AArch64TargetLowering::EmitGetSMESaveSize(MachineInstr 
&MI,
   return BB;
 }
 
+// Helper function to find the instruction that defined a virtual register.
+// If unable to find such instruction, returns nullptr.
+static MachineInstr *stripVRegCopies(const MachineRegisterInfo &MRI,
+ Register Reg) {
+  while (Reg.isVirtual()) {
+MachineInstr *DefMI = MRI.getVRegDef(Reg);
+assert(DefMI && "Virtual register definition not found");
+unsigned Opcode = DefMI->getOpcode();
+
+if (Opcode == AArch64::COPY) {
+  Reg = DefMI->getOperand(1).getReg();
+  // Vreg is defined by copying from physreg.
+  if (Reg.isPhysical())
+return DefMI;
+  continue;
+}
+if (Opcode == AArch64::SUBREG_TO_REG) {
+  Reg = DefMI->getOperand(2).getReg();
+  continue;
+}
+
+return DefMI;
+  }
+  return nullptr;
+}
+
+void AArch64TargetLowering::fixupBlendComponents(
+MachineInstr &MI, MachineBasicBlock *BB, MachineOperand &IntDiscOp,
+MachineOperand &AddrDiscOp, const TargetRegisterClass *AddrDiscRC) const {
+  const TargetInstrInfo *TII = Subtarge

[llvm-branch-commits] [llvm] [AArch64][PAC] Rework discriminator analysis in AUT and AUTPAC (PR #146489)

2025-07-03 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/146489

>From 6634cb01b163b2482ce57915e63e1e386308f218 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Sat, 28 Jun 2025 11:09:01 +0300
Subject: [PATCH] [AArch64][PAC] Rework discriminator analysis in AUT and
 AUTPAC

Make use of post-processing the discriminator components by custom
inserter hook to eliminate duplication for DAGISel and GlobalISel and
improve cross-BB analysis for DAGISel.
---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp|  51 +---
 .../Target/AArch64/AArch64ISelLowering.cpp|  10 +
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |   2 +
 .../GISel/AArch64InstructionSelector.cpp  |  27 +-
 llvm/test/CodeGen/AArch64/ptrauth-isel.ll | 235 +-
 5 files changed, 256 insertions(+), 69 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index da617b7e19266..5d3fd48f448b5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -1487,39 +1487,6 @@ void AArch64DAGToDAGISel::SelectTable(SDNode *N, 
unsigned NumVecs, unsigned Opc,
   ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
 }
 
-static std::tuple
-extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG) {
-  SDLoc DL(Disc);
-  SDValue AddrDisc;
-  SDValue ConstDisc;
-
-  // If this is a blend, remember the constant and address discriminators.
-  // Otherwise, it's either a constant discriminator, or a non-blended
-  // address discriminator.
-  if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
-  Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {
-AddrDisc = Disc->getOperand(1);
-ConstDisc = Disc->getOperand(2);
-  } else {
-ConstDisc = Disc;
-  }
-
-  // If the constant discriminator (either the blend RHS, or the entire
-  // discriminator value) isn't a 16-bit constant, bail out, and let the
-  // discriminator be computed separately.
-  auto *ConstDiscN = dyn_cast(ConstDisc);
-  if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))
-return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);
-
-  // If there's no address discriminator, use XZR directly.
-  if (!AddrDisc)
-AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);
-
-  return std::make_tuple(
-  DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),
-  AddrDisc);
-}
-
 void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
   SDLoc DL(N);
   // IntrinsicID is operand #0
@@ -1530,13 +1497,11 @@ void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {
   unsigned AUTKeyC = cast(AUTKey)->getZExtValue();
   AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
 
-  SDValue AUTAddrDisc, AUTConstDisc;
-  std::tie(AUTConstDisc, AUTAddrDisc) =
-  extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
+  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
 
   SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
  AArch64::X16, Val, SDValue());
-  SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};
+  SDValue Ops[] = {AUTKey, Zero, AUTDisc, X16Copy.getValue(1)};
 
   SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops);
   ReplaceNode(N, AUT);
@@ -1557,19 +1522,13 @@ void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode 
*N) {
   AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);
   PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);
 
-  SDValue AUTAddrDisc, AUTConstDisc;
-  std::tie(AUTConstDisc, AUTAddrDisc) =
-  extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);
-
-  SDValue PACAddrDisc, PACConstDisc;
-  std::tie(PACConstDisc, PACAddrDisc) =
-  extractPtrauthBlendDiscriminators(PACDisc, CurDAG);
+  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64);
 
   SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,
  AArch64::X16, Val, SDValue());
 
-  SDValue Ops[] = {AUTKey,   AUTConstDisc, AUTAddrDisc,PACKey,
-   PACConstDisc, PACAddrDisc,  X16Copy.getValue(1)};
+  SDValue Ops[] = {
+  AUTKey, Zero, AUTDisc, PACKey, Zero, PACDisc, X16Copy.getValue(1)};
 
   SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);
   ReplaceNode(N, AUTPAC);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c8eb9f3dd01ad..d7b835c8acba7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3241,10 +3241,20 @@ MachineBasicBlock 
*AArch64TargetLowering::EmitInstrWithCustomInserter(
   case AArch64::MOVT_TIZ_PSEUDO:
 return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true);
 
+  case AArch64::AUT:
+fixupBlendComponents(MI, BB, MI.getOperand(1), MI.getOperand(2),
+   

[llvm-branch-commits] [llvm] [AArch64][PAC] Introduce AArch64::PAC pseudo instruction (PR #146488)

2025-07-03 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/146488

>From ba9d8965de86e63cce18fc9c2d0fe9484f172e1f Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Sat, 28 Jun 2025 10:50:46 +0300
Subject: [PATCH] [AArch64][PAC] Introduce AArch64::PAC pseudo instruction

Introduce a pseudo instruction to be selected instead of a pair of
`MOVKXi` and `PAC[DI][AB]` carrying address and immediate modifiers
as separate operands. The new pseudo instruction is expanded in
AsmPrinter, so that MOVKXi is emitted immediately before `PAC[DI][AB]`.
This way, an attacker cannot control the immediate modifier used to sign
the value, even if address modifier can be substituted.

To simplify the instruction selection, select AArch64::PAC pseudo using
TableGen pattern and post-process its $AddrDisc operand by custom
inserter hook - this eliminates duplication of the logic for DAGISel
and GlobalISel. Furthermore, this improves cross-BB analysis in case of
DAGISel.
---
 llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp |  32 +++
 .../Target/AArch64/AArch64ISelLowering.cpp|  74 +++
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |   7 +
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  21 +-
 llvm/test/CodeGen/AArch64/ptrauth-isel.ll | 205 ++
 5 files changed, 338 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/ptrauth-isel.ll

diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp 
b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index dd10050592190..f34217a3a8133 100644
--- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -171,6 +171,9 @@ class AArch64AsmPrinter : public AsmPrinter {
   // Emit the sequence for AUT or AUTPAC.
   void emitPtrauthAuthResign(const MachineInstr *MI);
 
+  // Emit the sequence for PAC.
+  void emitPtrauthSign(const MachineInstr *MI);
+
   // Emit the sequence to compute the discriminator.
   //
   // ScratchReg should be x16/x17.
@@ -2173,6 +2176,31 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const 
MachineInstr *MI) {
 OutStreamer->emitLabel(EndSym);
 }
 
+void AArch64AsmPrinter::emitPtrauthSign(const MachineInstr *MI) {
+  Register Val = MI->getOperand(1).getReg();
+  auto Key = (AArch64PACKey::ID)MI->getOperand(2).getImm();
+  uint64_t Disc = MI->getOperand(3).getImm();
+  Register AddrDisc = MI->getOperand(4).getReg();
+  bool AddrDiscKilled = MI->getOperand(4).isKill();
+
+  // Compute aut discriminator into x17
+  assert(isUInt<16>(Disc));
+  Register DiscReg = emitPtrauthDiscriminator(
+  Disc, AddrDisc, AArch64::X17, /*MayUseAddrAsScratch=*/AddrDiscKilled);
+  bool IsZeroDisc = DiscReg == AArch64::XZR;
+  unsigned Opc = getPACOpcodeForKey(Key, IsZeroDisc);
+
+  //  paciza x16  ; if  IsZeroDisc
+  //  pacia x16, x17  ; if !IsZeroDisc
+  MCInst PACInst;
+  PACInst.setOpcode(Opc);
+  PACInst.addOperand(MCOperand::createReg(Val));
+  PACInst.addOperand(MCOperand::createReg(Val));
+  if (!IsZeroDisc)
+PACInst.addOperand(MCOperand::createReg(DiscReg));
+  EmitToStreamer(*OutStreamer, PACInst);
+}
+
 void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) {
   bool IsCall = MI->getOpcode() == AArch64::BLRA;
   unsigned BrTarget = MI->getOperand(0).getReg();
@@ -2867,6 +2895,10 @@ void AArch64AsmPrinter::emitInstruction(const 
MachineInstr *MI) {
 emitPtrauthAuthResign(MI);
 return;
 
+  case AArch64::PAC:
+emitPtrauthSign(MI);
+return;
+
   case AArch64::LOADauthptrstatic:
 LowerLOADauthptrstatic(*MI);
 return;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index fb8bd81c033af..c8eb9f3dd01ad 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3073,6 +3073,75 @@ AArch64TargetLowering::EmitGetSMESaveSize(MachineInstr 
&MI,
   return BB;
 }
 
+// Helper function to find the instruction that defined a virtual register.
+// If unable to find such instruction, returns nullptr.
+static MachineInstr *stripVRegCopies(const MachineRegisterInfo &MRI,
+ Register Reg) {
+  while (Reg.isVirtual()) {
+MachineInstr *DefMI = MRI.getVRegDef(Reg);
+assert(DefMI && "Virtual register definition not found");
+unsigned Opcode = DefMI->getOpcode();
+
+if (Opcode == AArch64::COPY) {
+  Reg = DefMI->getOperand(1).getReg();
+  // Vreg is defined by copying from physreg.
+  if (Reg.isPhysical())
+return DefMI;
+  continue;
+}
+if (Opcode == AArch64::SUBREG_TO_REG) {
+  Reg = DefMI->getOperand(2).getReg();
+  continue;
+}
+
+return DefMI;
+  }
+  return nullptr;
+}
+
+void AArch64TargetLowering::fixupBlendComponents(
+MachineInstr &MI, MachineBasicBlock *BB, MachineOperand &IntDiscOp,
+MachineOperand &AddrDiscOp, const TargetRegisterClass *AddrDiscRC) const {
+  const TargetInstrInfo *TII = Subtarge

[llvm-branch-commits] [mlir] [mlir] NFC - refactor id builder and avoid leaking impl details (PR #146922)

2025-07-03 Thread via llvm-branch-commits

llvmbot wrote:



@llvm/pr-subscribers-mlir-gpu

@llvm/pr-subscribers-mlir

Author: Nicolas Vasilache (nicolasvasilache)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/146922.diff


3 Files Affected:

- (modified) mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h (+14-17) 
- (modified) mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp (+6-27) 
- (modified) mlir/lib/Dialect/GPU/TransformOps/Utils.cpp (+107-69) 


``diff
diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h 
b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
index 111c67638efc8..de512ded59fec 100644
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
@@ -28,27 +28,24 @@ namespace transform {
 namespace gpu {
 
 /// Helper type for functions that generate ids for the mapping of a 
scf.forall.
-/// Operates on both 1) an "original" basis that represents the individual
-/// thread and block ids and 2) a "scaled" basis that represents grouped ids
-/// (e.g. block clusters, warpgroups and warps).
-/// The mapping of ids is done in the "scaled" basis (i.e. when mapping to 
warps
-/// a division by 32 occurs).
-/// The predication is in the "original" basis using the "active" quantities
-/// (`activeMappingSizes`, `availableMappingSizes` and `activeIdOps`).
 struct IdBuilderResult {
-  // Ops used to replace the forall induction variables.
+  /// Error message, if not empty then building the ids failed.
+  std::string errorMsg;
+  /// Values used to replace the forall induction variables.
   SmallVector mappingIdOps;
-  // Available mapping sizes used to predicate the forall body when they are
-  // larger than the predicate mapping sizes.
-  SmallVector availableMappingSizes;
-  // Actual mapping sizes used to predicate the forall body when they are
-  // smaller than the available mapping sizes.
-  SmallVector activeMappingSizes;
-  // Ops used to predicate the forall body when activeMappingSizes is smaller
-  // than the available mapping sizes.
-  SmallVector activeIdOps;
+  /// Values used to predicate the forall body when activeMappingSizes is
+  /// smaller than the available mapping sizes.
+  SmallVector predicateOps;
 };
 
+inline raw_ostream &operator<<(raw_ostream &os, const IdBuilderResult &res) {
+  llvm::interleaveComma(res.mappingIdOps, os << "mappingIdOps: ");
+  os << "\n";
+  llvm::interleaveComma(res.predicateOps, os << "predicateOps: ");
+  os << "\n";
+  return os;
+}
+
 /// Common gpu id builder type, allows the configuration of lowering for 
various
 /// mapping schemes. Takes:
 ///   - A rewriter with insertion point set before the forall op to rewrite.
diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp 
b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
index 20d1c94409238..63f87d9b5877e 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
@@ -491,6 +491,10 @@ static DiagnosedSilenceableFailure 
rewriteOneForallCommonImpl(
 
   IdBuilderResult builderResult =
   gpuIdBuilder.idBuilder(rewriter, loc, forallMappingSizes, originalBasis);
+  if (!builderResult.errorMsg.empty())
+return definiteFailureHelper(transformOp, forallOp, 
builderResult.errorMsg);
+
+  LLVM_DEBUG(DBGS() << builderResult);
 
   // Step 4. Map the induction variables to the mappingIdOps, this may involve
   // a permutation.
@@ -501,7 +505,7 @@ static DiagnosedSilenceableFailure 
rewriteOneForallCommonImpl(
forallMappingAttrs.getArrayRef().take_front(forallOp.getRank( {
 auto mappingAttr = cast(dim);
 Value peIdOp = mappingIdOps[mappingAttr.getRelativeIndex()];
-LDBG("map: " << iv << " to" << peIdOp);
+LDBG("map: " << iv << " to " << peIdOp);
 bvm.map(iv, peIdOp);
   }
 
@@ -510,32 +514,7 @@ static DiagnosedSilenceableFailure 
rewriteOneForallCommonImpl(
   // originalBasis and no predication occurs.
   Value predicate;
   if (originalBasisWasProvided) {
-SmallVector activeMappingSizes = builderResult.activeMappingSizes;
-SmallVector availableMappingSizes =
-builderResult.availableMappingSizes;
-SmallVector activeIdOps = builderResult.activeIdOps;
-LDBG("activeMappingSizes: " << llvm::interleaved(activeMappingSizes));
-LDBG("availableMappingSizes: "
- << llvm::interleaved(availableMappingSizes));
-LDBG("activeIdOps: " << llvm::interleaved(activeIdOps));
-for (auto [activeId, activeMappingSize, availableMappingSize] :
- llvm::zip_equal(activeIdOps, activeMappingSizes,
- availableMappingSizes)) {
-  if (activeMappingSize > availableMappingSize) {
-return definiteFailureHelper(
-transformOp, forallOp,
-"Trying to map to fewer GPU threads than loop iterations but "
-"overprovisioning is not yet supported. "
-"Try additional tiling of the before mapping 

[llvm-branch-commits] [mlir] [mlir] NFC - refactor id builder and avoid leaking impl details (PR #146922)

2025-07-03 Thread Nicolas Vasilache via llvm-branch-commits

https://github.com/nicolasvasilache created 
https://github.com/llvm/llvm-project/pull/146922

None

>From c88aee740d5d944364e79600bf3c01493a1c3fee Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache 
Date: Thu, 3 Jul 2025 18:32:59 +0200
Subject: [PATCH] [mlir] NFC - refactor id builder and avoid leaking impl
 details

---
 .../mlir/Dialect/GPU/TransformOps/Utils.h |  31 ++-
 .../GPU/TransformOps/GPUTransformOps.cpp  |  33 +---
 mlir/lib/Dialect/GPU/TransformOps/Utils.cpp   | 176 +++---
 3 files changed, 127 insertions(+), 113 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h 
b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
index 111c67638efc8..de512ded59fec 100644
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
@@ -28,27 +28,24 @@ namespace transform {
 namespace gpu {
 
 /// Helper type for functions that generate ids for the mapping of a 
scf.forall.
-/// Operates on both 1) an "original" basis that represents the individual
-/// thread and block ids and 2) a "scaled" basis that represents grouped ids
-/// (e.g. block clusters, warpgroups and warps).
-/// The mapping of ids is done in the "scaled" basis (i.e. when mapping to 
warps
-/// a division by 32 occurs).
-/// The predication is in the "original" basis using the "active" quantities
-/// (`activeMappingSizes`, `availableMappingSizes` and `activeIdOps`).
 struct IdBuilderResult {
-  // Ops used to replace the forall induction variables.
+  /// Error message, if not empty then building the ids failed.
+  std::string errorMsg;
+  /// Values used to replace the forall induction variables.
   SmallVector mappingIdOps;
-  // Available mapping sizes used to predicate the forall body when they are
-  // larger than the predicate mapping sizes.
-  SmallVector availableMappingSizes;
-  // Actual mapping sizes used to predicate the forall body when they are
-  // smaller than the available mapping sizes.
-  SmallVector activeMappingSizes;
-  // Ops used to predicate the forall body when activeMappingSizes is smaller
-  // than the available mapping sizes.
-  SmallVector activeIdOps;
+  /// Values used to predicate the forall body when activeMappingSizes is
+  /// smaller than the available mapping sizes.
+  SmallVector predicateOps;
 };
 
+inline raw_ostream &operator<<(raw_ostream &os, const IdBuilderResult &res) {
+  llvm::interleaveComma(res.mappingIdOps, os << "mappingIdOps: ");
+  os << "\n";
+  llvm::interleaveComma(res.predicateOps, os << "predicateOps: ");
+  os << "\n";
+  return os;
+}
+
 /// Common gpu id builder type, allows the configuration of lowering for 
various
 /// mapping schemes. Takes:
 ///   - A rewriter with insertion point set before the forall op to rewrite.
diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp 
b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
index 20d1c94409238..63f87d9b5877e 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
@@ -491,6 +491,10 @@ static DiagnosedSilenceableFailure 
rewriteOneForallCommonImpl(
 
   IdBuilderResult builderResult =
   gpuIdBuilder.idBuilder(rewriter, loc, forallMappingSizes, originalBasis);
+  if (!builderResult.errorMsg.empty())
+return definiteFailureHelper(transformOp, forallOp, 
builderResult.errorMsg);
+
+  LLVM_DEBUG(DBGS() << builderResult);
 
   // Step 4. Map the induction variables to the mappingIdOps, this may involve
   // a permutation.
@@ -501,7 +505,7 @@ static DiagnosedSilenceableFailure 
rewriteOneForallCommonImpl(
forallMappingAttrs.getArrayRef().take_front(forallOp.getRank( {
 auto mappingAttr = cast(dim);
 Value peIdOp = mappingIdOps[mappingAttr.getRelativeIndex()];
-LDBG("map: " << iv << " to" << peIdOp);
+LDBG("map: " << iv << " to " << peIdOp);
 bvm.map(iv, peIdOp);
   }
 
@@ -510,32 +514,7 @@ static DiagnosedSilenceableFailure 
rewriteOneForallCommonImpl(
   // originalBasis and no predication occurs.
   Value predicate;
   if (originalBasisWasProvided) {
-SmallVector activeMappingSizes = builderResult.activeMappingSizes;
-SmallVector availableMappingSizes =
-builderResult.availableMappingSizes;
-SmallVector activeIdOps = builderResult.activeIdOps;
-LDBG("activeMappingSizes: " << llvm::interleaved(activeMappingSizes));
-LDBG("availableMappingSizes: "
- << llvm::interleaved(availableMappingSizes));
-LDBG("activeIdOps: " << llvm::interleaved(activeIdOps));
-for (auto [activeId, activeMappingSize, availableMappingSize] :
- llvm::zip_equal(activeIdOps, activeMappingSizes,
- availableMappingSizes)) {
-  if (activeMappingSize > availableMappingSize) {
-return definiteFailureHelper(
-transformOp, forallOp,
-"Trying to map to fewer GPU threads than loop iterations but "
-   

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)

2025-07-03 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner edited 
https://github.com/llvm/llvm-project/pull/145828
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)

2025-07-03 Thread Justin Bogner via llvm-branch-commits

https://github.com/bogner approved this pull request.

Some minor comments but this looks good once they're accounted for.

https://github.com/llvm/llvm-project/pull/145828
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)

2025-07-03 Thread Justin Bogner via llvm-branch-commits


@@ -29,6 +29,9 @@ using namespace llvm::hlsl::rootsig;
 
 namespace {
 
+static const llvm::dxbc::RootSignatureVersion DefVersion =
+llvm::dxbc::RootSignatureVersion::V1_1;

bogner wrote:

I don't think this global makes the tests clearer. Maybe throw in a `using 
llvm::dxbc::RootSignatureVersion::V1_1` and just pass `V1_1` to the Parser 
constructors if you want to be concise?

https://github.com/llvm/llvm-project/pull/145828
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)

2025-07-03 Thread Justin Bogner via llvm-branch-commits


@@ -605,13 +608,159 @@ TEST_F(ParseHLSLRootSignatureTest, 
ValidTrailingCommaTest) {
 
   hlsl::RootSignatureLexer Lexer(Source, TokLoc);
   SmallVector Elements;
-  hlsl::RootSignatureParser Parser(Elements, Lexer, *PP);
+  hlsl::RootSignatureParser Parser(DefVersion, Elements, Lexer, *PP);
+
+  // Test no diagnostics produced
+  Consumer->setNoDiag();
+
+  ASSERT_FALSE(Parser.parse());
+
+  ASSERT_TRUE(Consumer->isSatisfied());
+}
+
+TEST_F(ParseHLSLRootSignatureTest, ValidVersion10Test) {
+  // This test checks that the default values are set correctly
+  // when parsing with root signature version 1.0
+  const llvm::StringLiteral Source = R"cc(
+CBV(b0),
+SRV(t0),
+UAV(u0),
+DescriptorTable(
+  CBV(b1),
+  SRV(t1),
+  UAV(u1),
+  Sampler(s1),
+)
+  )cc";
+
+  TrivialModuleLoader ModLoader;
+  auto PP = createPP(Source, ModLoader);
+  auto TokLoc = SourceLocation();
+
+  hlsl::RootSignatureLexer Lexer(Source, TokLoc);
+  SmallVector Elements;
+  auto Version = llvm::dxbc::RootSignatureVersion::V1_0;
+  hlsl::RootSignatureParser Parser(Version, Elements, Lexer, *PP);

bogner wrote:

Similarly, I think it's clearer to just pass the enum directly to the 
constructor here and below.

https://github.com/llvm/llvm-project/pull/145828
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)

2025-07-03 Thread Justin Bogner via llvm-branch-commits


@@ -55,7 +59,9 @@
 // CHECK-SAME:   numClauses = 3, visibility = All
 // CHECK-SAME: ),
 // CHECK-SAME: Sampler(
-// CHECK-SAME:   s0, numDescriptors = 4, space = 1, offset = 
DescriptorTableOffsetAppend, flags = None
+// CHECK-SAME:   s0, numDescriptors = 4, space = 1, offset = 
DescriptorTableOffsetAppend,
+// CHECK-V1_1-SAME:  flags = DescriptorsVolatile
+// CHECK-V1_1-SAME:  flags = None

bogner wrote:

This looks like a typo... are these both supposed to be V1_1? Does this test 
currently pass?

https://github.com/llvm/llvm-project/pull/145828
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] release/20.x: Backport [LLD][COFF] Disallow importing DllMain from import libraries (#146610) (PR #146699)

2025-07-03 Thread Nikita Popov via llvm-branch-commits


@@ -313,6 +313,7 @@ struct Configuration {
   bool warnDebugInfoUnusable = true;
   bool warnLongSectionNames = true;
   bool warnStdcallFixup = true;
+  bool warnExportedDllMain = true;

nikic wrote:

This is an ABI break.

https://github.com/llvm/llvm-project/pull/146699
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/20.x: [RelLookupTableConverter] Drop unnamed_addr for GVs in entries to avoid generating GOTPCREL relocations (#146068) (PR #146191)

2025-07-03 Thread Nikita Popov via llvm-branch-commits

https://github.com/nikic milestoned 
https://github.com/llvm/llvm-project/pull/146191
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 692a679 - Revert "[mlgo][regalloc] Fix after PR #131837 (#146297)"

2025-07-03 Thread via llvm-branch-commits

Author: Mircea Trofin
Date: 2025-07-03T07:10:55-07:00
New Revision: 692a6797f50ced40e0e7585dab2e9676cc3bba1b

URL: 
https://github.com/llvm/llvm-project/commit/692a6797f50ced40e0e7585dab2e9676cc3bba1b
DIFF: 
https://github.com/llvm/llvm-project/commit/692a6797f50ced40e0e7585dab2e9676cc3bba1b.diff

LOG: Revert "[mlgo][regalloc] Fix after PR #131837 (#146297)"

This reverts commit 9a6e0688b04f1122012548b5f7d627ed347acfba.

Added: 


Modified: 
llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt
llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt
llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll

Removed: 




diff  --git a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt 
b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt
index b6639b844c888..231f632403d48 100644
--- a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt
+++ b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt
@@ -16,8 +16,8 @@ hint_weights_by_max: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7265065908432007,0.0,
 start_bb_freq_by_max: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.333432674408,0.333432674408,0.333432674408,0.333432674408,0.166716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333432674408
 end_bb_freq_by_max: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2714630176778883e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2714630176778883e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948
 hottest_bb_freq_by_max: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.23831403255462646,0.07943800836801529,0.07943800836801529,0.07943800836801529,0.9912577867507935,0.07069581001996994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
-liverange_size: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9646825194358826,0.7932539582252502,0.7900793552398682,0.7392857074737549,0.9170634746551514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7428571581840515
-use_def_density: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051188573241233826,0.01760609820485115,0.014214384369552135,0.014272669330239296,1.0,0.07243786007165909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.42433658242225647
+liverange_size: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7939082384109497,0.7907436490058899,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7436708807945251
+use_def_density: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.014218696393072605,0.014276761561632156,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016
 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.777910232544
@@ -40,8 +40,8 @@ hint_weights_by_max: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
 start_bb_freq_by_max: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.333432674408,0.0,0.333432674408,0.333432674408,0.166716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333432674408
 end_bb_freq_by_max: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2714630176778883e-10,0.0,0.9760092496871948,0.9760092496871948,2.2714630176778883e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948
 hottest_bb_freq_by_max: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2404157966375351,0.08013860136270523,0.0,0.08013860136270523,1.0,0.07131929695606232,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08013860136270523
-liverange_size: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9646825194358826,0.0,0.7900793552398682,0.7392857074737549,0.9170634746551514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7932539582252502
-use_def_density: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051188573241233826,0.01760609820485115,0.0,0.014272669330239296,1.0,0.07243786007165909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014214384369552135
+liverange_size: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.0,0.7907436490058899,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7939082384109497
+use_def_density: 
0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.0,0.014276761561632156,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014218696393072605
 max_stage: 0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 min_stage: 0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
 progress: 0.777910232544
@@ -64,8 +64,8 @@ hint_weigh

[llvm-branch-commits] [llvm] [AArch64][PAC] Combine signing with address materialization (PR #130809)

2025-07-03 Thread Anatoly Trosinenko via llvm-branch-commits

https://github.com/atrosinenko updated 
https://github.com/llvm/llvm-project/pull/130809

>From a6f9665a83b9002250d5d7c59915d92d173a21e2 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko 
Date: Mon, 10 Mar 2025 15:14:55 +0300
Subject: [PATCH 1/2] [AArch64][PAC] Precommit tests on merging
 MOVaddr/LOADgotAUTH with PAC*

---
 .../GlobalISel/ptrauth-constant-in-code.ll| 76 +++
 .../AArch64/ptrauth-constant-in-code.ll   | 71 +
 2 files changed, 147 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-constant-in-code.ll 
b/llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-constant-in-code.ll
index 12a3448111fcb..140e29f942a79 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-constant-in-code.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-constant-in-code.ll
@@ -78,6 +78,82 @@ define ptr @foo() {
   ret ptr ptrauth (ptr @g, i32 0)
 }
 
+;--- finalize-isel.ll
+
+; RUN: llc < finalize-isel.ll -mtriple aarch64-elf -mattr=+pauth 
-global-isel=1 \
+; RUN:   -verify-machineinstrs -global-isel-abort=1 -stop-after=finalize-isel 
| \
+; RUN:   FileCheck --check-prefixes=ISEL,ISEL-ELF %s
+; RUN: llc < finalize-isel.ll -mtriple arm64-apple-ios -mattr=+pauth 
-global-isel=1 \
+; RUN:   -verify-machineinstrs -global-isel-abort=1 -stop-after=finalize-isel 
| \
+; RUN:   FileCheck --check-prefixes=ISEL %s
+
+@const_table_local = dso_local constant [3 x ptr] [ptr null, ptr null, ptr 
null]
+@const_table_got = constant [3 x ptr] [ptr null, ptr null, ptr null]
+
+define void @store_signed_const_local(ptr %dest) {
+; ISEL-LABEL: name: store_signed_const_local
+; ISEL:   body:
+; ISEL: %0:gpr64common = COPY $x0
+; ISEL-NEXT:%10:gpr64common = MOVaddr target-flags(aarch64-page) 
@const_table_local + 8, target-flags(aarch64-pageoff, aarch64-nc) 
@const_table_local + 8
+; ISEL-NEXT:%2:gpr64noip = MOVKXi %0, 1234
+; ISEL-NEXT:%15:gpr64noip = COPY %0
+; ISEL-NEXT:%4:gpr64 = PAC %10, 2, 1234, %15, implicit-def dead $x17
+; ISEL-NEXT:%14:gpr64 = COPY %4
+; ISEL-NEXT:STRXui %14, %0, 0 :: (store (p0) into %ir.dest)
+; ISEL-NEXT:RET_ReallyLR
+  %dest.i = ptrtoint ptr %dest to i64
+  %discr = call i64 @llvm.ptrauth.blend(i64 %dest.i, i64 1234)
+  %signed.i = call i64 @llvm.ptrauth.sign(i64 ptrtoint (ptr getelementptr ([2 
x ptr], ptr @const_table_local, i32 0, i32 1) to i64), i32 2, i64 %discr)
+  %signed.ptr = inttoptr i64 %signed.i to ptr
+  store ptr %signed.ptr, ptr %dest
+  ret void
+}
+
+define void @store_signed_const_got(ptr %dest) {
+; ISEL-ELF-LABEL: name: store_signed_const_got
+; ISEL-ELF:   body:
+; ISEL-ELF: %0:gpr64common = COPY $x0
+; ISEL-ELF-NEXT:%7:gpr64common = LOADgotAUTH target-flags(aarch64-got) 
@const_table_got
+; ISEL-ELF-NEXT:%6:gpr64common = ADDXri %7, 8, 0
+; ISEL-ELF-NEXT:%2:gpr64noip = MOVKXi %0, 1234
+; ISEL-ELF-NEXT:%12:gpr64noip = COPY %0
+; ISEL-ELF-NEXT:%4:gpr64 = PAC %6, 2, 1234, %12, implicit-def dead $x17
+; ISEL-ELF-NEXT:%10:gpr64 = COPY %4
+; ISEL-ELF-NEXT:STRXui %10, %0, 0 :: (store (p0) into %ir.dest)
+; ISEL-ELF-NEXT:RET_ReallyLR
+  %dest.i = ptrtoint ptr %dest to i64
+  %discr = call i64 @llvm.ptrauth.blend(i64 %dest.i, i64 1234)
+  %signed.i = call i64 @llvm.ptrauth.sign(i64 ptrtoint (ptr getelementptr ([2 
x ptr], ptr @const_table_got, i32 0, i32 1) to i64), i32 2, i64 %discr)
+  %signed.ptr = inttoptr i64 %signed.i to ptr
+  store ptr %signed.ptr, ptr %dest
+  ret void
+}
+
+define void @store_signed_arg(ptr %dest, ptr %p) {
+; ISEL-LABEL: name: store_signed_arg
+; ISEL:   body:
+; ISEL: %0:gpr64common = COPY $x0
+; ISEL-NEXT:%1:gpr64common = COPY $x1
+; ISEL-NEXT:%3:gpr64noip = MOVKXi %0, 1234
+; ISEL-NEXT:%6:gpr64common = ADDXri %1, 8, 0
+; ISEL-NEXT:%12:gpr64noip = COPY %0
+; ISEL-NEXT:%8:gpr64 = PAC %6, 2, 1234, %12, implicit-def dead $x17
+; ISEL-NEXT:%10:gpr64 = COPY %8
+; ISEL-NEXT:STRXui %10, %0, 0 :: (store (p0) into %ir.dest)
+; ISEL-NEXT:RET_ReallyLR
+  %dest.i = ptrtoint ptr %dest to i64
+  %discr = call i64 @llvm.ptrauth.blend(i64 %dest.i, i64 1234)
+  %p.offset = getelementptr [2 x ptr], ptr %p, i32 0, i32 1
+  %p.offset.i = ptrtoint ptr %p.offset to i64
+  %signed.i = call i64 @llvm.ptrauth.sign(i64 %p.offset.i, i32 2, i64 %discr)
+  %signed.ptr = inttoptr i64 %signed.i to ptr
+  store ptr %signed.ptr, ptr %dest
+  ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 8, !"ptrauth-elf-got", i32 1}
+
 ;--- ok.ll
 
 ; RUN: llc < ok.ll -mtriple aarch64-elf -mattr=+pauth -global-isel=1 \
diff --git a/llvm/test/CodeGen/AArch64/ptrauth-constant-in-code.ll 
b/llvm/test/CodeGen/AArch64/ptrauth-constant-in-code.ll
index 76339a7cc5791..429ff6e5489aa 100644
--- a/llvm/test/CodeGen/AArch64/ptrauth-constant-in-code.ll
+++ b/llvm/test/CodeGen/AArch64/ptrauth-constant-in-code.ll
@@ -69,6 +69,77 @@ define ptr @foo() {
   ret ptr ptrauth (ptr @g, i32 0)
 }
 
+;--- finalize-isel.ll
+
+; RUN: llc < final

[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)

2025-07-03 Thread Finn Plummer via llvm-branch-commits


@@ -55,7 +59,9 @@
 // CHECK-SAME:   numClauses = 3, visibility = All
 // CHECK-SAME: ),
 // CHECK-SAME: Sampler(
-// CHECK-SAME:   s0, numDescriptors = 4, space = 1, offset = 
DescriptorTableOffsetAppend, flags = None
+// CHECK-SAME:   s0, numDescriptors = 4, space = 1, offset = 
DescriptorTableOffsetAppend,
+// CHECK-V1_1-SAME:  flags = DescriptorsVolatile
+// CHECK-V1_1-SAME:  flags = None

inbelic wrote:

Yes, it was a typo. The test was passing, however, this was I had bad a 
`check-prefixes` in the command line. So none of the `CHECK:` lines were 
actually being tested now.

https://github.com/llvm/llvm-project/pull/145828
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)

2025-07-03 Thread Finn Plummer via llvm-branch-commits

https://github.com/inbelic edited 
https://github.com/llvm/llvm-project/pull/145828
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)

2025-07-03 Thread Finn Plummer via llvm-branch-commits

https://github.com/inbelic edited 
https://github.com/llvm/llvm-project/pull/145828
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [NFC][HLSL][RootSignature] Split up `HLSLRootSignatureUtils` (PR #146124)

2025-07-03 Thread Finn Plummer via llvm-branch-commits

https://github.com/inbelic edited 
https://github.com/llvm/llvm-project/pull/146124
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)

2025-07-03 Thread Finn Plummer via llvm-branch-commits

https://github.com/inbelic updated 
https://github.com/llvm/llvm-project/pull/145828

>From 471a4a556ad0653792e39c99da2423d5e3ed933f Mon Sep 17 00:00:00 2001
From: Finn Plummer 
Date: Fri, 27 Jun 2025 16:39:13 +
Subject: [PATCH 01/10] update `setDefaultFlags`

---
 .../llvm/Frontend/HLSL/HLSLRootSignature.h| 20 +--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h 
b/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h
index f552040ab31cc..0579c1b5f9c25 100644
--- a/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h
+++ b/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h
@@ -50,7 +50,14 @@ struct RootDescriptor {
   dxbc::ShaderVisibility Visibility = dxbc::ShaderVisibility::All;
   dxbc::RootDescriptorFlags Flags;
 
-  void setDefaultFlags() {
+  void setDefaultFlags(dxbc::RootSignatureVersion Version) {
+if (Version == dxbc::RootSignatureVersion::V1_0) {
+  Flags = dxbc::RootDescriptorFlags::DataVolatile;
+  return;
+}
+
+assert(Version == llvm::dxbc::RootSignatureVersion::V1_1 &&
+   "Specified an invalid root signature version");
 switch (Type) {
 case DescriptorType::CBuffer:
 case DescriptorType::SRV:
@@ -83,7 +90,16 @@ struct DescriptorTableClause {
   uint32_t Offset = DescriptorTableOffsetAppend;
   dxbc::DescriptorRangeFlags Flags;
 
-  void setDefaultFlags() {
+  void setDefaultFlags(dxbc::RootSignatureVersion Version) {
+if (Version == dxbc::RootSignatureVersion::V1_0) {
+  Flags = dxbc::DescriptorRangeFlags::DescriptorsVolatile;
+  if (Type != ClauseType::Sampler)
+Flags |= dxbc::DescriptorRangeFlags::DataVolatile;
+  return;
+}
+
+assert(Version == dxbc::RootSignatureVersion::V1_1 &&
+   "Specified an invalid root signature version");
 switch (Type) {
 case ClauseType::CBuffer:
 case ClauseType::SRV:

>From af70ea275d057f15b80223c11eb11174764da0ff Mon Sep 17 00:00:00 2001
From: Finn Plummer 
Date: Fri, 27 Jun 2025 16:39:43 +
Subject: [PATCH 02/10] update unit testing

---
 .../Frontend/HLSLRootSignatureDumpTest.cpp| 72 ++-
 1 file changed, 70 insertions(+), 2 deletions(-)

diff --git a/llvm/unittests/Frontend/HLSLRootSignatureDumpTest.cpp 
b/llvm/unittests/Frontend/HLSLRootSignatureDumpTest.cpp
index e090f6bae470f..76ac285735d05 100644
--- a/llvm/unittests/Frontend/HLSLRootSignatureDumpTest.cpp
+++ b/llvm/unittests/Frontend/HLSLRootSignatureDumpTest.cpp
@@ -17,7 +17,7 @@ TEST(HLSLRootSignatureTest, DescriptorCBVClauseDump) {
   DescriptorTableClause Clause;
   Clause.Type = ClauseType::CBuffer;
   Clause.Reg = {RegisterType::BReg, 0};
-  Clause.setDefaultFlags();
+  Clause.setDefaultFlags(llvm::dxbc::RootSignatureVersion::V1_1);
 
   std::string Out;
   llvm::raw_string_ostream OS(Out);
@@ -93,6 +93,40 @@ TEST(HLSLRootSignatureTest, DescriptorSamplerClauseDump) {
   EXPECT_EQ(Out, Expected);
 }
 
+TEST(HLSLRootSignatureTest, DescriptorCBVV10ClauseDump) {
+  DescriptorTableClause Clause;
+  Clause.Type = ClauseType::CBuffer;
+  Clause.Reg = {RegisterType::BReg, 0};
+  Clause.setDefaultFlags(llvm::dxbc::RootSignatureVersion::V1_0);
+
+  std::string Out;
+  llvm::raw_string_ostream OS(Out);
+  OS << Clause;
+  OS.flush();
+
+  std::string Expected = "CBV(b0, numDescriptors = 1, space = 0, "
+ "offset = DescriptorTableOffsetAppend, "
+ "flags = DescriptorsVolatile | DataVolatile)";
+  EXPECT_EQ(Out, Expected);
+}
+
+TEST(HLSLRootSignatureTest, DescriptorSamplerV10ClauseDump) {
+  DescriptorTableClause Clause;
+  Clause.Type = ClauseType::Sampler;
+  Clause.Reg = {RegisterType::SReg, 0};
+  Clause.setDefaultFlags(llvm::dxbc::RootSignatureVersion::V1_0);
+
+  std::string Out;
+  llvm::raw_string_ostream OS(Out);
+  OS << Clause;
+  OS.flush();
+
+  std::string Expected = "Sampler(s0, numDescriptors = 1, space = 0, offset = "
+ "DescriptorTableOffsetAppend, "
+ "flags = DescriptorsVolatile)";
+  EXPECT_EQ(Out, Expected);
+}
+
 TEST(HLSLRootSignatureTest, DescriptorTableDump) {
   DescriptorTable Table;
   Table.NumClauses = 4;
@@ -112,7 +146,7 @@ TEST(HLSLRootSignatureTest, RootCBVDump) {
   RootDescriptor Descriptor;
   Descriptor.Type = DescriptorType::CBuffer;
   Descriptor.Reg = {RegisterType::BReg, 0};
-  Descriptor.setDefaultFlags();
+  Descriptor.setDefaultFlags(llvm::dxbc::RootSignatureVersion::V1_1);
 
   std::string Out;
   llvm::raw_string_ostream OS(Out);
@@ -125,6 +159,40 @@ TEST(HLSLRootSignatureTest, RootCBVDump) {
   EXPECT_EQ(Out, Expected);
 }
 
+TEST(HLSLRootSignatureTest, RootSRV10Dump) {
+  RootDescriptor Descriptor;
+  Descriptor.Type = DescriptorType::SRV;
+  Descriptor.Reg = {RegisterType::TReg, 0};
+  Descriptor.setDefaultFlags(llvm::dxbc::RootSignatureVersion::V1_0);
+
+  std::string Out;
+  llvm::raw_string_ostream OS(Out);
+  OS << Descriptor;
+  OS.flush();
+
+  std::stri

[llvm-branch-commits] [llvm] [BOLT] Improve file handling in NFC-Mode (PR #146513)

2025-07-03 Thread Paschalis Mpeis via llvm-branch-commits

https://github.com/paschalis-mpeis updated 
https://github.com/llvm/llvm-project/pull/146513

>From 625f9ee79af68a121afd92e06d9b4f91007a9c38 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis 
Date: Tue, 1 Jul 2025 12:37:31 +0100
Subject: [PATCH 1/4] [BOLT] Improve file handling in NFC-Mode

This patch introduce the following improvements:
- Catch an exception when the CMakeCache.txt is not present
- Bail out gracefully when llvm-bolt did not build successfully the
  current or previous revision.
---
 bolt/utils/nfc-check-setup.py | 26 +++---
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/bolt/utils/nfc-check-setup.py b/bolt/utils/nfc-check-setup.py
index 7d634d7a88b83..2ff27e5c40b63 100755
--- a/bolt/utils/nfc-check-setup.py
+++ b/bolt/utils/nfc-check-setup.py
@@ -91,18 +91,26 @@ def main():
 
 source_dir = None
 # find the repo directory
-with open(f"{args.build_dir}/CMakeCache.txt") as f:
-for line in f:
-m = re.match(r"LLVM_SOURCE_DIR:STATIC=(.*)", line)
-if m:
-source_dir = m.groups()[0]
-if not source_dir:
-sys.exit("Source directory is not found")
+try:
+CMCacheFilename=f"{args.build_dir}/CMakeCache.txt"
+with open(CMCacheFilename) as f:
+for line in f:
+m = re.match(r"LLVM_SOURCE_DIR:STATIC=(.*)", line)
+if m:
+source_dir = m.groups()[0]
+if not source_dir:
+raise Exception(f"Source directory not found: '{CMCacheFilename}'")
+except Exception as e:
+sys.exit(e)
 
 # build the current commit
 subprocess.run(
 shlex.split("cmake --build . --target llvm-bolt"), cwd=args.build_dir
 )
+
+if not os.path.exists(bolt_path):
+sys.exit(f"Failed to build the current revision: '{bolt_path}'")
+
 # rename llvm-bolt
 os.replace(bolt_path, f"{bolt_path}.new")
 # memorize the old hash for logging
@@ -133,11 +141,15 @@ def main():
 subprocess.run(shlex.split(f"git checkout -f {args.cmp_rev}"), 
cwd=source_dir)
 # get the parent commit hash for logging
 new_ref = get_git_ref_or_rev(source_dir)
+
 # build the previous commit
 subprocess.run(
 shlex.split("cmake --build . --target llvm-bolt"), cwd=args.build_dir
 )
+
 # rename llvm-bolt
+if not os.path.exists(bolt_path):
+sys.exit(f"Failed to build the previous revision: '{bolt_path}'")
 os.replace(bolt_path, f"{bolt_path}.old")
 
 # symlink llvm-bolt-wrapper

>From 26e7b9f05f8a365f117f14a0975a232e1ec74202 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis 
Date: Tue, 1 Jul 2025 12:50:08 +0100
Subject: [PATCH 2/4] python formatter and nits

---
 bolt/utils/nfc-check-setup.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/bolt/utils/nfc-check-setup.py b/bolt/utils/nfc-check-setup.py
index 2ff27e5c40b63..22e8cc646a1c5 100755
--- a/bolt/utils/nfc-check-setup.py
+++ b/bolt/utils/nfc-check-setup.py
@@ -92,7 +92,7 @@ def main():
 source_dir = None
 # find the repo directory
 try:
-CMCacheFilename=f"{args.build_dir}/CMakeCache.txt"
+CMCacheFilename = f"{args.build_dir}/CMakeCache.txt"
 with open(CMCacheFilename) as f:
 for line in f:
 m = re.match(r"LLVM_SOURCE_DIR:STATIC=(.*)", line)
@@ -104,6 +104,7 @@ def main():
 sys.exit(e)
 
 # build the current commit
+print ("NFC-Setup: Building current revision..")
 subprocess.run(
 shlex.split("cmake --build . --target llvm-bolt"), cwd=args.build_dir
 )
@@ -143,6 +144,7 @@ def main():
 new_ref = get_git_ref_or_rev(source_dir)
 
 # build the previous commit
+print ("NFC-Setup: Building previous revision..")
 subprocess.run(
 shlex.split("cmake --build . --target llvm-bolt"), cwd=args.build_dir
 )

>From ca36aa02effc6c5e5da140940a5c55d4183e0422 Mon Sep 17 00:00:00 2001
From: Paschalis Mpeis 
Date: Tue, 1 Jul 2025 12:55:46 +0100
Subject: [PATCH 3/4] code formatter (2)

---
 bolt/utils/nfc-check-setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/utils/nfc-check-setup.py b/bolt/utils/nfc-check-setup.py
index 22e8cc646a1c5..d3248050f16e3 100755
--- a/bolt/utils/nfc-check-setup.py
+++ b/bolt/utils/nfc-check-setup.py
@@ -104,7 +104,7 @@ def main():
 sys.exit(e)
 
 # build the current commit
-print ("NFC-Setup: Building current revision..")
+print("NFC-Setup: Building current revision..")
 subprocess.run(
 shlex.split("cmake --build . --target llvm-bolt"), cwd=args.build_dir
 )
@@ -144,7 +144,7 @@ def main():
 new_ref = get_git_ref_or_rev(source_dir)
 
 # build the previous commit
-print ("NFC-Setup: Building previous revision..")
+print("NFC-Setup: Building previous revision..")
 subprocess.run(
 shlex.split("cmake --build . --target llvm-bolt"), cwd=args.build_dir
 )

>From 09363a

[llvm-branch-commits] [llvm] [DirectX] Move the scalarizer pass to before dxil-flatten-arrays (PR #146800)

2025-07-03 Thread Sarah Spall via llvm-branch-commits

https://github.com/spall approved this pull request.

Nit on the PR description. you didn't move the scalarizerPass, you moved the 
DXILFlattenArrays pass to be immediately after the scalarizerPass.

https://github.com/llvm/llvm-project/pull/146800
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] release/20.x: Backport [LLD][COFF] Disallow importing DllMain from import libraries (#146610) (PR #146699)

2025-07-03 Thread Martin Storsjö via llvm-branch-commits

mstorsjo wrote:

> @rnk @mstorsjo is it ok if we integrate this into the release?

Sorry I'm a bit late here, but I have a couple of follow-up comments to the 
original PR, including potentially changing the public interface (the option 
name). Plus @nikic's potential ABI concern (which I think might not apply here, 
but let's sort that out.

> @tstellar will there be a 20.1.8?

AFAIK there isn't one directly planned, unless very pressing issues are found.

https://github.com/llvm/llvm-project/pull/146699
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [mlir] [mlir][GPU][transform] Add gpu_to_rocdl conversion pattern to transfo… (PR #146962)

2025-07-03 Thread Nicolas Vasilache via llvm-branch-commits

https://github.com/nicolasvasilache created 
https://github.com/llvm/llvm-project/pull/146962

…rm dialect

Authored-by: Son Tuan Vu 

>From d8730eb667660782ec1dce6e9cdea020c5821300 Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache 
Date: Thu, 3 Jul 2025 23:09:00 +0200
Subject: [PATCH] [mlir][GPU][transform] Add gpu_to_rocdl conversion pattern to
 transform dialect

Authored-by: Son Tuan Vu 
---
 .../GPU/TransformOps/GPUTransformOps.td   | 14 +++
 .../Dialect/GPU/TransformOps/CMakeLists.txt   |  1 +
 .../GPU/TransformOps/GPUTransformOps.cpp  | 38 +++
 .../llvm-project-overlay/mlir/BUILD.bazel |  2 +
 4 files changed, 55 insertions(+)

diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td 
b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
index 36b579485fc04..87423c639945f 100644
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
@@ -54,6 +54,20 @@ def ApplyGPUSubgroupReduceToNVVMConversionPatternsOp : 
Op]> {
+  let description = [{
+Collects patterns that convert GPU dialect ops to ROCDL dialect ops. These
+patterns require an "LLVMTypeConverter".
+  }];
+  let arguments = (ins StrAttr:$chipset);
+  let assemblyFormat = [{
+`chipset` `=` $chipset attr-dict
+  }];
+}
+
 
//===--===//
 // Apply...PatternsOp
 
//===--===//
diff --git a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt 
b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt
index b26788f675ce5..e5cc0254f1ffe 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt
@@ -24,4 +24,5 @@ add_mlir_dialect_library(MLIRGPUTransformOps
   # ConversionPatterns
   MLIRNVGPUToNVVM
   MLIRGPUToNVVMTransforms
+  MLIRGPUToROCDLTransforms
   )  
diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp 
b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
index a86fc47947130..b764a72529f8f 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
@@ -10,6 +10,7 @@
 
 #include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
 #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
+#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
@@ -42,6 +43,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InterleavedRange.h"
+#include "llvm/Support/LogicalResult.h"
 #include 
 
 using namespace mlir;
@@ -129,6 +131,42 @@ LogicalResult 
transform::ApplyGPUSubgroupReduceToNVVMConversionPatternsOp::
   return success();
 }
 
+void transform::ApplyGPUToROCDLConversionPatternsOp::populatePatterns(
+TypeConverter &typeConverter, RewritePatternSet &patterns) {
+  auto &llvmTypeConverter = static_cast(typeConverter);
+  populateGpuMemorySpaceAttributeConversions(
+  llvmTypeConverter, [](AddressSpace space) {
+switch (space) {
+case AddressSpace::Global:
+  return 1;
+case AddressSpace::Workgroup:
+  return 3;
+case AddressSpace::Private:
+  return 5;
+}
+llvm_unreachable("unknown address space enum value");
+return 0;
+  });
+  FailureOr maybeChipset =
+  amdgpu::Chipset::parse(getChipset());
+  assert(llvm::succeeded(maybeChipset) && "expected valid chipset");
+  populateGpuToROCDLConversionPatterns(
+  llvmTypeConverter, patterns, mlir::gpu::amd::Runtime::HIP, 
*maybeChipset);
+}
+
+LogicalResult
+transform::ApplyGPUToROCDLConversionPatternsOp::verifyTypeConverter(
+transform::TypeConverterBuilderOpInterface builder) {
+  FailureOr maybeChipset =
+  amdgpu::Chipset::parse(getChipset());
+  if (failed(maybeChipset)) {
+return emitOpError("Invalid chipset name: " + getChipset());
+  }
+  if (builder.getTypeConverterType() != "LLVMTypeConverter")
+return emitOpError("expected LLVMTypeConverter");
+  return success();
+}
+
 
//===--===//
 // Apply...PatternsOp
 
//===--===//s
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel 
b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index cc266c2fe3a77..79f2cd5ea71db 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -5502,6 +5502,7 @@ cc_library(
 ":GPUDialect",
 ":GPUToGPURuntimeTransforms",
 ":GPUToNVVMTransforms",
+":GPUToROCDLTransforms",
 ":GPUTransformOpsIncGen",
 ":GPUTransforms",
 ":IR",
@@ -5509,6 +5510,7 @@ cc_libra

[llvm-branch-commits] [llvm] [mlir] [mlir][GPU][transform] Add gpu_to_rocdl conversion pattern to transfo… (PR #146962)

2025-07-03 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir-gpu

Author: Nicolas Vasilache (nicolasvasilache)


Changes

…rm dialect

Authored-by: Son Tuan Vu 

---
Full diff: https://github.com/llvm/llvm-project/pull/146962.diff


4 Files Affected:

- (modified) mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td 
(+14) 
- (modified) mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt (+1) 
- (modified) mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp (+38) 
- (modified) utils/bazel/llvm-project-overlay/mlir/BUILD.bazel (+2) 


``diff
diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td 
b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
index 36b579485fc04..87423c639945f 100644
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
@@ -54,6 +54,20 @@ def ApplyGPUSubgroupReduceToNVVMConversionPatternsOp : 
Op]> {
+  let description = [{
+Collects patterns that convert GPU dialect ops to ROCDL dialect ops. These
+patterns require an "LLVMTypeConverter".
+  }];
+  let arguments = (ins StrAttr:$chipset);
+  let assemblyFormat = [{
+`chipset` `=` $chipset attr-dict
+  }];
+}
+
 
//===--===//
 // Apply...PatternsOp
 
//===--===//
diff --git a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt 
b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt
index b26788f675ce5..e5cc0254f1ffe 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt
@@ -24,4 +24,5 @@ add_mlir_dialect_library(MLIRGPUTransformOps
   # ConversionPatterns
   MLIRNVGPUToNVVM
   MLIRGPUToNVVMTransforms
+  MLIRGPUToROCDLTransforms
   )  
diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp 
b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
index a86fc47947130..b764a72529f8f 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
@@ -10,6 +10,7 @@
 
 #include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
 #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
+#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
@@ -42,6 +43,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InterleavedRange.h"
+#include "llvm/Support/LogicalResult.h"
 #include 
 
 using namespace mlir;
@@ -129,6 +131,42 @@ LogicalResult 
transform::ApplyGPUSubgroupReduceToNVVMConversionPatternsOp::
   return success();
 }
 
+void transform::ApplyGPUToROCDLConversionPatternsOp::populatePatterns(
+TypeConverter &typeConverter, RewritePatternSet &patterns) {
+  auto &llvmTypeConverter = static_cast(typeConverter);
+  populateGpuMemorySpaceAttributeConversions(
+  llvmTypeConverter, [](AddressSpace space) {
+switch (space) {
+case AddressSpace::Global:
+  return 1;
+case AddressSpace::Workgroup:
+  return 3;
+case AddressSpace::Private:
+  return 5;
+}
+llvm_unreachable("unknown address space enum value");
+return 0;
+  });
+  FailureOr maybeChipset =
+  amdgpu::Chipset::parse(getChipset());
+  assert(llvm::succeeded(maybeChipset) && "expected valid chipset");
+  populateGpuToROCDLConversionPatterns(
+  llvmTypeConverter, patterns, mlir::gpu::amd::Runtime::HIP, 
*maybeChipset);
+}
+
+LogicalResult
+transform::ApplyGPUToROCDLConversionPatternsOp::verifyTypeConverter(
+transform::TypeConverterBuilderOpInterface builder) {
+  FailureOr maybeChipset =
+  amdgpu::Chipset::parse(getChipset());
+  if (failed(maybeChipset)) {
+return emitOpError("Invalid chipset name: " + getChipset());
+  }
+  if (builder.getTypeConverterType() != "LLVMTypeConverter")
+return emitOpError("expected LLVMTypeConverter");
+  return success();
+}
+
 
//===--===//
 // Apply...PatternsOp
 
//===--===//s
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel 
b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index cc266c2fe3a77..79f2cd5ea71db 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -5502,6 +5502,7 @@ cc_library(
 ":GPUDialect",
 ":GPUToGPURuntimeTransforms",
 ":GPUToNVVMTransforms",
+":GPUToROCDLTransforms",
 ":GPUTransformOpsIncGen",
 ":GPUTransforms",
 ":IR",
@@ -5509,6 +5510,7 @@ cc_library(
 ":MemRefDialect",
 ":NVGPUDialect",
 ":NVVMDialect",
+":ROCDLDialect",
 

[llvm-branch-commits] [llvm] [mlir] [mlir][GPU][transform] Add gpu_to_rocdl conversion pattern to transfo… (PR #146962)

2025-07-03 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir

Author: Nicolas Vasilache (nicolasvasilache)


Changes

…rm dialect

Authored-by: Son Tuan Vu 

---
Full diff: https://github.com/llvm/llvm-project/pull/146962.diff


4 Files Affected:

- (modified) mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td 
(+14) 
- (modified) mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt (+1) 
- (modified) mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp (+38) 
- (modified) utils/bazel/llvm-project-overlay/mlir/BUILD.bazel (+2) 


``diff
diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td 
b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
index 36b579485fc04..87423c639945f 100644
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
@@ -54,6 +54,20 @@ def ApplyGPUSubgroupReduceToNVVMConversionPatternsOp : 
Op]> {
+  let description = [{
+Collects patterns that convert GPU dialect ops to ROCDL dialect ops. These
+patterns require an "LLVMTypeConverter".
+  }];
+  let arguments = (ins StrAttr:$chipset);
+  let assemblyFormat = [{
+`chipset` `=` $chipset attr-dict
+  }];
+}
+
 
//===--===//
 // Apply...PatternsOp
 
//===--===//
diff --git a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt 
b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt
index b26788f675ce5..e5cc0254f1ffe 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt
@@ -24,4 +24,5 @@ add_mlir_dialect_library(MLIRGPUTransformOps
   # ConversionPatterns
   MLIRNVGPUToNVVM
   MLIRGPUToNVVMTransforms
+  MLIRGPUToROCDLTransforms
   )  
diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp 
b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
index a86fc47947130..b764a72529f8f 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
@@ -10,6 +10,7 @@
 
 #include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
 #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
+#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h"
 #include "mlir/Conversion/LLVMCommon/TypeConverter.h"
 #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
@@ -42,6 +43,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/InterleavedRange.h"
+#include "llvm/Support/LogicalResult.h"
 #include 
 
 using namespace mlir;
@@ -129,6 +131,42 @@ LogicalResult 
transform::ApplyGPUSubgroupReduceToNVVMConversionPatternsOp::
   return success();
 }
 
+void transform::ApplyGPUToROCDLConversionPatternsOp::populatePatterns(
+TypeConverter &typeConverter, RewritePatternSet &patterns) {
+  auto &llvmTypeConverter = static_cast(typeConverter);
+  populateGpuMemorySpaceAttributeConversions(
+  llvmTypeConverter, [](AddressSpace space) {
+switch (space) {
+case AddressSpace::Global:
+  return 1;
+case AddressSpace::Workgroup:
+  return 3;
+case AddressSpace::Private:
+  return 5;
+}
+llvm_unreachable("unknown address space enum value");
+return 0;
+  });
+  FailureOr maybeChipset =
+  amdgpu::Chipset::parse(getChipset());
+  assert(llvm::succeeded(maybeChipset) && "expected valid chipset");
+  populateGpuToROCDLConversionPatterns(
+  llvmTypeConverter, patterns, mlir::gpu::amd::Runtime::HIP, 
*maybeChipset);
+}
+
+LogicalResult
+transform::ApplyGPUToROCDLConversionPatternsOp::verifyTypeConverter(
+transform::TypeConverterBuilderOpInterface builder) {
+  FailureOr maybeChipset =
+  amdgpu::Chipset::parse(getChipset());
+  if (failed(maybeChipset)) {
+return emitOpError("Invalid chipset name: " + getChipset());
+  }
+  if (builder.getTypeConverterType() != "LLVMTypeConverter")
+return emitOpError("expected LLVMTypeConverter");
+  return success();
+}
+
 
//===--===//
 // Apply...PatternsOp
 
//===--===//s
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel 
b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index cc266c2fe3a77..79f2cd5ea71db 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -5502,6 +5502,7 @@ cc_library(
 ":GPUDialect",
 ":GPUToGPURuntimeTransforms",
 ":GPUToNVVMTransforms",
+":GPUToROCDLTransforms",
 ":GPUTransformOpsIncGen",
 ":GPUTransforms",
 ":IR",
@@ -5509,6 +5510,7 @@ cc_library(
 ":MemRefDialect",
 ":NVGPUDialect",
 ":NVVMDialect",
+":ROCDLDialect",
 

[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)

2025-07-03 Thread Nicolas Vasilache via llvm-branch-commits

https://github.com/nicolasvasilache updated 
https://github.com/llvm/llvm-project/pull/146943

>From e2fc2f4d78809d5196719b546fd2a6a06058837f Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache 
Date: Thu, 3 Jul 2025 21:26:53 +0200
Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to
 scf::ForallOp and use it to implement warp specialization.

This revision adds DeviceMaskingAttrInterface and extends
DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface
and DeviceMaskingAttrInterface.

The first implementation is if the form of a GPUMappingMaskAttr, which
can be additionally passed to the scf.forall.mapping attribute to
specify a mask on compute resources that should be active.

Support is added to GPUTransformOps to take advantage of this
information and lower to block/warpgroup/warp/thread specialization when
mapped to linear ids.

Co-authored-by: Oleksandr "Alex" Zinenko 
---
 .../Dialect/GPU/IR/GPUDeviceMappingAttr.td|  18 
 .../mlir/Dialect/GPU/TransformOps/Utils.h |  15 ++-
 .../Dialect/SCF/IR/DeviceMappingInterface.td  |  45 +++-
 mlir/include/mlir/Dialect/SCF/IR/SCFOps.td|  12 +++
 mlir/lib/Dialect/GPU/CMakeLists.txt   |   1 +
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp|  45 
 .../GPU/TransformOps/GPUTransformOps.cpp  |  58 ++
 mlir/lib/Dialect/GPU/TransformOps/Utils.cpp   | 102 +-
 mlir/lib/Dialect/SCF/IR/SCF.cpp   |  43 ++--
 .../Dialect/GPU/transform-gpu-failing.mlir|  61 +++
 mlir/test/Dialect/GPU/transform-gpu.mlir  |  81 ++
 mlir/test/Dialect/SCF/invalid.mlir|  18 
 12 files changed, 441 insertions(+), 58 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td 
b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
index 63f228ca3157f..e8540027e7b77 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
@@ -252,6 +252,24 @@ def GPULaneMappingAttr
   }];
 }
 
+def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [
+  DeclareAttrInterfaceMethods ] >  {
+  let parameters = (ins "uint64_t":$mask);
+  let assemblyFormat = "`<` params `>`";
+  let description = [{
+Attribute describing how to filter the processing units that a
+region is mapped to.
+
+In the first implementation the masking is a bitfield that specifies for
+each processing unit whether it is active or not.
+
+In the future, we may want to implement this as a symbol to refer to
+dynamically defined values.
+
+Extending op semantics with an operand is deemed too intrusive at this 
time.
+  }];
+}
+
 def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", 
"memory_space", [
   DeclareAttrInterfaceMethods ] >  {
   let parameters = (ins
diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h 
b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
index de512ded59fec..0a11b8f8d3fa0 100644
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
@@ -78,7 +78,8 @@ struct GpuIdBuilder {
 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuBlockIdBuilder : public GpuIdBuilder {
-  GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false);
+  GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false,
+DeviceMaskingAttrInterface mask = nullptr);
 };
 
 /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups.
@@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder {
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuWarpgroupIdBuilder : public GpuIdBuilder {
   GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize,
-bool useLinearMapping = false);
+bool useLinearMapping = false,
+DeviceMaskingAttrInterface mask = nullptr);
   int64_t warpSize = 32;
   /// In the future this may be configured by the transformation.
   static constexpr int64_t kNumWarpsPerGroup = 4;
@@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder {
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuWarpIdBuilder : public GpuIdBuilder {
   GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize,
-   bool useLinearMapping = false);
+   bool useLinearMapping = false,
+   DeviceMaskingAttrInterface mask = nullptr);
   int64_t warpSize = 32;
 };
 
@@ -111,7 +114,8 @@ struct GpuWarpIdBuilder : public GpuIdBuilder {
 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuThreadIdBuilder : public GpuIdBuilder {

[llvm-branch-commits] AlwaysInliner: A new inlining algorithm to interleave alloca promotion with inlines. (PR #145613)

2025-07-03 Thread Amara Emerson via llvm-branch-commits

aemerson wrote:

> ⚠️ undef deprecator found issues in your code. ⚠️

This looks to be just the IR output containing undef, not the input.

https://github.com/llvm/llvm-project/pull/145613
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)

2025-07-03 Thread Nicolas Vasilache via llvm-branch-commits

https://github.com/nicolasvasilache updated 
https://github.com/llvm/llvm-project/pull/146943

>From ad456bbf3da7ca290c521a945e950fd1cbf3ca81 Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache 
Date: Thu, 3 Jul 2025 21:26:53 +0200
Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to
 scf::ForallOp and use it to implement warp specialization.

This revision adds DeviceMaskingAttrInterface and extends
DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface
and DeviceMaskingAttrInterface.

The first implementation is if the form of a GPUMappingMaskAttr, which
can be additionally passed to the scf.forall.mapping attribute to
specify a mask on compute resources that should be active.

Support is added to GPUTransformOps to take advantage of this
information and lower to block/warpgroup/warp/thread specialization when
mapped to linear ids.

Co-authored-by: Oleksandr "Alex" Zinenko 
---
 .../Dialect/GPU/IR/GPUDeviceMappingAttr.td|  18 
 .../mlir/Dialect/GPU/TransformOps/Utils.h |  15 ++-
 .../Dialect/SCF/IR/DeviceMappingInterface.td  |  45 +++-
 mlir/include/mlir/Dialect/SCF/IR/SCFOps.td|  12 +++
 mlir/lib/Dialect/GPU/CMakeLists.txt   |   1 +
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp|  45 
 .../GPU/TransformOps/GPUTransformOps.cpp  |  62 +++
 mlir/lib/Dialect/GPU/TransformOps/Utils.cpp   | 102 +-
 mlir/lib/Dialect/SCF/IR/SCF.cpp   |  43 ++--
 .../Dialect/GPU/transform-gpu-failing.mlir|  61 +++
 mlir/test/Dialect/GPU/transform-gpu.mlir  |  81 ++
 mlir/test/Dialect/SCF/invalid.mlir|  18 
 12 files changed, 444 insertions(+), 59 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td 
b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
index 63f228ca3157f..e8540027e7b77 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
@@ -252,6 +252,24 @@ def GPULaneMappingAttr
   }];
 }
 
+def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [
+  DeclareAttrInterfaceMethods ] >  {
+  let parameters = (ins "uint64_t":$mask);
+  let assemblyFormat = "`<` params `>`";
+  let description = [{
+Attribute describing how to filter the processing units that a
+region is mapped to.
+
+In the first implementation the masking is a bitfield that specifies for
+each processing unit whether it is active or not.
+
+In the future, we may want to implement this as a symbol to refer to
+dynamically defined values.
+
+Extending op semantics with an operand is deemed too intrusive at this 
time.
+  }];
+}
+
 def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", 
"memory_space", [
   DeclareAttrInterfaceMethods ] >  {
   let parameters = (ins
diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h 
b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
index de512ded59fec..0a11b8f8d3fa0 100644
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
@@ -78,7 +78,8 @@ struct GpuIdBuilder {
 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuBlockIdBuilder : public GpuIdBuilder {
-  GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false);
+  GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false,
+DeviceMaskingAttrInterface mask = nullptr);
 };
 
 /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups.
@@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder {
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuWarpgroupIdBuilder : public GpuIdBuilder {
   GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize,
-bool useLinearMapping = false);
+bool useLinearMapping = false,
+DeviceMaskingAttrInterface mask = nullptr);
   int64_t warpSize = 32;
   /// In the future this may be configured by the transformation.
   static constexpr int64_t kNumWarpsPerGroup = 4;
@@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder {
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuWarpIdBuilder : public GpuIdBuilder {
   GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize,
-   bool useLinearMapping = false);
+   bool useLinearMapping = false,
+   DeviceMaskingAttrInterface mask = nullptr);
   int64_t warpSize = 32;
 };
 
@@ -111,7 +114,8 @@ struct GpuWarpIdBuilder : public GpuIdBuilder {
 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuThreadIdBuilder : public GpuIdBuilder {

[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)

2025-07-03 Thread Nicolas Vasilache via llvm-branch-commits

https://github.com/nicolasvasilache edited 
https://github.com/llvm/llvm-project/pull/146943
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)

2025-07-03 Thread Nicolas Vasilache via llvm-branch-commits

https://github.com/nicolasvasilache updated 
https://github.com/llvm/llvm-project/pull/146943

>From 403e4ba3929516ac27d51baf306dda2a043fd305 Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache 
Date: Thu, 3 Jul 2025 21:26:53 +0200
Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to
 scf::ForallOp and use it to implement warp specialization.

This revision adds DeviceMaskingAttrInterface and extends
DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface
and DeviceMaskingAttrInterface.

The first implementation is if the form of a GPUMappingMaskAttr, which
can be additionally passed to the scf.forall.mapping attribute to
specify a mask on compute resources that should be active.

Support is added to GPUTransformOps to take advantage of this
information and lower to block/warpgroup/warp/thread specialization when
mapped to linear ids.

Co-authored-by: Oleksandr "Alex" Zinenko 
---
 .../Dialect/GPU/IR/GPUDeviceMappingAttr.td|  18 
 .../mlir/Dialect/GPU/TransformOps/Utils.h |  15 ++-
 .../Dialect/SCF/IR/DeviceMappingInterface.td  |  45 +++-
 mlir/include/mlir/Dialect/SCF/IR/SCFOps.td|  12 +++
 mlir/lib/Dialect/GPU/CMakeLists.txt   |   1 +
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp|  45 
 .../GPU/TransformOps/GPUTransformOps.cpp  |  62 +++
 mlir/lib/Dialect/GPU/TransformOps/Utils.cpp   | 102 +-
 mlir/lib/Dialect/SCF/IR/SCF.cpp   |  43 ++--
 .../Dialect/GPU/transform-gpu-failing.mlir|  61 +++
 mlir/test/Dialect/GPU/transform-gpu.mlir  |  81 ++
 mlir/test/Dialect/SCF/invalid.mlir|  18 
 12 files changed, 444 insertions(+), 59 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td 
b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
index 63f228ca3157f..e8540027e7b77 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
@@ -252,6 +252,24 @@ def GPULaneMappingAttr
   }];
 }
 
+def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [
+  DeclareAttrInterfaceMethods ] >  {
+  let parameters = (ins "uint64_t":$mask);
+  let assemblyFormat = "`<` params `>`";
+  let description = [{
+Attribute describing how to filter the processing units that a
+region is mapped to.
+
+In the first implementation the masking is a bitfield that specifies for
+each processing unit whether it is active or not.
+
+In the future, we may want to implement this as a symbol to refer to
+dynamically defined values.
+
+Extending op semantics with an operand is deemed too intrusive at this 
time.
+  }];
+}
+
 def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", 
"memory_space", [
   DeclareAttrInterfaceMethods ] >  {
   let parameters = (ins
diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h 
b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
index de512ded59fec..0a11b8f8d3fa0 100644
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
@@ -78,7 +78,8 @@ struct GpuIdBuilder {
 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuBlockIdBuilder : public GpuIdBuilder {
-  GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false);
+  GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false,
+DeviceMaskingAttrInterface mask = nullptr);
 };
 
 /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups.
@@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder {
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuWarpgroupIdBuilder : public GpuIdBuilder {
   GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize,
-bool useLinearMapping = false);
+bool useLinearMapping = false,
+DeviceMaskingAttrInterface mask = nullptr);
   int64_t warpSize = 32;
   /// In the future this may be configured by the transformation.
   static constexpr int64_t kNumWarpsPerGroup = 4;
@@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder {
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuWarpIdBuilder : public GpuIdBuilder {
   GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize,
-   bool useLinearMapping = false);
+   bool useLinearMapping = false,
+   DeviceMaskingAttrInterface mask = nullptr);
   int64_t warpSize = 32;
 };
 
@@ -111,7 +114,8 @@ struct GpuWarpIdBuilder : public GpuIdBuilder {
 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuThreadIdBuilder : public GpuIdBuilder {

[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)

2025-07-03 Thread Nicolas Vasilache via llvm-branch-commits

https://github.com/nicolasvasilache updated 
https://github.com/llvm/llvm-project/pull/146943

>From 85aa5f8c72801f5a75142a663d6e89e83e63decc Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache 
Date: Thu, 3 Jul 2025 21:26:53 +0200
Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to
 scf::ForallOp and use it to implement warp specialization.

This revision adds DeviceMaskingAttrInterface and extends
DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface
and DeviceMaskingAttrInterface.

The first implementation is if the form of a GPUMappingMaskAttr, which
can be additionally passed to the scf.forall.mapping attribute to
specify a mask on compute resources that should be active.

Support is added to GPUTransformOps to take advantage of this
information and lower to block/warpgroup/warp/thread specialization when
mapped to linear ids.

Co-authored-by: Oleksandr "Alex" Zinenko 
---
 .../Dialect/GPU/IR/GPUDeviceMappingAttr.td|  18 
 .../mlir/Dialect/GPU/TransformOps/Utils.h |  15 ++-
 .../Dialect/SCF/IR/DeviceMappingInterface.td  |  45 +++-
 mlir/include/mlir/Dialect/SCF/IR/SCFOps.td|  12 +++
 mlir/lib/Dialect/GPU/CMakeLists.txt   |   1 +
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp|  45 
 .../GPU/TransformOps/GPUTransformOps.cpp  |  62 +++
 mlir/lib/Dialect/GPU/TransformOps/Utils.cpp   | 102 +-
 mlir/lib/Dialect/SCF/IR/SCF.cpp   |  43 ++--
 .../Dialect/GPU/transform-gpu-failing.mlir|  61 +++
 mlir/test/Dialect/GPU/transform-gpu.mlir  |  81 ++
 mlir/test/Dialect/SCF/invalid.mlir|  18 
 12 files changed, 444 insertions(+), 59 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td 
b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
index 63f228ca3157f..e8540027e7b77 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
@@ -252,6 +252,24 @@ def GPULaneMappingAttr
   }];
 }
 
+def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [
+  DeclareAttrInterfaceMethods ] >  {
+  let parameters = (ins "uint64_t":$mask);
+  let assemblyFormat = "`<` params `>`";
+  let description = [{
+Attribute describing how to filter the processing units that a
+region is mapped to.
+
+In the first implementation the masking is a bitfield that specifies for
+each processing unit whether it is active or not.
+
+In the future, we may want to implement this as a symbol to refer to
+dynamically defined values.
+
+Extending op semantics with an operand is deemed too intrusive at this 
time.
+  }];
+}
+
 def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", 
"memory_space", [
   DeclareAttrInterfaceMethods ] >  {
   let parameters = (ins
diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h 
b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
index de512ded59fec..0a11b8f8d3fa0 100644
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
@@ -78,7 +78,8 @@ struct GpuIdBuilder {
 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuBlockIdBuilder : public GpuIdBuilder {
-  GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false);
+  GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false,
+DeviceMaskingAttrInterface mask = nullptr);
 };
 
 /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups.
@@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder {
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuWarpgroupIdBuilder : public GpuIdBuilder {
   GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize,
-bool useLinearMapping = false);
+bool useLinearMapping = false,
+DeviceMaskingAttrInterface mask = nullptr);
   int64_t warpSize = 32;
   /// In the future this may be configured by the transformation.
   static constexpr int64_t kNumWarpsPerGroup = 4;
@@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder {
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuWarpIdBuilder : public GpuIdBuilder {
   GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize,
-   bool useLinearMapping = false);
+   bool useLinearMapping = false,
+   DeviceMaskingAttrInterface mask = nullptr);
   int64_t warpSize = 32;
 };
 
@@ -111,7 +114,8 @@ struct GpuWarpIdBuilder : public GpuIdBuilder {
 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuThreadIdBuilder : public GpuIdBuilder {

[llvm-branch-commits] [lld] release/20.x: Backport [LLD][COFF] Disallow importing DllMain from import libraries (#146610) (PR #146699)

2025-07-03 Thread Martin Storsjö via llvm-branch-commits


@@ -313,6 +313,7 @@ struct Configuration {
   bool warnDebugInfoUnusable = true;
   bool warnLongSectionNames = true;
   bool warnStdcallFixup = true;
+  bool warnExportedDllMain = true;

mstorsjo wrote:

I don't think this is an installed header though?

https://github.com/llvm/llvm-project/pull/146699
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang][OpenMP] Issue a warning when parsing future directive spelling (PR #146933)

2025-07-03 Thread Krzysztof Parzyszek via llvm-branch-commits

https://github.com/kparzysz created 
https://github.com/llvm/llvm-project/pull/146933

OpenMP 6.0 introduced alternative spelling for some directives, with the 
previous spellings still allowed.

Warn the user when a new spelling is encountered with OpenMP version set to an 
older value.

>From 5ad103e08e8a06cfc3708ba83601e073a022bb7e Mon Sep 17 00:00:00 2001
From: Krzysztof Parzyszek 
Date: Wed, 2 Jul 2025 12:49:04 -0500
Subject: [PATCH] [clang][OpenMP] Issue a warning when parsing future directive
 spelling

OpenMP 6.0 introduced alternative spelling for some directives, with the
previous spellings still being allowed.

Warn the user when a new spelling is encountered with OpenMP version set
to an older value.
---
 clang/include/clang/Basic/DiagnosticGroups.td |  4 +-
 .../clang/Basic/DiagnosticParseKinds.td   |  3 +
 clang/lib/Parse/ParseOpenMP.cpp   | 28 --
 .../test/OpenMP/openmp-6-future-spellings.cpp | 55 +++
 4 files changed, 85 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/OpenMP/openmp-6-future-spellings.cpp

diff --git a/clang/include/clang/Basic/DiagnosticGroups.td 
b/clang/include/clang/Basic/DiagnosticGroups.td
index 36fa3227fd6a6..ace8663b73a4a 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1530,9 +1530,11 @@ def OpenMPPre51Compat : 
DiagGroup<"pre-openmp-51-compat">;
 def OpenMP51Ext : DiagGroup<"openmp-51-extensions">;
 def OpenMPExtensions : DiagGroup<"openmp-extensions">;
 def OpenMPTargetException : DiagGroup<"openmp-target-exception">;
+def OpenMPFuture : DiagGroup<"openmp-future">;
 def OpenMP : DiagGroup<"openmp", [
 SourceUsesOpenMP, OpenMPClauses, OpenMPLoopForm, OpenMPTarget,
-OpenMPMapping, OpenMP51Ext, OpenMPExtensions, OpenMPTargetException
+OpenMPMapping, OpenMP51Ext, OpenMPExtensions, OpenMPTargetException,
+OpenMPFuture
   ]>;
 
 // OpenACC warnings.
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td 
b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 6c30da376dafb..87eb2b724b297 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1488,6 +1488,9 @@ def err_omp_multiple_step_or_linear_modifier : Error<
   "multiple %select{'step size'|'linear modifier'}0 found in linear clause">;
 def err_omp_deprecate_old_syntax: Error<
   "old syntax '%0' on '%1' clause was deprecated, use new syntax '%2'">;
+def warn_omp_future_directive_spelling: Warning<
+  "directive spelling '%0' is introduced in a later OpenMP version">,
+  InGroup;
 def warn_pragma_expected_colon_r_paren : Warning<
   "missing ':' or ')' after %0 - ignoring">, InGroup;
 def err_omp_unknown_directive : Error<
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 5256d08259b60..cb9eb3304c317 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -56,6 +56,21 @@ class DeclDirectiveListParserHelper final {
 };
 } // namespace
 
+static OpenMPDirectiveKind checkOpenMPDirectiveName(Parser &P,
+SourceLocation Loc,
+OpenMPDirectiveKind Kind,
+StringRef Name) {
+  unsigned Version = P.getLangOpts().OpenMP;
+  auto [D, VR] = getOpenMPDirectiveKindAndVersions(Name);
+  assert(D == Kind && "Directive kind mismatch");
+  // Ignore the case Version > VR.Max: In OpenMP 6.0 all prior spellings
+  // are explicitly allowed.
+  if (Version < VR.Min)
+P.Diag(Loc, diag::warn_omp_future_directive_spelling) << Name;
+
+  return Kind;
+}
+
 static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser &P) {
   static const DirectiveNameParser DirParser;
 
@@ -65,7 +80,10 @@ static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser 
&P) {
   if (Tok.isAnnotation())
 return OMPD_unknown;
 
-  S = DirParser.consume(S, P.getPreprocessor().getSpelling(Tok));
+  std::string Concat = P.getPreprocessor().getSpelling(Tok);
+  SourceLocation Loc = Tok.getLocation();
+
+  S = DirParser.consume(S, Concat);
   if (S == nullptr)
 return OMPD_unknown;
 
@@ -73,15 +91,17 @@ static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser 
&P) {
 OpenMPDirectiveKind DKind = S->Value;
 Tok = P.getPreprocessor().LookAhead(0);
 if (!Tok.isAnnotation()) {
-  S = DirParser.consume(S, P.getPreprocessor().getSpelling(Tok));
+  std::string TS = P.getPreprocessor().getSpelling(Tok);
+  S = DirParser.consume(S, TS);
   if (S == nullptr)
-return DKind;
+return checkOpenMPDirectiveName(P, Loc, DKind, Concat);
+  Concat += ' ' + TS;
   P.ConsumeToken();
 }
   }
 
   assert(S && "Should have exited early");
-  return S->Value;
+  return checkOpenMPDirectiveName(P, Loc, S->Value, Concat);
 }
 
 static DeclarationName parseOpenMPReductionId(Parser &P) {
diff --git a/clang/t

[llvm-branch-commits] [clang] [clang][OpenMP] Issue a warning when parsing future directive spelling (PR #146933)

2025-07-03 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Krzysztof Parzyszek (kparzysz)


Changes

OpenMP 6.0 introduced alternative spelling for some directives, with the 
previous spellings still allowed.

Warn the user when a new spelling is encountered with OpenMP version set to an 
older value.

---
Full diff: https://github.com/llvm/llvm-project/pull/146933.diff


4 Files Affected:

- (modified) clang/include/clang/Basic/DiagnosticGroups.td (+3-1) 
- (modified) clang/include/clang/Basic/DiagnosticParseKinds.td (+3) 
- (modified) clang/lib/Parse/ParseOpenMP.cpp (+24-4) 
- (added) clang/test/OpenMP/openmp-6-future-spellings.cpp (+55) 


``diff
diff --git a/clang/include/clang/Basic/DiagnosticGroups.td 
b/clang/include/clang/Basic/DiagnosticGroups.td
index 36fa3227fd6a6..ace8663b73a4a 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -1530,9 +1530,11 @@ def OpenMPPre51Compat : 
DiagGroup<"pre-openmp-51-compat">;
 def OpenMP51Ext : DiagGroup<"openmp-51-extensions">;
 def OpenMPExtensions : DiagGroup<"openmp-extensions">;
 def OpenMPTargetException : DiagGroup<"openmp-target-exception">;
+def OpenMPFuture : DiagGroup<"openmp-future">;
 def OpenMP : DiagGroup<"openmp", [
 SourceUsesOpenMP, OpenMPClauses, OpenMPLoopForm, OpenMPTarget,
-OpenMPMapping, OpenMP51Ext, OpenMPExtensions, OpenMPTargetException
+OpenMPMapping, OpenMP51Ext, OpenMPExtensions, OpenMPTargetException,
+OpenMPFuture
   ]>;
 
 // OpenACC warnings.
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td 
b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 6c30da376dafb..87eb2b724b297 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1488,6 +1488,9 @@ def err_omp_multiple_step_or_linear_modifier : Error<
   "multiple %select{'step size'|'linear modifier'}0 found in linear clause">;
 def err_omp_deprecate_old_syntax: Error<
   "old syntax '%0' on '%1' clause was deprecated, use new syntax '%2'">;
+def warn_omp_future_directive_spelling: Warning<
+  "directive spelling '%0' is introduced in a later OpenMP version">,
+  InGroup;
 def warn_pragma_expected_colon_r_paren : Warning<
   "missing ':' or ')' after %0 - ignoring">, InGroup;
 def err_omp_unknown_directive : Error<
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index 5256d08259b60..cb9eb3304c317 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -56,6 +56,21 @@ class DeclDirectiveListParserHelper final {
 };
 } // namespace
 
+static OpenMPDirectiveKind checkOpenMPDirectiveName(Parser &P,
+SourceLocation Loc,
+OpenMPDirectiveKind Kind,
+StringRef Name) {
+  unsigned Version = P.getLangOpts().OpenMP;
+  auto [D, VR] = getOpenMPDirectiveKindAndVersions(Name);
+  assert(D == Kind && "Directive kind mismatch");
+  // Ignore the case Version > VR.Max: In OpenMP 6.0 all prior spellings
+  // are explicitly allowed.
+  if (Version < VR.Min)
+P.Diag(Loc, diag::warn_omp_future_directive_spelling) << Name;
+
+  return Kind;
+}
+
 static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser &P) {
   static const DirectiveNameParser DirParser;
 
@@ -65,7 +80,10 @@ static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser 
&P) {
   if (Tok.isAnnotation())
 return OMPD_unknown;
 
-  S = DirParser.consume(S, P.getPreprocessor().getSpelling(Tok));
+  std::string Concat = P.getPreprocessor().getSpelling(Tok);
+  SourceLocation Loc = Tok.getLocation();
+
+  S = DirParser.consume(S, Concat);
   if (S == nullptr)
 return OMPD_unknown;
 
@@ -73,15 +91,17 @@ static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser 
&P) {
 OpenMPDirectiveKind DKind = S->Value;
 Tok = P.getPreprocessor().LookAhead(0);
 if (!Tok.isAnnotation()) {
-  S = DirParser.consume(S, P.getPreprocessor().getSpelling(Tok));
+  std::string TS = P.getPreprocessor().getSpelling(Tok);
+  S = DirParser.consume(S, TS);
   if (S == nullptr)
-return DKind;
+return checkOpenMPDirectiveName(P, Loc, DKind, Concat);
+  Concat += ' ' + TS;
   P.ConsumeToken();
 }
   }
 
   assert(S && "Should have exited early");
-  return S->Value;
+  return checkOpenMPDirectiveName(P, Loc, S->Value, Concat);
 }
 
 static DeclarationName parseOpenMPReductionId(Parser &P) {
diff --git a/clang/test/OpenMP/openmp-6-future-spellings.cpp 
b/clang/test/OpenMP/openmp-6-future-spellings.cpp
new file mode 100644
index 0..642ed3502d475
--- /dev/null
+++ b/clang/test/OpenMP/openmp-6-future-spellings.cpp
@@ -0,0 +1,55 @@
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=52 -ferror-limit 100 -o - 
%s
+
+// expected-warning@+1 {{directive spelling 'begin declare_target' is 
introduced in a later OpenMP version}

[llvm-branch-commits] [clang] [llvm] [NFC][HLSL][RootSignature] Split up `HLSLRootSignatureUtils` (PR #146124)

2025-07-03 Thread via llvm-branch-commits

https://github.com/joaosaffran approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/146124
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang][OpenMP] Issue a warning when parsing future directive spelling (PR #146933)

2025-07-03 Thread Alexey Bataev via llvm-branch-commits

https://github.com/alexey-bataev approved this pull request.


https://github.com/llvm/llvm-project/pull/146933
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)

2025-07-03 Thread via llvm-branch-commits

https://github.com/joaosaffran approved this pull request.


https://github.com/llvm/llvm-project/pull/145828
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] release/20.x: Backport [LLD][COFF] Disallow importing DllMain from import libraries (#146610) (PR #146699)

2025-07-03 Thread Alexandre Ganea via llvm-branch-commits

aganea wrote:

@rnk @mstorsjo is it ok if we integrate this into the release? @tstellar will 
there be a 20.1.8?

https://github.com/llvm/llvm-project/pull/146699
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][do concurrent] Extned `getAllocaBlock()` and emit yields correctly (PR #146853)

2025-07-03 Thread Tom Eccles via llvm-branch-commits

https://github.com/tblah approved this pull request.

LGTM, thanks

https://github.com/llvm/llvm-project/pull/146853
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Improve exception handling in NFC-Mode (PR #146513)

2025-07-03 Thread Paschalis Mpeis via llvm-branch-commits

https://github.com/paschalis-mpeis edited 
https://github.com/llvm/llvm-project/pull/146513
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Improve exception handling in NFC-Mode (PR #146513)

2025-07-03 Thread Paschalis Mpeis via llvm-branch-commits

paschalis-mpeis wrote:

Forced-push to rebase since the parent PR now has a `--create-wrapper` flag.

In the latest patch, `switch_back` is a function called whenever something goes 
wrong after checking out the prev revision, ie:
- building the old binary fails, or
- setting up the wrapper fails.

I also delete llvm-bolt at the start, since we rebuild it for the current 
revision anyway.

https://github.com/llvm/llvm-project/pull/146513
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT][NFC] Update nfc-check-setup.py guidance (PR #146659)

2025-07-03 Thread Paschalis Mpeis via llvm-branch-commits


@@ -156,9 +158,8 @@ def main():
 os.replace(bolt_path, f"{bolt_path}.old")
 
 print(
-f"Build directory {args.build_dir} is ready to run BOLT tests, e.g.\n"
-"\tbin/llvm-lit -sv tools/bolt/test\nor\n"
-"\tbin/llvm-lit -sv tools/bolttests"
+f"Build directory {args.build_dir} is ready for NFC-Mode comparison "
+"between the two revisions."

paschalis-mpeis wrote:

Will do, thanks! Setting up the wrapper now stays under a flag, so I'll 
reintroduce this example when I rebase the patch.

https://github.com/llvm/llvm-project/pull/146659
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [PHIElimination] Reuse existing COPY in predecessor basic block (Take Two) (PR #146806)

2025-07-03 Thread Mikael Holmén via llvm-branch-commits

mikaelholmen wrote:

> @mikaelholmen @mstorsjo @macurtis-amd @sjoerdmeijer @sushgokh We've decided 
> that it's best to revert the original PR (see #146850), sorry for wasting 
> your time. This PR is trying to reintroduce it with fixes to the issues 
> you've presented. Can I humbly ask you to test this commit one final time?

I've re-tested the cases I've reported problems for with this patch on top of 
our downstream compiler based on trunk version c79fcfee41 and they still work.

https://github.com/llvm/llvm-project/pull/146806
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] 8763227 - Revert "[win][aarch64] Always reserve frame pointers for Arm64 Windows (#146582)"

2025-07-03 Thread via llvm-branch-commits

Author: David Spickett
Date: 2025-07-03T10:02:43+01:00
New Revision: 8763227d14aae0994e33e34ffc1948ca95c1efcb

URL: 
https://github.com/llvm/llvm-project/commit/8763227d14aae0994e33e34ffc1948ca95c1efcb
DIFF: 
https://github.com/llvm/llvm-project/commit/8763227d14aae0994e33e34ffc1948ca95c1efcb.diff

LOG: Revert "[win][aarch64] Always reserve frame pointers for Arm64 Windows 
(#146582)"

This reverts commit a74c7d877637f31ff25308969ef7ca6ed94aacc5.

Added: 


Modified: 
clang/lib/Driver/ToolChains/CommonArgs.cpp
clang/test/Driver/frame-pointer-elim.c
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/lib/Target/AArch64/AArch64FrameLowering.h
llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
llvm/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll
llvm/test/CodeGen/AArch64/win-sve.ll
llvm/test/CodeGen/AArch64/wincfi-missing-seh-directives.ll
llvm/test/CodeGen/AArch64/wineh-frame5.mir
llvm/test/CodeGen/AArch64/wineh-frame7.mir

Removed: 




diff  --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp 
b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 2fcf9b28dc746..070901f037823 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -174,13 +174,7 @@ static bool mustUseNonLeafFramePointerForTarget(const 
llvm::Triple &Triple) {
 // even if new frame records are not created.
 static bool mustMaintainValidFrameChain(const llvm::opt::ArgList &Args,
 const llvm::Triple &Triple) {
-  switch (Triple.getArch()) {
-  default:
-return false;
-  case llvm::Triple::arm:
-  case llvm::Triple::armeb:
-  case llvm::Triple::thumb:
-  case llvm::Triple::thumbeb:
+  if (Triple.isARM() || Triple.isThumb()) {
 // For 32-bit Arm, the -mframe-chain=aapcs and -mframe-chain=aapcs+leaf
 // options require the frame pointer register to be reserved (or point to a
 // new AAPCS-compilant frame record), even with-fno-omit-frame-pointer.
@@ -189,13 +183,8 @@ static bool mustMaintainValidFrameChain(const 
llvm::opt::ArgList &Args,
   return V != "none";
 }
 return false;
-
-  case llvm::Triple::aarch64:
-// Arm64 Windows requires that the frame chain is valid, as there is no
-// way to indicate during a stack walk that a frame has used the frame
-// pointer as a general purpose register.
-return Triple.isOSWindows();
   }
+  return false;
 }
 
 // True if a target-specific option causes -fno-omit-frame-pointer to also

diff  --git a/clang/test/Driver/frame-pointer-elim.c 
b/clang/test/Driver/frame-pointer-elim.c
index 0dd7eb0c738db..f64ff6efc7261 100644
--- a/clang/test/Driver/frame-pointer-elim.c
+++ b/clang/test/Driver/frame-pointer-elim.c
@@ -4,8 +4,6 @@
 // KEEP-NON-LEAF: "-mframe-pointer=non-leaf"
 // KEEP-NONE-NOT: warning: argument unused
 // KEEP-NONE: "-mframe-pointer=none"
-// KEEP-RESERVED-NOT: warning: argument unused
-// KEEP-RESERVED: "-mframe-pointer=reserved"
 
 // On Linux x86, omit frame pointer when optimization is enabled.
 // RUN: %clang -### --target=i386-linux -S -fomit-frame-pointer %s 2>&1 | \
@@ -217,9 +215,5 @@
 // RUN: %clang -### --target=aarch64-none-elf -S -O1 -fno-omit-frame-pointer 
%s 2>&1 |  \
 // RUN:   FileCheck --check-prefix=KEEP-NON-LEAF %s
 
-// AArch64 Windows requires that the frame pointer be reserved
-// RUN: %clang -### --target=aarch64-pc-windows-msvc -S -fomit-frame-pointer 
%s 2>&1 |  \
-// RUN:   FileCheck --check-prefix=KEEP-RESERVED %s
-
 void f0() {}
 void f1() { f0(); }

diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp 
b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 3ef7e5265c724..6f1ce5bdbe286 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -518,27 +518,6 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction 
&MF) const {
   return false;
 }
 
-/// Should the Frame Pointer be reserved for the current function?
-bool AArch64FrameLowering::isFPReserved(const MachineFunction &MF) const {
-  const TargetMachine &TM = MF.getTarget();
-  const Triple &TT = TM.getTargetTriple();
-
-  // These OSes require the frame chain is valid, even if the current frame 
does
-  // not use a frame pointer.
-  if (TT.isOSDarwin() || TT.isOSWindows())
-return true;
-
-  // If the function has a frame pointer, it is reserved.
-  if (hasFP(MF))
-return true;
-
-  // Frontend has requested to preserve the frame pointer.
-  if (TM.Options.FramePointerIsReserved(MF))
-return true;
-
-  return false;
-}
-
 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
 /// not required, we reserve argument space for call sites in the function
 /// immediately on entry to the current function.  This eliminates the need for

diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h 
b/llvm/lib/Target/AArch64/AArch6

[llvm-branch-commits] [llvm] [DLCov] Origin-Tracking: Add debugify support (PR #143594)

2025-07-03 Thread Jeremy Morse via llvm-branch-commits

https://github.com/jmorse commented:

Tentative LGTM, noting that you're planning on updating docs in this PR too. 
I've no familiarity with the python modified alas.

https://github.com/llvm/llvm-project/pull/143594
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DLCov] Origin-Tracking: Add debugify support (PR #143594)

2025-07-03 Thread Jeremy Morse via llvm-branch-commits

https://github.com/jmorse edited 
https://github.com/llvm/llvm-project/pull/143594
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DLCov] Origin-Tracking: Add debugify support (PR #143594)

2025-07-03 Thread Jeremy Morse via llvm-branch-commits


@@ -59,6 +65,52 @@ cl::opt DebugifyLevel(
 
 raw_ostream &dbg() { return Quiet ? nulls() : errs(); }
 
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+// These maps refer to addresses in this instance of LLVM, so we can reuse them
+// everywhere - therefore, we store them at file scope.
+static SymbolizedAddressMap SymbolizedAddrs;
+static AddressSet UnsymbolizedAddrs;
+
+std::string symbolizeStackTrace(const Instruction *I) {
+  // We flush the set of unsymbolized addresses at the latest possible moment,
+  // i.e. now.
+  if (!UnsymbolizedAddrs.empty()) {
+sys::symbolizeAddresses(UnsymbolizedAddrs, SymbolizedAddrs);
+UnsymbolizedAddrs.clear();
+  }
+  auto OriginStackTraces = I->getDebugLoc().getOriginStackTraces();
+  std::string Result;
+  raw_string_ostream OS(Result);
+  for (size_t TraceIdx = 0; TraceIdx < OriginStackTraces.size(); ++TraceIdx) {
+if (TraceIdx != 0)
+  OS << "\n";
+auto &[Depth, StackTrace] = OriginStackTraces[TraceIdx];
+unsigned VirtualFrameNo = 0;
+for (int Frame = 0; Frame < Depth; ++Frame) {
+  assert(SymbolizedAddrs.contains(StackTrace[Frame]) &&
+ "Expected each address to have been symbolized.");
+  for (std::string &SymbolizedFrame : SymbolizedAddrs[StackTrace[Frame]]) {
+OS << right_justify(formatv("#{0}", VirtualFrameNo++).str(), 
std::log10(Depth) + 2)
+  << ' ' << SymbolizedFrame << '\n';
+  }
+}
+  }
+  return Result;
+}
+void collectStackAddresses(Instruction &I) {
+  auto &OriginStackTraces = I.getDebugLoc().getOriginStackTraces();

jmorse wrote:

Major nit; would we be able to name the type here rather than `auto`? It'll 
make the resulting code a lot easier to localise and dissect for future 
readers. (The next `auto` makes sense of course).

https://github.com/llvm/llvm-project/pull/143594
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DLCov] Origin-Tracking: Add debugify support (PR #143594)

2025-07-03 Thread Jeremy Morse via llvm-branch-commits


@@ -59,6 +65,52 @@ cl::opt DebugifyLevel(
 
 raw_ostream &dbg() { return Quiet ? nulls() : errs(); }
 
+#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
+// These maps refer to addresses in this instance of LLVM, so we can reuse them
+// everywhere - therefore, we store them at file scope.

jmorse wrote:

I feel the term "instance of LLVM" could be more precise: can we us the word 
"process" here? All other ambiguities and difficulties of fixed addresses are 
brought to mind with the word process.

https://github.com/llvm/llvm-project/pull/143594
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][do concurrent] Extned `getAllocaBlock()` and emit yields correctly (PR #146853)

2025-07-03 Thread Kareem Ergawy via llvm-branch-commits

https://github.com/ergawy created 
https://github.com/llvm/llvm-project/pull/146853

Handles some loose ends in `do concurrent` reduction declarations. This PR 
extends `getAllocaBlock` to handle declare ops, and also emit `fir.yield` in 
all regions.

>From caabbde941c1c870a850d4ed85d1b81d5e5d0759 Mon Sep 17 00:00:00 2001
From: ergawy 
Date: Thu, 3 Jul 2025 05:45:40 -0500
Subject: [PATCH] [flang][do concurrent] Extned `getAllocaBlock()` and emit
 yields correctly

Handles some loose ends in `do concurrent` reduction declarations. This
PR extends `getAllocaBlock` to handle declare ops, and also emit
`fir.yield` in all regions.
---
 .../lib/Lower/Support/ReductionProcessor.cpp  |  4 ++-
 flang/lib/Optimizer/Builder/FIRBuilder.cpp|  3 ++
 .../test/HLFIR/fir-reduction-alloca-block.fir | 31 +++
 .../do_concurrent_reduce_allocatable.f90  | 22 +
 4 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 flang/test/HLFIR/fir-reduction-alloca-block.fir
 create mode 100644 flang/test/Lower/do_concurrent_reduce_allocatable.f90

diff --git a/flang/lib/Lower/Support/ReductionProcessor.cpp 
b/flang/lib/Lower/Support/ReductionProcessor.cpp
index 539d5cd37c2ea..14b2c9836748f 100644
--- a/flang/lib/Lower/Support/ReductionProcessor.cpp
+++ b/flang/lib/Lower/Support/ReductionProcessor.cpp
@@ -529,7 +529,9 @@ static void createReductionAllocAndInitRegions(
 converter, loc, type, initValue, initBlock,
 reductionDecl.getInitializerAllocArg(),
 reductionDecl.getInitializerMoldArg(), 
reductionDecl.getCleanupRegion(),
-DeclOperationKind::Reduction);
+DeclOperationKind::Reduction, /*sym=*/nullptr,
+/*cannotHaveLowerBounds=*/false,
+/*isDoConcurrent*/ std::is_same_v);
   }
 
   if (fir::isa_trivial(ty)) {
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp 
b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index b5cabdb830e5c..acd5a88a2582d 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -286,6 +286,9 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() {
   if (auto firLocalOp = 
getRegion().getParentOfType())
 return &getRegion().front();
 
+  if (auto firLocalOp = getRegion().getParentOfType())
+return &getRegion().front();
+
   return getEntryBlock();
 }
 
diff --git a/flang/test/HLFIR/fir-reduction-alloca-block.fir 
b/flang/test/HLFIR/fir-reduction-alloca-block.fir
new file mode 100644
index 0..75857cfbe01d3
--- /dev/null
+++ b/flang/test/HLFIR/fir-reduction-alloca-block.fir
@@ -0,0 +1,31 @@
+// Tests that `fir.local` ops are able to provide an alloca block when 
required.
+
+// RUN: fir-opt %s -convert-hlfir-to-fir | FileCheck %s
+
+fir.declare_reduction @add_reduction_byref_box_heap_UxUxf32 : 
!fir.ref>>> alloc {
+  %0 = fir.alloca !fir.box>>
+  fir.yield(%0 : !fir.ref>>>)
+} init {
+^bb0(%arg0: !fir.ref>>>, %arg1: 
!fir.ref>>>):
+  %cst = arith.constant 0.00e+00 : f32
+  %0 = fir.load %arg1 : !fir.ref>>>
+  hlfir.assign %cst to %0 : f32, !fir.box>>
+  fir.yield(%arg1 : !fir.ref>>>)
+} combiner {
+^bb0(%arg0: !fir.ref>>>, %arg1: 
!fir.ref>>>):
+  fir.yield(%arg0 : !fir.ref>>>)
+}
+
+// CHECK-LABEL:   fir.declare_reduction @add_reduction_byref_box_heap_UxUxf32 
: !fir.ref>>> alloc {
+// CHECK:   %[[VAL_0:.*]] = fir.alloca 
!fir.box>>
+// CHECK:   fir.yield(%[[VAL_0]] : 
!fir.ref>>>)
+
+// CHECK-LABEL:   } init {
+// CHECK: ^bb0(%[[VAL_0:.*]]: 
!fir.ref>>>, %[[VAL_1:.*]]: 
!fir.ref>>>):
+// CHECK:   %[[VAL_2:.*]] = fir.alloca 
!fir.box>>
+// CHECK:   fir.yield(%[[VAL_1]] : 
!fir.ref>>>)
+
+// CHECK-LABEL:   } combiner {
+// CHECK: ^bb0(%[[VAL_0:.*]]: 
!fir.ref>>>, %[[VAL_1:.*]]: 
!fir.ref>>>):
+// CHECK:   fir.yield(%[[VAL_0]] : 
!fir.ref>>>)
+// CHECK: }
diff --git a/flang/test/Lower/do_concurrent_reduce_allocatable.f90 
b/flang/test/Lower/do_concurrent_reduce_allocatable.f90
new file mode 100644
index 0..873fd10dd1b97
--- /dev/null
+++ b/flang/test/Lower/do_concurrent_reduce_allocatable.f90
@@ -0,0 +1,22 @@
+! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s
+
+subroutine do_concurrent_allocatable
+  integer :: i
+  real, allocatable, dimension(:,:) :: x
+
+  do concurrent (i = 1:10) reduce(+: x)
+  end do
+end subroutine
+
+! CHECK: fir.declare_reduction @[[RED_OP:.*]] : ![[RED_TYPE:.*]] alloc {
+! CHECK:   %[[ALLOC:.*]] = fir.alloca
+! CHECK:   fir.yield(%[[ALLOC]] : ![[RED_TYPE]])
+! CHECK: } init {
+! CHECK: ^bb0(%{{.*}}: ![[RED_TYPE]], %[[RED_ARG:.*]]: ![[RED_TYPE]]):
+! CHECK:   fir.yield(%[[RED_ARG]] : !{{.*}})
+! CHECK: } combiner {
+! CHECK: ^bb0(%[[COMB_RES:.*]]: ![[RED_TYPE]], %{{.*}}: ![[RED_TYPE]]):
+! CHECK:   fir.yield(%[[COMB_RES]] : !{{.*}})
+! CHECK: } cleanup {
+! CHECK:   fir.yield
+! CHECK: }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailma

[llvm-branch-commits] [llvm] [PHIElimination] Reuse existing COPY in predecessor basic block (Take Two) (PR #146806)

2025-07-03 Thread Guy David via llvm-branch-commits

guy-david wrote:

@mikaelholmen @mstorsjo @macurtis-amd @sjoerdmeijer @sushgokh
We've decided that it's best to revert the original PR (see 
https://github.com/llvm/llvm-project/pull/146850), sorry for wasting your time.
This PR is trying to reintroduce it with fixes to the issues you've presented. 
Can I humbly ask you to test this commit one final time?

https://github.com/llvm/llvm-project/pull/146806
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [flang] [flang][do concurrent] Extned `getAllocaBlock()` and emit yields correctly (PR #146853)

2025-07-03 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-flang-fir-hlfir

Author: Kareem Ergawy (ergawy)


Changes

Handles some loose ends in `do concurrent` reduction declarations. This PR 
extends `getAllocaBlock` to handle declare ops, and also emit `fir.yield` in 
all regions.

---
Full diff: https://github.com/llvm/llvm-project/pull/146853.diff


4 Files Affected:

- (modified) flang/lib/Lower/Support/ReductionProcessor.cpp (+3-1) 
- (modified) flang/lib/Optimizer/Builder/FIRBuilder.cpp (+3) 
- (added) flang/test/HLFIR/fir-reduction-alloca-block.fir (+31) 
- (added) flang/test/Lower/do_concurrent_reduce_allocatable.f90 (+22) 


``diff
diff --git a/flang/lib/Lower/Support/ReductionProcessor.cpp 
b/flang/lib/Lower/Support/ReductionProcessor.cpp
index 539d5cd37c2ea..14b2c9836748f 100644
--- a/flang/lib/Lower/Support/ReductionProcessor.cpp
+++ b/flang/lib/Lower/Support/ReductionProcessor.cpp
@@ -529,7 +529,9 @@ static void createReductionAllocAndInitRegions(
 converter, loc, type, initValue, initBlock,
 reductionDecl.getInitializerAllocArg(),
 reductionDecl.getInitializerMoldArg(), 
reductionDecl.getCleanupRegion(),
-DeclOperationKind::Reduction);
+DeclOperationKind::Reduction, /*sym=*/nullptr,
+/*cannotHaveLowerBounds=*/false,
+/*isDoConcurrent*/ std::is_same_v);
   }
 
   if (fir::isa_trivial(ty)) {
diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp 
b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
index b5cabdb830e5c..acd5a88a2582d 100644
--- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp
+++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp
@@ -286,6 +286,9 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() {
   if (auto firLocalOp = 
getRegion().getParentOfType())
 return &getRegion().front();
 
+  if (auto firLocalOp = getRegion().getParentOfType())
+return &getRegion().front();
+
   return getEntryBlock();
 }
 
diff --git a/flang/test/HLFIR/fir-reduction-alloca-block.fir 
b/flang/test/HLFIR/fir-reduction-alloca-block.fir
new file mode 100644
index 0..75857cfbe01d3
--- /dev/null
+++ b/flang/test/HLFIR/fir-reduction-alloca-block.fir
@@ -0,0 +1,31 @@
+// Tests that `fir.local` ops are able to provide an alloca block when 
required.
+
+// RUN: fir-opt %s -convert-hlfir-to-fir | FileCheck %s
+
+fir.declare_reduction @add_reduction_byref_box_heap_UxUxf32 : 
!fir.ref>>> alloc {
+  %0 = fir.alloca !fir.box>>
+  fir.yield(%0 : !fir.ref>>>)
+} init {
+^bb0(%arg0: !fir.ref>>>, %arg1: 
!fir.ref>>>):
+  %cst = arith.constant 0.00e+00 : f32
+  %0 = fir.load %arg1 : !fir.ref>>>
+  hlfir.assign %cst to %0 : f32, !fir.box>>
+  fir.yield(%arg1 : !fir.ref>>>)
+} combiner {
+^bb0(%arg0: !fir.ref>>>, %arg1: 
!fir.ref>>>):
+  fir.yield(%arg0 : !fir.ref>>>)
+}
+
+// CHECK-LABEL:   fir.declare_reduction @add_reduction_byref_box_heap_UxUxf32 
: !fir.ref>>> alloc {
+// CHECK:   %[[VAL_0:.*]] = fir.alloca 
!fir.box>>
+// CHECK:   fir.yield(%[[VAL_0]] : 
!fir.ref>>>)
+
+// CHECK-LABEL:   } init {
+// CHECK: ^bb0(%[[VAL_0:.*]]: 
!fir.ref>>>, %[[VAL_1:.*]]: 
!fir.ref>>>):
+// CHECK:   %[[VAL_2:.*]] = fir.alloca 
!fir.box>>
+// CHECK:   fir.yield(%[[VAL_1]] : 
!fir.ref>>>)
+
+// CHECK-LABEL:   } combiner {
+// CHECK: ^bb0(%[[VAL_0:.*]]: 
!fir.ref>>>, %[[VAL_1:.*]]: 
!fir.ref>>>):
+// CHECK:   fir.yield(%[[VAL_0]] : 
!fir.ref>>>)
+// CHECK: }
diff --git a/flang/test/Lower/do_concurrent_reduce_allocatable.f90 
b/flang/test/Lower/do_concurrent_reduce_allocatable.f90
new file mode 100644
index 0..873fd10dd1b97
--- /dev/null
+++ b/flang/test/Lower/do_concurrent_reduce_allocatable.f90
@@ -0,0 +1,22 @@
+! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s
+
+subroutine do_concurrent_allocatable
+  integer :: i
+  real, allocatable, dimension(:,:) :: x
+
+  do concurrent (i = 1:10) reduce(+: x)
+  end do
+end subroutine
+
+! CHECK: fir.declare_reduction @[[RED_OP:.*]] : ![[RED_TYPE:.*]] alloc {
+! CHECK:   %[[ALLOC:.*]] = fir.alloca
+! CHECK:   fir.yield(%[[ALLOC]] : ![[RED_TYPE]])
+! CHECK: } init {
+! CHECK: ^bb0(%{{.*}}: ![[RED_TYPE]], %[[RED_ARG:.*]]: ![[RED_TYPE]]):
+! CHECK:   fir.yield(%[[RED_ARG]] : !{{.*}})
+! CHECK: } combiner {
+! CHECK: ^bb0(%[[COMB_RES:.*]]: ![[RED_TYPE]], %{{.*}}: ![[RED_TYPE]]):
+! CHECK:   fir.yield(%[[COMB_RES]] : !{{.*}})
+! CHECK: } cleanup {
+! CHECK:   fir.yield
+! CHECK: }

``




https://github.com/llvm/llvm-project/pull/146853
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [PHIElimination] Reuse existing COPY in predecessor basic block (Take Two) (PR #146806)

2025-07-03 Thread Guy David via llvm-branch-commits

https://github.com/guy-david edited 
https://github.com/llvm/llvm-project/pull/146806
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [PHIElimination] Reuse existing COPY in predecessor basic block (Take Two) (PR #146806)

2025-07-03 Thread Sushant Gokhale via llvm-branch-commits

sushgokh wrote:

> @mikaelholmen @mstorsjo @macurtis-amd @sjoerdmeijer @sushgokh We've decided 
> that it's best to revert the original PR (see #146850), sorry for wasting 
> your time. This PR is trying to reintroduce it with fixes to the issues 
> you've presented. Can I humbly ask you to test this commit one final time?

SHA f5c62ee0fa04 + this PR was passing for us.

Now, trunk + this PR has again started failing.

https://github.com/llvm/llvm-project/pull/146806
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] 6ac62f9 - fix building error

2025-07-03 Thread via llvm-branch-commits

Author: shore
Date: 2025-07-04T09:55:18+08:00
New Revision: 6ac62f931e55eb02e546d67c8ea28d9a672f7fd1

URL: 
https://github.com/llvm/llvm-project/commit/6ac62f931e55eb02e546d67c8ea28d9a672f7fd1
DIFF: 
https://github.com/llvm/llvm-project/commit/6ac62f931e55eb02e546d67c8ea28d9a672f7fd1.diff

LOG: fix building error

Added: 


Modified: 
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/test/lit.cfg.py

Removed: 




diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index fd679a9933cf0..6f4e93bffe198 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4563,6 +4563,7 @@ static SrcStatus getNegStatus(Register Reg, SrcStatus S,
   default:
 llvm_unreachable("unexpected SrcStatus");
   }
+  llvm_unreachable("unexpected SrcStatus");
 }
 
 static std::optional>

diff  --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index bd6e37c848d8c..fddade78df9b6 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -474,7 +474,7 @@ def have_cxx_shared_library():
 print("could not exec llvm-readobj")
 return False
 
-readobj_out = readobj_cmd.stdout.read().decode("ascii")
+readobj_out = readobj_cmd.stdout.read().decode("utf-8")
 readobj_cmd.wait()
 
 regex = re.compile(r"(libc\+\+|libstdc\+\+|msvcp).*\.(so|dylib|dll)")



___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [llvm] [DirectX][Draft] validate registers are bound to root signature (PR #146785)

2025-07-03 Thread via llvm-branch-commits

https://github.com/joaosaffran updated 
https://github.com/llvm/llvm-project/pull/146785

>From 76d633d2b2b70ae6eaa1e7c40ef09e5f6ef9ae74 Mon Sep 17 00:00:00 2001
From: joaosaffran 
Date: Thu, 26 Jun 2025 19:28:01 +
Subject: [PATCH 1/5] refactoring

---
 .../lib/Target/DirectX/DXContainerGlobals.cpp |  9 ++--
 llvm/lib/Target/DirectX/DXILRootSignature.cpp | 12 ++---
 llvm/lib/Target/DirectX/DXILRootSignature.h   | 45 ++-
 3 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp 
b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
index 9c38901f6821f..fa27c4665cfbe 100644
--- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
+++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
@@ -160,18 +160,17 @@ void DXContainerGlobals::addRootSignature(Module &M,
 
   assert(MMI.EntryPropertyVec.size() == 1);
 
-  auto &RSA = getAnalysis();
+  auto &RSA = getAnalysis().getRSInfo();
   const Function *EntryFunction = MMI.EntryPropertyVec[0].Entry;
-  const auto &FuncRs = RSA.find(EntryFunction);
+  const auto &RS = RSA.getDescForFunction(EntryFunction);
 
-  if (FuncRs == RSA.end())
+  if (!RS )
 return;
 
-  const RootSignatureDesc &RS = FuncRs->second;
   SmallString<256> Data;
   raw_svector_ostream OS(Data);
 
-  RS.write(OS);
+  RS->write(OS);
 
   Constant *Constant =
   ConstantDataArray::getString(M.getContext(), Data, /*AddNull*/ false);
diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp 
b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
index 29e78fcce5262..4094df160ef6f 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
@@ -554,9 +554,9 @@ analyzeModule(Module &M) {
 
 AnalysisKey RootSignatureAnalysis::Key;
 
-SmallDenseMap
-RootSignatureAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
-  return analyzeModule(M);
+RootSignatureBindingInfo RootSignatureAnalysis::run(Module &M,
+ModuleAnalysisManager &AM) 
{
+  return RootSignatureBindingInfo(analyzeModule(M));
 }
 
 
//===--===//
@@ -564,8 +564,7 @@ RootSignatureAnalysis::run(Module &M, ModuleAnalysisManager 
&AM) {
 PreservedAnalyses RootSignatureAnalysisPrinter::run(Module &M,
 ModuleAnalysisManager &AM) 
{
 
-  SmallDenseMap &RSDMap =
-  AM.getResult(M);
+  RootSignatureBindingInfo &RSDMap = AM.getResult(M);
 
   OS << "Root Signature Definitions"
  << "\n";
@@ -636,7 +635,8 @@ PreservedAnalyses RootSignatureAnalysisPrinter::run(Module 
&M,
 
 
//===--===//
 bool RootSignatureAnalysisWrapper::runOnModule(Module &M) {
-  FuncToRsMap = analyzeModule(M);
+  FuncToRsMap = std::make_unique(
+  RootSignatureBindingInfo(analyzeModule(M)));
   return false;
 }
 
diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.h 
b/llvm/lib/Target/DirectX/DXILRootSignature.h
index b45cebc15fd39..fef933811f840 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.h
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.h
@@ -33,16 +33,43 @@ enum class RootSignatureElementKind {
   CBV = 5,
   DescriptorTable = 6,
 };
+
+class RootSignatureBindingInfo {
+  private:
+SmallDenseMap FuncToRsMap;
+
+  public:
+  using iterator =
+SmallDenseMap::iterator;
+
+  RootSignatureBindingInfo () = default;
+  RootSignatureBindingInfo(SmallDenseMap Map) : FuncToRsMap(Map) {};
+
+  iterator find(const Function *F) { return FuncToRsMap.find(F); }
+
+  iterator end() { return FuncToRsMap.end(); }
+
+  std::optional getDescForFunction(const Function* 
F) {
+const auto FuncRs = find(F);
+if (FuncRs == end())
+  return std::nullopt;
+
+return FuncRs->second;
+  }
+  
+};
+
 class RootSignatureAnalysis : public AnalysisInfoMixin {
   friend AnalysisInfoMixin;
   static AnalysisKey Key;
 
 public:
-  RootSignatureAnalysis() = default;
 
-  using Result = SmallDenseMap;
+RootSignatureAnalysis() = default;
 
-  SmallDenseMap
+  using Result = RootSignatureBindingInfo;
+  
+  RootSignatureBindingInfo
   run(Module &M, ModuleAnalysisManager &AM);
 };
 
@@ -52,20 +79,16 @@ class RootSignatureAnalysis : public 
AnalysisInfoMixin {
 /// passes which run through the legacy pass manager.
 class RootSignatureAnalysisWrapper : public ModulePass {
 private:
-  SmallDenseMap FuncToRsMap;
+  std::unique_ptr FuncToRsMap;
 
 public:
   static char ID;
+  using Result = RootSignatureBindingInfo;
 
   RootSignatureAnalysisWrapper() : ModulePass(ID) {}
 
-  using iterator =
-  SmallDenseMap::iterator;
-
-  iterator find(const Function *F) { return FuncToRsMap.find(F); }
-
-  iterator end() { return FuncToRsMap.end(); }
-
+  RootSignatureBindingInfo& getRSInfo() {return *FuncToRsMap;}
+  
   bool runOnModule(Module &M) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;

>From 75

[llvm-branch-commits] [clang] [llvm] [DirectX][Draft] validate registers are bound to root signature (PR #146785)

2025-07-03 Thread via llvm-branch-commits

https://github.com/joaosaffran updated 
https://github.com/llvm/llvm-project/pull/146785

>From 76d633d2b2b70ae6eaa1e7c40ef09e5f6ef9ae74 Mon Sep 17 00:00:00 2001
From: joaosaffran 
Date: Thu, 26 Jun 2025 19:28:01 +
Subject: [PATCH 1/5] refactoring

---
 .../lib/Target/DirectX/DXContainerGlobals.cpp |  9 ++--
 llvm/lib/Target/DirectX/DXILRootSignature.cpp | 12 ++---
 llvm/lib/Target/DirectX/DXILRootSignature.h   | 45 ++-
 3 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp 
b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
index 9c38901f6821f..fa27c4665cfbe 100644
--- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
+++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp
@@ -160,18 +160,17 @@ void DXContainerGlobals::addRootSignature(Module &M,
 
   assert(MMI.EntryPropertyVec.size() == 1);
 
-  auto &RSA = getAnalysis();
+  auto &RSA = getAnalysis().getRSInfo();
   const Function *EntryFunction = MMI.EntryPropertyVec[0].Entry;
-  const auto &FuncRs = RSA.find(EntryFunction);
+  const auto &RS = RSA.getDescForFunction(EntryFunction);
 
-  if (FuncRs == RSA.end())
+  if (!RS )
 return;
 
-  const RootSignatureDesc &RS = FuncRs->second;
   SmallString<256> Data;
   raw_svector_ostream OS(Data);
 
-  RS.write(OS);
+  RS->write(OS);
 
   Constant *Constant =
   ConstantDataArray::getString(M.getContext(), Data, /*AddNull*/ false);
diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp 
b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
index 29e78fcce5262..4094df160ef6f 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp
@@ -554,9 +554,9 @@ analyzeModule(Module &M) {
 
 AnalysisKey RootSignatureAnalysis::Key;
 
-SmallDenseMap
-RootSignatureAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
-  return analyzeModule(M);
+RootSignatureBindingInfo RootSignatureAnalysis::run(Module &M,
+ModuleAnalysisManager &AM) 
{
+  return RootSignatureBindingInfo(analyzeModule(M));
 }
 
 
//===--===//
@@ -564,8 +564,7 @@ RootSignatureAnalysis::run(Module &M, ModuleAnalysisManager 
&AM) {
 PreservedAnalyses RootSignatureAnalysisPrinter::run(Module &M,
 ModuleAnalysisManager &AM) 
{
 
-  SmallDenseMap &RSDMap =
-  AM.getResult(M);
+  RootSignatureBindingInfo &RSDMap = AM.getResult(M);
 
   OS << "Root Signature Definitions"
  << "\n";
@@ -636,7 +635,8 @@ PreservedAnalyses RootSignatureAnalysisPrinter::run(Module 
&M,
 
 
//===--===//
 bool RootSignatureAnalysisWrapper::runOnModule(Module &M) {
-  FuncToRsMap = analyzeModule(M);
+  FuncToRsMap = std::make_unique(
+  RootSignatureBindingInfo(analyzeModule(M)));
   return false;
 }
 
diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.h 
b/llvm/lib/Target/DirectX/DXILRootSignature.h
index b45cebc15fd39..fef933811f840 100644
--- a/llvm/lib/Target/DirectX/DXILRootSignature.h
+++ b/llvm/lib/Target/DirectX/DXILRootSignature.h
@@ -33,16 +33,43 @@ enum class RootSignatureElementKind {
   CBV = 5,
   DescriptorTable = 6,
 };
+
+class RootSignatureBindingInfo {
+  private:
+SmallDenseMap FuncToRsMap;
+
+  public:
+  using iterator =
+SmallDenseMap::iterator;
+
+  RootSignatureBindingInfo () = default;
+  RootSignatureBindingInfo(SmallDenseMap Map) : FuncToRsMap(Map) {};
+
+  iterator find(const Function *F) { return FuncToRsMap.find(F); }
+
+  iterator end() { return FuncToRsMap.end(); }
+
+  std::optional getDescForFunction(const Function* 
F) {
+const auto FuncRs = find(F);
+if (FuncRs == end())
+  return std::nullopt;
+
+return FuncRs->second;
+  }
+  
+};
+
 class RootSignatureAnalysis : public AnalysisInfoMixin {
   friend AnalysisInfoMixin;
   static AnalysisKey Key;
 
 public:
-  RootSignatureAnalysis() = default;
 
-  using Result = SmallDenseMap;
+RootSignatureAnalysis() = default;
 
-  SmallDenseMap
+  using Result = RootSignatureBindingInfo;
+  
+  RootSignatureBindingInfo
   run(Module &M, ModuleAnalysisManager &AM);
 };
 
@@ -52,20 +79,16 @@ class RootSignatureAnalysis : public 
AnalysisInfoMixin {
 /// passes which run through the legacy pass manager.
 class RootSignatureAnalysisWrapper : public ModulePass {
 private:
-  SmallDenseMap FuncToRsMap;
+  std::unique_ptr FuncToRsMap;
 
 public:
   static char ID;
+  using Result = RootSignatureBindingInfo;
 
   RootSignatureAnalysisWrapper() : ModulePass(ID) {}
 
-  using iterator =
-  SmallDenseMap::iterator;
-
-  iterator find(const Function *F) { return FuncToRsMap.find(F); }
-
-  iterator end() { return FuncToRsMap.end(); }
-
+  RootSignatureBindingInfo& getRSInfo() {return *FuncToRsMap;}
+  
   bool runOnModule(Module &M) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;

>From 75

[llvm-branch-commits] [lldb] 567fc97 - Revert "[lldb][RPC] Upstream lldb-rpc-gen tool (#138031)"

2025-07-03 Thread via llvm-branch-commits

Author: Chelsea Cassanova
Date: 2025-07-03T15:52:35-07:00
New Revision: 567fc97be4881ce3656e45c0b2cc7e85e08dde3d

URL: 
https://github.com/llvm/llvm-project/commit/567fc97be4881ce3656e45c0b2cc7e85e08dde3d
DIFF: 
https://github.com/llvm/llvm-project/commit/567fc97be4881ce3656e45c0b2cc7e85e08dde3d.diff

LOG: Revert "[lldb][RPC] Upstream lldb-rpc-gen tool (#138031)"

This reverts commit 9bfb347ea0a0a260eb505921dfc0cb824a6ced5d.

Added: 


Modified: 
lldb/cmake/modules/LLDBConfig.cmake
lldb/test/CMakeLists.txt
lldb/test/Shell/helper/toolchain.py
lldb/test/Shell/lit.site.cfg.py.in
lldb/tools/CMakeLists.txt

Removed: 
lldb/test/Shell/RPC/Generator/Inputs/SBDummy.h
lldb/test/Shell/RPC/Generator/Tests/CheckRPCGenToolByproducts.test
lldb/test/Shell/RPC/Generator/lit.local.cfg
lldb/tools/lldb-rpc/CMakeLists.txt
lldb/tools/lldb-rpc/LLDBRPCGeneration.cmake
lldb/tools/lldb-rpc/LLDBRPCHeaders.cmake
lldb/tools/lldb-rpc/lldb-rpc-gen/CMakeLists.txt
lldb/tools/lldb-rpc/lldb-rpc-gen/RPCCommon.cpp
lldb/tools/lldb-rpc/lldb-rpc-gen/RPCCommon.h
lldb/tools/lldb-rpc/lldb-rpc-gen/lldb-rpc-gen.cpp



diff  --git a/lldb/cmake/modules/LLDBConfig.cmake 
b/lldb/cmake/modules/LLDBConfig.cmake
index f674c29682160..8c30b6e09d2c7 100644
--- a/lldb/cmake/modules/LLDBConfig.cmake
+++ b/lldb/cmake/modules/LLDBConfig.cmake
@@ -323,6 +323,4 @@ else()
 set(LLDB_CAN_USE_DEBUGSERVER OFF)
 endif()
 
-set(LLDB_BUILD_LLDBRPC ON CACHE BOOL "")
-
 include(LLDBGenerateConfig)

diff  --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt
index 7cf239c7f95ab..6449ac5a9247f 100644
--- a/lldb/test/CMakeLists.txt
+++ b/lldb/test/CMakeLists.txt
@@ -132,10 +132,6 @@ if(TARGET lldb-framework)
   add_lldb_test_dependency(lldb-framework)
 endif()
 
-if (LLDB_BUILD_LLDBRPC)
-  add_lldb_test_dependency(lldb-rpc-generate-sources)
-endif()
-
 # Add dependencies that are not exported targets when building standalone.
 if(NOT LLDB_BUILT_STANDALONE)
   add_lldb_test_dependency(
@@ -253,8 +249,7 @@ llvm_canonicalize_cmake_booleans(
   LLDB_TEST_SHELL_DISABLE_REMOTE
   LLDB_TOOL_LLDB_SERVER_BUILD
   LLDB_USE_SYSTEM_DEBUGSERVER
-  LLDB_IS_64_BITS
-  LLDB_BUILD_LLDBRPC)
+  LLDB_IS_64_BITS)
 
 # Configure the individual test suites.
 add_subdirectory(API)

diff  --git a/lldb/test/Shell/RPC/Generator/Inputs/SBDummy.h 
b/lldb/test/Shell/RPC/Generator/Inputs/SBDummy.h
deleted file mode 100644
index e69de29bb2d1d..0

diff  --git 
a/lldb/test/Shell/RPC/Generator/Tests/CheckRPCGenToolByproducts.test 
b/lldb/test/Shell/RPC/Generator/Tests/CheckRPCGenToolByproducts.test
deleted file mode 100644
index 15fcf8fb39c7d..0
--- a/lldb/test/Shell/RPC/Generator/Tests/CheckRPCGenToolByproducts.test
+++ /dev/null
@@ -1,9 +0,0 @@
-RUN: %lldb-rpc-gen --output-dir=%t %S/../Inputs/SBDummy.h
-
-RUN: ls %t | FileCheck %s
-
-# We're just making sure that the tool emits the class names,
-# methods and skipped methods file in the output directory.
-CHECK: SBAPI.def
-CHECK: SBClasses.def
-CHECK: SkippedMethods.txt

diff  --git a/lldb/test/Shell/RPC/Generator/lit.local.cfg 
b/lldb/test/Shell/RPC/Generator/lit.local.cfg
deleted file mode 100644
index db9494781c00c..0
--- a/lldb/test/Shell/RPC/Generator/lit.local.cfg
+++ /dev/null
@@ -1,3 +0,0 @@
-# All tests for the tool need lldb-rpc-gen to be built.
-if not config.lldb_has_lldbrpc:
-   config.unsupported = True

diff  --git a/lldb/test/Shell/helper/toolchain.py 
b/lldb/test/Shell/helper/toolchain.py
index 728f6347242f1..42968128f2702 100644
--- a/lldb/test/Shell/helper/toolchain.py
+++ b/lldb/test/Shell/helper/toolchain.py
@@ -156,16 +156,6 @@ def use_lldb_substitutions(config):
 extra_args=["platform"],
 unresolved="ignore",
 ),
-ToolSubst(
-"%lldb-rpc-gen",
-command=FindTool("lldb-rpc-gen"),
-# We need the LLDB build directory root to pass into the tool, not 
the test build root.
-extra_args=[
-"-p " + config.lldb_build_directory + "/..",
-'--extra-arg="-resource-dir=' + config.clang_resource_dir + 
'"',
-],
-unresolved="ignore",
-),
 "lldb-test",
 "lldb-dap",
 ToolSubst(

diff  --git a/lldb/test/Shell/lit.site.cfg.py.in 
b/lldb/test/Shell/lit.site.cfg.py.in
index beaa41e6fd379..5be5359217769 100644
--- a/lldb/test/Shell/lit.site.cfg.py.in
+++ b/lldb/test/Shell/lit.site.cfg.py.in
@@ -33,7 +33,6 @@ config.lldb_build_directory = "@LLDB_TEST_BUILD_DIRECTORY@"
 config.have_lldb_server = @LLDB_TOOL_LLDB_SERVER_BUILD@
 config.lldb_system_debugserver = @LLDB_USE_SYSTEM_DEBUGSERVER@
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
-config.lldb_has_lldbrpc = @LLDB_BUILD_LLDBRPC@
 # The shell tests use their own module caches.
 config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", 

[llvm-branch-commits] [llvm] d6c3ae8 - Revert "[AMDGPU] Re-apply: Implement vop3p complex pattern optmization for gi…"

2025-07-03 Thread via llvm-branch-commits

Author: Shoreshen
Date: 2025-07-04T09:43:00+08:00
New Revision: d6c3ae82c37987098b409ec46971cb2ee370f8c5

URL: 
https://github.com/llvm/llvm-project/commit/d6c3ae82c37987098b409ec46971cb2ee370f8c5
DIFF: 
https://github.com/llvm/llvm-project/commit/d6c3ae82c37987098b409ec46971cb2ee370f8c5.diff

LOG: Revert "[AMDGPU] Re-apply: Implement vop3p complex pattern optmization for 
gi…"

This reverts commit db03c27763656948323a50b9706da912c581e6f2.

Added: 


Modified: 
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
llvm/test/CodeGen/AMDGPU/packed-fp32.ll
llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll

Removed: 




diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index fd679a9933cf0..b632b16f5c198 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4327,591 +4327,60 @@ 
AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
   }};
 }
 
-enum class SrcStatus {
-  IS_SAME,
-  IS_UPPER_HALF,
-  IS_LOWER_HALF,
-  IS_UPPER_HALF_NEG,
-  // This means current op = [op_upper, op_lower] and src = -op_lower.
-  IS_LOWER_HALF_NEG,
-  IS_HI_NEG,
-  // This means current op = [op_upper, op_lower] and src = [op_upper,
-  // -op_lower].
-  IS_LO_NEG,
-  IS_BOTH_NEG,
-  INVALID,
-  NEG_START = IS_UPPER_HALF_NEG,
-  NEG_END = IS_BOTH_NEG,
-  HALF_START = IS_UPPER_HALF,
-  HALF_END = IS_LOWER_HALF_NEG
-};
-/// Test if the MI is truncating to half, such as `%reg0:n = G_TRUNC %reg1:2n`
-static bool isTruncHalf(const MachineInstr *MI,
-const MachineRegisterInfo &MRI) {
-  if (MI->getOpcode() != AMDGPU::G_TRUNC)
-return false;
-
-  unsigned DstSize = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits();
-  unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits();
-  return DstSize * 2 == SrcSize;
-}
-
-/// Test if the MI is logic shift right with half bits,
-/// such as `%reg0:2n =G_LSHR %reg1:2n, CONST(n)`
-static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI) 
{
-  if (MI->getOpcode() != AMDGPU::G_LSHR)
-return false;
-
-  Register ShiftSrc;
-  std::optional ShiftAmt;
-  if (mi_match(MI->getOperand(0).getReg(), MRI,
-   m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt {
-unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits();
-unsigned Shift = ShiftAmt->Value.getZExtValue();
-return Shift * 2 == SrcSize;
-  }
-  return false;
-}
-
-/// Test if the MI is shift left with half bits,
-/// such as `%reg0:2n =G_SHL %reg1:2n, CONST(n)`
-static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI) {
-  if (MI->getOpcode() != AMDGPU::G_SHL)
-return false;
-
-  Register ShiftSrc;
-  std::optional ShiftAmt;
-  if (mi_match(MI->getOperand(0).getReg(), MRI,
-   m_GShl(m_Reg(ShiftSrc), m_GCst(ShiftAmt {
-unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits();
-unsigned Shift = ShiftAmt->Value.getZExtValue();
-return Shift * 2 == SrcSize;
-  }
-  return false;
-}
-
-/// Test function, if the MI is `%reg0:n, %reg1:n = G_UNMERGE_VALUES %reg2:2n`
-static bool isUnmergeHalf(const MachineInstr *MI,
-  const MachineRegisterInfo &MRI) {
-  if (MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
-return false;
-  return MI->getNumOperands() == 3 && MI->getOperand(0).isDef() &&
- MI->getOperand(1).isDef() && !MI->getOperand(2).isDef();
-}
-
-enum class TypeClass { VECTOR_OF_TWO, SCALAR, NONE_OF_LISTED };
-
-static TypeClass isVectorOfTwoOrScalar(Register Reg,
-   const MachineRegisterInfo &MRI) {
-  LLT OpTy = MRI.getType(Reg);
-  if (OpTy.isScalar())
-return TypeClass::SCALAR;
-  if (OpTy.isVector() && OpTy.getNumElements() == 2)
-return TypeClass::VECTOR_OF_TWO;
-  return TypeClass::NONE_OF_LISTED;
-}
-
-static SrcStatus getNegStatus(Register Reg, SrcStatus S,
-  const MachineRegisterInfo &MRI) {
-  TypeClass NegType = isVectorOfTwoOrScalar(Reg, MRI);
-  if (NegType != TypeClass::VECTOR_OF_TWO && NegType != TypeClass::SCALAR)
-return SrcStatus::INVALID;
-
-  switch (S) {
-  case SrcStatus::IS_SAME:
-if (NegType == TypeClass::VECTOR_OF_TWO) {
-  // Vector of 2:
-  // [SrcHi, SrcLo]   = [CurrHi, CurrLo]
-  // [CurrHi, CurrLo] = neg [OpHi, OpLo](2 x Type)
-  // [CurrHi, CurrLo] = [-OpHi, -OpLo](2 x Type)
-  // [SrcHi, SrcLo]   = [-OpHi, -OpLo]
-  return SrcStatus::IS_BOTH_NEG;
-}
-if (NegType == TypeClass::SCALAR) {
-  // Scalar:
-  

[llvm-branch-commits] [llvm] 14a4448 - Revert "Revert "[AMDGPU] Re-apply: Implement vop3p complex pattern optmizatio…"

2025-07-03 Thread via llvm-branch-commits

Author: Shoreshen
Date: 2025-07-04T09:43:51+08:00
New Revision: 14a4448afc8e65b8610d78fc66f2695a691a25b3

URL: 
https://github.com/llvm/llvm-project/commit/14a4448afc8e65b8610d78fc66f2695a691a25b3
DIFF: 
https://github.com/llvm/llvm-project/commit/14a4448afc8e65b8610d78fc66f2695a691a25b3.diff

LOG: Revert "Revert "[AMDGPU] Re-apply: Implement vop3p complex pattern 
optmizatio…"

This reverts commit 5b8304d6b90c42f2d3cf918e5e0f935767866e64.

Added: 


Modified: 
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll
llvm/test/CodeGen/AMDGPU/packed-fp32.ll
llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll

Removed: 




diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp 
b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b632b16f5c198..fd679a9933cf0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4327,60 +4327,591 @@ 
AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const {
   }};
 }
 
-std::pair
-AMDGPUInstructionSelector::selectVOP3PModsImpl(
-  Register Src, const MachineRegisterInfo &MRI, bool IsDOT) const {
+enum class SrcStatus {
+  IS_SAME,
+  IS_UPPER_HALF,
+  IS_LOWER_HALF,
+  IS_UPPER_HALF_NEG,
+  // This means current op = [op_upper, op_lower] and src = -op_lower.
+  IS_LOWER_HALF_NEG,
+  IS_HI_NEG,
+  // This means current op = [op_upper, op_lower] and src = [op_upper,
+  // -op_lower].
+  IS_LO_NEG,
+  IS_BOTH_NEG,
+  INVALID,
+  NEG_START = IS_UPPER_HALF_NEG,
+  NEG_END = IS_BOTH_NEG,
+  HALF_START = IS_UPPER_HALF,
+  HALF_END = IS_LOWER_HALF_NEG
+};
+/// Test if the MI is truncating to half, such as `%reg0:n = G_TRUNC %reg1:2n`
+static bool isTruncHalf(const MachineInstr *MI,
+const MachineRegisterInfo &MRI) {
+  if (MI->getOpcode() != AMDGPU::G_TRUNC)
+return false;
+
+  unsigned DstSize = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits();
+  unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits();
+  return DstSize * 2 == SrcSize;
+}
+
+/// Test if the MI is logic shift right with half bits,
+/// such as `%reg0:2n =G_LSHR %reg1:2n, CONST(n)`
+static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI) 
{
+  if (MI->getOpcode() != AMDGPU::G_LSHR)
+return false;
+
+  Register ShiftSrc;
+  std::optional ShiftAmt;
+  if (mi_match(MI->getOperand(0).getReg(), MRI,
+   m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt {
+unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits();
+unsigned Shift = ShiftAmt->Value.getZExtValue();
+return Shift * 2 == SrcSize;
+  }
+  return false;
+}
+
+/// Test if the MI is shift left with half bits,
+/// such as `%reg0:2n =G_SHL %reg1:2n, CONST(n)`
+static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI) {
+  if (MI->getOpcode() != AMDGPU::G_SHL)
+return false;
+
+  Register ShiftSrc;
+  std::optional ShiftAmt;
+  if (mi_match(MI->getOperand(0).getReg(), MRI,
+   m_GShl(m_Reg(ShiftSrc), m_GCst(ShiftAmt {
+unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits();
+unsigned Shift = ShiftAmt->Value.getZExtValue();
+return Shift * 2 == SrcSize;
+  }
+  return false;
+}
+
+/// Test function, if the MI is `%reg0:n, %reg1:n = G_UNMERGE_VALUES %reg2:2n`
+static bool isUnmergeHalf(const MachineInstr *MI,
+  const MachineRegisterInfo &MRI) {
+  if (MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES)
+return false;
+  return MI->getNumOperands() == 3 && MI->getOperand(0).isDef() &&
+ MI->getOperand(1).isDef() && !MI->getOperand(2).isDef();
+}
+
+enum class TypeClass { VECTOR_OF_TWO, SCALAR, NONE_OF_LISTED };
+
+static TypeClass isVectorOfTwoOrScalar(Register Reg,
+   const MachineRegisterInfo &MRI) {
+  LLT OpTy = MRI.getType(Reg);
+  if (OpTy.isScalar())
+return TypeClass::SCALAR;
+  if (OpTy.isVector() && OpTy.getNumElements() == 2)
+return TypeClass::VECTOR_OF_TWO;
+  return TypeClass::NONE_OF_LISTED;
+}
+
+static SrcStatus getNegStatus(Register Reg, SrcStatus S,
+  const MachineRegisterInfo &MRI) {
+  TypeClass NegType = isVectorOfTwoOrScalar(Reg, MRI);
+  if (NegType != TypeClass::VECTOR_OF_TWO && NegType != TypeClass::SCALAR)
+return SrcStatus::INVALID;
+
+  switch (S) {
+  case SrcStatus::IS_SAME:
+if (NegType == TypeClass::VECTOR_OF_TWO) {
+  // Vector of 2:
+  // [SrcHi, SrcLo]   = [CurrHi, CurrLo]
+  // [CurrHi, CurrLo] = neg [OpHi, OpLo](2 x Type)
+  // [CurrHi, CurrLo] = [-OpHi, -OpLo](2 x Type)
+  // [SrcHi, SrcLo]  

[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)

2025-07-03 Thread Nicolas Vasilache via llvm-branch-commits

https://github.com/nicolasvasilache created 
https://github.com/llvm/llvm-project/pull/146943

…lOp and use it to implement warp specialization.

This revision adds DeviceMaskingAttrInterface and extends 
DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and 
DeviceMaskingAttrInterface.

The first implementation is if the form of a GPUMappingMaskAttr, which can be 
additionally passed to the scf.forall.mapping attribute to specify a mask on 
compute resources that should be active.

Support is added to GPUTransformOps to take advantage of this information and 
lower to block/warpgroup/warp/thread specialization when mapped to linear ids.

>From 02e425b30966f4781fe07d8cf595a1e2b0d41aa3 Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache 
Date: Thu, 3 Jul 2025 21:26:53 +0200
Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to
 scf::ForallOp and use it to implement warp specialization.

This revision adds DeviceMaskingAttrInterface and extends
DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface
and DeviceMaskingAttrInterface.

The first implementation is if the form of a GPUMappingMaskAttr, which
can be additionally passed to the scf.forall.mapping attribute to
specify a mask on compute resources that should be active.

Support is added to GPUTransformOps to take advantage of this
information and lower to block/warpgroup/warp/thread specialization when
mapped to linear ids.

Co-authored-by: Oleksandr "Alex" Zinenko 
---
 .../Dialect/GPU/IR/GPUDeviceMappingAttr.td|  18 
 .../mlir/Dialect/GPU/TransformOps/Utils.h |  15 ++-
 .../Dialect/SCF/IR/DeviceMappingInterface.td  |  45 +++-
 mlir/include/mlir/Dialect/SCF/IR/SCFOps.td|  12 +++
 mlir/lib/Dialect/GPU/CMakeLists.txt   |   1 +
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp|  45 
 .../GPU/TransformOps/GPUTransformOps.cpp  |  58 ++
 mlir/lib/Dialect/GPU/TransformOps/Utils.cpp   | 100 +-
 mlir/lib/Dialect/SCF/IR/SCF.cpp   |  43 ++--
 .../Dialect/GPU/transform-gpu-failing.mlir|  61 +++
 mlir/test/Dialect/GPU/transform-gpu.mlir  |  81 ++
 mlir/test/Dialect/SCF/invalid.mlir|  18 
 12 files changed, 439 insertions(+), 58 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td 
b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
index 63f228ca3157f..e8540027e7b77 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
@@ -252,6 +252,24 @@ def GPULaneMappingAttr
   }];
 }
 
+def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [
+  DeclareAttrInterfaceMethods ] >  {
+  let parameters = (ins "uint64_t":$mask);
+  let assemblyFormat = "`<` params `>`";
+  let description = [{
+Attribute describing how to filter the processing units that a
+region is mapped to.
+
+In the first implementation the masking is a bitfield that specifies for
+each processing unit whether it is active or not.
+
+In the future, we may want to implement this as a symbol to refer to
+dynamically defined values.
+
+Extending op semantics with an operand is deemed too intrusive at this 
time.
+  }];
+}
+
 def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", 
"memory_space", [
   DeclareAttrInterfaceMethods ] >  {
   let parameters = (ins
diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h 
b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
index de512ded59fec..0a11b8f8d3fa0 100644
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
@@ -78,7 +78,8 @@ struct GpuIdBuilder {
 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuBlockIdBuilder : public GpuIdBuilder {
-  GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false);
+  GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false,
+DeviceMaskingAttrInterface mask = nullptr);
 };
 
 /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups.
@@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder {
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuWarpgroupIdBuilder : public GpuIdBuilder {
   GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize,
-bool useLinearMapping = false);
+bool useLinearMapping = false,
+DeviceMaskingAttrInterface mask = nullptr);
   int64_t warpSize = 32;
   /// In the future this may be configured by the transformation.
   static constexpr int64_t kNumWarpsPerGroup = 4;
@@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder {
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 stru

[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)

2025-07-03 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-mlir-gpu

Author: Nicolas Vasilache (nicolasvasilache)


Changes

…lOp and use it to implement warp specialization.

This revision adds DeviceMaskingAttrInterface and extends 
DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and 
DeviceMaskingAttrInterface.

The first implementation is if the form of a GPUMappingMaskAttr, which can be 
additionally passed to the scf.forall.mapping attribute to specify a mask on 
compute resources that should be active.

Support is added to GPUTransformOps to take advantage of this information and 
lower to block/warpgroup/warp/thread specialization when mapped to linear ids.

---

Patch is 35.49 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/146943.diff


12 Files Affected:

- (modified) mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td (+18) 
- (modified) mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h (+10-5) 
- (modified) mlir/include/mlir/Dialect/SCF/IR/DeviceMappingInterface.td (+44-1) 
- (modified) mlir/include/mlir/Dialect/SCF/IR/SCFOps.td (+12) 
- (modified) mlir/lib/Dialect/GPU/CMakeLists.txt (+1) 
- (modified) mlir/lib/Dialect/GPU/IR/GPUDialect.cpp (+45) 
- (modified) mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp (+39-19) 
- (modified) mlir/lib/Dialect/GPU/TransformOps/Utils.cpp (+73-27) 
- (modified) mlir/lib/Dialect/SCF/IR/SCF.cpp (+37-6) 
- (modified) mlir/test/Dialect/GPU/transform-gpu-failing.mlir (+61) 
- (modified) mlir/test/Dialect/GPU/transform-gpu.mlir (+81) 
- (modified) mlir/test/Dialect/SCF/invalid.mlir (+18) 


``diff
diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td 
b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
index 63f228ca3157f..e8540027e7b77 100644
--- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
+++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td
@@ -252,6 +252,24 @@ def GPULaneMappingAttr
   }];
 }
 
+def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [
+  DeclareAttrInterfaceMethods ] >  {
+  let parameters = (ins "uint64_t":$mask);
+  let assemblyFormat = "`<` params `>`";
+  let description = [{
+Attribute describing how to filter the processing units that a
+region is mapped to.
+
+In the first implementation the masking is a bitfield that specifies for
+each processing unit whether it is active or not.
+
+In the future, we may want to implement this as a symbol to refer to
+dynamically defined values.
+
+Extending op semantics with an operand is deemed too intrusive at this 
time.
+  }];
+}
+
 def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", 
"memory_space", [
   DeclareAttrInterfaceMethods ] >  {
   let parameters = (ins
diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h 
b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
index de512ded59fec..0a11b8f8d3fa0 100644
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h
@@ -78,7 +78,8 @@ struct GpuIdBuilder {
 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuBlockIdBuilder : public GpuIdBuilder {
-  GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false);
+  GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false,
+DeviceMaskingAttrInterface mask = nullptr);
 };
 
 /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups.
@@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder {
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuWarpgroupIdBuilder : public GpuIdBuilder {
   GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize,
-bool useLinearMapping = false);
+bool useLinearMapping = false,
+DeviceMaskingAttrInterface mask = nullptr);
   int64_t warpSize = 32;
   /// In the future this may be configured by the transformation.
   static constexpr int64_t kNumWarpsPerGroup = 4;
@@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder {
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuWarpIdBuilder : public GpuIdBuilder {
   GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize,
-   bool useLinearMapping = false);
+   bool useLinearMapping = false,
+   DeviceMaskingAttrInterface mask = nullptr);
   int64_t warpSize = 32;
 };
 
@@ -111,7 +114,8 @@ struct GpuWarpIdBuilder : public GpuIdBuilder {
 /// If `useLinearMapping` is true, the `idBuilder` method returns nD values
 /// used for indexing rewrites as well as 1D sizes for predicate generation.
 struct GpuThreadIdBuilder : public GpuIdBuilder {
-  GpuThreadIdBuilder(MLIRContext *ctx, bool useLinearMapping = false);
+  GpuThreadIdBuild