[llvm-branch-commits] [clang] [clang][OpenMP] Use DirectiveNameParser to parse directive names (PR #146779)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/146779 >From ab3f0cc9d240f12e07be452effa75b5c7d010d9b Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 2 Jul 2025 11:25:00 -0500 Subject: [PATCH] [clang][OpenMP] Use DirectiveNameParser to parse directive names This simplifies the parsing code in clang quite a bit. --- clang/lib/Parse/ParseOpenMP.cpp | 181 1 file changed, 19 insertions(+), 162 deletions(-) diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index f694ae1d0d112..c0a17d0e9537d 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -25,6 +25,7 @@ #include "clang/Sema/SemaOpenMP.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Frontend/OpenMP/DirectiveNameParser.h" #include "llvm/Frontend/OpenMP/OMPAssume.h" #include "llvm/Frontend/OpenMP/OMPContext.h" #include @@ -37,48 +38,6 @@ using namespace llvm::omp; //===--===// namespace { -enum OpenMPDirectiveKindEx { - OMPD_cancellation = llvm::omp::Directive_enumSize + 1, - OMPD_data, - OMPD_declare, - OMPD_end, - OMPD_end_declare, - OMPD_enter, - OMPD_exit, - OMPD_point, - OMPD_reduction, - OMPD_target_enter, - OMPD_target_exit, - OMPD_update, - OMPD_distribute_parallel, - OMPD_teams_distribute_parallel, - OMPD_target_teams_distribute_parallel, - OMPD_mapper, - OMPD_variant, - OMPD_begin, - OMPD_begin_declare, -}; - -// Helper to unify the enum class OpenMPDirectiveKind with its extension -// the OpenMPDirectiveKindEx enum which allows to use them together as if they -// are unsigned values. -struct OpenMPDirectiveKindExWrapper { - OpenMPDirectiveKindExWrapper(unsigned Value) : Value(Value) {} - OpenMPDirectiveKindExWrapper(OpenMPDirectiveKind DK) : Value(unsigned(DK)) {} - bool operator==(OpenMPDirectiveKindExWrapper V) const { -return Value == V.Value; - } - bool operator!=(OpenMPDirectiveKindExWrapper V) const { -return Value != V.Value; - } - bool operator==(OpenMPDirectiveKind V) const { return Value == unsigned(V); } - bool operator!=(OpenMPDirectiveKind V) const { return Value != unsigned(V); } - bool operator<(OpenMPDirectiveKind V) const { return Value < unsigned(V); } - operator unsigned() const { return Value; } - operator OpenMPDirectiveKind() const { return OpenMPDirectiveKind(Value); } - unsigned Value; -}; - class DeclDirectiveListParserHelper final { SmallVector Identifiers; Parser *P; @@ -97,130 +56,32 @@ class DeclDirectiveListParserHelper final { }; } // namespace -// Map token string to extended OMP token kind that are -// OpenMPDirectiveKind + OpenMPDirectiveKindEx. -static unsigned getOpenMPDirectiveKindEx(StringRef S) { - OpenMPDirectiveKindExWrapper DKind = getOpenMPDirectiveKind(S); - if (DKind != OMPD_unknown) -return DKind; - - return llvm::StringSwitch(S) - .Case("cancellation", OMPD_cancellation) - .Case("data", OMPD_data) - .Case("declare", OMPD_declare) - .Case("end", OMPD_end) - .Case("enter", OMPD_enter) - .Case("exit", OMPD_exit) - .Case("point", OMPD_point) - .Case("reduction", OMPD_reduction) - .Case("update", OMPD_update) - .Case("mapper", OMPD_mapper) - .Case("variant", OMPD_variant) - .Case("begin", OMPD_begin) - .Default(OMPD_unknown); -} +static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser &P) { + static const DirectiveNameParser DNP; + + const DirectiveNameParser::State *S = DNP.initial(); -static OpenMPDirectiveKindExWrapper parseOpenMPDirectiveKind(Parser &P) { - // Array of foldings: F[i][0] F[i][1] ===> F[i][2]. - // E.g.: OMPD_for OMPD_simd ===> OMPD_for_simd - // TODO: add other combined directives in topological order. - static const OpenMPDirectiveKindExWrapper F[][3] = { - {OMPD_begin, OMPD_declare, OMPD_begin_declare}, - {OMPD_begin, OMPD_assumes, OMPD_begin_assumes}, - {OMPD_end, OMPD_declare, OMPD_end_declare}, - {OMPD_end, OMPD_assumes, OMPD_end_assumes}, - {OMPD_cancellation, OMPD_point, OMPD_cancellation_point}, - {OMPD_declare, OMPD_reduction, OMPD_declare_reduction}, - {OMPD_declare, OMPD_mapper, OMPD_declare_mapper}, - {OMPD_declare, OMPD_simd, OMPD_declare_simd}, - {OMPD_declare, OMPD_target, OMPD_declare_target}, - {OMPD_declare, OMPD_variant, OMPD_declare_variant}, - {OMPD_begin_declare, OMPD_target, OMPD_begin_declare_target}, - {OMPD_begin_declare, OMPD_variant, OMPD_begin_declare_variant}, - {OMPD_end_declare, OMPD_variant, OMPD_end_declare_variant}, - {OMPD_distribute, OMPD_parallel, OMPD_distribute_parallel}, - {OMPD_distribute_parallel, OMPD_for, OMPD_distribute_parallel_for}, - {OMPD_distribute_parallel_for, OMPD_simd, - OMPD_distribute_parallel_for_simd}, - {OMPD_distribute, OMPD_simd, OMPD_distrib
[llvm-branch-commits] [clang] [clang][OpenMP] Use DirectiveNameParser to parse directive names (PR #146779)
https://github.com/kparzysz updated https://github.com/llvm/llvm-project/pull/146779 >From ab3f0cc9d240f12e07be452effa75b5c7d010d9b Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 2 Jul 2025 11:25:00 -0500 Subject: [PATCH 1/2] [clang][OpenMP] Use DirectiveNameParser to parse directive names This simplifies the parsing code in clang quite a bit. --- clang/lib/Parse/ParseOpenMP.cpp | 181 1 file changed, 19 insertions(+), 162 deletions(-) diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index f694ae1d0d112..c0a17d0e9537d 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -25,6 +25,7 @@ #include "clang/Sema/SemaOpenMP.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Frontend/OpenMP/DirectiveNameParser.h" #include "llvm/Frontend/OpenMP/OMPAssume.h" #include "llvm/Frontend/OpenMP/OMPContext.h" #include @@ -37,48 +38,6 @@ using namespace llvm::omp; //===--===// namespace { -enum OpenMPDirectiveKindEx { - OMPD_cancellation = llvm::omp::Directive_enumSize + 1, - OMPD_data, - OMPD_declare, - OMPD_end, - OMPD_end_declare, - OMPD_enter, - OMPD_exit, - OMPD_point, - OMPD_reduction, - OMPD_target_enter, - OMPD_target_exit, - OMPD_update, - OMPD_distribute_parallel, - OMPD_teams_distribute_parallel, - OMPD_target_teams_distribute_parallel, - OMPD_mapper, - OMPD_variant, - OMPD_begin, - OMPD_begin_declare, -}; - -// Helper to unify the enum class OpenMPDirectiveKind with its extension -// the OpenMPDirectiveKindEx enum which allows to use them together as if they -// are unsigned values. -struct OpenMPDirectiveKindExWrapper { - OpenMPDirectiveKindExWrapper(unsigned Value) : Value(Value) {} - OpenMPDirectiveKindExWrapper(OpenMPDirectiveKind DK) : Value(unsigned(DK)) {} - bool operator==(OpenMPDirectiveKindExWrapper V) const { -return Value == V.Value; - } - bool operator!=(OpenMPDirectiveKindExWrapper V) const { -return Value != V.Value; - } - bool operator==(OpenMPDirectiveKind V) const { return Value == unsigned(V); } - bool operator!=(OpenMPDirectiveKind V) const { return Value != unsigned(V); } - bool operator<(OpenMPDirectiveKind V) const { return Value < unsigned(V); } - operator unsigned() const { return Value; } - operator OpenMPDirectiveKind() const { return OpenMPDirectiveKind(Value); } - unsigned Value; -}; - class DeclDirectiveListParserHelper final { SmallVector Identifiers; Parser *P; @@ -97,130 +56,32 @@ class DeclDirectiveListParserHelper final { }; } // namespace -// Map token string to extended OMP token kind that are -// OpenMPDirectiveKind + OpenMPDirectiveKindEx. -static unsigned getOpenMPDirectiveKindEx(StringRef S) { - OpenMPDirectiveKindExWrapper DKind = getOpenMPDirectiveKind(S); - if (DKind != OMPD_unknown) -return DKind; - - return llvm::StringSwitch(S) - .Case("cancellation", OMPD_cancellation) - .Case("data", OMPD_data) - .Case("declare", OMPD_declare) - .Case("end", OMPD_end) - .Case("enter", OMPD_enter) - .Case("exit", OMPD_exit) - .Case("point", OMPD_point) - .Case("reduction", OMPD_reduction) - .Case("update", OMPD_update) - .Case("mapper", OMPD_mapper) - .Case("variant", OMPD_variant) - .Case("begin", OMPD_begin) - .Default(OMPD_unknown); -} +static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser &P) { + static const DirectiveNameParser DNP; + + const DirectiveNameParser::State *S = DNP.initial(); -static OpenMPDirectiveKindExWrapper parseOpenMPDirectiveKind(Parser &P) { - // Array of foldings: F[i][0] F[i][1] ===> F[i][2]. - // E.g.: OMPD_for OMPD_simd ===> OMPD_for_simd - // TODO: add other combined directives in topological order. - static const OpenMPDirectiveKindExWrapper F[][3] = { - {OMPD_begin, OMPD_declare, OMPD_begin_declare}, - {OMPD_begin, OMPD_assumes, OMPD_begin_assumes}, - {OMPD_end, OMPD_declare, OMPD_end_declare}, - {OMPD_end, OMPD_assumes, OMPD_end_assumes}, - {OMPD_cancellation, OMPD_point, OMPD_cancellation_point}, - {OMPD_declare, OMPD_reduction, OMPD_declare_reduction}, - {OMPD_declare, OMPD_mapper, OMPD_declare_mapper}, - {OMPD_declare, OMPD_simd, OMPD_declare_simd}, - {OMPD_declare, OMPD_target, OMPD_declare_target}, - {OMPD_declare, OMPD_variant, OMPD_declare_variant}, - {OMPD_begin_declare, OMPD_target, OMPD_begin_declare_target}, - {OMPD_begin_declare, OMPD_variant, OMPD_begin_declare_variant}, - {OMPD_end_declare, OMPD_variant, OMPD_end_declare_variant}, - {OMPD_distribute, OMPD_parallel, OMPD_distribute_parallel}, - {OMPD_distribute_parallel, OMPD_for, OMPD_distribute_parallel_for}, - {OMPD_distribute_parallel_for, OMPD_simd, - OMPD_distribute_parallel_for_simd}, - {OMPD_distribute, OMPD_simd, OMPD_dis
[llvm-branch-commits] [llvm] [AArch64][PAC] Rework discriminator analysis in AUT and AUTPAC (PR #146489)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/146489 >From 6634cb01b163b2482ce57915e63e1e386308f218 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Sat, 28 Jun 2025 11:09:01 +0300 Subject: [PATCH] [AArch64][PAC] Rework discriminator analysis in AUT and AUTPAC Make use of post-processing the discriminator components by custom inserter hook to eliminate duplication for DAGISel and GlobalISel and improve cross-BB analysis for DAGISel. --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 51 +--- .../Target/AArch64/AArch64ISelLowering.cpp| 10 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 + .../GISel/AArch64InstructionSelector.cpp | 27 +- llvm/test/CodeGen/AArch64/ptrauth-isel.ll | 235 +- 5 files changed, 256 insertions(+), 69 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index da617b7e19266..5d3fd48f448b5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1487,39 +1487,6 @@ void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); } -static std::tuple -extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG) { - SDLoc DL(Disc); - SDValue AddrDisc; - SDValue ConstDisc; - - // If this is a blend, remember the constant and address discriminators. - // Otherwise, it's either a constant discriminator, or a non-blended - // address discriminator. - if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN && - Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) { -AddrDisc = Disc->getOperand(1); -ConstDisc = Disc->getOperand(2); - } else { -ConstDisc = Disc; - } - - // If the constant discriminator (either the blend RHS, or the entire - // discriminator value) isn't a 16-bit constant, bail out, and let the - // discriminator be computed separately. - auto *ConstDiscN = dyn_cast(ConstDisc); - if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue())) -return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc); - - // If there's no address discriminator, use XZR directly. - if (!AddrDisc) -AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64); - - return std::make_tuple( - DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64), - AddrDisc); -} - void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) { SDLoc DL(N); // IntrinsicID is operand #0 @@ -1530,13 +1497,11 @@ void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) { unsigned AUTKeyC = cast(AUTKey)->getZExtValue(); AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64); - SDValue AUTAddrDisc, AUTConstDisc; - std::tie(AUTConstDisc, AUTAddrDisc) = - extractPtrauthBlendDiscriminators(AUTDisc, CurDAG); + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64); SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AArch64::X16, Val, SDValue()); - SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)}; + SDValue Ops[] = {AUTKey, Zero, AUTDisc, X16Copy.getValue(1)}; SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops); ReplaceNode(N, AUT); @@ -1557,19 +1522,13 @@ void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) { AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64); PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64); - SDValue AUTAddrDisc, AUTConstDisc; - std::tie(AUTConstDisc, AUTAddrDisc) = - extractPtrauthBlendDiscriminators(AUTDisc, CurDAG); - - SDValue PACAddrDisc, PACConstDisc; - std::tie(PACConstDisc, PACAddrDisc) = - extractPtrauthBlendDiscriminators(PACDisc, CurDAG); + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64); SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AArch64::X16, Val, SDValue()); - SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,PACKey, - PACConstDisc, PACAddrDisc, X16Copy.getValue(1)}; + SDValue Ops[] = { + AUTKey, Zero, AUTDisc, PACKey, Zero, PACDisc, X16Copy.getValue(1)}; SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops); ReplaceNode(N, AUTPAC); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c8eb9f3dd01ad..d7b835c8acba7 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3241,10 +3241,20 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( case AArch64::MOVT_TIZ_PSEUDO: return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true); + case AArch64::AUT: +fixupBlendComponents(MI, BB, MI.getOperand(1), MI.getOperand(2), +
[llvm-branch-commits] [llvm] [AArch64][PAC] Introduce AArch64::PAC pseudo instruction (PR #146488)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/146488 >From ba9d8965de86e63cce18fc9c2d0fe9484f172e1f Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Sat, 28 Jun 2025 10:50:46 +0300 Subject: [PATCH] [AArch64][PAC] Introduce AArch64::PAC pseudo instruction Introduce a pseudo instruction to be selected instead of a pair of `MOVKXi` and `PAC[DI][AB]` carrying address and immediate modifiers as separate operands. The new pseudo instruction is expanded in AsmPrinter, so that MOVKXi is emitted immediately before `PAC[DI][AB]`. This way, an attacker cannot control the immediate modifier used to sign the value, even if address modifier can be substituted. To simplify the instruction selection, select AArch64::PAC pseudo using TableGen pattern and post-process its $AddrDisc operand by custom inserter hook - this eliminates duplication of the logic for DAGISel and GlobalISel. Furthermore, this improves cross-BB analysis in case of DAGISel. --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 32 +++ .../Target/AArch64/AArch64ISelLowering.cpp| 74 +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 7 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 21 +- llvm/test/CodeGen/AArch64/ptrauth-isel.ll | 205 ++ 5 files changed, 338 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/ptrauth-isel.ll diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index dd10050592190..f34217a3a8133 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -171,6 +171,9 @@ class AArch64AsmPrinter : public AsmPrinter { // Emit the sequence for AUT or AUTPAC. void emitPtrauthAuthResign(const MachineInstr *MI); + // Emit the sequence for PAC. + void emitPtrauthSign(const MachineInstr *MI); + // Emit the sequence to compute the discriminator. // // ScratchReg should be x16/x17. @@ -2173,6 +2176,31 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) { OutStreamer->emitLabel(EndSym); } +void AArch64AsmPrinter::emitPtrauthSign(const MachineInstr *MI) { + Register Val = MI->getOperand(1).getReg(); + auto Key = (AArch64PACKey::ID)MI->getOperand(2).getImm(); + uint64_t Disc = MI->getOperand(3).getImm(); + Register AddrDisc = MI->getOperand(4).getReg(); + bool AddrDiscKilled = MI->getOperand(4).isKill(); + + // Compute aut discriminator into x17 + assert(isUInt<16>(Disc)); + Register DiscReg = emitPtrauthDiscriminator( + Disc, AddrDisc, AArch64::X17, /*MayUseAddrAsScratch=*/AddrDiscKilled); + bool IsZeroDisc = DiscReg == AArch64::XZR; + unsigned Opc = getPACOpcodeForKey(Key, IsZeroDisc); + + // paciza x16 ; if IsZeroDisc + // pacia x16, x17 ; if !IsZeroDisc + MCInst PACInst; + PACInst.setOpcode(Opc); + PACInst.addOperand(MCOperand::createReg(Val)); + PACInst.addOperand(MCOperand::createReg(Val)); + if (!IsZeroDisc) +PACInst.addOperand(MCOperand::createReg(DiscReg)); + EmitToStreamer(*OutStreamer, PACInst); +} + void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) { bool IsCall = MI->getOpcode() == AArch64::BLRA; unsigned BrTarget = MI->getOperand(0).getReg(); @@ -2867,6 +2895,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { emitPtrauthAuthResign(MI); return; + case AArch64::PAC: +emitPtrauthSign(MI); +return; + case AArch64::LOADauthptrstatic: LowerLOADauthptrstatic(*MI); return; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index fb8bd81c033af..c8eb9f3dd01ad 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3073,6 +3073,75 @@ AArch64TargetLowering::EmitGetSMESaveSize(MachineInstr &MI, return BB; } +// Helper function to find the instruction that defined a virtual register. +// If unable to find such instruction, returns nullptr. +static MachineInstr *stripVRegCopies(const MachineRegisterInfo &MRI, + Register Reg) { + while (Reg.isVirtual()) { +MachineInstr *DefMI = MRI.getVRegDef(Reg); +assert(DefMI && "Virtual register definition not found"); +unsigned Opcode = DefMI->getOpcode(); + +if (Opcode == AArch64::COPY) { + Reg = DefMI->getOperand(1).getReg(); + // Vreg is defined by copying from physreg. + if (Reg.isPhysical()) +return DefMI; + continue; +} +if (Opcode == AArch64::SUBREG_TO_REG) { + Reg = DefMI->getOperand(2).getReg(); + continue; +} + +return DefMI; + } + return nullptr; +} + +void AArch64TargetLowering::fixupBlendComponents( +MachineInstr &MI, MachineBasicBlock *BB, MachineOperand &IntDiscOp, +MachineOperand &AddrDiscOp, const TargetRegisterClass *AddrDiscRC) const { + const TargetInstrInfo *TII = Subtarge
[llvm-branch-commits] [llvm] [AArch64][PAC] Rework discriminator analysis in AUT and AUTPAC (PR #146489)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/146489 >From 6634cb01b163b2482ce57915e63e1e386308f218 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Sat, 28 Jun 2025 11:09:01 +0300 Subject: [PATCH] [AArch64][PAC] Rework discriminator analysis in AUT and AUTPAC Make use of post-processing the discriminator components by custom inserter hook to eliminate duplication for DAGISel and GlobalISel and improve cross-BB analysis for DAGISel. --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 51 +--- .../Target/AArch64/AArch64ISelLowering.cpp| 10 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 2 + .../GISel/AArch64InstructionSelector.cpp | 27 +- llvm/test/CodeGen/AArch64/ptrauth-isel.ll | 235 +- 5 files changed, 256 insertions(+), 69 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index da617b7e19266..5d3fd48f448b5 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -1487,39 +1487,6 @@ void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); } -static std::tuple -extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG) { - SDLoc DL(Disc); - SDValue AddrDisc; - SDValue ConstDisc; - - // If this is a blend, remember the constant and address discriminators. - // Otherwise, it's either a constant discriminator, or a non-blended - // address discriminator. - if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN && - Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) { -AddrDisc = Disc->getOperand(1); -ConstDisc = Disc->getOperand(2); - } else { -ConstDisc = Disc; - } - - // If the constant discriminator (either the blend RHS, or the entire - // discriminator value) isn't a 16-bit constant, bail out, and let the - // discriminator be computed separately. - auto *ConstDiscN = dyn_cast(ConstDisc); - if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue())) -return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc); - - // If there's no address discriminator, use XZR directly. - if (!AddrDisc) -AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64); - - return std::make_tuple( - DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64), - AddrDisc); -} - void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) { SDLoc DL(N); // IntrinsicID is operand #0 @@ -1530,13 +1497,11 @@ void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) { unsigned AUTKeyC = cast(AUTKey)->getZExtValue(); AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64); - SDValue AUTAddrDisc, AUTConstDisc; - std::tie(AUTConstDisc, AUTAddrDisc) = - extractPtrauthBlendDiscriminators(AUTDisc, CurDAG); + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64); SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AArch64::X16, Val, SDValue()); - SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)}; + SDValue Ops[] = {AUTKey, Zero, AUTDisc, X16Copy.getValue(1)}; SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops); ReplaceNode(N, AUT); @@ -1557,19 +1522,13 @@ void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) { AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64); PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64); - SDValue AUTAddrDisc, AUTConstDisc; - std::tie(AUTConstDisc, AUTAddrDisc) = - extractPtrauthBlendDiscriminators(AUTDisc, CurDAG); - - SDValue PACAddrDisc, PACConstDisc; - std::tie(PACConstDisc, PACAddrDisc) = - extractPtrauthBlendDiscriminators(PACDisc, CurDAG); + SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i64); SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AArch64::X16, Val, SDValue()); - SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc,PACKey, - PACConstDisc, PACAddrDisc, X16Copy.getValue(1)}; + SDValue Ops[] = { + AUTKey, Zero, AUTDisc, PACKey, Zero, PACDisc, X16Copy.getValue(1)}; SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops); ReplaceNode(N, AUTPAC); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index c8eb9f3dd01ad..d7b835c8acba7 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3241,10 +3241,20 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( case AArch64::MOVT_TIZ_PSEUDO: return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true); + case AArch64::AUT: +fixupBlendComponents(MI, BB, MI.getOperand(1), MI.getOperand(2), +
[llvm-branch-commits] [llvm] [AArch64][PAC] Introduce AArch64::PAC pseudo instruction (PR #146488)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/146488 >From ba9d8965de86e63cce18fc9c2d0fe9484f172e1f Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Sat, 28 Jun 2025 10:50:46 +0300 Subject: [PATCH] [AArch64][PAC] Introduce AArch64::PAC pseudo instruction Introduce a pseudo instruction to be selected instead of a pair of `MOVKXi` and `PAC[DI][AB]` carrying address and immediate modifiers as separate operands. The new pseudo instruction is expanded in AsmPrinter, so that MOVKXi is emitted immediately before `PAC[DI][AB]`. This way, an attacker cannot control the immediate modifier used to sign the value, even if address modifier can be substituted. To simplify the instruction selection, select AArch64::PAC pseudo using TableGen pattern and post-process its $AddrDisc operand by custom inserter hook - this eliminates duplication of the logic for DAGISel and GlobalISel. Furthermore, this improves cross-BB analysis in case of DAGISel. --- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 32 +++ .../Target/AArch64/AArch64ISelLowering.cpp| 74 +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 7 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 21 +- llvm/test/CodeGen/AArch64/ptrauth-isel.ll | 205 ++ 5 files changed, 338 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/ptrauth-isel.ll diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index dd10050592190..f34217a3a8133 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -171,6 +171,9 @@ class AArch64AsmPrinter : public AsmPrinter { // Emit the sequence for AUT or AUTPAC. void emitPtrauthAuthResign(const MachineInstr *MI); + // Emit the sequence for PAC. + void emitPtrauthSign(const MachineInstr *MI); + // Emit the sequence to compute the discriminator. // // ScratchReg should be x16/x17. @@ -2173,6 +2176,31 @@ void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) { OutStreamer->emitLabel(EndSym); } +void AArch64AsmPrinter::emitPtrauthSign(const MachineInstr *MI) { + Register Val = MI->getOperand(1).getReg(); + auto Key = (AArch64PACKey::ID)MI->getOperand(2).getImm(); + uint64_t Disc = MI->getOperand(3).getImm(); + Register AddrDisc = MI->getOperand(4).getReg(); + bool AddrDiscKilled = MI->getOperand(4).isKill(); + + // Compute aut discriminator into x17 + assert(isUInt<16>(Disc)); + Register DiscReg = emitPtrauthDiscriminator( + Disc, AddrDisc, AArch64::X17, /*MayUseAddrAsScratch=*/AddrDiscKilled); + bool IsZeroDisc = DiscReg == AArch64::XZR; + unsigned Opc = getPACOpcodeForKey(Key, IsZeroDisc); + + // paciza x16 ; if IsZeroDisc + // pacia x16, x17 ; if !IsZeroDisc + MCInst PACInst; + PACInst.setOpcode(Opc); + PACInst.addOperand(MCOperand::createReg(Val)); + PACInst.addOperand(MCOperand::createReg(Val)); + if (!IsZeroDisc) +PACInst.addOperand(MCOperand::createReg(DiscReg)); + EmitToStreamer(*OutStreamer, PACInst); +} + void AArch64AsmPrinter::emitPtrauthBranch(const MachineInstr *MI) { bool IsCall = MI->getOpcode() == AArch64::BLRA; unsigned BrTarget = MI->getOperand(0).getReg(); @@ -2867,6 +2895,10 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { emitPtrauthAuthResign(MI); return; + case AArch64::PAC: +emitPtrauthSign(MI); +return; + case AArch64::LOADauthptrstatic: LowerLOADauthptrstatic(*MI); return; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index fb8bd81c033af..c8eb9f3dd01ad 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3073,6 +3073,75 @@ AArch64TargetLowering::EmitGetSMESaveSize(MachineInstr &MI, return BB; } +// Helper function to find the instruction that defined a virtual register. +// If unable to find such instruction, returns nullptr. +static MachineInstr *stripVRegCopies(const MachineRegisterInfo &MRI, + Register Reg) { + while (Reg.isVirtual()) { +MachineInstr *DefMI = MRI.getVRegDef(Reg); +assert(DefMI && "Virtual register definition not found"); +unsigned Opcode = DefMI->getOpcode(); + +if (Opcode == AArch64::COPY) { + Reg = DefMI->getOperand(1).getReg(); + // Vreg is defined by copying from physreg. + if (Reg.isPhysical()) +return DefMI; + continue; +} +if (Opcode == AArch64::SUBREG_TO_REG) { + Reg = DefMI->getOperand(2).getReg(); + continue; +} + +return DefMI; + } + return nullptr; +} + +void AArch64TargetLowering::fixupBlendComponents( +MachineInstr &MI, MachineBasicBlock *BB, MachineOperand &IntDiscOp, +MachineOperand &AddrDiscOp, const TargetRegisterClass *AddrDiscRC) const { + const TargetInstrInfo *TII = Subtarge
[llvm-branch-commits] [mlir] [mlir] NFC - refactor id builder and avoid leaking impl details (PR #146922)
llvmbot wrote: @llvm/pr-subscribers-mlir-gpu @llvm/pr-subscribers-mlir Author: Nicolas Vasilache (nicolasvasilache) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/146922.diff 3 Files Affected: - (modified) mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h (+14-17) - (modified) mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp (+6-27) - (modified) mlir/lib/Dialect/GPU/TransformOps/Utils.cpp (+107-69) ``diff diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h index 111c67638efc8..de512ded59fec 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h @@ -28,27 +28,24 @@ namespace transform { namespace gpu { /// Helper type for functions that generate ids for the mapping of a scf.forall. -/// Operates on both 1) an "original" basis that represents the individual -/// thread and block ids and 2) a "scaled" basis that represents grouped ids -/// (e.g. block clusters, warpgroups and warps). -/// The mapping of ids is done in the "scaled" basis (i.e. when mapping to warps -/// a division by 32 occurs). -/// The predication is in the "original" basis using the "active" quantities -/// (`activeMappingSizes`, `availableMappingSizes` and `activeIdOps`). struct IdBuilderResult { - // Ops used to replace the forall induction variables. + /// Error message, if not empty then building the ids failed. + std::string errorMsg; + /// Values used to replace the forall induction variables. SmallVector mappingIdOps; - // Available mapping sizes used to predicate the forall body when they are - // larger than the predicate mapping sizes. - SmallVector availableMappingSizes; - // Actual mapping sizes used to predicate the forall body when they are - // smaller than the available mapping sizes. - SmallVector activeMappingSizes; - // Ops used to predicate the forall body when activeMappingSizes is smaller - // than the available mapping sizes. - SmallVector activeIdOps; + /// Values used to predicate the forall body when activeMappingSizes is + /// smaller than the available mapping sizes. + SmallVector predicateOps; }; +inline raw_ostream &operator<<(raw_ostream &os, const IdBuilderResult &res) { + llvm::interleaveComma(res.mappingIdOps, os << "mappingIdOps: "); + os << "\n"; + llvm::interleaveComma(res.predicateOps, os << "predicateOps: "); + os << "\n"; + return os; +} + /// Common gpu id builder type, allows the configuration of lowering for various /// mapping schemes. Takes: /// - A rewriter with insertion point set before the forall op to rewrite. diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp index 20d1c94409238..63f87d9b5877e 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp +++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp @@ -491,6 +491,10 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl( IdBuilderResult builderResult = gpuIdBuilder.idBuilder(rewriter, loc, forallMappingSizes, originalBasis); + if (!builderResult.errorMsg.empty()) +return definiteFailureHelper(transformOp, forallOp, builderResult.errorMsg); + + LLVM_DEBUG(DBGS() << builderResult); // Step 4. Map the induction variables to the mappingIdOps, this may involve // a permutation. @@ -501,7 +505,7 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl( forallMappingAttrs.getArrayRef().take_front(forallOp.getRank( { auto mappingAttr = cast(dim); Value peIdOp = mappingIdOps[mappingAttr.getRelativeIndex()]; -LDBG("map: " << iv << " to" << peIdOp); +LDBG("map: " << iv << " to " << peIdOp); bvm.map(iv, peIdOp); } @@ -510,32 +514,7 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl( // originalBasis and no predication occurs. Value predicate; if (originalBasisWasProvided) { -SmallVector activeMappingSizes = builderResult.activeMappingSizes; -SmallVector availableMappingSizes = -builderResult.availableMappingSizes; -SmallVector activeIdOps = builderResult.activeIdOps; -LDBG("activeMappingSizes: " << llvm::interleaved(activeMappingSizes)); -LDBG("availableMappingSizes: " - << llvm::interleaved(availableMappingSizes)); -LDBG("activeIdOps: " << llvm::interleaved(activeIdOps)); -for (auto [activeId, activeMappingSize, availableMappingSize] : - llvm::zip_equal(activeIdOps, activeMappingSizes, - availableMappingSizes)) { - if (activeMappingSize > availableMappingSize) { -return definiteFailureHelper( -transformOp, forallOp, -"Trying to map to fewer GPU threads than loop iterations but " -"overprovisioning is not yet supported. " -"Try additional tiling of the before mapping
[llvm-branch-commits] [mlir] [mlir] NFC - refactor id builder and avoid leaking impl details (PR #146922)
https://github.com/nicolasvasilache created https://github.com/llvm/llvm-project/pull/146922 None >From c88aee740d5d944364e79600bf3c01493a1c3fee Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Jul 2025 18:32:59 +0200 Subject: [PATCH] [mlir] NFC - refactor id builder and avoid leaking impl details --- .../mlir/Dialect/GPU/TransformOps/Utils.h | 31 ++- .../GPU/TransformOps/GPUTransformOps.cpp | 33 +--- mlir/lib/Dialect/GPU/TransformOps/Utils.cpp | 176 +++--- 3 files changed, 127 insertions(+), 113 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h index 111c67638efc8..de512ded59fec 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h @@ -28,27 +28,24 @@ namespace transform { namespace gpu { /// Helper type for functions that generate ids for the mapping of a scf.forall. -/// Operates on both 1) an "original" basis that represents the individual -/// thread and block ids and 2) a "scaled" basis that represents grouped ids -/// (e.g. block clusters, warpgroups and warps). -/// The mapping of ids is done in the "scaled" basis (i.e. when mapping to warps -/// a division by 32 occurs). -/// The predication is in the "original" basis using the "active" quantities -/// (`activeMappingSizes`, `availableMappingSizes` and `activeIdOps`). struct IdBuilderResult { - // Ops used to replace the forall induction variables. + /// Error message, if not empty then building the ids failed. + std::string errorMsg; + /// Values used to replace the forall induction variables. SmallVector mappingIdOps; - // Available mapping sizes used to predicate the forall body when they are - // larger than the predicate mapping sizes. - SmallVector availableMappingSizes; - // Actual mapping sizes used to predicate the forall body when they are - // smaller than the available mapping sizes. - SmallVector activeMappingSizes; - // Ops used to predicate the forall body when activeMappingSizes is smaller - // than the available mapping sizes. - SmallVector activeIdOps; + /// Values used to predicate the forall body when activeMappingSizes is + /// smaller than the available mapping sizes. + SmallVector predicateOps; }; +inline raw_ostream &operator<<(raw_ostream &os, const IdBuilderResult &res) { + llvm::interleaveComma(res.mappingIdOps, os << "mappingIdOps: "); + os << "\n"; + llvm::interleaveComma(res.predicateOps, os << "predicateOps: "); + os << "\n"; + return os; +} + /// Common gpu id builder type, allows the configuration of lowering for various /// mapping schemes. Takes: /// - A rewriter with insertion point set before the forall op to rewrite. diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp index 20d1c94409238..63f87d9b5877e 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp +++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp @@ -491,6 +491,10 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl( IdBuilderResult builderResult = gpuIdBuilder.idBuilder(rewriter, loc, forallMappingSizes, originalBasis); + if (!builderResult.errorMsg.empty()) +return definiteFailureHelper(transformOp, forallOp, builderResult.errorMsg); + + LLVM_DEBUG(DBGS() << builderResult); // Step 4. Map the induction variables to the mappingIdOps, this may involve // a permutation. @@ -501,7 +505,7 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl( forallMappingAttrs.getArrayRef().take_front(forallOp.getRank( { auto mappingAttr = cast(dim); Value peIdOp = mappingIdOps[mappingAttr.getRelativeIndex()]; -LDBG("map: " << iv << " to" << peIdOp); +LDBG("map: " << iv << " to " << peIdOp); bvm.map(iv, peIdOp); } @@ -510,32 +514,7 @@ static DiagnosedSilenceableFailure rewriteOneForallCommonImpl( // originalBasis and no predication occurs. Value predicate; if (originalBasisWasProvided) { -SmallVector activeMappingSizes = builderResult.activeMappingSizes; -SmallVector availableMappingSizes = -builderResult.availableMappingSizes; -SmallVector activeIdOps = builderResult.activeIdOps; -LDBG("activeMappingSizes: " << llvm::interleaved(activeMappingSizes)); -LDBG("availableMappingSizes: " - << llvm::interleaved(availableMappingSizes)); -LDBG("activeIdOps: " << llvm::interleaved(activeIdOps)); -for (auto [activeId, activeMappingSize, availableMappingSize] : - llvm::zip_equal(activeIdOps, activeMappingSizes, - availableMappingSizes)) { - if (activeMappingSize > availableMappingSize) { -return definiteFailureHelper( -transformOp, forallOp, -"Trying to map to fewer GPU threads than loop iterations but " -
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)
https://github.com/bogner edited https://github.com/llvm/llvm-project/pull/145828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)
https://github.com/bogner approved this pull request. Some minor comments but this looks good once they're accounted for. https://github.com/llvm/llvm-project/pull/145828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)
@@ -29,6 +29,9 @@ using namespace llvm::hlsl::rootsig; namespace { +static const llvm::dxbc::RootSignatureVersion DefVersion = +llvm::dxbc::RootSignatureVersion::V1_1; bogner wrote: I don't think this global makes the tests clearer. Maybe throw in a `using llvm::dxbc::RootSignatureVersion::V1_1` and just pass `V1_1` to the Parser constructors if you want to be concise? https://github.com/llvm/llvm-project/pull/145828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)
@@ -605,13 +608,159 @@ TEST_F(ParseHLSLRootSignatureTest, ValidTrailingCommaTest) { hlsl::RootSignatureLexer Lexer(Source, TokLoc); SmallVector Elements; - hlsl::RootSignatureParser Parser(Elements, Lexer, *PP); + hlsl::RootSignatureParser Parser(DefVersion, Elements, Lexer, *PP); + + // Test no diagnostics produced + Consumer->setNoDiag(); + + ASSERT_FALSE(Parser.parse()); + + ASSERT_TRUE(Consumer->isSatisfied()); +} + +TEST_F(ParseHLSLRootSignatureTest, ValidVersion10Test) { + // This test checks that the default values are set correctly + // when parsing with root signature version 1.0 + const llvm::StringLiteral Source = R"cc( +CBV(b0), +SRV(t0), +UAV(u0), +DescriptorTable( + CBV(b1), + SRV(t1), + UAV(u1), + Sampler(s1), +) + )cc"; + + TrivialModuleLoader ModLoader; + auto PP = createPP(Source, ModLoader); + auto TokLoc = SourceLocation(); + + hlsl::RootSignatureLexer Lexer(Source, TokLoc); + SmallVector Elements; + auto Version = llvm::dxbc::RootSignatureVersion::V1_0; + hlsl::RootSignatureParser Parser(Version, Elements, Lexer, *PP); bogner wrote: Similarly, I think it's clearer to just pass the enum directly to the constructor here and below. https://github.com/llvm/llvm-project/pull/145828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)
@@ -55,7 +59,9 @@ // CHECK-SAME: numClauses = 3, visibility = All // CHECK-SAME: ), // CHECK-SAME: Sampler( -// CHECK-SAME: s0, numDescriptors = 4, space = 1, offset = DescriptorTableOffsetAppend, flags = None +// CHECK-SAME: s0, numDescriptors = 4, space = 1, offset = DescriptorTableOffsetAppend, +// CHECK-V1_1-SAME: flags = DescriptorsVolatile +// CHECK-V1_1-SAME: flags = None bogner wrote: This looks like a typo... are these both supposed to be V1_1? Does this test currently pass? https://github.com/llvm/llvm-project/pull/145828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/20.x: Backport [LLD][COFF] Disallow importing DllMain from import libraries (#146610) (PR #146699)
@@ -313,6 +313,7 @@ struct Configuration { bool warnDebugInfoUnusable = true; bool warnLongSectionNames = true; bool warnStdcallFixup = true; + bool warnExportedDllMain = true; nikic wrote: This is an ABI break. https://github.com/llvm/llvm-project/pull/146699 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [RelLookupTableConverter] Drop unnamed_addr for GVs in entries to avoid generating GOTPCREL relocations (#146068) (PR #146191)
https://github.com/nikic milestoned https://github.com/llvm/llvm-project/pull/146191 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 692a679 - Revert "[mlgo][regalloc] Fix after PR #131837 (#146297)"
Author: Mircea Trofin Date: 2025-07-03T07:10:55-07:00 New Revision: 692a6797f50ced40e0e7585dab2e9676cc3bba1b URL: https://github.com/llvm/llvm-project/commit/692a6797f50ced40e0e7585dab2e9676cc3bba1b DIFF: https://github.com/llvm/llvm-project/commit/692a6797f50ced40e0e7585dab2e9676cc3bba1b.diff LOG: Revert "[mlgo][regalloc] Fix after PR #131837 (#146297)" This reverts commit 9a6e0688b04f1122012548b5f7d627ed347acfba. Added: Modified: llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt llvm/test/CodeGen/MLRegAlloc/Inputs/reference-prio-log-noml.txt llvm/test/CodeGen/MLRegAlloc/dev-mode-prio-logging.ll Removed: diff --git a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt index b6639b844c888..231f632403d48 100644 --- a/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt +++ b/llvm/test/CodeGen/MLRegAlloc/Inputs/reference-log-noml.txt @@ -16,8 +16,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7265065908432007,0.0, start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.333432674408,0.333432674408,0.333432674408,0.333432674408,0.166716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2714630176778883e-10,0.9760092496871948,0.9760092496871948,0.9760092496871948,2.2714630176778883e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.23831403255462646,0.07943800836801529,0.07943800836801529,0.07943800836801529,0.9912577867507935,0.07069581001996994,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9646825194358826,0.7932539582252502,0.7900793552398682,0.7392857074737549,0.9170634746551514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7428571581840515 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051188573241233826,0.01760609820485115,0.014214384369552135,0.014272669330239296,1.0,0.07243786007165909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.42433658242225647 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.7939082384109497,0.7907436490058899,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7436708807945251 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.014218696393072605,0.014276761561632156,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.4243086874485016 max_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.777910232544 @@ -40,8 +40,8 @@ hint_weights_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0 start_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.333432674408,0.0,0.333432674408,0.333432674408,0.166716337204,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333432674408 end_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.2714630176778883e-10,0.0,0.9760092496871948,0.9760092496871948,2.2714630176778883e-10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.9760092496871948 hottest_bb_freq_by_max: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2404157966375351,0.08013860136270523,0.0,0.08013860136270523,1.0,0.07131929695606232,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.08013860136270523 -liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9646825194358826,0.0,0.7900793552398682,0.7392857074737549,0.9170634746551514,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7932539582252502 -use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.051188573241233826,0.01760609820485115,0.0,0.014272669330239296,1.0,0.07243786007165909,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014214384369552135 +liverange_size: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.9647942781448364,0.0,0.7907436490058899,0.7401107549667358,0.9173259735107422,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7939082384109497 +use_def_density: 0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.05123833194375038,0.017619721591472626,0.0,0.014276761561632156,1.0,0.07275574654340744,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.014218696393072605 max_stage: 0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 min_stage: 0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 progress: 0.777910232544 @@ -64,8 +64,8 @@ hint_weigh
[llvm-branch-commits] [llvm] [AArch64][PAC] Combine signing with address materialization (PR #130809)
https://github.com/atrosinenko updated https://github.com/llvm/llvm-project/pull/130809 >From a6f9665a83b9002250d5d7c59915d92d173a21e2 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Mon, 10 Mar 2025 15:14:55 +0300 Subject: [PATCH 1/2] [AArch64][PAC] Precommit tests on merging MOVaddr/LOADgotAUTH with PAC* --- .../GlobalISel/ptrauth-constant-in-code.ll| 76 +++ .../AArch64/ptrauth-constant-in-code.ll | 71 + 2 files changed, 147 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-constant-in-code.ll b/llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-constant-in-code.ll index 12a3448111fcb..140e29f942a79 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-constant-in-code.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/ptrauth-constant-in-code.ll @@ -78,6 +78,82 @@ define ptr @foo() { ret ptr ptrauth (ptr @g, i32 0) } +;--- finalize-isel.ll + +; RUN: llc < finalize-isel.ll -mtriple aarch64-elf -mattr=+pauth -global-isel=1 \ +; RUN: -verify-machineinstrs -global-isel-abort=1 -stop-after=finalize-isel | \ +; RUN: FileCheck --check-prefixes=ISEL,ISEL-ELF %s +; RUN: llc < finalize-isel.ll -mtriple arm64-apple-ios -mattr=+pauth -global-isel=1 \ +; RUN: -verify-machineinstrs -global-isel-abort=1 -stop-after=finalize-isel | \ +; RUN: FileCheck --check-prefixes=ISEL %s + +@const_table_local = dso_local constant [3 x ptr] [ptr null, ptr null, ptr null] +@const_table_got = constant [3 x ptr] [ptr null, ptr null, ptr null] + +define void @store_signed_const_local(ptr %dest) { +; ISEL-LABEL: name: store_signed_const_local +; ISEL: body: +; ISEL: %0:gpr64common = COPY $x0 +; ISEL-NEXT:%10:gpr64common = MOVaddr target-flags(aarch64-page) @const_table_local + 8, target-flags(aarch64-pageoff, aarch64-nc) @const_table_local + 8 +; ISEL-NEXT:%2:gpr64noip = MOVKXi %0, 1234 +; ISEL-NEXT:%15:gpr64noip = COPY %0 +; ISEL-NEXT:%4:gpr64 = PAC %10, 2, 1234, %15, implicit-def dead $x17 +; ISEL-NEXT:%14:gpr64 = COPY %4 +; ISEL-NEXT:STRXui %14, %0, 0 :: (store (p0) into %ir.dest) +; ISEL-NEXT:RET_ReallyLR + %dest.i = ptrtoint ptr %dest to i64 + %discr = call i64 @llvm.ptrauth.blend(i64 %dest.i, i64 1234) + %signed.i = call i64 @llvm.ptrauth.sign(i64 ptrtoint (ptr getelementptr ([2 x ptr], ptr @const_table_local, i32 0, i32 1) to i64), i32 2, i64 %discr) + %signed.ptr = inttoptr i64 %signed.i to ptr + store ptr %signed.ptr, ptr %dest + ret void +} + +define void @store_signed_const_got(ptr %dest) { +; ISEL-ELF-LABEL: name: store_signed_const_got +; ISEL-ELF: body: +; ISEL-ELF: %0:gpr64common = COPY $x0 +; ISEL-ELF-NEXT:%7:gpr64common = LOADgotAUTH target-flags(aarch64-got) @const_table_got +; ISEL-ELF-NEXT:%6:gpr64common = ADDXri %7, 8, 0 +; ISEL-ELF-NEXT:%2:gpr64noip = MOVKXi %0, 1234 +; ISEL-ELF-NEXT:%12:gpr64noip = COPY %0 +; ISEL-ELF-NEXT:%4:gpr64 = PAC %6, 2, 1234, %12, implicit-def dead $x17 +; ISEL-ELF-NEXT:%10:gpr64 = COPY %4 +; ISEL-ELF-NEXT:STRXui %10, %0, 0 :: (store (p0) into %ir.dest) +; ISEL-ELF-NEXT:RET_ReallyLR + %dest.i = ptrtoint ptr %dest to i64 + %discr = call i64 @llvm.ptrauth.blend(i64 %dest.i, i64 1234) + %signed.i = call i64 @llvm.ptrauth.sign(i64 ptrtoint (ptr getelementptr ([2 x ptr], ptr @const_table_got, i32 0, i32 1) to i64), i32 2, i64 %discr) + %signed.ptr = inttoptr i64 %signed.i to ptr + store ptr %signed.ptr, ptr %dest + ret void +} + +define void @store_signed_arg(ptr %dest, ptr %p) { +; ISEL-LABEL: name: store_signed_arg +; ISEL: body: +; ISEL: %0:gpr64common = COPY $x0 +; ISEL-NEXT:%1:gpr64common = COPY $x1 +; ISEL-NEXT:%3:gpr64noip = MOVKXi %0, 1234 +; ISEL-NEXT:%6:gpr64common = ADDXri %1, 8, 0 +; ISEL-NEXT:%12:gpr64noip = COPY %0 +; ISEL-NEXT:%8:gpr64 = PAC %6, 2, 1234, %12, implicit-def dead $x17 +; ISEL-NEXT:%10:gpr64 = COPY %8 +; ISEL-NEXT:STRXui %10, %0, 0 :: (store (p0) into %ir.dest) +; ISEL-NEXT:RET_ReallyLR + %dest.i = ptrtoint ptr %dest to i64 + %discr = call i64 @llvm.ptrauth.blend(i64 %dest.i, i64 1234) + %p.offset = getelementptr [2 x ptr], ptr %p, i32 0, i32 1 + %p.offset.i = ptrtoint ptr %p.offset to i64 + %signed.i = call i64 @llvm.ptrauth.sign(i64 %p.offset.i, i32 2, i64 %discr) + %signed.ptr = inttoptr i64 %signed.i to ptr + store ptr %signed.ptr, ptr %dest + ret void +} + +!llvm.module.flags = !{!0} +!0 = !{i32 8, !"ptrauth-elf-got", i32 1} + ;--- ok.ll ; RUN: llc < ok.ll -mtriple aarch64-elf -mattr=+pauth -global-isel=1 \ diff --git a/llvm/test/CodeGen/AArch64/ptrauth-constant-in-code.ll b/llvm/test/CodeGen/AArch64/ptrauth-constant-in-code.ll index 76339a7cc5791..429ff6e5489aa 100644 --- a/llvm/test/CodeGen/AArch64/ptrauth-constant-in-code.ll +++ b/llvm/test/CodeGen/AArch64/ptrauth-constant-in-code.ll @@ -69,6 +69,77 @@ define ptr @foo() { ret ptr ptrauth (ptr @g, i32 0) } +;--- finalize-isel.ll + +; RUN: llc < final
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)
@@ -55,7 +59,9 @@ // CHECK-SAME: numClauses = 3, visibility = All // CHECK-SAME: ), // CHECK-SAME: Sampler( -// CHECK-SAME: s0, numDescriptors = 4, space = 1, offset = DescriptorTableOffsetAppend, flags = None +// CHECK-SAME: s0, numDescriptors = 4, space = 1, offset = DescriptorTableOffsetAppend, +// CHECK-V1_1-SAME: flags = DescriptorsVolatile +// CHECK-V1_1-SAME: flags = None inbelic wrote: Yes, it was a typo. The test was passing, however, this was I had bad a `check-prefixes` in the command line. So none of the `CHECK:` lines were actually being tested now. https://github.com/llvm/llvm-project/pull/145828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)
https://github.com/inbelic edited https://github.com/llvm/llvm-project/pull/145828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)
https://github.com/inbelic edited https://github.com/llvm/llvm-project/pull/145828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [NFC][HLSL][RootSignature] Split up `HLSLRootSignatureUtils` (PR #146124)
https://github.com/inbelic edited https://github.com/llvm/llvm-project/pull/146124 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)
https://github.com/inbelic updated https://github.com/llvm/llvm-project/pull/145828 >From 471a4a556ad0653792e39c99da2423d5e3ed933f Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Fri, 27 Jun 2025 16:39:13 + Subject: [PATCH 01/10] update `setDefaultFlags` --- .../llvm/Frontend/HLSL/HLSLRootSignature.h| 20 +-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h b/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h index f552040ab31cc..0579c1b5f9c25 100644 --- a/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h +++ b/llvm/include/llvm/Frontend/HLSL/HLSLRootSignature.h @@ -50,7 +50,14 @@ struct RootDescriptor { dxbc::ShaderVisibility Visibility = dxbc::ShaderVisibility::All; dxbc::RootDescriptorFlags Flags; - void setDefaultFlags() { + void setDefaultFlags(dxbc::RootSignatureVersion Version) { +if (Version == dxbc::RootSignatureVersion::V1_0) { + Flags = dxbc::RootDescriptorFlags::DataVolatile; + return; +} + +assert(Version == llvm::dxbc::RootSignatureVersion::V1_1 && + "Specified an invalid root signature version"); switch (Type) { case DescriptorType::CBuffer: case DescriptorType::SRV: @@ -83,7 +90,16 @@ struct DescriptorTableClause { uint32_t Offset = DescriptorTableOffsetAppend; dxbc::DescriptorRangeFlags Flags; - void setDefaultFlags() { + void setDefaultFlags(dxbc::RootSignatureVersion Version) { +if (Version == dxbc::RootSignatureVersion::V1_0) { + Flags = dxbc::DescriptorRangeFlags::DescriptorsVolatile; + if (Type != ClauseType::Sampler) +Flags |= dxbc::DescriptorRangeFlags::DataVolatile; + return; +} + +assert(Version == dxbc::RootSignatureVersion::V1_1 && + "Specified an invalid root signature version"); switch (Type) { case ClauseType::CBuffer: case ClauseType::SRV: >From af70ea275d057f15b80223c11eb11174764da0ff Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Fri, 27 Jun 2025 16:39:43 + Subject: [PATCH 02/10] update unit testing --- .../Frontend/HLSLRootSignatureDumpTest.cpp| 72 ++- 1 file changed, 70 insertions(+), 2 deletions(-) diff --git a/llvm/unittests/Frontend/HLSLRootSignatureDumpTest.cpp b/llvm/unittests/Frontend/HLSLRootSignatureDumpTest.cpp index e090f6bae470f..76ac285735d05 100644 --- a/llvm/unittests/Frontend/HLSLRootSignatureDumpTest.cpp +++ b/llvm/unittests/Frontend/HLSLRootSignatureDumpTest.cpp @@ -17,7 +17,7 @@ TEST(HLSLRootSignatureTest, DescriptorCBVClauseDump) { DescriptorTableClause Clause; Clause.Type = ClauseType::CBuffer; Clause.Reg = {RegisterType::BReg, 0}; - Clause.setDefaultFlags(); + Clause.setDefaultFlags(llvm::dxbc::RootSignatureVersion::V1_1); std::string Out; llvm::raw_string_ostream OS(Out); @@ -93,6 +93,40 @@ TEST(HLSLRootSignatureTest, DescriptorSamplerClauseDump) { EXPECT_EQ(Out, Expected); } +TEST(HLSLRootSignatureTest, DescriptorCBVV10ClauseDump) { + DescriptorTableClause Clause; + Clause.Type = ClauseType::CBuffer; + Clause.Reg = {RegisterType::BReg, 0}; + Clause.setDefaultFlags(llvm::dxbc::RootSignatureVersion::V1_0); + + std::string Out; + llvm::raw_string_ostream OS(Out); + OS << Clause; + OS.flush(); + + std::string Expected = "CBV(b0, numDescriptors = 1, space = 0, " + "offset = DescriptorTableOffsetAppend, " + "flags = DescriptorsVolatile | DataVolatile)"; + EXPECT_EQ(Out, Expected); +} + +TEST(HLSLRootSignatureTest, DescriptorSamplerV10ClauseDump) { + DescriptorTableClause Clause; + Clause.Type = ClauseType::Sampler; + Clause.Reg = {RegisterType::SReg, 0}; + Clause.setDefaultFlags(llvm::dxbc::RootSignatureVersion::V1_0); + + std::string Out; + llvm::raw_string_ostream OS(Out); + OS << Clause; + OS.flush(); + + std::string Expected = "Sampler(s0, numDescriptors = 1, space = 0, offset = " + "DescriptorTableOffsetAppend, " + "flags = DescriptorsVolatile)"; + EXPECT_EQ(Out, Expected); +} + TEST(HLSLRootSignatureTest, DescriptorTableDump) { DescriptorTable Table; Table.NumClauses = 4; @@ -112,7 +146,7 @@ TEST(HLSLRootSignatureTest, RootCBVDump) { RootDescriptor Descriptor; Descriptor.Type = DescriptorType::CBuffer; Descriptor.Reg = {RegisterType::BReg, 0}; - Descriptor.setDefaultFlags(); + Descriptor.setDefaultFlags(llvm::dxbc::RootSignatureVersion::V1_1); std::string Out; llvm::raw_string_ostream OS(Out); @@ -125,6 +159,40 @@ TEST(HLSLRootSignatureTest, RootCBVDump) { EXPECT_EQ(Out, Expected); } +TEST(HLSLRootSignatureTest, RootSRV10Dump) { + RootDescriptor Descriptor; + Descriptor.Type = DescriptorType::SRV; + Descriptor.Reg = {RegisterType::TReg, 0}; + Descriptor.setDefaultFlags(llvm::dxbc::RootSignatureVersion::V1_0); + + std::string Out; + llvm::raw_string_ostream OS(Out); + OS << Descriptor; + OS.flush(); + + std::stri
[llvm-branch-commits] [llvm] [BOLT] Improve file handling in NFC-Mode (PR #146513)
https://github.com/paschalis-mpeis updated https://github.com/llvm/llvm-project/pull/146513 >From 625f9ee79af68a121afd92e06d9b4f91007a9c38 Mon Sep 17 00:00:00 2001 From: Paschalis Mpeis Date: Tue, 1 Jul 2025 12:37:31 +0100 Subject: [PATCH 1/4] [BOLT] Improve file handling in NFC-Mode This patch introduce the following improvements: - Catch an exception when the CMakeCache.txt is not present - Bail out gracefully when llvm-bolt did not build successfully the current or previous revision. --- bolt/utils/nfc-check-setup.py | 26 +++--- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/bolt/utils/nfc-check-setup.py b/bolt/utils/nfc-check-setup.py index 7d634d7a88b83..2ff27e5c40b63 100755 --- a/bolt/utils/nfc-check-setup.py +++ b/bolt/utils/nfc-check-setup.py @@ -91,18 +91,26 @@ def main(): source_dir = None # find the repo directory -with open(f"{args.build_dir}/CMakeCache.txt") as f: -for line in f: -m = re.match(r"LLVM_SOURCE_DIR:STATIC=(.*)", line) -if m: -source_dir = m.groups()[0] -if not source_dir: -sys.exit("Source directory is not found") +try: +CMCacheFilename=f"{args.build_dir}/CMakeCache.txt" +with open(CMCacheFilename) as f: +for line in f: +m = re.match(r"LLVM_SOURCE_DIR:STATIC=(.*)", line) +if m: +source_dir = m.groups()[0] +if not source_dir: +raise Exception(f"Source directory not found: '{CMCacheFilename}'") +except Exception as e: +sys.exit(e) # build the current commit subprocess.run( shlex.split("cmake --build . --target llvm-bolt"), cwd=args.build_dir ) + +if not os.path.exists(bolt_path): +sys.exit(f"Failed to build the current revision: '{bolt_path}'") + # rename llvm-bolt os.replace(bolt_path, f"{bolt_path}.new") # memorize the old hash for logging @@ -133,11 +141,15 @@ def main(): subprocess.run(shlex.split(f"git checkout -f {args.cmp_rev}"), cwd=source_dir) # get the parent commit hash for logging new_ref = get_git_ref_or_rev(source_dir) + # build the previous commit subprocess.run( shlex.split("cmake --build . --target llvm-bolt"), cwd=args.build_dir ) + # rename llvm-bolt +if not os.path.exists(bolt_path): +sys.exit(f"Failed to build the previous revision: '{bolt_path}'") os.replace(bolt_path, f"{bolt_path}.old") # symlink llvm-bolt-wrapper >From 26e7b9f05f8a365f117f14a0975a232e1ec74202 Mon Sep 17 00:00:00 2001 From: Paschalis Mpeis Date: Tue, 1 Jul 2025 12:50:08 +0100 Subject: [PATCH 2/4] python formatter and nits --- bolt/utils/nfc-check-setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bolt/utils/nfc-check-setup.py b/bolt/utils/nfc-check-setup.py index 2ff27e5c40b63..22e8cc646a1c5 100755 --- a/bolt/utils/nfc-check-setup.py +++ b/bolt/utils/nfc-check-setup.py @@ -92,7 +92,7 @@ def main(): source_dir = None # find the repo directory try: -CMCacheFilename=f"{args.build_dir}/CMakeCache.txt" +CMCacheFilename = f"{args.build_dir}/CMakeCache.txt" with open(CMCacheFilename) as f: for line in f: m = re.match(r"LLVM_SOURCE_DIR:STATIC=(.*)", line) @@ -104,6 +104,7 @@ def main(): sys.exit(e) # build the current commit +print ("NFC-Setup: Building current revision..") subprocess.run( shlex.split("cmake --build . --target llvm-bolt"), cwd=args.build_dir ) @@ -143,6 +144,7 @@ def main(): new_ref = get_git_ref_or_rev(source_dir) # build the previous commit +print ("NFC-Setup: Building previous revision..") subprocess.run( shlex.split("cmake --build . --target llvm-bolt"), cwd=args.build_dir ) >From ca36aa02effc6c5e5da140940a5c55d4183e0422 Mon Sep 17 00:00:00 2001 From: Paschalis Mpeis Date: Tue, 1 Jul 2025 12:55:46 +0100 Subject: [PATCH 3/4] code formatter (2) --- bolt/utils/nfc-check-setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bolt/utils/nfc-check-setup.py b/bolt/utils/nfc-check-setup.py index 22e8cc646a1c5..d3248050f16e3 100755 --- a/bolt/utils/nfc-check-setup.py +++ b/bolt/utils/nfc-check-setup.py @@ -104,7 +104,7 @@ def main(): sys.exit(e) # build the current commit -print ("NFC-Setup: Building current revision..") +print("NFC-Setup: Building current revision..") subprocess.run( shlex.split("cmake --build . --target llvm-bolt"), cwd=args.build_dir ) @@ -144,7 +144,7 @@ def main(): new_ref = get_git_ref_or_rev(source_dir) # build the previous commit -print ("NFC-Setup: Building previous revision..") +print("NFC-Setup: Building previous revision..") subprocess.run( shlex.split("cmake --build . --target llvm-bolt"), cwd=args.build_dir ) >From 09363a
[llvm-branch-commits] [llvm] [DirectX] Move the scalarizer pass to before dxil-flatten-arrays (PR #146800)
https://github.com/spall approved this pull request. Nit on the PR description. you didn't move the scalarizerPass, you moved the DXILFlattenArrays pass to be immediately after the scalarizerPass. https://github.com/llvm/llvm-project/pull/146800 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/20.x: Backport [LLD][COFF] Disallow importing DllMain from import libraries (#146610) (PR #146699)
mstorsjo wrote: > @rnk @mstorsjo is it ok if we integrate this into the release? Sorry I'm a bit late here, but I have a couple of follow-up comments to the original PR, including potentially changing the public interface (the option name). Plus @nikic's potential ABI concern (which I think might not apply here, but let's sort that out. > @tstellar will there be a 20.1.8? AFAIK there isn't one directly planned, unless very pressing issues are found. https://github.com/llvm/llvm-project/pull/146699 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [mlir] [mlir][GPU][transform] Add gpu_to_rocdl conversion pattern to transfo… (PR #146962)
https://github.com/nicolasvasilache created https://github.com/llvm/llvm-project/pull/146962 …rm dialect Authored-by: Son Tuan Vu >From d8730eb667660782ec1dce6e9cdea020c5821300 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Jul 2025 23:09:00 +0200 Subject: [PATCH] [mlir][GPU][transform] Add gpu_to_rocdl conversion pattern to transform dialect Authored-by: Son Tuan Vu --- .../GPU/TransformOps/GPUTransformOps.td | 14 +++ .../Dialect/GPU/TransformOps/CMakeLists.txt | 1 + .../GPU/TransformOps/GPUTransformOps.cpp | 38 +++ .../llvm-project-overlay/mlir/BUILD.bazel | 2 + 4 files changed, 55 insertions(+) diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td index 36b579485fc04..87423c639945f 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td @@ -54,6 +54,20 @@ def ApplyGPUSubgroupReduceToNVVMConversionPatternsOp : Op]> { + let description = [{ +Collects patterns that convert GPU dialect ops to ROCDL dialect ops. These +patterns require an "LLVMTypeConverter". + }]; + let arguments = (ins StrAttr:$chipset); + let assemblyFormat = [{ +`chipset` `=` $chipset attr-dict + }]; +} + //===--===// // Apply...PatternsOp //===--===// diff --git a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt index b26788f675ce5..e5cc0254f1ffe 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt @@ -24,4 +24,5 @@ add_mlir_dialect_library(MLIRGPUTransformOps # ConversionPatterns MLIRNVGPUToNVVM MLIRGPUToNVVMTransforms + MLIRGPUToROCDLTransforms ) diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp index a86fc47947130..b764a72529f8f 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp +++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp @@ -10,6 +10,7 @@ #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" +#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" @@ -42,6 +43,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InterleavedRange.h" +#include "llvm/Support/LogicalResult.h" #include using namespace mlir; @@ -129,6 +131,42 @@ LogicalResult transform::ApplyGPUSubgroupReduceToNVVMConversionPatternsOp:: return success(); } +void transform::ApplyGPUToROCDLConversionPatternsOp::populatePatterns( +TypeConverter &typeConverter, RewritePatternSet &patterns) { + auto &llvmTypeConverter = static_cast(typeConverter); + populateGpuMemorySpaceAttributeConversions( + llvmTypeConverter, [](AddressSpace space) { +switch (space) { +case AddressSpace::Global: + return 1; +case AddressSpace::Workgroup: + return 3; +case AddressSpace::Private: + return 5; +} +llvm_unreachable("unknown address space enum value"); +return 0; + }); + FailureOr maybeChipset = + amdgpu::Chipset::parse(getChipset()); + assert(llvm::succeeded(maybeChipset) && "expected valid chipset"); + populateGpuToROCDLConversionPatterns( + llvmTypeConverter, patterns, mlir::gpu::amd::Runtime::HIP, *maybeChipset); +} + +LogicalResult +transform::ApplyGPUToROCDLConversionPatternsOp::verifyTypeConverter( +transform::TypeConverterBuilderOpInterface builder) { + FailureOr maybeChipset = + amdgpu::Chipset::parse(getChipset()); + if (failed(maybeChipset)) { +return emitOpError("Invalid chipset name: " + getChipset()); + } + if (builder.getTypeConverterType() != "LLVMTypeConverter") +return emitOpError("expected LLVMTypeConverter"); + return success(); +} + //===--===// // Apply...PatternsOp //===--===//s diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index cc266c2fe3a77..79f2cd5ea71db 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5502,6 +5502,7 @@ cc_library( ":GPUDialect", ":GPUToGPURuntimeTransforms", ":GPUToNVVMTransforms", +":GPUToROCDLTransforms", ":GPUTransformOpsIncGen", ":GPUTransforms", ":IR", @@ -5509,6 +5510,7 @@ cc_libra
[llvm-branch-commits] [llvm] [mlir] [mlir][GPU][transform] Add gpu_to_rocdl conversion pattern to transfo… (PR #146962)
llvmbot wrote: @llvm/pr-subscribers-mlir-gpu Author: Nicolas Vasilache (nicolasvasilache) Changes …rm dialect Authored-by: Son Tuan Vu--- Full diff: https://github.com/llvm/llvm-project/pull/146962.diff 4 Files Affected: - (modified) mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td (+14) - (modified) mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt (+1) - (modified) mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp (+38) - (modified) utils/bazel/llvm-project-overlay/mlir/BUILD.bazel (+2) ``diff diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td index 36b579485fc04..87423c639945f 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td @@ -54,6 +54,20 @@ def ApplyGPUSubgroupReduceToNVVMConversionPatternsOp : Op]> { + let description = [{ +Collects patterns that convert GPU dialect ops to ROCDL dialect ops. These +patterns require an "LLVMTypeConverter". + }]; + let arguments = (ins StrAttr:$chipset); + let assemblyFormat = [{ +`chipset` `=` $chipset attr-dict + }]; +} + //===--===// // Apply...PatternsOp //===--===// diff --git a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt index b26788f675ce5..e5cc0254f1ffe 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt @@ -24,4 +24,5 @@ add_mlir_dialect_library(MLIRGPUTransformOps # ConversionPatterns MLIRNVGPUToNVVM MLIRGPUToNVVMTransforms + MLIRGPUToROCDLTransforms ) diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp index a86fc47947130..b764a72529f8f 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp +++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp @@ -10,6 +10,7 @@ #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" +#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" @@ -42,6 +43,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InterleavedRange.h" +#include "llvm/Support/LogicalResult.h" #include using namespace mlir; @@ -129,6 +131,42 @@ LogicalResult transform::ApplyGPUSubgroupReduceToNVVMConversionPatternsOp:: return success(); } +void transform::ApplyGPUToROCDLConversionPatternsOp::populatePatterns( +TypeConverter &typeConverter, RewritePatternSet &patterns) { + auto &llvmTypeConverter = static_cast(typeConverter); + populateGpuMemorySpaceAttributeConversions( + llvmTypeConverter, [](AddressSpace space) { +switch (space) { +case AddressSpace::Global: + return 1; +case AddressSpace::Workgroup: + return 3; +case AddressSpace::Private: + return 5; +} +llvm_unreachable("unknown address space enum value"); +return 0; + }); + FailureOr maybeChipset = + amdgpu::Chipset::parse(getChipset()); + assert(llvm::succeeded(maybeChipset) && "expected valid chipset"); + populateGpuToROCDLConversionPatterns( + llvmTypeConverter, patterns, mlir::gpu::amd::Runtime::HIP, *maybeChipset); +} + +LogicalResult +transform::ApplyGPUToROCDLConversionPatternsOp::verifyTypeConverter( +transform::TypeConverterBuilderOpInterface builder) { + FailureOr maybeChipset = + amdgpu::Chipset::parse(getChipset()); + if (failed(maybeChipset)) { +return emitOpError("Invalid chipset name: " + getChipset()); + } + if (builder.getTypeConverterType() != "LLVMTypeConverter") +return emitOpError("expected LLVMTypeConverter"); + return success(); +} + //===--===// // Apply...PatternsOp //===--===//s diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index cc266c2fe3a77..79f2cd5ea71db 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5502,6 +5502,7 @@ cc_library( ":GPUDialect", ":GPUToGPURuntimeTransforms", ":GPUToNVVMTransforms", +":GPUToROCDLTransforms", ":GPUTransformOpsIncGen", ":GPUTransforms", ":IR", @@ -5509,6 +5510,7 @@ cc_library( ":MemRefDialect", ":NVGPUDialect", ":NVVMDialect", +":ROCDLDialect",
[llvm-branch-commits] [llvm] [mlir] [mlir][GPU][transform] Add gpu_to_rocdl conversion pattern to transfo… (PR #146962)
llvmbot wrote: @llvm/pr-subscribers-mlir Author: Nicolas Vasilache (nicolasvasilache) Changes …rm dialect Authored-by: Son Tuan Vu--- Full diff: https://github.com/llvm/llvm-project/pull/146962.diff 4 Files Affected: - (modified) mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td (+14) - (modified) mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt (+1) - (modified) mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp (+38) - (modified) utils/bazel/llvm-project-overlay/mlir/BUILD.bazel (+2) ``diff diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td index 36b579485fc04..87423c639945f 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td @@ -54,6 +54,20 @@ def ApplyGPUSubgroupReduceToNVVMConversionPatternsOp : Op]> { + let description = [{ +Collects patterns that convert GPU dialect ops to ROCDL dialect ops. These +patterns require an "LLVMTypeConverter". + }]; + let arguments = (ins StrAttr:$chipset); + let assemblyFormat = [{ +`chipset` `=` $chipset attr-dict + }]; +} + //===--===// // Apply...PatternsOp //===--===// diff --git a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt index b26788f675ce5..e5cc0254f1ffe 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt +++ b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt @@ -24,4 +24,5 @@ add_mlir_dialect_library(MLIRGPUTransformOps # ConversionPatterns MLIRNVGPUToNVVM MLIRGPUToNVVMTransforms + MLIRGPUToROCDLTransforms ) diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp index a86fc47947130..b764a72529f8f 100644 --- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp +++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp @@ -10,6 +10,7 @@ #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" #include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h" +#include "mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h" #include "mlir/Conversion/LLVMCommon/TypeConverter.h" #include "mlir/Dialect/AMDGPU/IR/AMDGPUDialect.h" #include "mlir/Dialect/Affine/IR/AffineOps.h" @@ -42,6 +43,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InterleavedRange.h" +#include "llvm/Support/LogicalResult.h" #include using namespace mlir; @@ -129,6 +131,42 @@ LogicalResult transform::ApplyGPUSubgroupReduceToNVVMConversionPatternsOp:: return success(); } +void transform::ApplyGPUToROCDLConversionPatternsOp::populatePatterns( +TypeConverter &typeConverter, RewritePatternSet &patterns) { + auto &llvmTypeConverter = static_cast(typeConverter); + populateGpuMemorySpaceAttributeConversions( + llvmTypeConverter, [](AddressSpace space) { +switch (space) { +case AddressSpace::Global: + return 1; +case AddressSpace::Workgroup: + return 3; +case AddressSpace::Private: + return 5; +} +llvm_unreachable("unknown address space enum value"); +return 0; + }); + FailureOr maybeChipset = + amdgpu::Chipset::parse(getChipset()); + assert(llvm::succeeded(maybeChipset) && "expected valid chipset"); + populateGpuToROCDLConversionPatterns( + llvmTypeConverter, patterns, mlir::gpu::amd::Runtime::HIP, *maybeChipset); +} + +LogicalResult +transform::ApplyGPUToROCDLConversionPatternsOp::verifyTypeConverter( +transform::TypeConverterBuilderOpInterface builder) { + FailureOr maybeChipset = + amdgpu::Chipset::parse(getChipset()); + if (failed(maybeChipset)) { +return emitOpError("Invalid chipset name: " + getChipset()); + } + if (builder.getTypeConverterType() != "LLVMTypeConverter") +return emitOpError("expected LLVMTypeConverter"); + return success(); +} + //===--===// // Apply...PatternsOp //===--===//s diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index cc266c2fe3a77..79f2cd5ea71db 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5502,6 +5502,7 @@ cc_library( ":GPUDialect", ":GPUToGPURuntimeTransforms", ":GPUToNVVMTransforms", +":GPUToROCDLTransforms", ":GPUTransformOpsIncGen", ":GPUTransforms", ":IR", @@ -5509,6 +5510,7 @@ cc_library( ":MemRefDialect", ":NVGPUDialect", ":NVVMDialect", +":ROCDLDialect",
[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)
https://github.com/nicolasvasilache updated https://github.com/llvm/llvm-project/pull/146943 >From e2fc2f4d78809d5196719b546fd2a6a06058837f Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Jul 2025 21:26:53 +0200 Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::ForallOp and use it to implement warp specialization. This revision adds DeviceMaskingAttrInterface and extends DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and DeviceMaskingAttrInterface. The first implementation is if the form of a GPUMappingMaskAttr, which can be additionally passed to the scf.forall.mapping attribute to specify a mask on compute resources that should be active. Support is added to GPUTransformOps to take advantage of this information and lower to block/warpgroup/warp/thread specialization when mapped to linear ids. Co-authored-by: Oleksandr "Alex" Zinenko --- .../Dialect/GPU/IR/GPUDeviceMappingAttr.td| 18 .../mlir/Dialect/GPU/TransformOps/Utils.h | 15 ++- .../Dialect/SCF/IR/DeviceMappingInterface.td | 45 +++- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td| 12 +++ mlir/lib/Dialect/GPU/CMakeLists.txt | 1 + mlir/lib/Dialect/GPU/IR/GPUDialect.cpp| 45 .../GPU/TransformOps/GPUTransformOps.cpp | 58 ++ mlir/lib/Dialect/GPU/TransformOps/Utils.cpp | 102 +- mlir/lib/Dialect/SCF/IR/SCF.cpp | 43 ++-- .../Dialect/GPU/transform-gpu-failing.mlir| 61 +++ mlir/test/Dialect/GPU/transform-gpu.mlir | 81 ++ mlir/test/Dialect/SCF/invalid.mlir| 18 12 files changed, 441 insertions(+), 58 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td index 63f228ca3157f..e8540027e7b77 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td @@ -252,6 +252,24 @@ def GPULaneMappingAttr }]; } +def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [ + DeclareAttrInterfaceMethods ] > { + let parameters = (ins "uint64_t":$mask); + let assemblyFormat = "`<` params `>`"; + let description = [{ +Attribute describing how to filter the processing units that a +region is mapped to. + +In the first implementation the masking is a bitfield that specifies for +each processing unit whether it is active or not. + +In the future, we may want to implement this as a symbol to refer to +dynamically defined values. + +Extending op semantics with an operand is deemed too intrusive at this time. + }]; +} + def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", "memory_space", [ DeclareAttrInterfaceMethods ] > { let parameters = (ins diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h index de512ded59fec..0a11b8f8d3fa0 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h @@ -78,7 +78,8 @@ struct GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuBlockIdBuilder : public GpuIdBuilder { - GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false); + GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); }; /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups. @@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpgroupIdBuilder : public GpuIdBuilder { GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize, -bool useLinearMapping = false); +bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; /// In the future this may be configured by the transformation. static constexpr int64_t kNumWarpsPerGroup = 4; @@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpIdBuilder : public GpuIdBuilder { GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize, - bool useLinearMapping = false); + bool useLinearMapping = false, + DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; }; @@ -111,7 +114,8 @@ struct GpuWarpIdBuilder : public GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuThreadIdBuilder : public GpuIdBuilder {
[llvm-branch-commits] AlwaysInliner: A new inlining algorithm to interleave alloca promotion with inlines. (PR #145613)
aemerson wrote: > ⚠️ undef deprecator found issues in your code. ⚠️ This looks to be just the IR output containing undef, not the input. https://github.com/llvm/llvm-project/pull/145613 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)
https://github.com/nicolasvasilache updated https://github.com/llvm/llvm-project/pull/146943 >From ad456bbf3da7ca290c521a945e950fd1cbf3ca81 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Jul 2025 21:26:53 +0200 Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::ForallOp and use it to implement warp specialization. This revision adds DeviceMaskingAttrInterface and extends DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and DeviceMaskingAttrInterface. The first implementation is if the form of a GPUMappingMaskAttr, which can be additionally passed to the scf.forall.mapping attribute to specify a mask on compute resources that should be active. Support is added to GPUTransformOps to take advantage of this information and lower to block/warpgroup/warp/thread specialization when mapped to linear ids. Co-authored-by: Oleksandr "Alex" Zinenko --- .../Dialect/GPU/IR/GPUDeviceMappingAttr.td| 18 .../mlir/Dialect/GPU/TransformOps/Utils.h | 15 ++- .../Dialect/SCF/IR/DeviceMappingInterface.td | 45 +++- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td| 12 +++ mlir/lib/Dialect/GPU/CMakeLists.txt | 1 + mlir/lib/Dialect/GPU/IR/GPUDialect.cpp| 45 .../GPU/TransformOps/GPUTransformOps.cpp | 62 +++ mlir/lib/Dialect/GPU/TransformOps/Utils.cpp | 102 +- mlir/lib/Dialect/SCF/IR/SCF.cpp | 43 ++-- .../Dialect/GPU/transform-gpu-failing.mlir| 61 +++ mlir/test/Dialect/GPU/transform-gpu.mlir | 81 ++ mlir/test/Dialect/SCF/invalid.mlir| 18 12 files changed, 444 insertions(+), 59 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td index 63f228ca3157f..e8540027e7b77 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td @@ -252,6 +252,24 @@ def GPULaneMappingAttr }]; } +def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [ + DeclareAttrInterfaceMethods ] > { + let parameters = (ins "uint64_t":$mask); + let assemblyFormat = "`<` params `>`"; + let description = [{ +Attribute describing how to filter the processing units that a +region is mapped to. + +In the first implementation the masking is a bitfield that specifies for +each processing unit whether it is active or not. + +In the future, we may want to implement this as a symbol to refer to +dynamically defined values. + +Extending op semantics with an operand is deemed too intrusive at this time. + }]; +} + def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", "memory_space", [ DeclareAttrInterfaceMethods ] > { let parameters = (ins diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h index de512ded59fec..0a11b8f8d3fa0 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h @@ -78,7 +78,8 @@ struct GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuBlockIdBuilder : public GpuIdBuilder { - GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false); + GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); }; /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups. @@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpgroupIdBuilder : public GpuIdBuilder { GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize, -bool useLinearMapping = false); +bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; /// In the future this may be configured by the transformation. static constexpr int64_t kNumWarpsPerGroup = 4; @@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpIdBuilder : public GpuIdBuilder { GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize, - bool useLinearMapping = false); + bool useLinearMapping = false, + DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; }; @@ -111,7 +114,8 @@ struct GpuWarpIdBuilder : public GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuThreadIdBuilder : public GpuIdBuilder {
[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)
https://github.com/nicolasvasilache edited https://github.com/llvm/llvm-project/pull/146943 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)
https://github.com/nicolasvasilache updated https://github.com/llvm/llvm-project/pull/146943 >From 403e4ba3929516ac27d51baf306dda2a043fd305 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Jul 2025 21:26:53 +0200 Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::ForallOp and use it to implement warp specialization. This revision adds DeviceMaskingAttrInterface and extends DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and DeviceMaskingAttrInterface. The first implementation is if the form of a GPUMappingMaskAttr, which can be additionally passed to the scf.forall.mapping attribute to specify a mask on compute resources that should be active. Support is added to GPUTransformOps to take advantage of this information and lower to block/warpgroup/warp/thread specialization when mapped to linear ids. Co-authored-by: Oleksandr "Alex" Zinenko --- .../Dialect/GPU/IR/GPUDeviceMappingAttr.td| 18 .../mlir/Dialect/GPU/TransformOps/Utils.h | 15 ++- .../Dialect/SCF/IR/DeviceMappingInterface.td | 45 +++- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td| 12 +++ mlir/lib/Dialect/GPU/CMakeLists.txt | 1 + mlir/lib/Dialect/GPU/IR/GPUDialect.cpp| 45 .../GPU/TransformOps/GPUTransformOps.cpp | 62 +++ mlir/lib/Dialect/GPU/TransformOps/Utils.cpp | 102 +- mlir/lib/Dialect/SCF/IR/SCF.cpp | 43 ++-- .../Dialect/GPU/transform-gpu-failing.mlir| 61 +++ mlir/test/Dialect/GPU/transform-gpu.mlir | 81 ++ mlir/test/Dialect/SCF/invalid.mlir| 18 12 files changed, 444 insertions(+), 59 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td index 63f228ca3157f..e8540027e7b77 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td @@ -252,6 +252,24 @@ def GPULaneMappingAttr }]; } +def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [ + DeclareAttrInterfaceMethods ] > { + let parameters = (ins "uint64_t":$mask); + let assemblyFormat = "`<` params `>`"; + let description = [{ +Attribute describing how to filter the processing units that a +region is mapped to. + +In the first implementation the masking is a bitfield that specifies for +each processing unit whether it is active or not. + +In the future, we may want to implement this as a symbol to refer to +dynamically defined values. + +Extending op semantics with an operand is deemed too intrusive at this time. + }]; +} + def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", "memory_space", [ DeclareAttrInterfaceMethods ] > { let parameters = (ins diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h index de512ded59fec..0a11b8f8d3fa0 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h @@ -78,7 +78,8 @@ struct GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuBlockIdBuilder : public GpuIdBuilder { - GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false); + GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); }; /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups. @@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpgroupIdBuilder : public GpuIdBuilder { GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize, -bool useLinearMapping = false); +bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; /// In the future this may be configured by the transformation. static constexpr int64_t kNumWarpsPerGroup = 4; @@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpIdBuilder : public GpuIdBuilder { GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize, - bool useLinearMapping = false); + bool useLinearMapping = false, + DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; }; @@ -111,7 +114,8 @@ struct GpuWarpIdBuilder : public GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuThreadIdBuilder : public GpuIdBuilder {
[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)
https://github.com/nicolasvasilache updated https://github.com/llvm/llvm-project/pull/146943 >From 85aa5f8c72801f5a75142a663d6e89e83e63decc Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Jul 2025 21:26:53 +0200 Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::ForallOp and use it to implement warp specialization. This revision adds DeviceMaskingAttrInterface and extends DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and DeviceMaskingAttrInterface. The first implementation is if the form of a GPUMappingMaskAttr, which can be additionally passed to the scf.forall.mapping attribute to specify a mask on compute resources that should be active. Support is added to GPUTransformOps to take advantage of this information and lower to block/warpgroup/warp/thread specialization when mapped to linear ids. Co-authored-by: Oleksandr "Alex" Zinenko --- .../Dialect/GPU/IR/GPUDeviceMappingAttr.td| 18 .../mlir/Dialect/GPU/TransformOps/Utils.h | 15 ++- .../Dialect/SCF/IR/DeviceMappingInterface.td | 45 +++- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td| 12 +++ mlir/lib/Dialect/GPU/CMakeLists.txt | 1 + mlir/lib/Dialect/GPU/IR/GPUDialect.cpp| 45 .../GPU/TransformOps/GPUTransformOps.cpp | 62 +++ mlir/lib/Dialect/GPU/TransformOps/Utils.cpp | 102 +- mlir/lib/Dialect/SCF/IR/SCF.cpp | 43 ++-- .../Dialect/GPU/transform-gpu-failing.mlir| 61 +++ mlir/test/Dialect/GPU/transform-gpu.mlir | 81 ++ mlir/test/Dialect/SCF/invalid.mlir| 18 12 files changed, 444 insertions(+), 59 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td index 63f228ca3157f..e8540027e7b77 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td @@ -252,6 +252,24 @@ def GPULaneMappingAttr }]; } +def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [ + DeclareAttrInterfaceMethods ] > { + let parameters = (ins "uint64_t":$mask); + let assemblyFormat = "`<` params `>`"; + let description = [{ +Attribute describing how to filter the processing units that a +region is mapped to. + +In the first implementation the masking is a bitfield that specifies for +each processing unit whether it is active or not. + +In the future, we may want to implement this as a symbol to refer to +dynamically defined values. + +Extending op semantics with an operand is deemed too intrusive at this time. + }]; +} + def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", "memory_space", [ DeclareAttrInterfaceMethods ] > { let parameters = (ins diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h index de512ded59fec..0a11b8f8d3fa0 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h @@ -78,7 +78,8 @@ struct GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuBlockIdBuilder : public GpuIdBuilder { - GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false); + GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); }; /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups. @@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpgroupIdBuilder : public GpuIdBuilder { GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize, -bool useLinearMapping = false); +bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; /// In the future this may be configured by the transformation. static constexpr int64_t kNumWarpsPerGroup = 4; @@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpIdBuilder : public GpuIdBuilder { GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize, - bool useLinearMapping = false); + bool useLinearMapping = false, + DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; }; @@ -111,7 +114,8 @@ struct GpuWarpIdBuilder : public GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuThreadIdBuilder : public GpuIdBuilder {
[llvm-branch-commits] [lld] release/20.x: Backport [LLD][COFF] Disallow importing DllMain from import libraries (#146610) (PR #146699)
@@ -313,6 +313,7 @@ struct Configuration { bool warnDebugInfoUnusable = true; bool warnLongSectionNames = true; bool warnStdcallFixup = true; + bool warnExportedDllMain = true; mstorsjo wrote: I don't think this is an installed header though? https://github.com/llvm/llvm-project/pull/146699 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][OpenMP] Issue a warning when parsing future directive spelling (PR #146933)
https://github.com/kparzysz created https://github.com/llvm/llvm-project/pull/146933 OpenMP 6.0 introduced alternative spelling for some directives, with the previous spellings still allowed. Warn the user when a new spelling is encountered with OpenMP version set to an older value. >From 5ad103e08e8a06cfc3708ba83601e073a022bb7e Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 2 Jul 2025 12:49:04 -0500 Subject: [PATCH] [clang][OpenMP] Issue a warning when parsing future directive spelling OpenMP 6.0 introduced alternative spelling for some directives, with the previous spellings still being allowed. Warn the user when a new spelling is encountered with OpenMP version set to an older value. --- clang/include/clang/Basic/DiagnosticGroups.td | 4 +- .../clang/Basic/DiagnosticParseKinds.td | 3 + clang/lib/Parse/ParseOpenMP.cpp | 28 -- .../test/OpenMP/openmp-6-future-spellings.cpp | 55 +++ 4 files changed, 85 insertions(+), 5 deletions(-) create mode 100644 clang/test/OpenMP/openmp-6-future-spellings.cpp diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 36fa3227fd6a6..ace8663b73a4a 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1530,9 +1530,11 @@ def OpenMPPre51Compat : DiagGroup<"pre-openmp-51-compat">; def OpenMP51Ext : DiagGroup<"openmp-51-extensions">; def OpenMPExtensions : DiagGroup<"openmp-extensions">; def OpenMPTargetException : DiagGroup<"openmp-target-exception">; +def OpenMPFuture : DiagGroup<"openmp-future">; def OpenMP : DiagGroup<"openmp", [ SourceUsesOpenMP, OpenMPClauses, OpenMPLoopForm, OpenMPTarget, -OpenMPMapping, OpenMP51Ext, OpenMPExtensions, OpenMPTargetException +OpenMPMapping, OpenMP51Ext, OpenMPExtensions, OpenMPTargetException, +OpenMPFuture ]>; // OpenACC warnings. diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 6c30da376dafb..87eb2b724b297 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1488,6 +1488,9 @@ def err_omp_multiple_step_or_linear_modifier : Error< "multiple %select{'step size'|'linear modifier'}0 found in linear clause">; def err_omp_deprecate_old_syntax: Error< "old syntax '%0' on '%1' clause was deprecated, use new syntax '%2'">; +def warn_omp_future_directive_spelling: Warning< + "directive spelling '%0' is introduced in a later OpenMP version">, + InGroup; def warn_pragma_expected_colon_r_paren : Warning< "missing ':' or ')' after %0 - ignoring">, InGroup; def err_omp_unknown_directive : Error< diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 5256d08259b60..cb9eb3304c317 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -56,6 +56,21 @@ class DeclDirectiveListParserHelper final { }; } // namespace +static OpenMPDirectiveKind checkOpenMPDirectiveName(Parser &P, +SourceLocation Loc, +OpenMPDirectiveKind Kind, +StringRef Name) { + unsigned Version = P.getLangOpts().OpenMP; + auto [D, VR] = getOpenMPDirectiveKindAndVersions(Name); + assert(D == Kind && "Directive kind mismatch"); + // Ignore the case Version > VR.Max: In OpenMP 6.0 all prior spellings + // are explicitly allowed. + if (Version < VR.Min) +P.Diag(Loc, diag::warn_omp_future_directive_spelling) << Name; + + return Kind; +} + static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser &P) { static const DirectiveNameParser DirParser; @@ -65,7 +80,10 @@ static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser &P) { if (Tok.isAnnotation()) return OMPD_unknown; - S = DirParser.consume(S, P.getPreprocessor().getSpelling(Tok)); + std::string Concat = P.getPreprocessor().getSpelling(Tok); + SourceLocation Loc = Tok.getLocation(); + + S = DirParser.consume(S, Concat); if (S == nullptr) return OMPD_unknown; @@ -73,15 +91,17 @@ static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser &P) { OpenMPDirectiveKind DKind = S->Value; Tok = P.getPreprocessor().LookAhead(0); if (!Tok.isAnnotation()) { - S = DirParser.consume(S, P.getPreprocessor().getSpelling(Tok)); + std::string TS = P.getPreprocessor().getSpelling(Tok); + S = DirParser.consume(S, TS); if (S == nullptr) -return DKind; +return checkOpenMPDirectiveName(P, Loc, DKind, Concat); + Concat += ' ' + TS; P.ConsumeToken(); } } assert(S && "Should have exited early"); - return S->Value; + return checkOpenMPDirectiveName(P, Loc, S->Value, Concat); } static DeclarationName parseOpenMPReductionId(Parser &P) { diff --git a/clang/t
[llvm-branch-commits] [clang] [clang][OpenMP] Issue a warning when parsing future directive spelling (PR #146933)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Krzysztof Parzyszek (kparzysz) Changes OpenMP 6.0 introduced alternative spelling for some directives, with the previous spellings still allowed. Warn the user when a new spelling is encountered with OpenMP version set to an older value. --- Full diff: https://github.com/llvm/llvm-project/pull/146933.diff 4 Files Affected: - (modified) clang/include/clang/Basic/DiagnosticGroups.td (+3-1) - (modified) clang/include/clang/Basic/DiagnosticParseKinds.td (+3) - (modified) clang/lib/Parse/ParseOpenMP.cpp (+24-4) - (added) clang/test/OpenMP/openmp-6-future-spellings.cpp (+55) ``diff diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 36fa3227fd6a6..ace8663b73a4a 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1530,9 +1530,11 @@ def OpenMPPre51Compat : DiagGroup<"pre-openmp-51-compat">; def OpenMP51Ext : DiagGroup<"openmp-51-extensions">; def OpenMPExtensions : DiagGroup<"openmp-extensions">; def OpenMPTargetException : DiagGroup<"openmp-target-exception">; +def OpenMPFuture : DiagGroup<"openmp-future">; def OpenMP : DiagGroup<"openmp", [ SourceUsesOpenMP, OpenMPClauses, OpenMPLoopForm, OpenMPTarget, -OpenMPMapping, OpenMP51Ext, OpenMPExtensions, OpenMPTargetException +OpenMPMapping, OpenMP51Ext, OpenMPExtensions, OpenMPTargetException, +OpenMPFuture ]>; // OpenACC warnings. diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td index 6c30da376dafb..87eb2b724b297 100644 --- a/clang/include/clang/Basic/DiagnosticParseKinds.td +++ b/clang/include/clang/Basic/DiagnosticParseKinds.td @@ -1488,6 +1488,9 @@ def err_omp_multiple_step_or_linear_modifier : Error< "multiple %select{'step size'|'linear modifier'}0 found in linear clause">; def err_omp_deprecate_old_syntax: Error< "old syntax '%0' on '%1' clause was deprecated, use new syntax '%2'">; +def warn_omp_future_directive_spelling: Warning< + "directive spelling '%0' is introduced in a later OpenMP version">, + InGroup; def warn_pragma_expected_colon_r_paren : Warning< "missing ':' or ')' after %0 - ignoring">, InGroup; def err_omp_unknown_directive : Error< diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp index 5256d08259b60..cb9eb3304c317 100644 --- a/clang/lib/Parse/ParseOpenMP.cpp +++ b/clang/lib/Parse/ParseOpenMP.cpp @@ -56,6 +56,21 @@ class DeclDirectiveListParserHelper final { }; } // namespace +static OpenMPDirectiveKind checkOpenMPDirectiveName(Parser &P, +SourceLocation Loc, +OpenMPDirectiveKind Kind, +StringRef Name) { + unsigned Version = P.getLangOpts().OpenMP; + auto [D, VR] = getOpenMPDirectiveKindAndVersions(Name); + assert(D == Kind && "Directive kind mismatch"); + // Ignore the case Version > VR.Max: In OpenMP 6.0 all prior spellings + // are explicitly allowed. + if (Version < VR.Min) +P.Diag(Loc, diag::warn_omp_future_directive_spelling) << Name; + + return Kind; +} + static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser &P) { static const DirectiveNameParser DirParser; @@ -65,7 +80,10 @@ static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser &P) { if (Tok.isAnnotation()) return OMPD_unknown; - S = DirParser.consume(S, P.getPreprocessor().getSpelling(Tok)); + std::string Concat = P.getPreprocessor().getSpelling(Tok); + SourceLocation Loc = Tok.getLocation(); + + S = DirParser.consume(S, Concat); if (S == nullptr) return OMPD_unknown; @@ -73,15 +91,17 @@ static OpenMPDirectiveKind parseOpenMPDirectiveKind(Parser &P) { OpenMPDirectiveKind DKind = S->Value; Tok = P.getPreprocessor().LookAhead(0); if (!Tok.isAnnotation()) { - S = DirParser.consume(S, P.getPreprocessor().getSpelling(Tok)); + std::string TS = P.getPreprocessor().getSpelling(Tok); + S = DirParser.consume(S, TS); if (S == nullptr) -return DKind; +return checkOpenMPDirectiveName(P, Loc, DKind, Concat); + Concat += ' ' + TS; P.ConsumeToken(); } } assert(S && "Should have exited early"); - return S->Value; + return checkOpenMPDirectiveName(P, Loc, S->Value, Concat); } static DeclarationName parseOpenMPReductionId(Parser &P) { diff --git a/clang/test/OpenMP/openmp-6-future-spellings.cpp b/clang/test/OpenMP/openmp-6-future-spellings.cpp new file mode 100644 index 0..642ed3502d475 --- /dev/null +++ b/clang/test/OpenMP/openmp-6-future-spellings.cpp @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=52 -ferror-limit 100 -o - %s + +// expected-warning@+1 {{directive spelling 'begin declare_target' is introduced in a later OpenMP version}
[llvm-branch-commits] [clang] [llvm] [NFC][HLSL][RootSignature] Split up `HLSLRootSignatureUtils` (PR #146124)
https://github.com/joaosaffran approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/146124 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang][OpenMP] Issue a warning when parsing future directive spelling (PR #146933)
https://github.com/alexey-bataev approved this pull request. https://github.com/llvm/llvm-project/pull/146933 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Update `setDefaultFlags` to account for Root Signature Version (PR #145828)
https://github.com/joaosaffran approved this pull request. https://github.com/llvm/llvm-project/pull/145828 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/20.x: Backport [LLD][COFF] Disallow importing DllMain from import libraries (#146610) (PR #146699)
aganea wrote: @rnk @mstorsjo is it ok if we integrate this into the release? @tstellar will there be a 20.1.8? https://github.com/llvm/llvm-project/pull/146699 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][do concurrent] Extned `getAllocaBlock()` and emit yields correctly (PR #146853)
https://github.com/tblah approved this pull request. LGTM, thanks https://github.com/llvm/llvm-project/pull/146853 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Improve exception handling in NFC-Mode (PR #146513)
https://github.com/paschalis-mpeis edited https://github.com/llvm/llvm-project/pull/146513 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Improve exception handling in NFC-Mode (PR #146513)
paschalis-mpeis wrote: Forced-push to rebase since the parent PR now has a `--create-wrapper` flag. In the latest patch, `switch_back` is a function called whenever something goes wrong after checking out the prev revision, ie: - building the old binary fails, or - setting up the wrapper fails. I also delete llvm-bolt at the start, since we rebuild it for the current revision anyway. https://github.com/llvm/llvm-project/pull/146513 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][NFC] Update nfc-check-setup.py guidance (PR #146659)
@@ -156,9 +158,8 @@ def main(): os.replace(bolt_path, f"{bolt_path}.old") print( -f"Build directory {args.build_dir} is ready to run BOLT tests, e.g.\n" -"\tbin/llvm-lit -sv tools/bolt/test\nor\n" -"\tbin/llvm-lit -sv tools/bolttests" +f"Build directory {args.build_dir} is ready for NFC-Mode comparison " +"between the two revisions." paschalis-mpeis wrote: Will do, thanks! Setting up the wrapper now stays under a flag, so I'll reintroduce this example when I rebase the patch. https://github.com/llvm/llvm-project/pull/146659 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [PHIElimination] Reuse existing COPY in predecessor basic block (Take Two) (PR #146806)
mikaelholmen wrote: > @mikaelholmen @mstorsjo @macurtis-amd @sjoerdmeijer @sushgokh We've decided > that it's best to revert the original PR (see #146850), sorry for wasting > your time. This PR is trying to reintroduce it with fixes to the issues > you've presented. Can I humbly ask you to test this commit one final time? I've re-tested the cases I've reported problems for with this patch on top of our downstream compiler based on trunk version c79fcfee41 and they still work. https://github.com/llvm/llvm-project/pull/146806 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 8763227 - Revert "[win][aarch64] Always reserve frame pointers for Arm64 Windows (#146582)"
Author: David Spickett Date: 2025-07-03T10:02:43+01:00 New Revision: 8763227d14aae0994e33e34ffc1948ca95c1efcb URL: https://github.com/llvm/llvm-project/commit/8763227d14aae0994e33e34ffc1948ca95c1efcb DIFF: https://github.com/llvm/llvm-project/commit/8763227d14aae0994e33e34ffc1948ca95c1efcb.diff LOG: Revert "[win][aarch64] Always reserve frame pointers for Arm64 Windows (#146582)" This reverts commit a74c7d877637f31ff25308969ef7ca6ed94aacc5. Added: Modified: clang/lib/Driver/ToolChains/CommonArgs.cpp clang/test/Driver/frame-pointer-elim.c llvm/lib/Target/AArch64/AArch64FrameLowering.cpp llvm/lib/Target/AArch64/AArch64FrameLowering.h llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp llvm/test/CodeGen/AArch64/regress-w29-reserved-with-fp.ll llvm/test/CodeGen/AArch64/win-sve.ll llvm/test/CodeGen/AArch64/wincfi-missing-seh-directives.ll llvm/test/CodeGen/AArch64/wineh-frame5.mir llvm/test/CodeGen/AArch64/wineh-frame7.mir Removed: diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 2fcf9b28dc746..070901f037823 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -174,13 +174,7 @@ static bool mustUseNonLeafFramePointerForTarget(const llvm::Triple &Triple) { // even if new frame records are not created. static bool mustMaintainValidFrameChain(const llvm::opt::ArgList &Args, const llvm::Triple &Triple) { - switch (Triple.getArch()) { - default: -return false; - case llvm::Triple::arm: - case llvm::Triple::armeb: - case llvm::Triple::thumb: - case llvm::Triple::thumbeb: + if (Triple.isARM() || Triple.isThumb()) { // For 32-bit Arm, the -mframe-chain=aapcs and -mframe-chain=aapcs+leaf // options require the frame pointer register to be reserved (or point to a // new AAPCS-compilant frame record), even with-fno-omit-frame-pointer. @@ -189,13 +183,8 @@ static bool mustMaintainValidFrameChain(const llvm::opt::ArgList &Args, return V != "none"; } return false; - - case llvm::Triple::aarch64: -// Arm64 Windows requires that the frame chain is valid, as there is no -// way to indicate during a stack walk that a frame has used the frame -// pointer as a general purpose register. -return Triple.isOSWindows(); } + return false; } // True if a target-specific option causes -fno-omit-frame-pointer to also diff --git a/clang/test/Driver/frame-pointer-elim.c b/clang/test/Driver/frame-pointer-elim.c index 0dd7eb0c738db..f64ff6efc7261 100644 --- a/clang/test/Driver/frame-pointer-elim.c +++ b/clang/test/Driver/frame-pointer-elim.c @@ -4,8 +4,6 @@ // KEEP-NON-LEAF: "-mframe-pointer=non-leaf" // KEEP-NONE-NOT: warning: argument unused // KEEP-NONE: "-mframe-pointer=none" -// KEEP-RESERVED-NOT: warning: argument unused -// KEEP-RESERVED: "-mframe-pointer=reserved" // On Linux x86, omit frame pointer when optimization is enabled. // RUN: %clang -### --target=i386-linux -S -fomit-frame-pointer %s 2>&1 | \ @@ -217,9 +215,5 @@ // RUN: %clang -### --target=aarch64-none-elf -S -O1 -fno-omit-frame-pointer %s 2>&1 | \ // RUN: FileCheck --check-prefix=KEEP-NON-LEAF %s -// AArch64 Windows requires that the frame pointer be reserved -// RUN: %clang -### --target=aarch64-pc-windows-msvc -S -fomit-frame-pointer %s 2>&1 | \ -// RUN: FileCheck --check-prefix=KEEP-RESERVED %s - void f0() {} void f1() { f0(); } diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 3ef7e5265c724..6f1ce5bdbe286 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -518,27 +518,6 @@ bool AArch64FrameLowering::hasFPImpl(const MachineFunction &MF) const { return false; } -/// Should the Frame Pointer be reserved for the current function? -bool AArch64FrameLowering::isFPReserved(const MachineFunction &MF) const { - const TargetMachine &TM = MF.getTarget(); - const Triple &TT = TM.getTargetTriple(); - - // These OSes require the frame chain is valid, even if the current frame does - // not use a frame pointer. - if (TT.isOSDarwin() || TT.isOSWindows()) -return true; - - // If the function has a frame pointer, it is reserved. - if (hasFP(MF)) -return true; - - // Frontend has requested to preserve the frame pointer. - if (TM.Options.FramePointerIsReserved(MF)) -return true; - - return false; -} - /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is /// not required, we reserve argument space for call sites in the function /// immediately on entry to the current function. This eliminates the need for diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch6
[llvm-branch-commits] [llvm] [DLCov] Origin-Tracking: Add debugify support (PR #143594)
https://github.com/jmorse commented: Tentative LGTM, noting that you're planning on updating docs in this PR too. I've no familiarity with the python modified alas. https://github.com/llvm/llvm-project/pull/143594 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DLCov] Origin-Tracking: Add debugify support (PR #143594)
https://github.com/jmorse edited https://github.com/llvm/llvm-project/pull/143594 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DLCov] Origin-Tracking: Add debugify support (PR #143594)
@@ -59,6 +65,52 @@ cl::opt DebugifyLevel( raw_ostream &dbg() { return Quiet ? nulls() : errs(); } +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +// These maps refer to addresses in this instance of LLVM, so we can reuse them +// everywhere - therefore, we store them at file scope. +static SymbolizedAddressMap SymbolizedAddrs; +static AddressSet UnsymbolizedAddrs; + +std::string symbolizeStackTrace(const Instruction *I) { + // We flush the set of unsymbolized addresses at the latest possible moment, + // i.e. now. + if (!UnsymbolizedAddrs.empty()) { +sys::symbolizeAddresses(UnsymbolizedAddrs, SymbolizedAddrs); +UnsymbolizedAddrs.clear(); + } + auto OriginStackTraces = I->getDebugLoc().getOriginStackTraces(); + std::string Result; + raw_string_ostream OS(Result); + for (size_t TraceIdx = 0; TraceIdx < OriginStackTraces.size(); ++TraceIdx) { +if (TraceIdx != 0) + OS << "\n"; +auto &[Depth, StackTrace] = OriginStackTraces[TraceIdx]; +unsigned VirtualFrameNo = 0; +for (int Frame = 0; Frame < Depth; ++Frame) { + assert(SymbolizedAddrs.contains(StackTrace[Frame]) && + "Expected each address to have been symbolized."); + for (std::string &SymbolizedFrame : SymbolizedAddrs[StackTrace[Frame]]) { +OS << right_justify(formatv("#{0}", VirtualFrameNo++).str(), std::log10(Depth) + 2) + << ' ' << SymbolizedFrame << '\n'; + } +} + } + return Result; +} +void collectStackAddresses(Instruction &I) { + auto &OriginStackTraces = I.getDebugLoc().getOriginStackTraces(); jmorse wrote: Major nit; would we be able to name the type here rather than `auto`? It'll make the resulting code a lot easier to localise and dissect for future readers. (The next `auto` makes sense of course). https://github.com/llvm/llvm-project/pull/143594 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DLCov] Origin-Tracking: Add debugify support (PR #143594)
@@ -59,6 +65,52 @@ cl::opt DebugifyLevel( raw_ostream &dbg() { return Quiet ? nulls() : errs(); } +#if LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN +// These maps refer to addresses in this instance of LLVM, so we can reuse them +// everywhere - therefore, we store them at file scope. jmorse wrote: I feel the term "instance of LLVM" could be more precise: can we us the word "process" here? All other ambiguities and difficulties of fixed addresses are brought to mind with the word process. https://github.com/llvm/llvm-project/pull/143594 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][do concurrent] Extned `getAllocaBlock()` and emit yields correctly (PR #146853)
https://github.com/ergawy created https://github.com/llvm/llvm-project/pull/146853 Handles some loose ends in `do concurrent` reduction declarations. This PR extends `getAllocaBlock` to handle declare ops, and also emit `fir.yield` in all regions. >From caabbde941c1c870a850d4ed85d1b81d5e5d0759 Mon Sep 17 00:00:00 2001 From: ergawy Date: Thu, 3 Jul 2025 05:45:40 -0500 Subject: [PATCH] [flang][do concurrent] Extned `getAllocaBlock()` and emit yields correctly Handles some loose ends in `do concurrent` reduction declarations. This PR extends `getAllocaBlock` to handle declare ops, and also emit `fir.yield` in all regions. --- .../lib/Lower/Support/ReductionProcessor.cpp | 4 ++- flang/lib/Optimizer/Builder/FIRBuilder.cpp| 3 ++ .../test/HLFIR/fir-reduction-alloca-block.fir | 31 +++ .../do_concurrent_reduce_allocatable.f90 | 22 + 4 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 flang/test/HLFIR/fir-reduction-alloca-block.fir create mode 100644 flang/test/Lower/do_concurrent_reduce_allocatable.f90 diff --git a/flang/lib/Lower/Support/ReductionProcessor.cpp b/flang/lib/Lower/Support/ReductionProcessor.cpp index 539d5cd37c2ea..14b2c9836748f 100644 --- a/flang/lib/Lower/Support/ReductionProcessor.cpp +++ b/flang/lib/Lower/Support/ReductionProcessor.cpp @@ -529,7 +529,9 @@ static void createReductionAllocAndInitRegions( converter, loc, type, initValue, initBlock, reductionDecl.getInitializerAllocArg(), reductionDecl.getInitializerMoldArg(), reductionDecl.getCleanupRegion(), -DeclOperationKind::Reduction); +DeclOperationKind::Reduction, /*sym=*/nullptr, +/*cannotHaveLowerBounds=*/false, +/*isDoConcurrent*/ std::is_same_v); } if (fir::isa_trivial(ty)) { diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index b5cabdb830e5c..acd5a88a2582d 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -286,6 +286,9 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() { if (auto firLocalOp = getRegion().getParentOfType()) return &getRegion().front(); + if (auto firLocalOp = getRegion().getParentOfType()) +return &getRegion().front(); + return getEntryBlock(); } diff --git a/flang/test/HLFIR/fir-reduction-alloca-block.fir b/flang/test/HLFIR/fir-reduction-alloca-block.fir new file mode 100644 index 0..75857cfbe01d3 --- /dev/null +++ b/flang/test/HLFIR/fir-reduction-alloca-block.fir @@ -0,0 +1,31 @@ +// Tests that `fir.local` ops are able to provide an alloca block when required. + +// RUN: fir-opt %s -convert-hlfir-to-fir | FileCheck %s + +fir.declare_reduction @add_reduction_byref_box_heap_UxUxf32 : !fir.ref>>> alloc { + %0 = fir.alloca !fir.box>> + fir.yield(%0 : !fir.ref>>>) +} init { +^bb0(%arg0: !fir.ref>>>, %arg1: !fir.ref>>>): + %cst = arith.constant 0.00e+00 : f32 + %0 = fir.load %arg1 : !fir.ref>>> + hlfir.assign %cst to %0 : f32, !fir.box>> + fir.yield(%arg1 : !fir.ref>>>) +} combiner { +^bb0(%arg0: !fir.ref>>>, %arg1: !fir.ref>>>): + fir.yield(%arg0 : !fir.ref>>>) +} + +// CHECK-LABEL: fir.declare_reduction @add_reduction_byref_box_heap_UxUxf32 : !fir.ref>>> alloc { +// CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box>> +// CHECK: fir.yield(%[[VAL_0]] : !fir.ref>>>) + +// CHECK-LABEL: } init { +// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>>, %[[VAL_1:.*]]: !fir.ref>>>): +// CHECK: %[[VAL_2:.*]] = fir.alloca !fir.box>> +// CHECK: fir.yield(%[[VAL_1]] : !fir.ref>>>) + +// CHECK-LABEL: } combiner { +// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>>, %[[VAL_1:.*]]: !fir.ref>>>): +// CHECK: fir.yield(%[[VAL_0]] : !fir.ref>>>) +// CHECK: } diff --git a/flang/test/Lower/do_concurrent_reduce_allocatable.f90 b/flang/test/Lower/do_concurrent_reduce_allocatable.f90 new file mode 100644 index 0..873fd10dd1b97 --- /dev/null +++ b/flang/test/Lower/do_concurrent_reduce_allocatable.f90 @@ -0,0 +1,22 @@ +! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s + +subroutine do_concurrent_allocatable + integer :: i + real, allocatable, dimension(:,:) :: x + + do concurrent (i = 1:10) reduce(+: x) + end do +end subroutine + +! CHECK: fir.declare_reduction @[[RED_OP:.*]] : ![[RED_TYPE:.*]] alloc { +! CHECK: %[[ALLOC:.*]] = fir.alloca +! CHECK: fir.yield(%[[ALLOC]] : ![[RED_TYPE]]) +! CHECK: } init { +! CHECK: ^bb0(%{{.*}}: ![[RED_TYPE]], %[[RED_ARG:.*]]: ![[RED_TYPE]]): +! CHECK: fir.yield(%[[RED_ARG]] : !{{.*}}) +! CHECK: } combiner { +! CHECK: ^bb0(%[[COMB_RES:.*]]: ![[RED_TYPE]], %{{.*}}: ![[RED_TYPE]]): +! CHECK: fir.yield(%[[COMB_RES]] : !{{.*}}) +! CHECK: } cleanup { +! CHECK: fir.yield +! CHECK: } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailma
[llvm-branch-commits] [llvm] [PHIElimination] Reuse existing COPY in predecessor basic block (Take Two) (PR #146806)
guy-david wrote: @mikaelholmen @mstorsjo @macurtis-amd @sjoerdmeijer @sushgokh We've decided that it's best to revert the original PR (see https://github.com/llvm/llvm-project/pull/146850), sorry for wasting your time. This PR is trying to reintroduce it with fixes to the issues you've presented. Can I humbly ask you to test this commit one final time? https://github.com/llvm/llvm-project/pull/146806 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [flang] [flang][do concurrent] Extned `getAllocaBlock()` and emit yields correctly (PR #146853)
llvmbot wrote: @llvm/pr-subscribers-flang-fir-hlfir Author: Kareem Ergawy (ergawy) Changes Handles some loose ends in `do concurrent` reduction declarations. This PR extends `getAllocaBlock` to handle declare ops, and also emit `fir.yield` in all regions. --- Full diff: https://github.com/llvm/llvm-project/pull/146853.diff 4 Files Affected: - (modified) flang/lib/Lower/Support/ReductionProcessor.cpp (+3-1) - (modified) flang/lib/Optimizer/Builder/FIRBuilder.cpp (+3) - (added) flang/test/HLFIR/fir-reduction-alloca-block.fir (+31) - (added) flang/test/Lower/do_concurrent_reduce_allocatable.f90 (+22) ``diff diff --git a/flang/lib/Lower/Support/ReductionProcessor.cpp b/flang/lib/Lower/Support/ReductionProcessor.cpp index 539d5cd37c2ea..14b2c9836748f 100644 --- a/flang/lib/Lower/Support/ReductionProcessor.cpp +++ b/flang/lib/Lower/Support/ReductionProcessor.cpp @@ -529,7 +529,9 @@ static void createReductionAllocAndInitRegions( converter, loc, type, initValue, initBlock, reductionDecl.getInitializerAllocArg(), reductionDecl.getInitializerMoldArg(), reductionDecl.getCleanupRegion(), -DeclOperationKind::Reduction); +DeclOperationKind::Reduction, /*sym=*/nullptr, +/*cannotHaveLowerBounds=*/false, +/*isDoConcurrent*/ std::is_same_v); } if (fir::isa_trivial(ty)) { diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index b5cabdb830e5c..acd5a88a2582d 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -286,6 +286,9 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() { if (auto firLocalOp = getRegion().getParentOfType()) return &getRegion().front(); + if (auto firLocalOp = getRegion().getParentOfType()) +return &getRegion().front(); + return getEntryBlock(); } diff --git a/flang/test/HLFIR/fir-reduction-alloca-block.fir b/flang/test/HLFIR/fir-reduction-alloca-block.fir new file mode 100644 index 0..75857cfbe01d3 --- /dev/null +++ b/flang/test/HLFIR/fir-reduction-alloca-block.fir @@ -0,0 +1,31 @@ +// Tests that `fir.local` ops are able to provide an alloca block when required. + +// RUN: fir-opt %s -convert-hlfir-to-fir | FileCheck %s + +fir.declare_reduction @add_reduction_byref_box_heap_UxUxf32 : !fir.ref>>> alloc { + %0 = fir.alloca !fir.box>> + fir.yield(%0 : !fir.ref>>>) +} init { +^bb0(%arg0: !fir.ref>>>, %arg1: !fir.ref>>>): + %cst = arith.constant 0.00e+00 : f32 + %0 = fir.load %arg1 : !fir.ref>>> + hlfir.assign %cst to %0 : f32, !fir.box>> + fir.yield(%arg1 : !fir.ref>>>) +} combiner { +^bb0(%arg0: !fir.ref>>>, %arg1: !fir.ref>>>): + fir.yield(%arg0 : !fir.ref>>>) +} + +// CHECK-LABEL: fir.declare_reduction @add_reduction_byref_box_heap_UxUxf32 : !fir.ref>>> alloc { +// CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box>> +// CHECK: fir.yield(%[[VAL_0]] : !fir.ref>>>) + +// CHECK-LABEL: } init { +// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>>, %[[VAL_1:.*]]: !fir.ref>>>): +// CHECK: %[[VAL_2:.*]] = fir.alloca !fir.box>> +// CHECK: fir.yield(%[[VAL_1]] : !fir.ref>>>) + +// CHECK-LABEL: } combiner { +// CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>>, %[[VAL_1:.*]]: !fir.ref>>>): +// CHECK: fir.yield(%[[VAL_0]] : !fir.ref>>>) +// CHECK: } diff --git a/flang/test/Lower/do_concurrent_reduce_allocatable.f90 b/flang/test/Lower/do_concurrent_reduce_allocatable.f90 new file mode 100644 index 0..873fd10dd1b97 --- /dev/null +++ b/flang/test/Lower/do_concurrent_reduce_allocatable.f90 @@ -0,0 +1,22 @@ +! RUN: %flang_fc1 -emit-hlfir -o - %s | FileCheck %s + +subroutine do_concurrent_allocatable + integer :: i + real, allocatable, dimension(:,:) :: x + + do concurrent (i = 1:10) reduce(+: x) + end do +end subroutine + +! CHECK: fir.declare_reduction @[[RED_OP:.*]] : ![[RED_TYPE:.*]] alloc { +! CHECK: %[[ALLOC:.*]] = fir.alloca +! CHECK: fir.yield(%[[ALLOC]] : ![[RED_TYPE]]) +! CHECK: } init { +! CHECK: ^bb0(%{{.*}}: ![[RED_TYPE]], %[[RED_ARG:.*]]: ![[RED_TYPE]]): +! CHECK: fir.yield(%[[RED_ARG]] : !{{.*}}) +! CHECK: } combiner { +! CHECK: ^bb0(%[[COMB_RES:.*]]: ![[RED_TYPE]], %{{.*}}: ![[RED_TYPE]]): +! CHECK: fir.yield(%[[COMB_RES]] : !{{.*}}) +! CHECK: } cleanup { +! CHECK: fir.yield +! CHECK: } `` https://github.com/llvm/llvm-project/pull/146853 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [PHIElimination] Reuse existing COPY in predecessor basic block (Take Two) (PR #146806)
https://github.com/guy-david edited https://github.com/llvm/llvm-project/pull/146806 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [PHIElimination] Reuse existing COPY in predecessor basic block (Take Two) (PR #146806)
sushgokh wrote: > @mikaelholmen @mstorsjo @macurtis-amd @sjoerdmeijer @sushgokh We've decided > that it's best to revert the original PR (see #146850), sorry for wasting > your time. This PR is trying to reintroduce it with fixes to the issues > you've presented. Can I humbly ask you to test this commit one final time? SHA f5c62ee0fa04 + this PR was passing for us. Now, trunk + this PR has again started failing. https://github.com/llvm/llvm-project/pull/146806 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 6ac62f9 - fix building error
Author: shore Date: 2025-07-04T09:55:18+08:00 New Revision: 6ac62f931e55eb02e546d67c8ea28d9a672f7fd1 URL: https://github.com/llvm/llvm-project/commit/6ac62f931e55eb02e546d67c8ea28d9a672f7fd1 DIFF: https://github.com/llvm/llvm-project/commit/6ac62f931e55eb02e546d67c8ea28d9a672f7fd1.diff LOG: fix building error Added: Modified: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp llvm/test/lit.cfg.py Removed: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index fd679a9933cf0..6f4e93bffe198 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -4563,6 +4563,7 @@ static SrcStatus getNegStatus(Register Reg, SrcStatus S, default: llvm_unreachable("unexpected SrcStatus"); } + llvm_unreachable("unexpected SrcStatus"); } static std::optional> diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py index bd6e37c848d8c..fddade78df9b6 100644 --- a/llvm/test/lit.cfg.py +++ b/llvm/test/lit.cfg.py @@ -474,7 +474,7 @@ def have_cxx_shared_library(): print("could not exec llvm-readobj") return False -readobj_out = readobj_cmd.stdout.read().decode("ascii") +readobj_out = readobj_cmd.stdout.read().decode("utf-8") readobj_cmd.wait() regex = re.compile(r"(libc\+\+|libstdc\+\+|msvcp).*\.(so|dylib|dll)") ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [DirectX][Draft] validate registers are bound to root signature (PR #146785)
https://github.com/joaosaffran updated https://github.com/llvm/llvm-project/pull/146785 >From 76d633d2b2b70ae6eaa1e7c40ef09e5f6ef9ae74 Mon Sep 17 00:00:00 2001 From: joaosaffran Date: Thu, 26 Jun 2025 19:28:01 + Subject: [PATCH 1/5] refactoring --- .../lib/Target/DirectX/DXContainerGlobals.cpp | 9 ++-- llvm/lib/Target/DirectX/DXILRootSignature.cpp | 12 ++--- llvm/lib/Target/DirectX/DXILRootSignature.h | 45 ++- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index 9c38901f6821f..fa27c4665cfbe 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -160,18 +160,17 @@ void DXContainerGlobals::addRootSignature(Module &M, assert(MMI.EntryPropertyVec.size() == 1); - auto &RSA = getAnalysis(); + auto &RSA = getAnalysis().getRSInfo(); const Function *EntryFunction = MMI.EntryPropertyVec[0].Entry; - const auto &FuncRs = RSA.find(EntryFunction); + const auto &RS = RSA.getDescForFunction(EntryFunction); - if (FuncRs == RSA.end()) + if (!RS ) return; - const RootSignatureDesc &RS = FuncRs->second; SmallString<256> Data; raw_svector_ostream OS(Data); - RS.write(OS); + RS->write(OS); Constant *Constant = ConstantDataArray::getString(M.getContext(), Data, /*AddNull*/ false); diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp b/llvm/lib/Target/DirectX/DXILRootSignature.cpp index 29e78fcce5262..4094df160ef6f 100644 --- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp +++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp @@ -554,9 +554,9 @@ analyzeModule(Module &M) { AnalysisKey RootSignatureAnalysis::Key; -SmallDenseMap -RootSignatureAnalysis::run(Module &M, ModuleAnalysisManager &AM) { - return analyzeModule(M); +RootSignatureBindingInfo RootSignatureAnalysis::run(Module &M, +ModuleAnalysisManager &AM) { + return RootSignatureBindingInfo(analyzeModule(M)); } //===--===// @@ -564,8 +564,7 @@ RootSignatureAnalysis::run(Module &M, ModuleAnalysisManager &AM) { PreservedAnalyses RootSignatureAnalysisPrinter::run(Module &M, ModuleAnalysisManager &AM) { - SmallDenseMap &RSDMap = - AM.getResult(M); + RootSignatureBindingInfo &RSDMap = AM.getResult(M); OS << "Root Signature Definitions" << "\n"; @@ -636,7 +635,8 @@ PreservedAnalyses RootSignatureAnalysisPrinter::run(Module &M, //===--===// bool RootSignatureAnalysisWrapper::runOnModule(Module &M) { - FuncToRsMap = analyzeModule(M); + FuncToRsMap = std::make_unique( + RootSignatureBindingInfo(analyzeModule(M))); return false; } diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.h b/llvm/lib/Target/DirectX/DXILRootSignature.h index b45cebc15fd39..fef933811f840 100644 --- a/llvm/lib/Target/DirectX/DXILRootSignature.h +++ b/llvm/lib/Target/DirectX/DXILRootSignature.h @@ -33,16 +33,43 @@ enum class RootSignatureElementKind { CBV = 5, DescriptorTable = 6, }; + +class RootSignatureBindingInfo { + private: +SmallDenseMap FuncToRsMap; + + public: + using iterator = +SmallDenseMap::iterator; + + RootSignatureBindingInfo () = default; + RootSignatureBindingInfo(SmallDenseMap Map) : FuncToRsMap(Map) {}; + + iterator find(const Function *F) { return FuncToRsMap.find(F); } + + iterator end() { return FuncToRsMap.end(); } + + std::optional getDescForFunction(const Function* F) { +const auto FuncRs = find(F); +if (FuncRs == end()) + return std::nullopt; + +return FuncRs->second; + } + +}; + class RootSignatureAnalysis : public AnalysisInfoMixin { friend AnalysisInfoMixin; static AnalysisKey Key; public: - RootSignatureAnalysis() = default; - using Result = SmallDenseMap; +RootSignatureAnalysis() = default; - SmallDenseMap + using Result = RootSignatureBindingInfo; + + RootSignatureBindingInfo run(Module &M, ModuleAnalysisManager &AM); }; @@ -52,20 +79,16 @@ class RootSignatureAnalysis : public AnalysisInfoMixin { /// passes which run through the legacy pass manager. class RootSignatureAnalysisWrapper : public ModulePass { private: - SmallDenseMap FuncToRsMap; + std::unique_ptr FuncToRsMap; public: static char ID; + using Result = RootSignatureBindingInfo; RootSignatureAnalysisWrapper() : ModulePass(ID) {} - using iterator = - SmallDenseMap::iterator; - - iterator find(const Function *F) { return FuncToRsMap.find(F); } - - iterator end() { return FuncToRsMap.end(); } - + RootSignatureBindingInfo& getRSInfo() {return *FuncToRsMap;} + bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override; >From 75
[llvm-branch-commits] [clang] [llvm] [DirectX][Draft] validate registers are bound to root signature (PR #146785)
https://github.com/joaosaffran updated https://github.com/llvm/llvm-project/pull/146785 >From 76d633d2b2b70ae6eaa1e7c40ef09e5f6ef9ae74 Mon Sep 17 00:00:00 2001 From: joaosaffran Date: Thu, 26 Jun 2025 19:28:01 + Subject: [PATCH 1/5] refactoring --- .../lib/Target/DirectX/DXContainerGlobals.cpp | 9 ++-- llvm/lib/Target/DirectX/DXILRootSignature.cpp | 12 ++--- llvm/lib/Target/DirectX/DXILRootSignature.h | 45 ++- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp index 9c38901f6821f..fa27c4665cfbe 100644 --- a/llvm/lib/Target/DirectX/DXContainerGlobals.cpp +++ b/llvm/lib/Target/DirectX/DXContainerGlobals.cpp @@ -160,18 +160,17 @@ void DXContainerGlobals::addRootSignature(Module &M, assert(MMI.EntryPropertyVec.size() == 1); - auto &RSA = getAnalysis(); + auto &RSA = getAnalysis().getRSInfo(); const Function *EntryFunction = MMI.EntryPropertyVec[0].Entry; - const auto &FuncRs = RSA.find(EntryFunction); + const auto &RS = RSA.getDescForFunction(EntryFunction); - if (FuncRs == RSA.end()) + if (!RS ) return; - const RootSignatureDesc &RS = FuncRs->second; SmallString<256> Data; raw_svector_ostream OS(Data); - RS.write(OS); + RS->write(OS); Constant *Constant = ConstantDataArray::getString(M.getContext(), Data, /*AddNull*/ false); diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp b/llvm/lib/Target/DirectX/DXILRootSignature.cpp index 29e78fcce5262..4094df160ef6f 100644 --- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp +++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp @@ -554,9 +554,9 @@ analyzeModule(Module &M) { AnalysisKey RootSignatureAnalysis::Key; -SmallDenseMap -RootSignatureAnalysis::run(Module &M, ModuleAnalysisManager &AM) { - return analyzeModule(M); +RootSignatureBindingInfo RootSignatureAnalysis::run(Module &M, +ModuleAnalysisManager &AM) { + return RootSignatureBindingInfo(analyzeModule(M)); } //===--===// @@ -564,8 +564,7 @@ RootSignatureAnalysis::run(Module &M, ModuleAnalysisManager &AM) { PreservedAnalyses RootSignatureAnalysisPrinter::run(Module &M, ModuleAnalysisManager &AM) { - SmallDenseMap &RSDMap = - AM.getResult(M); + RootSignatureBindingInfo &RSDMap = AM.getResult(M); OS << "Root Signature Definitions" << "\n"; @@ -636,7 +635,8 @@ PreservedAnalyses RootSignatureAnalysisPrinter::run(Module &M, //===--===// bool RootSignatureAnalysisWrapper::runOnModule(Module &M) { - FuncToRsMap = analyzeModule(M); + FuncToRsMap = std::make_unique( + RootSignatureBindingInfo(analyzeModule(M))); return false; } diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.h b/llvm/lib/Target/DirectX/DXILRootSignature.h index b45cebc15fd39..fef933811f840 100644 --- a/llvm/lib/Target/DirectX/DXILRootSignature.h +++ b/llvm/lib/Target/DirectX/DXILRootSignature.h @@ -33,16 +33,43 @@ enum class RootSignatureElementKind { CBV = 5, DescriptorTable = 6, }; + +class RootSignatureBindingInfo { + private: +SmallDenseMap FuncToRsMap; + + public: + using iterator = +SmallDenseMap::iterator; + + RootSignatureBindingInfo () = default; + RootSignatureBindingInfo(SmallDenseMap Map) : FuncToRsMap(Map) {}; + + iterator find(const Function *F) { return FuncToRsMap.find(F); } + + iterator end() { return FuncToRsMap.end(); } + + std::optional getDescForFunction(const Function* F) { +const auto FuncRs = find(F); +if (FuncRs == end()) + return std::nullopt; + +return FuncRs->second; + } + +}; + class RootSignatureAnalysis : public AnalysisInfoMixin { friend AnalysisInfoMixin; static AnalysisKey Key; public: - RootSignatureAnalysis() = default; - using Result = SmallDenseMap; +RootSignatureAnalysis() = default; - SmallDenseMap + using Result = RootSignatureBindingInfo; + + RootSignatureBindingInfo run(Module &M, ModuleAnalysisManager &AM); }; @@ -52,20 +79,16 @@ class RootSignatureAnalysis : public AnalysisInfoMixin { /// passes which run through the legacy pass manager. class RootSignatureAnalysisWrapper : public ModulePass { private: - SmallDenseMap FuncToRsMap; + std::unique_ptr FuncToRsMap; public: static char ID; + using Result = RootSignatureBindingInfo; RootSignatureAnalysisWrapper() : ModulePass(ID) {} - using iterator = - SmallDenseMap::iterator; - - iterator find(const Function *F) { return FuncToRsMap.find(F); } - - iterator end() { return FuncToRsMap.end(); } - + RootSignatureBindingInfo& getRSInfo() {return *FuncToRsMap;} + bool runOnModule(Module &M) override; void getAnalysisUsage(AnalysisUsage &AU) const override; >From 75
[llvm-branch-commits] [lldb] 567fc97 - Revert "[lldb][RPC] Upstream lldb-rpc-gen tool (#138031)"
Author: Chelsea Cassanova Date: 2025-07-03T15:52:35-07:00 New Revision: 567fc97be4881ce3656e45c0b2cc7e85e08dde3d URL: https://github.com/llvm/llvm-project/commit/567fc97be4881ce3656e45c0b2cc7e85e08dde3d DIFF: https://github.com/llvm/llvm-project/commit/567fc97be4881ce3656e45c0b2cc7e85e08dde3d.diff LOG: Revert "[lldb][RPC] Upstream lldb-rpc-gen tool (#138031)" This reverts commit 9bfb347ea0a0a260eb505921dfc0cb824a6ced5d. Added: Modified: lldb/cmake/modules/LLDBConfig.cmake lldb/test/CMakeLists.txt lldb/test/Shell/helper/toolchain.py lldb/test/Shell/lit.site.cfg.py.in lldb/tools/CMakeLists.txt Removed: lldb/test/Shell/RPC/Generator/Inputs/SBDummy.h lldb/test/Shell/RPC/Generator/Tests/CheckRPCGenToolByproducts.test lldb/test/Shell/RPC/Generator/lit.local.cfg lldb/tools/lldb-rpc/CMakeLists.txt lldb/tools/lldb-rpc/LLDBRPCGeneration.cmake lldb/tools/lldb-rpc/LLDBRPCHeaders.cmake lldb/tools/lldb-rpc/lldb-rpc-gen/CMakeLists.txt lldb/tools/lldb-rpc/lldb-rpc-gen/RPCCommon.cpp lldb/tools/lldb-rpc/lldb-rpc-gen/RPCCommon.h lldb/tools/lldb-rpc/lldb-rpc-gen/lldb-rpc-gen.cpp diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index f674c29682160..8c30b6e09d2c7 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -323,6 +323,4 @@ else() set(LLDB_CAN_USE_DEBUGSERVER OFF) endif() -set(LLDB_BUILD_LLDBRPC ON CACHE BOOL "") - include(LLDBGenerateConfig) diff --git a/lldb/test/CMakeLists.txt b/lldb/test/CMakeLists.txt index 7cf239c7f95ab..6449ac5a9247f 100644 --- a/lldb/test/CMakeLists.txt +++ b/lldb/test/CMakeLists.txt @@ -132,10 +132,6 @@ if(TARGET lldb-framework) add_lldb_test_dependency(lldb-framework) endif() -if (LLDB_BUILD_LLDBRPC) - add_lldb_test_dependency(lldb-rpc-generate-sources) -endif() - # Add dependencies that are not exported targets when building standalone. if(NOT LLDB_BUILT_STANDALONE) add_lldb_test_dependency( @@ -253,8 +249,7 @@ llvm_canonicalize_cmake_booleans( LLDB_TEST_SHELL_DISABLE_REMOTE LLDB_TOOL_LLDB_SERVER_BUILD LLDB_USE_SYSTEM_DEBUGSERVER - LLDB_IS_64_BITS - LLDB_BUILD_LLDBRPC) + LLDB_IS_64_BITS) # Configure the individual test suites. add_subdirectory(API) diff --git a/lldb/test/Shell/RPC/Generator/Inputs/SBDummy.h b/lldb/test/Shell/RPC/Generator/Inputs/SBDummy.h deleted file mode 100644 index e69de29bb2d1d..0 diff --git a/lldb/test/Shell/RPC/Generator/Tests/CheckRPCGenToolByproducts.test b/lldb/test/Shell/RPC/Generator/Tests/CheckRPCGenToolByproducts.test deleted file mode 100644 index 15fcf8fb39c7d..0 --- a/lldb/test/Shell/RPC/Generator/Tests/CheckRPCGenToolByproducts.test +++ /dev/null @@ -1,9 +0,0 @@ -RUN: %lldb-rpc-gen --output-dir=%t %S/../Inputs/SBDummy.h - -RUN: ls %t | FileCheck %s - -# We're just making sure that the tool emits the class names, -# methods and skipped methods file in the output directory. -CHECK: SBAPI.def -CHECK: SBClasses.def -CHECK: SkippedMethods.txt diff --git a/lldb/test/Shell/RPC/Generator/lit.local.cfg b/lldb/test/Shell/RPC/Generator/lit.local.cfg deleted file mode 100644 index db9494781c00c..0 --- a/lldb/test/Shell/RPC/Generator/lit.local.cfg +++ /dev/null @@ -1,3 +0,0 @@ -# All tests for the tool need lldb-rpc-gen to be built. -if not config.lldb_has_lldbrpc: - config.unsupported = True diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py index 728f6347242f1..42968128f2702 100644 --- a/lldb/test/Shell/helper/toolchain.py +++ b/lldb/test/Shell/helper/toolchain.py @@ -156,16 +156,6 @@ def use_lldb_substitutions(config): extra_args=["platform"], unresolved="ignore", ), -ToolSubst( -"%lldb-rpc-gen", -command=FindTool("lldb-rpc-gen"), -# We need the LLDB build directory root to pass into the tool, not the test build root. -extra_args=[ -"-p " + config.lldb_build_directory + "/..", -'--extra-arg="-resource-dir=' + config.clang_resource_dir + '"', -], -unresolved="ignore", -), "lldb-test", "lldb-dap", ToolSubst( diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in index beaa41e6fd379..5be5359217769 100644 --- a/lldb/test/Shell/lit.site.cfg.py.in +++ b/lldb/test/Shell/lit.site.cfg.py.in @@ -33,7 +33,6 @@ config.lldb_build_directory = "@LLDB_TEST_BUILD_DIRECTORY@" config.have_lldb_server = @LLDB_TOOL_LLDB_SERVER_BUILD@ config.lldb_system_debugserver = @LLDB_USE_SYSTEM_DEBUGSERVER@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" -config.lldb_has_lldbrpc = @LLDB_BUILD_LLDBRPC@ # The shell tests use their own module caches. config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@",
[llvm-branch-commits] [llvm] d6c3ae8 - Revert "[AMDGPU] Re-apply: Implement vop3p complex pattern optmization for gi…"
Author: Shoreshen Date: 2025-07-04T09:43:00+08:00 New Revision: d6c3ae82c37987098b409ec46971cb2ee370f8c5 URL: https://github.com/llvm/llvm-project/commit/d6c3ae82c37987098b409ec46971cb2ee370f8c5 DIFF: https://github.com/llvm/llvm-project/commit/d6c3ae82c37987098b409ec46971cb2ee370f8c5.diff LOG: Revert "[AMDGPU] Re-apply: Implement vop3p complex pattern optmization for gi…" This reverts commit db03c27763656948323a50b9706da912c581e6f2. Added: Modified: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll llvm/test/CodeGen/AMDGPU/packed-fp32.ll llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll Removed: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index fd679a9933cf0..b632b16f5c198 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -4327,591 +4327,60 @@ AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const { }}; } -enum class SrcStatus { - IS_SAME, - IS_UPPER_HALF, - IS_LOWER_HALF, - IS_UPPER_HALF_NEG, - // This means current op = [op_upper, op_lower] and src = -op_lower. - IS_LOWER_HALF_NEG, - IS_HI_NEG, - // This means current op = [op_upper, op_lower] and src = [op_upper, - // -op_lower]. - IS_LO_NEG, - IS_BOTH_NEG, - INVALID, - NEG_START = IS_UPPER_HALF_NEG, - NEG_END = IS_BOTH_NEG, - HALF_START = IS_UPPER_HALF, - HALF_END = IS_LOWER_HALF_NEG -}; -/// Test if the MI is truncating to half, such as `%reg0:n = G_TRUNC %reg1:2n` -static bool isTruncHalf(const MachineInstr *MI, -const MachineRegisterInfo &MRI) { - if (MI->getOpcode() != AMDGPU::G_TRUNC) -return false; - - unsigned DstSize = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits(); - unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits(); - return DstSize * 2 == SrcSize; -} - -/// Test if the MI is logic shift right with half bits, -/// such as `%reg0:2n =G_LSHR %reg1:2n, CONST(n)` -static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI) { - if (MI->getOpcode() != AMDGPU::G_LSHR) -return false; - - Register ShiftSrc; - std::optional ShiftAmt; - if (mi_match(MI->getOperand(0).getReg(), MRI, - m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt { -unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits(); -unsigned Shift = ShiftAmt->Value.getZExtValue(); -return Shift * 2 == SrcSize; - } - return false; -} - -/// Test if the MI is shift left with half bits, -/// such as `%reg0:2n =G_SHL %reg1:2n, CONST(n)` -static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI) { - if (MI->getOpcode() != AMDGPU::G_SHL) -return false; - - Register ShiftSrc; - std::optional ShiftAmt; - if (mi_match(MI->getOperand(0).getReg(), MRI, - m_GShl(m_Reg(ShiftSrc), m_GCst(ShiftAmt { -unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits(); -unsigned Shift = ShiftAmt->Value.getZExtValue(); -return Shift * 2 == SrcSize; - } - return false; -} - -/// Test function, if the MI is `%reg0:n, %reg1:n = G_UNMERGE_VALUES %reg2:2n` -static bool isUnmergeHalf(const MachineInstr *MI, - const MachineRegisterInfo &MRI) { - if (MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES) -return false; - return MI->getNumOperands() == 3 && MI->getOperand(0).isDef() && - MI->getOperand(1).isDef() && !MI->getOperand(2).isDef(); -} - -enum class TypeClass { VECTOR_OF_TWO, SCALAR, NONE_OF_LISTED }; - -static TypeClass isVectorOfTwoOrScalar(Register Reg, - const MachineRegisterInfo &MRI) { - LLT OpTy = MRI.getType(Reg); - if (OpTy.isScalar()) -return TypeClass::SCALAR; - if (OpTy.isVector() && OpTy.getNumElements() == 2) -return TypeClass::VECTOR_OF_TWO; - return TypeClass::NONE_OF_LISTED; -} - -static SrcStatus getNegStatus(Register Reg, SrcStatus S, - const MachineRegisterInfo &MRI) { - TypeClass NegType = isVectorOfTwoOrScalar(Reg, MRI); - if (NegType != TypeClass::VECTOR_OF_TWO && NegType != TypeClass::SCALAR) -return SrcStatus::INVALID; - - switch (S) { - case SrcStatus::IS_SAME: -if (NegType == TypeClass::VECTOR_OF_TWO) { - // Vector of 2: - // [SrcHi, SrcLo] = [CurrHi, CurrLo] - // [CurrHi, CurrLo] = neg [OpHi, OpLo](2 x Type) - // [CurrHi, CurrLo] = [-OpHi, -OpLo](2 x Type) - // [SrcHi, SrcLo] = [-OpHi, -OpLo] - return SrcStatus::IS_BOTH_NEG; -} -if (NegType == TypeClass::SCALAR) { - // Scalar: -
[llvm-branch-commits] [llvm] 14a4448 - Revert "Revert "[AMDGPU] Re-apply: Implement vop3p complex pattern optmizatio…"
Author: Shoreshen Date: 2025-07-04T09:43:51+08:00 New Revision: 14a4448afc8e65b8610d78fc66f2695a691a25b3 URL: https://github.com/llvm/llvm-project/commit/14a4448afc8e65b8610d78fc66f2695a691a25b3 DIFF: https://github.com/llvm/llvm-project/commit/14a4448afc8e65b8610d78fc66f2695a691a25b3.diff LOG: Revert "Revert "[AMDGPU] Re-apply: Implement vop3p complex pattern optmizatio…" This reverts commit 5b8304d6b90c42f2d3cf918e5e0f935767866e64. Added: Modified: llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h llvm/test/CodeGen/AMDGPU/GlobalISel/fmul.v2f16.ll llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sdot2.ll llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.udot2.ll llvm/test/CodeGen/AMDGPU/packed-fp32.ll llvm/test/CodeGen/AMDGPU/strict_fsub.f16.ll Removed: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index b632b16f5c198..fd679a9933cf0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -4327,60 +4327,591 @@ AMDGPUInstructionSelector::selectVOP3NoMods(MachineOperand &Root) const { }}; } -std::pair -AMDGPUInstructionSelector::selectVOP3PModsImpl( - Register Src, const MachineRegisterInfo &MRI, bool IsDOT) const { +enum class SrcStatus { + IS_SAME, + IS_UPPER_HALF, + IS_LOWER_HALF, + IS_UPPER_HALF_NEG, + // This means current op = [op_upper, op_lower] and src = -op_lower. + IS_LOWER_HALF_NEG, + IS_HI_NEG, + // This means current op = [op_upper, op_lower] and src = [op_upper, + // -op_lower]. + IS_LO_NEG, + IS_BOTH_NEG, + INVALID, + NEG_START = IS_UPPER_HALF_NEG, + NEG_END = IS_BOTH_NEG, + HALF_START = IS_UPPER_HALF, + HALF_END = IS_LOWER_HALF_NEG +}; +/// Test if the MI is truncating to half, such as `%reg0:n = G_TRUNC %reg1:2n` +static bool isTruncHalf(const MachineInstr *MI, +const MachineRegisterInfo &MRI) { + if (MI->getOpcode() != AMDGPU::G_TRUNC) +return false; + + unsigned DstSize = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits(); + unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits(); + return DstSize * 2 == SrcSize; +} + +/// Test if the MI is logic shift right with half bits, +/// such as `%reg0:2n =G_LSHR %reg1:2n, CONST(n)` +static bool isLshrHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI) { + if (MI->getOpcode() != AMDGPU::G_LSHR) +return false; + + Register ShiftSrc; + std::optional ShiftAmt; + if (mi_match(MI->getOperand(0).getReg(), MRI, + m_GLShr(m_Reg(ShiftSrc), m_GCst(ShiftAmt { +unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits(); +unsigned Shift = ShiftAmt->Value.getZExtValue(); +return Shift * 2 == SrcSize; + } + return false; +} + +/// Test if the MI is shift left with half bits, +/// such as `%reg0:2n =G_SHL %reg1:2n, CONST(n)` +static bool isShlHalf(const MachineInstr *MI, const MachineRegisterInfo &MRI) { + if (MI->getOpcode() != AMDGPU::G_SHL) +return false; + + Register ShiftSrc; + std::optional ShiftAmt; + if (mi_match(MI->getOperand(0).getReg(), MRI, + m_GShl(m_Reg(ShiftSrc), m_GCst(ShiftAmt { +unsigned SrcSize = MRI.getType(MI->getOperand(1).getReg()).getSizeInBits(); +unsigned Shift = ShiftAmt->Value.getZExtValue(); +return Shift * 2 == SrcSize; + } + return false; +} + +/// Test function, if the MI is `%reg0:n, %reg1:n = G_UNMERGE_VALUES %reg2:2n` +static bool isUnmergeHalf(const MachineInstr *MI, + const MachineRegisterInfo &MRI) { + if (MI->getOpcode() != AMDGPU::G_UNMERGE_VALUES) +return false; + return MI->getNumOperands() == 3 && MI->getOperand(0).isDef() && + MI->getOperand(1).isDef() && !MI->getOperand(2).isDef(); +} + +enum class TypeClass { VECTOR_OF_TWO, SCALAR, NONE_OF_LISTED }; + +static TypeClass isVectorOfTwoOrScalar(Register Reg, + const MachineRegisterInfo &MRI) { + LLT OpTy = MRI.getType(Reg); + if (OpTy.isScalar()) +return TypeClass::SCALAR; + if (OpTy.isVector() && OpTy.getNumElements() == 2) +return TypeClass::VECTOR_OF_TWO; + return TypeClass::NONE_OF_LISTED; +} + +static SrcStatus getNegStatus(Register Reg, SrcStatus S, + const MachineRegisterInfo &MRI) { + TypeClass NegType = isVectorOfTwoOrScalar(Reg, MRI); + if (NegType != TypeClass::VECTOR_OF_TWO && NegType != TypeClass::SCALAR) +return SrcStatus::INVALID; + + switch (S) { + case SrcStatus::IS_SAME: +if (NegType == TypeClass::VECTOR_OF_TWO) { + // Vector of 2: + // [SrcHi, SrcLo] = [CurrHi, CurrLo] + // [CurrHi, CurrLo] = neg [OpHi, OpLo](2 x Type) + // [CurrHi, CurrLo] = [-OpHi, -OpLo](2 x Type) + // [SrcHi, SrcLo]
[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)
https://github.com/nicolasvasilache created https://github.com/llvm/llvm-project/pull/146943 …lOp and use it to implement warp specialization. This revision adds DeviceMaskingAttrInterface and extends DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and DeviceMaskingAttrInterface. The first implementation is if the form of a GPUMappingMaskAttr, which can be additionally passed to the scf.forall.mapping attribute to specify a mask on compute resources that should be active. Support is added to GPUTransformOps to take advantage of this information and lower to block/warpgroup/warp/thread specialization when mapped to linear ids. >From 02e425b30966f4781fe07d8cf595a1e2b0d41aa3 Mon Sep 17 00:00:00 2001 From: Nicolas Vasilache Date: Thu, 3 Jul 2025 21:26:53 +0200 Subject: [PATCH] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::ForallOp and use it to implement warp specialization. This revision adds DeviceMaskingAttrInterface and extends DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and DeviceMaskingAttrInterface. The first implementation is if the form of a GPUMappingMaskAttr, which can be additionally passed to the scf.forall.mapping attribute to specify a mask on compute resources that should be active. Support is added to GPUTransformOps to take advantage of this information and lower to block/warpgroup/warp/thread specialization when mapped to linear ids. Co-authored-by: Oleksandr "Alex" Zinenko --- .../Dialect/GPU/IR/GPUDeviceMappingAttr.td| 18 .../mlir/Dialect/GPU/TransformOps/Utils.h | 15 ++- .../Dialect/SCF/IR/DeviceMappingInterface.td | 45 +++- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td| 12 +++ mlir/lib/Dialect/GPU/CMakeLists.txt | 1 + mlir/lib/Dialect/GPU/IR/GPUDialect.cpp| 45 .../GPU/TransformOps/GPUTransformOps.cpp | 58 ++ mlir/lib/Dialect/GPU/TransformOps/Utils.cpp | 100 +- mlir/lib/Dialect/SCF/IR/SCF.cpp | 43 ++-- .../Dialect/GPU/transform-gpu-failing.mlir| 61 +++ mlir/test/Dialect/GPU/transform-gpu.mlir | 81 ++ mlir/test/Dialect/SCF/invalid.mlir| 18 12 files changed, 439 insertions(+), 58 deletions(-) diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td index 63f228ca3157f..e8540027e7b77 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td @@ -252,6 +252,24 @@ def GPULaneMappingAttr }]; } +def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [ + DeclareAttrInterfaceMethods ] > { + let parameters = (ins "uint64_t":$mask); + let assemblyFormat = "`<` params `>`"; + let description = [{ +Attribute describing how to filter the processing units that a +region is mapped to. + +In the first implementation the masking is a bitfield that specifies for +each processing unit whether it is active or not. + +In the future, we may want to implement this as a symbol to refer to +dynamically defined values. + +Extending op semantics with an operand is deemed too intrusive at this time. + }]; +} + def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", "memory_space", [ DeclareAttrInterfaceMethods ] > { let parameters = (ins diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h index de512ded59fec..0a11b8f8d3fa0 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h @@ -78,7 +78,8 @@ struct GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuBlockIdBuilder : public GpuIdBuilder { - GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false); + GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); }; /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups. @@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpgroupIdBuilder : public GpuIdBuilder { GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize, -bool useLinearMapping = false); +bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; /// In the future this may be configured by the transformation. static constexpr int64_t kNumWarpsPerGroup = 4; @@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. stru
[llvm-branch-commits] [mlir] [mlir][SCF][GPU] Add DeviceMaskingAttrInterface support to scf::Foral… (PR #146943)
llvmbot wrote: @llvm/pr-subscribers-mlir-gpu Author: Nicolas Vasilache (nicolasvasilache) Changes …lOp and use it to implement warp specialization. This revision adds DeviceMaskingAttrInterface and extends DeviceMappingArrayAttr to accept a union of DeviceMappingAttrInterface and DeviceMaskingAttrInterface. The first implementation is if the form of a GPUMappingMaskAttr, which can be additionally passed to the scf.forall.mapping attribute to specify a mask on compute resources that should be active. Support is added to GPUTransformOps to take advantage of this information and lower to block/warpgroup/warp/thread specialization when mapped to linear ids. --- Patch is 35.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/146943.diff 12 Files Affected: - (modified) mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td (+18) - (modified) mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h (+10-5) - (modified) mlir/include/mlir/Dialect/SCF/IR/DeviceMappingInterface.td (+44-1) - (modified) mlir/include/mlir/Dialect/SCF/IR/SCFOps.td (+12) - (modified) mlir/lib/Dialect/GPU/CMakeLists.txt (+1) - (modified) mlir/lib/Dialect/GPU/IR/GPUDialect.cpp (+45) - (modified) mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp (+39-19) - (modified) mlir/lib/Dialect/GPU/TransformOps/Utils.cpp (+73-27) - (modified) mlir/lib/Dialect/SCF/IR/SCF.cpp (+37-6) - (modified) mlir/test/Dialect/GPU/transform-gpu-failing.mlir (+61) - (modified) mlir/test/Dialect/GPU/transform-gpu.mlir (+81) - (modified) mlir/test/Dialect/SCF/invalid.mlir (+18) ``diff diff --git a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td index 63f228ca3157f..e8540027e7b77 100644 --- a/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td +++ b/mlir/include/mlir/Dialect/GPU/IR/GPUDeviceMappingAttr.td @@ -252,6 +252,24 @@ def GPULaneMappingAttr }]; } +def GPUMappingMaskAttr : GPU_Attr<"GPUMappingMask", "mask", [ + DeclareAttrInterfaceMethods ] > { + let parameters = (ins "uint64_t":$mask); + let assemblyFormat = "`<` params `>`"; + let description = [{ +Attribute describing how to filter the processing units that a +region is mapped to. + +In the first implementation the masking is a bitfield that specifies for +each processing unit whether it is active or not. + +In the future, we may want to implement this as a symbol to refer to +dynamically defined values. + +Extending op semantics with an operand is deemed too intrusive at this time. + }]; +} + def GPUMemorySpaceMappingAttr : GPU_Attr<"GPUMemorySpaceMapping", "memory_space", [ DeclareAttrInterfaceMethods ] > { let parameters = (ins diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h index de512ded59fec..0a11b8f8d3fa0 100644 --- a/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h +++ b/mlir/include/mlir/Dialect/GPU/TransformOps/Utils.h @@ -78,7 +78,8 @@ struct GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuBlockIdBuilder : public GpuIdBuilder { - GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false); + GpuBlockIdBuilder(MLIRContext *ctx, bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); }; /// Builder for warpgroup ids used to map scf.forall to reindexed warpgroups. @@ -88,7 +89,8 @@ struct GpuBlockIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpgroupIdBuilder : public GpuIdBuilder { GpuWarpgroupIdBuilder(MLIRContext *ctx, int64_t warpSize, -bool useLinearMapping = false); +bool useLinearMapping = false, +DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; /// In the future this may be configured by the transformation. static constexpr int64_t kNumWarpsPerGroup = 4; @@ -101,7 +103,8 @@ struct GpuWarpgroupIdBuilder : public GpuIdBuilder { /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuWarpIdBuilder : public GpuIdBuilder { GpuWarpIdBuilder(MLIRContext *ctx, int64_t warpSize, - bool useLinearMapping = false); + bool useLinearMapping = false, + DeviceMaskingAttrInterface mask = nullptr); int64_t warpSize = 32; }; @@ -111,7 +114,8 @@ struct GpuWarpIdBuilder : public GpuIdBuilder { /// If `useLinearMapping` is true, the `idBuilder` method returns nD values /// used for indexing rewrites as well as 1D sizes for predicate generation. struct GpuThreadIdBuilder : public GpuIdBuilder { - GpuThreadIdBuilder(MLIRContext *ctx, bool useLinearMapping = false); + GpuThreadIdBuild