[llvm-branch-commits] [flang] [Flang][OpenMP] - When mapping a `fir.boxchar`, map the underlying data pointer as a member (PR #141715)
@@ -285,6 +285,9 @@ bool hasDynamicSize(mlir::Type t) { return true; if (auto rec = mlir::dyn_cast(t)) return hasDynamicSize(rec); + if (auto boxChar = mlir::dyn_cast(t)) { +return characterWithDynamicLen(boxChar.getEleTy()); + } jeanPerier wrote: This `hasDynamicSize` change brings ambiguity to the helper because this helpers return false for pointer like types (fir.ref, fir.box, ..), it is meant to deal with data type. fir.boxchar is a pointer like type, not a data type like. It is up to the user I think to call it with the data type (fir.char<>). https://github.com/llvm/llvm-project/pull/141715 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64] Prepare for split ZPR and PPR area allocation (NFCI) (PR #142391)
https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/142391 This patch attempts to refactor AArch64FrameLowering to allow the size of the ZPR and PPR areas to be calculated separately. This will be used by a subsequent patch to support allocating ZPRs and PPRs to separate areas. This patch should be an NFC and is split out to make later functional changes easier to spot. >From 0b09392f5d5e8721125355e0f77653a85d643a6f Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Thu, 8 May 2025 17:38:27 + Subject: [PATCH] [AArch64] Prepare for split ZPR and PPR area allocation (NFCI) This patch attempts to refactor AArch64FrameLowering to allow the size of the ZPR and PPR areas to be calculated separately. This will be used by a subsequent patch to support allocating ZPRs and PPRs to separate areas. This patch should be an NFC and is split out to make later functional changes easier to spot. --- .../Target/AArch64/AArch64FrameLowering.cpp | 305 -- .../lib/Target/AArch64/AArch64FrameLowering.h | 12 +- .../AArch64/AArch64MachineFunctionInfo.h | 47 +-- .../Target/AArch64/AArch64RegisterInfo.cpp| 7 +- 4 files changed, 247 insertions(+), 124 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 643778c742497..e5592a921e192 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -326,7 +326,10 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF, static bool produceCompactUnwindFrame(MachineFunction &MF); static bool needsWinCFI(const MachineFunction &MF); +static StackOffset getZPRStackSize(const MachineFunction &MF); +static StackOffset getPPRStackSize(const MachineFunction &MF); static StackOffset getSVEStackSize(const MachineFunction &MF); +static bool hasSVEStackSize(const MachineFunction &MF); static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB); /// Returns true if a homogeneous prolog or epilog code can be emitted @@ -345,7 +348,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog( if (needsWinCFI(MF)) return false; // TODO: SVE is not supported yet. - if (getSVEStackSize(MF)) + if (hasSVEStackSize(MF)) return false; // Bail on stack adjustment needed on return for simplicity. @@ -445,10 +448,36 @@ static unsigned getFixedObjectSize(const MachineFunction &MF, } } -/// Returns the size of the entire SVE stackframe (calleesaves + spills). +static unsigned getStackHazardSize(const MachineFunction &MF) { + return MF.getSubtarget().getStreamingHazardSize(); +} + +/// Returns the size of the entire ZPR stackframe (calleesaves + spills). +static StackOffset getZPRStackSize(const MachineFunction &MF) { + const AArch64FunctionInfo *AFI = MF.getInfo(); + return StackOffset::getScalable(AFI->getStackSizeZPR()); +} + +/// Returns the size of the entire PPR stackframe (calleesaves + spills). +static StackOffset getPPRStackSize(const MachineFunction &MF) { + const AArch64FunctionInfo *AFI = MF.getInfo(); + return StackOffset::getScalable(AFI->getStackSizePPR()); +} + +/// Returns the size of the entire SVE stackframe (PPRs + ZPRs). static StackOffset getSVEStackSize(const MachineFunction &MF) { + return getZPRStackSize(MF) + getPPRStackSize(MF); +} + +static bool hasSVEStackSize(const MachineFunction &MF) { const AArch64FunctionInfo *AFI = MF.getInfo(); - return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE()); + return AFI->getStackSizeZPR() > 0 || AFI->getStackSizePPR() > 0; +} + +/// Returns true if PPRs are spilled as ZPRs. +static bool arePPRsSpilledAsZPR(const MachineFunction &MF) { + return MF.getSubtarget().getRegisterInfo()->getSpillSize( + AArch64::PPRRegClass) == 16; } bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { @@ -476,7 +505,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { !Subtarget.hasSVE(); return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize || - getSVEStackSize(MF) || LowerQRegCopyThroughMem); + hasSVEStackSize(MF) || LowerQRegCopyThroughMem); } /// hasFPImpl - Return true if the specified function should have a dedicated @@ -1144,7 +1173,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( // When there is an SVE area on the stack, always allocate the // callee-saves and spills/locals separately. - if (getSVEStackSize(MF)) + if (hasSVEStackSize(MF)) return false; return true; @@ -1570,30 +1599,40 @@ static bool isTargetWindows(const MachineFunction &MF) { return MF.getSubtarget().isTargetWindows(); } -static unsigned getStackHazardSize(const MachineFunction &MF) { - return MF.getSubtarget().getStreamingHazardSize(); -} - // Convenience function to determine whether I is an SVE callee save. -static bool IsSVECal
[llvm-branch-commits] [llvm] [AArch64][SME] Support split ZPR and PPR area allocation (PR #142392)
https://github.com/MacDue edited https://github.com/llvm/llvm-project/pull/142392 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64][SME] Support split ZPR and PPR area allocation (PR #142392)
llvmbot wrote: @llvm/pr-subscribers-backend-aarch64 Author: Benjamin Maxwell (MacDue) Changes For a while we have supported the `-aarch64-stack-hazard-size=` option, which adds "hazard padding" between GPRs and FPR/ZPRs. However, there is currently a hole in this mitigation as PPR and FPR/ZPR accesses to the same area also cause streaming memory hazards (this is noted by `-pass-remarks-analysis=sme -aarch64-stack-hazard-remark-size= `), and the current stack layout places PPRs and ZPRs within the same area. Which looks like: ``` Higher address | callee-saved gpr registers| |-- | | lr,fp (a.k.a. "frame record")| |---| <- fp(=x29) | | |---| | callee-saved fp/simd/SVE regs | |---| |SVE stack objects | |---| | local variables of fixed size | | | | | | | | <- sp | Lower address ``` With this patch the stack (and hazard padding) is rearranged so that hazard padding is placed between the PPRs and ZPRs rather than within the (fixed size) callee-save region. Which looks something like this: ``` Higher address | callee-saved gpr registers| |-- | | lr,fp (a.k.a. "frame record")| |---| <- fp(=x29) |callee-saved PPRs | |PPR stack objects | (These are SVE predicates) |---| | | |---| | callee-saved ZPR regs | (These are SVE vectors) |ZPR stack objects | Note: FPRs are promoted to ZPRs |---| | local variables of fixed size | | | | | | | | <- sp | Lower address ``` This layout is only enabled if: * SplitSVEObjects are enabled (`-aarch64-split-sve-objects`) - (This may be enabled by default in a later patch) * Streaming memory hazards are present - (`-aarch64-stack-hazard-size= ` != 0) * PPRs and FPRs/ZPRs are on the stack * There's no stack realignment or variable-sized objects - This is left as a TODO for now Additionally, any FPR callee-saves that are present will be promoted to ZPRs. This is to prevent stack hazards between FPRs and GRPs in the fixed size callee-save area (which would otherwise require more hazard padding, or moving the FPR callee-saves). This layout should resolve the hole in the hazard padding mitigation, and is not intended change codegen for non-SME code. --- Patch is 172.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142392.diff 8 Files Affected: - (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+391-133) - (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.h (+2-1) - (modified) llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h (+7) - (added) llvm/test/CodeGen/AArch64/framelayout-split-sve.mir (+526) - (modified) llvm/test/CodeGen/AArch64/spill-fill-zpr-predicates.mir (+7-10) - (added) llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll (+751) - (modified) llvm/test/CodeGen/AArch64/stack-hazard.ll (+542-326) - (modified) llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll (-2) ``diff diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index e5592a921e192..36775b8dc05e5 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -270,6 +270,11 @@ static cl::opt OrderFrameObjects("aarch64-order-frame-objects", cl::desc("sort stack allocations"), cl::init(true), cl::Hidden); +static cl::opt +SplitSVEObjects("aarch64-split-sve-objects", +cl::desc("Split allocation of ZPR & PPR objects"), +cl::init(false), cl::Hidden); + cl::opt EnableHomogeneousPrologEpilog( "homogeneous-prolog-epilog", cl::Hidden, cl::desc("Emit homogeneous prologue and epilogue for the size " @@ -458,10 +463,13 @@ static StackOffset getZPRStackSize(const MachineFunction &MF) { return StackOffset::getScalable(AFI->getStackSizeZPR()); } -/// Returns the size of the entire PPR stackframe (calleesaves + spills). +/// Returns the size of the entire PPR stackframe (calleesaves + spills + hazard +/// padding). stat
[llvm-branch-commits] [llvm] [AArch64] Prepare for split ZPR and PPR area allocation (NFCI) (PR #142391)
MacDue wrote: This is a stacked PR. Please see the final PR for context. 1. https://github.com/llvm/llvm-project/pull/142390 2. :point_right: https://github.com/llvm/llvm-project/pull/142391 3. https://github.com/llvm/llvm-project/pull/142392 https://github.com/llvm/llvm-project/pull/142391 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64] Prepare for split ZPR and PPR area allocation (NFCI) (PR #142391)
https://github.com/MacDue ready_for_review https://github.com/llvm/llvm-project/pull/142391 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64] Prepare for split ZPR and PPR area allocation (NFCI) (PR #142391)
llvmbot wrote: @llvm/pr-subscribers-backend-aarch64 Author: Benjamin Maxwell (MacDue) Changes This patch attempts to refactor AArch64FrameLowering to allow the size of the ZPR and PPR areas to be calculated separately. This will be used by a subsequent patch to support allocating ZPRs and PPRs to separate areas. This patch should be an NFC and is split out to make later functional changes easier to spot. --- Patch is 34.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142391.diff 4 Files Affected: - (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+209-96) - (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.h (+8-4) - (modified) llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h (+26-21) - (modified) llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp (+4-3) ``diff diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 643778c742497..e5592a921e192 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -326,7 +326,10 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF, static bool produceCompactUnwindFrame(MachineFunction &MF); static bool needsWinCFI(const MachineFunction &MF); +static StackOffset getZPRStackSize(const MachineFunction &MF); +static StackOffset getPPRStackSize(const MachineFunction &MF); static StackOffset getSVEStackSize(const MachineFunction &MF); +static bool hasSVEStackSize(const MachineFunction &MF); static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB); /// Returns true if a homogeneous prolog or epilog code can be emitted @@ -345,7 +348,7 @@ bool AArch64FrameLowering::homogeneousPrologEpilog( if (needsWinCFI(MF)) return false; // TODO: SVE is not supported yet. - if (getSVEStackSize(MF)) + if (hasSVEStackSize(MF)) return false; // Bail on stack adjustment needed on return for simplicity. @@ -445,10 +448,36 @@ static unsigned getFixedObjectSize(const MachineFunction &MF, } } -/// Returns the size of the entire SVE stackframe (calleesaves + spills). +static unsigned getStackHazardSize(const MachineFunction &MF) { + return MF.getSubtarget().getStreamingHazardSize(); +} + +/// Returns the size of the entire ZPR stackframe (calleesaves + spills). +static StackOffset getZPRStackSize(const MachineFunction &MF) { + const AArch64FunctionInfo *AFI = MF.getInfo(); + return StackOffset::getScalable(AFI->getStackSizeZPR()); +} + +/// Returns the size of the entire PPR stackframe (calleesaves + spills). +static StackOffset getPPRStackSize(const MachineFunction &MF) { + const AArch64FunctionInfo *AFI = MF.getInfo(); + return StackOffset::getScalable(AFI->getStackSizePPR()); +} + +/// Returns the size of the entire SVE stackframe (PPRs + ZPRs). static StackOffset getSVEStackSize(const MachineFunction &MF) { + return getZPRStackSize(MF) + getPPRStackSize(MF); +} + +static bool hasSVEStackSize(const MachineFunction &MF) { const AArch64FunctionInfo *AFI = MF.getInfo(); - return StackOffset::getScalable((int64_t)AFI->getStackSizeSVE()); + return AFI->getStackSizeZPR() > 0 || AFI->getStackSizePPR() > 0; +} + +/// Returns true if PPRs are spilled as ZPRs. +static bool arePPRsSpilledAsZPR(const MachineFunction &MF) { + return MF.getSubtarget().getRegisterInfo()->getSpillSize( + AArch64::PPRRegClass) == 16; } bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { @@ -476,7 +505,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { !Subtarget.hasSVE(); return !(MFI.hasCalls() || hasFP(MF) || NumBytes > RedZoneSize || - getSVEStackSize(MF) || LowerQRegCopyThroughMem); + hasSVEStackSize(MF) || LowerQRegCopyThroughMem); } /// hasFPImpl - Return true if the specified function should have a dedicated @@ -1144,7 +1173,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( // When there is an SVE area on the stack, always allocate the // callee-saves and spills/locals separately. - if (getSVEStackSize(MF)) + if (hasSVEStackSize(MF)) return false; return true; @@ -1570,30 +1599,40 @@ static bool isTargetWindows(const MachineFunction &MF) { return MF.getSubtarget().isTargetWindows(); } -static unsigned getStackHazardSize(const MachineFunction &MF) { - return MF.getSubtarget().getStreamingHazardSize(); -} - // Convenience function to determine whether I is an SVE callee save. -static bool IsSVECalleeSave(MachineBasicBlock::iterator I) { +static bool IsZPRCalleeSave(MachineBasicBlock::iterator I) { switch (I->getOpcode()) { default: return false; - case AArch64::PTRUE_C_B: case AArch64::LD1B_2Z_IMM: case AArch64::ST1B_2Z_IMM: case AArch64::STR_ZXI: - case AArch64::STR_PXI: case AArch64::LDR_ZXI: - case AArch64::LDR_PXI: - case A
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
@@ -1904,6 +1904,20 @@ def atomic_load_64 : let MemoryVT = i64; } +def atomic_load_128_v2i64 : + PatFrag<(ops node:$ptr), + (atomic_load node:$ptr)> { + let IsAtomic = true; + let MemoryVT = v2i64; +} + +def atomic_load_128_v4i32 : + PatFrag<(ops node:$ptr), + (atomic_load node:$ptr)> { + let IsAtomic = true; + let MemoryVT = v4i32; jofrn wrote: The tests that use them must also have the changes from AtomicExpand. https://github.com/llvm/llvm-project/pull/120716 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64][SME] Support split ZPR and PPR area allocation (PR #142392)
https://github.com/MacDue ready_for_review https://github.com/llvm/llvm-project/pull/142392 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/20.x: [RelLookupTableConverter] Drop unnamed_addr to avoid generating GOTPCREL relocations (#142304) (PR #142311)
https://github.com/nikic approved this pull request. https://github.com/llvm/llvm-project/pull/142311 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 1ccec5e - Revert "[Clang] Improve infrastructure for libstdc++ workarounds (#141977)"
Author: cor3ntin Date: 2025-06-02T18:58:25+02:00 New Revision: 1ccec5e96b2ec873e5a6da60eb9036cf00a94414 URL: https://github.com/llvm/llvm-project/commit/1ccec5e96b2ec873e5a6da60eb9036cf00a94414 DIFF: https://github.com/llvm/llvm-project/commit/1ccec5e96b2ec873e5a6da60eb9036cf00a94414.diff LOG: Revert "[Clang] Improve infrastructure for libstdc++ workarounds (#141977)" This reverts commit 6f7268e204ed0a2c014324828e0582d90e8fcd04. Added: Modified: clang/include/clang/Lex/Preprocessor.h clang/lib/Lex/PPExpressions.cpp clang/lib/Sema/SemaDeclCXX.cpp clang/lib/Sema/SemaExceptionSpec.cpp clang/lib/Sema/SemaInit.cpp clang/lib/Sema/SemaTemplate.cpp clang/lib/Sema/SemaTemplateInstantiateDecl.cpp clang/test/SemaCXX/libstdcxx_common_type_hack.cpp clang/test/SemaCXX/libstdcxx_explicit_init_list_hack.cpp clang/test/SemaCXX/libstdcxx_pair_swap_hack.cpp Removed: diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index cab398ad4a865..f2dfd3a349b8b 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -129,12 +129,6 @@ enum class EmbedResult { Empty = 2,// Corresponds to __STDC_EMBED_EMPTY__ }; -struct CXXStandardLibraryVersionInfo { - enum Library { Unknown, LibStdCXX }; - Library Lib; - unsigned Version; -}; - /// Engages in a tight little dance with the lexer to efficiently /// preprocess tokens. /// @@ -2712,15 +2706,6 @@ class Preprocessor { return IsFileLexer(CurLexer.get(), CurPPLexer); } - //======// - // Standard Library Identification - std::optional CXXStandardLibraryVersion; - -public: - std::optional getStdLibCxxVersion(); - bool NeedsStdLibCxxWorkaroundBefore(unsigned FixedVersion); - -private: //======// // Caching stuff. void CachingLex(Token &Result); diff --git a/clang/lib/Lex/PPExpressions.cpp b/clang/lib/Lex/PPExpressions.cpp index 118d4ae6791d1..cf7e32bee2e71 100644 --- a/clang/lib/Lex/PPExpressions.cpp +++ b/clang/lib/Lex/PPExpressions.cpp @@ -979,49 +979,3 @@ Preprocessor::EvaluateDirectiveExpression(IdentifierInfo *&IfNDefMacro, return EvaluateDirectiveExpression(IfNDefMacro, Tok, EvaluatedDefined, CheckForEoD); } - -static std::optional -getCXXStandardLibraryVersion(Preprocessor &PP, StringRef MacroName, - CXXStandardLibraryVersionInfo::Library Lib) { - MacroInfo *Macro = PP.getMacroInfo(PP.getIdentifierInfo(MacroName)); - - if (!Macro || Macro->getNumTokens() != 1 || !Macro->isObjectLike()) -return std::nullopt; - - const Token &RevisionDateTok = Macro->getReplacementToken(0); - - bool Invalid = false; - llvm::SmallVector Buffer; - llvm::StringRef RevisionDate = - PP.getSpelling(RevisionDateTok, Buffer, &Invalid); - if (!Invalid) { -unsigned Value; -// We don't use NumericParser to avoid diagnostics -if (!RevisionDate.consumeInteger(10, Value)) - return CXXStandardLibraryVersionInfo{Lib, Value}; - } - return CXXStandardLibraryVersionInfo{CXXStandardLibraryVersionInfo::Unknown, - 0}; -} - -std::optional Preprocessor::getStdLibCxxVersion() { - if (!CXXStandardLibraryVersion) -CXXStandardLibraryVersion = getCXXStandardLibraryVersion( -*this, "__GLIBCXX__", CXXStandardLibraryVersionInfo::LibStdCXX); - if (!CXXStandardLibraryVersion) -return std::nullopt; - - if (CXXStandardLibraryVersion->Lib == - CXXStandardLibraryVersionInfo::LibStdCXX) -return CXXStandardLibraryVersion->Version; - return std::nullopt; -} - -bool Preprocessor::NeedsStdLibCxxWorkaroundBefore(unsigned FixedVersion) { - assert(FixedVersion >= 2000'00'00 && FixedVersion <= 2100'00'00 && - "invalid value for __GLIBCXX__"); - std::optional Ver = getStdLibCxxVersion(); - if (!Ver) -return false; - return *Ver < FixedVersion; -} diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 55e078f3180a2..4a735992cec68 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -13198,7 +13198,6 @@ NamedDecl *Sema::BuildUsingDeclaration( if (getLangOpts().CPlusPlus14 && II && II->isStr("gets") && CurContext->isStdNamespace() && isa(LookupContext) && -PP.NeedsStdLibCxxWorkaroundBefore(2016'12'21) && getSourceManager().isInSystemHeader(UsingLoc)) return nullptr; UsingValidatorCCC CCC(HasTypenameKeyword, IsInstantiation, SS.getScopeRep(), diff --git a/clang/lib/Sema/SemaExceptionSpec.cpp b/clang/lib/Sema/SemaExceptionSpec.cpp index c692f824da422..c83eab53891ca 100644 --- a/clang/lib/Sema/SemaExceptionSpec.cpp +++ b/clang/lib/Sema/SemaExceptionSpec.cpp
[llvm-branch-commits] AArch64: Relax x16/x17 constraint on AUT in certain cases. (PR #132857)
https://github.com/atrosinenko commented: @pcc Thank you for the updates! By the way, have you considered splitting `emitPtrauthAuthResign` into "auth" and "resign" parts? On one hand, this could decrease the number of arguments passed to a single function. On the other hand, passing "on_success" label between functions may render this simplification meaningless. https://github.com/llvm/llvm-project/pull/132857 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] AArch64: Relax x16/x17 constraint on AUT in certain cases. (PR #132857)
@@ -1951,9 +1952,23 @@ let Predicates = [HasPAuth] in { let Uses = [X16]; } + def AUTxMxN : Pseudo<(outs GPR64:$AuthVal, GPR64common:$Scratch), + (ins GPR64:$Val, i32imm:$Key, +i64imm:$Disc, GPR64:$AddrDisc), + []>, Sched<[WriteI, ReadI]> { +let Constraints = "$AuthVal = $Val"; atrosinenko wrote: [nit] Constraint string can be passed as an optional argument of `Pseudo<...>`. https://github.com/llvm/llvm-project/pull/132857 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] AArch64: Relax x16/x17 constraint on AUT in certain cases. (PR #132857)
@@ -168,7 +168,12 @@ class AArch64AsmPrinter : public AsmPrinter { void emitPtrauthTailCallHardening(const MachineInstr *TC); // Emit the sequence for AUT or AUTPAC. - void emitPtrauthAuthResign(const MachineInstr *MI); + void emitPtrauthAuthResign(Register AUTVal, AArch64PACKey::ID AUTKey, + uint64_t AUTDisc, + const MachineOperand *AUTAddrDisc, + Register Scratch, + std::optional PACKey, + uint64_t PACDisc, unsigned PACAddrDisc); atrosinenko wrote: ```suggestion uint64_t PACDisc, Register PACAddrDisc); ``` https://github.com/llvm/llvm-project/pull/132857 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] AArch64: Relax x16/x17 constraint on AUT in certain cases. (PR #132857)
@@ -140,16 +157,18 @@ define i64 @test_resign_blend_and_const(i64 %arg, i64 %arg1) { ; CHECKED-LABEL: test_resign_blend_and_const: ; CHECKED: %bb.0: ; CHECKED-NEXT:mov x16, x0 -; CHECKED-NEXT:mov x17, x1 -; CHECKED-NEXT:movk x17, #12345, lsl #48 -; CHECKED-NEXT:autda x16, x17 +; CHECKED-ELF-NEXT:movk x1, #12345, lsl #48 +; CHECKED-ELF-NEXT:autda x16, x1 +; CHECKED-DARWIN-NEXT: mov x17, x1 +; CHECKED-DARWIN-NEXT: movk x17, #12345, lsl #48 +; CHECKED-DARWIN-NEXT: autda x16, x17 ; CHECKED-NEXT:mov x17, x16 ; CHECKED-NEXT:xpacd x17 ; CHECKED-NEXT:cmp x16, x17 -; CHECKED-NEXT:b.eq [[L]]auth_success_1 +; CHECKED-NEXT:b.eq [[L]]auth_success_[[N2:[0-9]+]] ; CHECKED-NEXT:mov x16, x17 ; CHECKED-NEXT:b [[L]]resign_end_1 -; CHECKED-NEXT: Lauth_success_1: +; CHECKED-NEXT: Lauth_success_[[N2]]: atrosinenko wrote: [nit] Not necessary? https://github.com/llvm/llvm-project/pull/132857 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] AArch64: Relax x16/x17 constraint on AUT in certain cases. (PR #132857)
https://github.com/atrosinenko edited https://github.com/llvm/llvm-project/pull/132857 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] AArch64: Relax x16/x17 constraint on AUT in certain cases. (PR #132857)
@@ -2064,8 +2069,12 @@ void AArch64AsmPrinter::emitPtrauthTailCallHardening(const MachineInstr *TC) { /*ShouldTrap=*/true, /*OnFailure=*/nullptr); } -void AArch64AsmPrinter::emitPtrauthAuthResign(const MachineInstr *MI) { - const bool IsAUTPAC = MI->getOpcode() == AArch64::AUTPAC; +void AArch64AsmPrinter::emitPtrauthAuthResign( +Register AUTVal, AArch64PACKey::ID AUTKey, uint64_t AUTDisc, +const MachineOperand *AUTAddrDisc, Register Scratch, +std::optional PACKey, uint64_t PACDisc, +unsigned PACAddrDisc) { atrosinenko wrote: ```suggestion Register PACAddrDisc) { ``` https://github.com/llvm/llvm-project/pull/132857 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] AArch64: Relax x16/x17 constraint on AUT in certain cases. (PR #132857)
@@ -1866,7 +1871,6 @@ Register AArch64AsmPrinter::emitPtrauthDiscriminator(uint16_t Disc, Register AddrDisc, Register ScratchReg, bool MayUseAddrAsScratch) { - assert(ScratchReg == AArch64::X16 || ScratchReg == AArch64::X17); atrosinenko wrote: Maybe `... || !STI->isX16X17Safer()`? https://github.com/llvm/llvm-project/pull/132857 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] AArch64: Relax x16/x17 constraint on AUT in certain cases. (PR #132857)
@@ -621,10 +687,10 @@ define i64 @test_resign_da_constdisc(i64 %arg, i64 %arg1) { ; CHECKED-NEXT:mov x17, x16 ; CHECKED-NEXT:xpacd x17 ; CHECKED-NEXT:cmp x16, x17 -; CHECKED-NEXT:b.eq [[L]]auth_success_7 +; CHECKED-NEXT:b.eq [[L]]auth_success_[[N1:[0-9]+]] ; CHECKED-NEXT:mov x16, x17 ; CHECKED-NEXT:b [[L]]resign_end_6 -; CHECKED-NEXT: Lauth_success_7: +; CHECKED-NEXT: Lauth_success_[[N1]]: atrosinenko wrote: [nit] Not necessary? https://github.com/llvm/llvm-project/pull/132857 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-repl] Fix error recovery while PTU cleanup (#127467) (PR #142445)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/142445 Backport 3b4c51bb3243a02526313c51207a674139b67a00 Requested by: @anutosh491 >From 4c50edbf427a7ebac4d5a9bf8249203bb112c1a7 Mon Sep 17 00:00:00 2001 From: Anutosh Bhat Date: Mon, 2 Jun 2025 20:14:28 +0530 Subject: [PATCH] [clang-repl] Fix error recovery while PTU cleanup (#127467) Fixes #123300 What is seen ``` clang-repl> int x = 42; clang-repl> auto capture = [&]() { return x * 2; }; In file included from <<< inputs >>>:1: input_line_4:1:17: error: non-local lambda expression cannot have a capture-default 1 | auto capture = [&]() { return x * 2; }; | ^ zsh: segmentation fault clang-repl --Xcc="-v" (lldb) bt * thread #1, queue = 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1, address=0x8) * frame #0: 0x000107b4f8b8 libclang-cpp.19.1.dylib`clang::IncrementalParser::CleanUpPTU(clang::PartialTranslationUnit&) + 988 frame #1: 0x000107b4f1b4 libclang-cpp.19.1.dylib`clang::IncrementalParser::ParseOrWrapTopLevelDecl() + 416 frame #2: 0x000107b4fb94 libclang-cpp.19.1.dylib`clang::IncrementalParser::Parse(llvm::StringRef) + 612 frame #3: 0x000107b52fec libclang-cpp.19.1.dylib`clang::Interpreter::ParseAndExecute(llvm::StringRef, clang::Value*) + 180 frame #4: 0x00013498 clang-repl`main + 3560 frame #5: 0x00018d39a0e0 dyld`start + 2360 ``` Though the error is justified, we shouldn't be interested in exiting through a segfault in such cases. The issue is that empty named decls weren't being taken care of resulting into this assert https://github.com/llvm/llvm-project/blob/c1a229252617ed58f943bf3f4698bd8204ee0f04/clang/include/clang/AST/DeclarationName.h#L503 Can also be seen when the example is attempted through xeus-cpp-lite.  (cherry picked from commit 3b4c51bb3243a02526313c51207a674139b67a00) --- clang/lib/Interpreter/IncrementalParser.cpp | 2 +- clang/test/Interpreter/lambda.cpp | 15 +-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/clang/lib/Interpreter/IncrementalParser.cpp b/clang/lib/Interpreter/IncrementalParser.cpp index e43cea1baf43a..1d223e230669c 100644 --- a/clang/lib/Interpreter/IncrementalParser.cpp +++ b/clang/lib/Interpreter/IncrementalParser.cpp @@ -175,7 +175,7 @@ void IncrementalParser::CleanUpPTU(TranslationUnitDecl *MostRecentTU) { // FIXME: We should de-allocate MostRecentTU for (Decl *D : MostRecentTU->decls()) { auto *ND = dyn_cast(D); -if (!ND) +if (!ND || ND->getDeclName().isEmpty()) continue; // Check if we need to clean up the IdResolver chain. if (ND->getDeclName().getFETokenInfo() && !D->getLangOpts().ObjC && diff --git a/clang/test/Interpreter/lambda.cpp b/clang/test/Interpreter/lambda.cpp index df75274a050b2..7e5e1057e4c9e 100644 --- a/clang/test/Interpreter/lambda.cpp +++ b/clang/test/Interpreter/lambda.cpp @@ -1,7 +1,8 @@ // REQUIRES: host-supports-jit // UNSUPPORTED: system-aix // RUN: cat %s | clang-repl | FileCheck %s -// RUN: cat %s | clang-repl -Xcc -O2 | FileCheck %s +// RUN: cat %s | clang-repl -Xcc -Xclang -Xcc -verify -Xcc -O2 | FileCheck %s + extern "C" int printf(const char *, ...); auto l1 = []() { printf("ONE\n"); return 42; }; @@ -14,4 +15,14 @@ auto r2 = l2(); auto r3 = l2(); // CHECK: TWO -%quit +// Verify non-local lambda capture error is correctly reported +int x = 42; + +// expected-error {{non-local lambda expression cannot have a capture-default}} +auto capture = [&]() { return x * 2; }; + +// Ensure interpreter continues and x is still valid +printf("x = %d\n", x); +// CHECK: x = 42 + +%quit \ No newline at end of file ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-repl] Fix error recovery while PTU cleanup (#127467) (PR #142445)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/142445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-repl] Fix error recovery while PTU cleanup (#127467) (PR #142445)
llvmbot wrote: @vgvassilev What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/142445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-repl] Fix error recovery while PTU cleanup (#127467) (PR #142445)
llvmbot wrote: @llvm/pr-subscribers-clang Author: None (llvmbot) Changes Backport 3b4c51bb3243a02526313c51207a674139b67a00 Requested by: @anutosh491 --- Full diff: https://github.com/llvm/llvm-project/pull/142445.diff 2 Files Affected: - (modified) clang/lib/Interpreter/IncrementalParser.cpp (+1-1) - (modified) clang/test/Interpreter/lambda.cpp (+13-2) ``diff diff --git a/clang/lib/Interpreter/IncrementalParser.cpp b/clang/lib/Interpreter/IncrementalParser.cpp index e43cea1baf43a..1d223e230669c 100644 --- a/clang/lib/Interpreter/IncrementalParser.cpp +++ b/clang/lib/Interpreter/IncrementalParser.cpp @@ -175,7 +175,7 @@ void IncrementalParser::CleanUpPTU(TranslationUnitDecl *MostRecentTU) { // FIXME: We should de-allocate MostRecentTU for (Decl *D : MostRecentTU->decls()) { auto *ND = dyn_cast(D); -if (!ND) +if (!ND || ND->getDeclName().isEmpty()) continue; // Check if we need to clean up the IdResolver chain. if (ND->getDeclName().getFETokenInfo() && !D->getLangOpts().ObjC && diff --git a/clang/test/Interpreter/lambda.cpp b/clang/test/Interpreter/lambda.cpp index df75274a050b2..7e5e1057e4c9e 100644 --- a/clang/test/Interpreter/lambda.cpp +++ b/clang/test/Interpreter/lambda.cpp @@ -1,7 +1,8 @@ // REQUIRES: host-supports-jit // UNSUPPORTED: system-aix // RUN: cat %s | clang-repl | FileCheck %s -// RUN: cat %s | clang-repl -Xcc -O2 | FileCheck %s +// RUN: cat %s | clang-repl -Xcc -Xclang -Xcc -verify -Xcc -O2 | FileCheck %s + extern "C" int printf(const char *, ...); auto l1 = []() { printf("ONE\n"); return 42; }; @@ -14,4 +15,14 @@ auto r2 = l2(); auto r3 = l2(); // CHECK: TWO -%quit +// Verify non-local lambda capture error is correctly reported +int x = 42; + +// expected-error {{non-local lambda expression cannot have a capture-default}} +auto capture = [&]() { return x * 2; }; + +// Ensure interpreter continues and x is still valid +printf("x = %d\n", x); +// CHECK: x = 42 + +%quit \ No newline at end of file `` https://github.com/llvm/llvm-project/pull/142445 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64][SME] Support split ZPR and PPR area allocation (PR #142392)
MacDue wrote: This is a stacked PR. See previous PRs below: 1. https://github.com/llvm/llvm-project/pull/142390 2. https://github.com/llvm/llvm-project/pull/142391 3. :point_right: https://github.com/llvm/llvm-project/pull/142392 https://github.com/llvm/llvm-project/pull/142392 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Cost model for minimumnum/maximumnum (PR #141946)
@@ -742,6 +744,23 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, break; case Intrinsic::copysign: return NElts * getFullRateInstrCost(); + case Intrinsic::minimumnum: + case Intrinsic::maximumnum: { +// Instruction + 2 canonicalizes. For cases that need type promotion, we the +// promotion takes the place of the canonicalize. Pierre-vh wrote: ```suggestion // Instruction + 2 canonicalizes. For cases that need type promotion, the // promotion takes the place of the canonicalize. ``` https://github.com/llvm/llvm-project/pull/141946 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Move fpenvIEEEMode into TTI (PR #141945)
@@ -281,6 +281,13 @@ class GCNTTIImpl final : public BasicTTIImplBase { void collectKernelLaunchBounds( const Function &F, SmallVectorImpl> &LB) const override; + + enum class KnownIEEEMode { Unknown, On, Off }; + + /// Return KnownIEEEMode::On if we know if the use context can assume Pierre-vh wrote: ```suggestion /// \returns KnownIEEEMode::On if we know if the use context can assume ``` https://github.com/llvm/llvm-project/pull/141945 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Move fpenvIEEEMode into TTI (PR #141945)
https://github.com/Pierre-vh approved this pull request. unrelated to this patch but `KnownIEEEMode::Unknown` looks weird, maybe it should just be named `IEEEMode`? https://github.com/llvm/llvm-project/pull/141945 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Add cost model tests for minimumnum/maximumnum (PR #141904)
https://github.com/Pierre-vh approved this pull request. https://github.com/llvm/llvm-project/pull/141904 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Cost model for minimumnum/maximumnum (PR #141946)
https://github.com/Pierre-vh approved this pull request. https://github.com/llvm/llvm-project/pull/141946 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Move fpenvIEEEMode into TTI (PR #141945)
https://github.com/Pierre-vh edited https://github.com/llvm/llvm-project/pull/141945 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Reduce cost of f64 copysign (PR #141944)
@@ -741,8 +743,8 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::copysign: return NElts * getFullRateInstrCost(); case Intrinsic::canonicalize: { -assert(SLT != MVT::f64); -InstRate = getFullRateInstrCost(); +InstRate = +SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost(); break; } case Intrinsic::uadd_sat: Pierre-vh wrote: are those cases below fine with handling f64 now? https://github.com/llvm/llvm-project/pull/141944 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] AMDGPU: Reduce cost of f64 copysign (PR #141944)
@@ -741,8 +743,8 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, case Intrinsic::copysign: return NElts * getFullRateInstrCost(); case Intrinsic::canonicalize: { -assert(SLT != MVT::f64); -InstRate = getFullRateInstrCost(); +InstRate = +SLT == MVT::f64 ? get64BitInstrCost(CostKind) : getFullRateInstrCost(); break; } case Intrinsic::uadd_sat: arsenm wrote: They are only integer intrinsics https://github.com/llvm/llvm-project/pull/141944 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][PromoteMem2Reg] Move IncomingVals, IncomingLocs, Worklist into class (PR #142468)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: Vitaly Buka (vitalybuka) Changes `IncomingVals`, `IncomingLocs`, `Worklist` into class members. They are all DFS state related, as `Visited`. But visited is already a class member. On it's own the patch has no value, but it simplify stuff in the next patch. --- Full diff: https://github.com/llvm/llvm-project/pull/142468.diff 1 Files Affected: - (modified) llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp (+36-23) ``diff diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 62995e57b917c..9ddcbd516e00a 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -392,6 +392,15 @@ struct PromoteMem2Reg { /// number. SmallVector BBNumPreds; + /// The state of incoming values for the current DFS step. + RenamePassData::ValVector IncomingVals; + + /// The state of incoming locations for the current DFS step. + RenamePassData::LocationVector IncomingLocs; + + // DFS work stack. + SmallVector WorkList; + /// Whether the function has the no-signed-zeros-fp-math attribute set. bool NoSignedZeros = false; @@ -423,10 +432,7 @@ struct PromoteMem2Reg { void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, const SmallPtrSetImpl &DefBlocks, SmallPtrSetImpl &LiveInBlocks); - void RenamePass(BasicBlock *BB, BasicBlock *Pred, - RenamePassData::ValVector IncVals, - RenamePassData::LocationVector IncLocs, - std::vector &Worklist); + void RenamePass(BasicBlock *BB, BasicBlock *Pred); bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); /// Delete dbg.assigns that have been demoted to dbg.values. @@ -438,6 +444,20 @@ struct PromoteMem2Reg { DVR->eraseFromParent(); DVRAssignsToDelete.clear(); } + + void pushToWorklist(BasicBlock *BB, BasicBlock *Pred, + RenamePassData::ValVector IncVals, + RenamePassData::LocationVector IncLocs) { +WorkList.emplace_back(BB, Pred, std::move(IncVals), std::move(IncLocs)); + } + + RenamePassData popFromWorklist() { +RenamePassData R = std::move(WorkList.back()); +WorkList.pop_back(); +IncomingVals = std::move(R.Values); +IncomingLocs = std::move(R.Locations); +return R; + } }; } // end anonymous namespace @@ -849,29 +869,26 @@ void PromoteMem2Reg::run() { // Set the incoming values for the basic block to be null values for all of // the alloca's. We do this in case there is a load of a value that has not // been stored yet. In this case, it will get this null value. - RenamePassData::ValVector Values(Allocas.size()); + IncomingVals.assign(Allocas.size(), nullptr); for (unsigned i = 0, e = Allocas.size(); i != e; ++i) -Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); +IncomingVals[i] = UndefValue::get(Allocas[i]->getAllocatedType()); // When handling debug info, treat all incoming values as if they have unknown // locations until proven otherwise. - RenamePassData::LocationVector Locations(Allocas.size()); + IncomingLocs.assign(Allocas.size(), {}); // The renamer uses the Visited set to avoid infinite loops. Visited.resize(F.getMaxBlockNumber()); // Walks all basic blocks in the function performing the SSA rename algorithm // and inserting the phi nodes we marked as necessary - std::vector RenamePassWorkList; - RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values), - std::move(Locations)); + pushToWorklist(&F.front(), nullptr, std::move(IncomingVals), + std::move(IncomingLocs)); do { -RenamePassData RPD = std::move(RenamePassWorkList.back()); -RenamePassWorkList.pop_back(); +RenamePassData RPD = popFromWorklist(); // RenamePass may add new worklist entries. -RenamePass(RPD.BB, RPD.Pred, std::move(RPD.Values), - std::move(RPD.Locations), RenamePassWorkList); - } while (!RenamePassWorkList.empty()); +RenamePass(RPD.BB, RPD.Pred); + } while (!WorkList.empty()); // Remove the allocas themselves from the function. for (Instruction *A : Allocas) { @@ -1096,10 +1113,7 @@ static void updateForIncomingValueLocation(PHINode *PN, DebugLoc DL, /// /// IncomingVals indicates what value each Alloca contains on exit from the /// predecessor block Pred. -void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, -RenamePassData::ValVector IncomingVals, -RenamePassData::LocationVector IncomingLocs, -std::vector &Worklist) { +void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred) { // If we are inserting any phi nodes into this BB, the
[llvm-branch-commits] [BOLT] Expose external entry count for functions (PR #141674)
llvmbot wrote: @llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) Changes Record the number of function invocations from external code - code outside the binary, which may include JIT code and DSOs. The purpose of it is to exclude external entry counts from call graph flow conservation analysis. It is known that external entries increase CG flow imbalance. Test Plan: updated shrinkwrapping.test --- Full diff: https://github.com/llvm/llvm-project/pull/141674.diff 9 Files Affected: - (modified) bolt/include/bolt/Core/BinaryFunction.h (+12) - (modified) bolt/include/bolt/Profile/DataReader.h (+3) - (modified) bolt/include/bolt/Profile/ProfileYAMLMapping.h (+2) - (modified) bolt/lib/Core/BinaryFunction.cpp (+2) - (modified) bolt/lib/Profile/DataAggregator.cpp (+1) - (modified) bolt/lib/Profile/DataReader.cpp (+6) - (modified) bolt/lib/Profile/YAMLProfileReader.cpp (+1) - (modified) bolt/lib/Profile/YAMLProfileWriter.cpp (+1) - (modified) bolt/test/X86/shrinkwrapping.test (+2) ``diff diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 6f3b5923d3ef4..54187b32968a5 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -388,6 +388,10 @@ class BinaryFunction { /// The profile data for the number of times the function was executed. uint64_t ExecutionCount{COUNT_NO_PROFILE}; + /// Profile data for the number of times this function was entered from + /// external code (DSO, JIT, etc). + uint64_t ExternEntryCount{0}; + /// Profile match ratio. float ProfileMatchRatio{0.0f}; @@ -1864,6 +1868,10 @@ class BinaryFunction { return *this; } + /// Set the profile data for the number of times the function was entered from + /// external code (DSO/JIT). + void setExternEntryCount(uint64_t Count) { ExternEntryCount = Count; } + /// Adjust execution count for the function by a given \p Count. The value /// \p Count will be subtracted from the current function count. /// @@ -1891,6 +1899,10 @@ class BinaryFunction { /// Return COUNT_NO_PROFILE if there's no profile info. uint64_t getExecutionCount() const { return ExecutionCount; } + /// Return the profile information about the number of times the function was + /// entered from external code (DSO/JIT). + uint64_t getExternEntryCount() const { return ExternEntryCount; } + /// Return the raw profile information about the number of branch /// executions corresponding to this function. uint64_t getRawSampleCount() const { return RawSampleCount; } diff --git a/bolt/include/bolt/Profile/DataReader.h b/bolt/include/bolt/Profile/DataReader.h index 80031f8f6ef4a..9bc61ec83364f 100644 --- a/bolt/include/bolt/Profile/DataReader.h +++ b/bolt/include/bolt/Profile/DataReader.h @@ -109,6 +109,9 @@ struct FuncBranchData { /// Total execution count for the function. int64_t ExecutionCount{0}; + /// Total entry count from external code for the function. + uint64_t ExternEntryCount{0}; + /// Indicate if the data was used. bool Used{false}; diff --git a/bolt/include/bolt/Profile/ProfileYAMLMapping.h b/bolt/include/bolt/Profile/ProfileYAMLMapping.h index a8d9a15311d94..41e2bd1651efd 100644 --- a/bolt/include/bolt/Profile/ProfileYAMLMapping.h +++ b/bolt/include/bolt/Profile/ProfileYAMLMapping.h @@ -206,6 +206,7 @@ struct BinaryFunctionProfile { uint32_t Id{0}; llvm::yaml::Hex64 Hash{0}; uint64_t ExecCount{0}; + uint64_t ExternEntryCount{0}; std::vector Blocks; std::vector InlineTree; bool Used{false}; @@ -218,6 +219,7 @@ template <> struct MappingTraits { YamlIO.mapRequired("fid", BFP.Id); YamlIO.mapRequired("hash", BFP.Hash); YamlIO.mapRequired("exec", BFP.ExecCount); +YamlIO.mapOptional("extern", BFP.ExternEntryCount, 0); YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks); YamlIO.mapOptional("blocks", BFP.Blocks, std::vector()); diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 851fa36a6b4b7..68477f778470c 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -476,6 +476,8 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) { OS << "\n Sample Count: " << RawSampleCount; OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f); } + if (ExternEntryCount) +OS << "\n Extern Entry Count: " << ExternEntryCount; if (opts::PrintDynoStats && !getLayout().block_empty()) { OS << '\n'; diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp index 4b7a9fd912869..7d62dadff887a 100644 --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -2289,6 +2289,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC, YamlBF.Id = BF->getFunctionNumber(); YamlBF.Hash = BAT->getBFHash(FuncAddress); YamlBF.Ex
[llvm-branch-commits] [BOLT] Expose external entry count for functions (PR #141674)
https://github.com/aaupov ready_for_review https://github.com/llvm/llvm-project/pull/141674 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [UBSan][Ignorelist] Expanding =sanitize to global. (PR #142456)
https://github.com/qinkunbao updated https://github.com/llvm/llvm-project/pull/142456 >From c0d1cb94966a24e12525bb5c1d863e43cd13e3c2 Mon Sep 17 00:00:00 2001 From: Qinkun Bao Date: Mon, 2 Jun 2025 19:10:16 + Subject: [PATCH] Change the test. Created using spr 1.3.6 --- ...relist.test => asan-global-ignorelist.test} | 18 +++--- 1 file changed, 11 insertions(+), 7 deletions(-) rename clang/test/CodeGen/{ubsan-global-ignorelist.test => asan-global-ignorelist.test} (60%) diff --git a/clang/test/CodeGen/ubsan-global-ignorelist.test b/clang/test/CodeGen/asan-global-ignorelist.test similarity index 60% rename from clang/test/CodeGen/ubsan-global-ignorelist.test rename to clang/test/CodeGen/asan-global-ignorelist.test index 8d7812217f331..3ec2847a8a4e9 100644 --- a/clang/test/CodeGen/ubsan-global-ignorelist.test +++ b/clang/test/CodeGen/asan-global-ignorelist.test @@ -1,11 +1,11 @@ // RUN: rm -rf %t // RUN: split-file %s %t -// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE -// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds -fsanitize-ignorelist=%t/order-0.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,IGNORE -// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds -fsanitize-ignorelist=%t/order-1.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE -// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds -fsanitize-ignorelist=%t/order-2.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,IGNORE -// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds -fsanitize-ignorelist=%t/order-3.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=address %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=address -fsanitize-ignorelist=%t/order-0.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,IGNORE +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=address -fsanitize-ignorelist=%t/order-1.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=address -fsanitize-ignorelist=%t/order-2.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,IGNORE +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=address -fsanitize-ignorelist=%t/order-3.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE // The same type can appear multiple times within an ignorelist. Any ``=sanitize`` type // entries enable sanitizer instrumentation, even if it was ignored by entries before. @@ -13,18 +13,22 @@ // precedence. //--- order-0.ignorelist +[address] global:global_array //--- order-1.ignorelist +[address] global:global_array global:global_array=sanitize //--- order-2.ignorelist +[address] global:* global:global_array=sanitize global:global_array //--- order-3.ignorelist +[address] global:* global:global_array=sanitize global:global* @@ -34,6 +38,6 @@ global:*array=sanitize unsigned global_array[100] = {-1}; // CHECK-LABEL: define dso_local i32 @test -// IGNORE-NOT: call void @__ubsan_handle_out_of_bounds -// SANITIZE: call void @__ubsan_handle_out_of_bounds +// IGNORE-NOT: call void @__asan_report_load4 +// SANITIZE: call void @__asan_report_load4 int test(int i) { return global_array[i]; } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [UBSan][Ignorelist] Expanding =sanitize to global. (PR #142456)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Qinkun Bao (qinkunbao) Changes See https://github.com/llvm/llvm-project/issues/139128 If multiple entries match the source, than the latest entry takes the precedence.` --- Full diff: https://github.com/llvm/llvm-project/pull/142456.diff 1 Files Affected: - (added) clang/test/CodeGen/ubsan-global-ignorelist.test (+39) ``diff diff --git a/clang/test/CodeGen/ubsan-global-ignorelist.test b/clang/test/CodeGen/ubsan-global-ignorelist.test new file mode 100644 index 0..8d7812217f331 --- /dev/null +++ b/clang/test/CodeGen/ubsan-global-ignorelist.test @@ -0,0 +1,39 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t + +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds -fsanitize-ignorelist=%t/order-0.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,IGNORE +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds -fsanitize-ignorelist=%t/order-1.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds -fsanitize-ignorelist=%t/order-2.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,IGNORE +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds -fsanitize-ignorelist=%t/order-3.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE + +// The same type can appear multiple times within an ignorelist. Any ``=sanitize`` type +// entries enable sanitizer instrumentation, even if it was ignored by entries before. +// If multiple entries match the source, then the latest entry takes the +// precedence. + +//--- order-0.ignorelist +global:global_array + +//--- order-1.ignorelist +global:global_array +global:global_array=sanitize + +//--- order-2.ignorelist +global:* +global:global_array=sanitize +global:global_array + +//--- order-3.ignorelist +global:* +global:global_array=sanitize +global:global* +global:*array=sanitize + +//--- test.c +unsigned global_array[100] = {-1}; + +// CHECK-LABEL: define dso_local i32 @test +// IGNORE-NOT: call void @__ubsan_handle_out_of_bounds +// SANITIZE: call void @__ubsan_handle_out_of_bounds +int test(int i) { return global_array[i]; } `` https://github.com/llvm/llvm-project/pull/142456 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Sanitizer][Ignorelist] Expanding =sanitize to mainfile. (PR #142472)
https://github.com/vitalybuka approved this pull request. https://github.com/llvm/llvm-project/pull/142472 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Sanitizer][Ignorelist] Expanding =sanitize to global. (PR #142456)
https://github.com/vitalybuka approved this pull request. https://github.com/llvm/llvm-project/pull/142456 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][PromoteMem2Reg] Don't handle the first successor out of order (PR #142464)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: Vitaly Buka (vitalybuka) Changes Additionally handler successors in regular order, so that two successors cases are handled exactly as before, so we need to less tests. Additionally reverse order here results in more natural 'phi' arguments list order. --- Full diff: https://github.com/llvm/llvm-project/pull/142464.diff 2 Files Affected: - (modified) llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp (+12-14) - (modified) llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll (+2-2) ``diff diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index d84b07bd1457c..62995e57b917c 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -1215,24 +1215,22 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, } // 'Recurse' to our successors. - succ_iterator I = succ_begin(BB), E = succ_end(BB); - if (I == E) -return; // Keep track of the successors so we don't visit the same successor twice SmallPtrSet VisitedSuccs; - // Handle the first successor after the rest, to mimic legacy behaviour. - // FIXME: Handle them in regular order. - VisitedSuccs.insert(*I); - ++I; - - for (; I != E; ++I) -if (VisitedSuccs.insert(*I).second) - Worklist.emplace_back(*I, BB, IncomingVals, IncomingLocs); - - Worklist.emplace_back(*succ_begin(BB), BB, std::move(IncomingVals), -std::move(IncomingLocs)); + for (BasicBlock *S : reverse(successors(BB))) +if (VisitedSuccs.insert(S).second) { + if (VisitedSuccs.size() == 1) { +// Let the first successor to own allocated arrays. +Worklist.emplace_back(S, BB, std::move(IncomingVals), + std::move(IncomingLocs)); + } else { +// Other successors have to make a copy. +Worklist.emplace_back(S, BB, Worklist.back().Values, + Worklist.back().Locations); + } +} } void llvm::PromoteMemToReg(ArrayRef Allocas, DominatorTree &DT, diff --git a/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll index b532c81556738..ef414885bf809 100644 --- a/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll +++ b/llvm/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll @@ -215,7 +215,7 @@ define ptr @_Z3fooRSt6vectorIiSaIiEE(ptr %X) { ; IC_SROA-NEXT:[[TMP27:%.*]] = getelementptr i8, ptr [[__FIRST_ADDR_I_I_SROA_0_0]], i32 4 ; IC_SROA-NEXT:br label [[BB18_I_I]] ; IC_SROA: bb18.i.i: -; IC_SROA-NEXT:[[__FIRST_ADDR_I_I_SROA_0_1:%.*]] = phi ptr [ [[TMP27]], [[BB17_I_I]] ], [ [[__FIRST_ADDR_I_I_SROA_0_0]], [[BB13_I_I]] ] +; IC_SROA-NEXT:[[__FIRST_ADDR_I_I_SROA_0_1:%.*]] = phi ptr [ [[__FIRST_ADDR_I_I_SROA_0_0]], [[BB13_I_I]] ], [ [[TMP27]], [[BB17_I_I]] ] ; IC_SROA-NEXT:[[TMP28:%.*]] = load i32, ptr [[__FIRST_ADDR_I_I_SROA_0_1]], align 4 ; IC_SROA-NEXT:[[TMP29:%.*]] = icmp eq i32 [[TMP28]], 42 ; IC_SROA-NEXT:br i1 [[TMP29]], label [[BB20_I_I:%.*]], label [[BB21_I_I:%.*]] @@ -225,7 +225,7 @@ define ptr @_Z3fooRSt6vectorIiSaIiEE(ptr %X) { ; IC_SROA-NEXT:[[TMP30:%.*]] = getelementptr i8, ptr [[__FIRST_ADDR_I_I_SROA_0_1]], i32 4 ; IC_SROA-NEXT:br label [[BB22_I_I]] ; IC_SROA: bb22.i.i: -; IC_SROA-NEXT:[[__FIRST_ADDR_I_I_SROA_0_2:%.*]] = phi ptr [ [[TMP30]], [[BB21_I_I]] ], [ [[__FIRST_ADDR_I_I_SROA_0_0]], [[BB13_I_I]] ] +; IC_SROA-NEXT:[[__FIRST_ADDR_I_I_SROA_0_2:%.*]] = phi ptr [ [[__FIRST_ADDR_I_I_SROA_0_0]], [[BB13_I_I]] ], [ [[TMP30]], [[BB21_I_I]] ] ; IC_SROA-NEXT:[[TMP31:%.*]] = load i32, ptr [[__FIRST_ADDR_I_I_SROA_0_2]], align 4 ; IC_SROA-NEXT:[[TMP32:%.*]] = icmp eq i32 [[TMP31]], 42 ; IC_SROA-NEXT:br i1 [[TMP32]], label [[BB24_I_I:%.*]], label [[BB25_I_I:%.*]] `` https://github.com/llvm/llvm-project/pull/142464 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][PromoteMem2Reg] Don't handle the first successor out of order (PR #142464)
https://github.com/vitalybuka created https://github.com/llvm/llvm-project/pull/142464 Additionally handler successors in regular order, so that two successors cases are handled exactly as before, so we need to less tests. Additionally reverse order here results in more natural 'phi' arguments list order. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg (PR #142474)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/142474 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg (PR #142474)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/142474 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][PromoteMem2Reg] Move IncomingVals, IncomingLocs, Worklist into class (PR #142468)
vitalybuka wrote: > LGTM with context of the follow up. By itself, this patch is probably making > things worse Yes, Incoming* here are better as parameters. I will wait with landing for #142474 accepted https://github.com/llvm/llvm-project/pull/142468 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [HLSL] Add descriptor table metadata parsing (PR #142492)
llvmbot wrote: @llvm/pr-subscribers-backend-directx @llvm/pr-subscribers-llvm-binary-utilities Author: None (joaosaffran) Changes - adds parsing from metadata into dxcontainer binary - adds validations as described in the spec - adds testing scenarios Closes: #[126640](https://github.com/llvm/llvm-project/issues/126640) --- Patch is 20.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/142492.diff 8 Files Affected: - (modified) llvm/include/llvm/BinaryFormat/DXContainerConstants.def (+11) - (modified) llvm/lib/Target/DirectX/DXILRootSignature.cpp (+201-5) - (modified) llvm/lib/Target/DirectX/DXILRootSignature.h (+2-1) - (added) llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-Invalid-Flag.ll (+20) - (added) llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-Invalid-RangeType.ll (+20) - (added) llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable-Invalid-RegisterSpace.ll (+20) - (added) llvm/test/CodeGen/DirectX/ContainerData/RootSignature-DescriptorTable.ll (+48) - (modified) llvm/test/CodeGen/DirectX/ContainerData/RootSignature-Parameters.ll (+20-2) ``diff diff --git a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def index 501ef0c31cdd0..fa66450c563c4 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def +++ b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def @@ -98,6 +98,17 @@ DESCRIPTOR_RANGE_FLAG(16, DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS) #undef DESCRIPTOR_RANGE_FLAG #endif // DESCRIPTOR_RANGE_FLAG +// DESCRIPTOR_RANGE(value, name). +#ifdef DESCRIPTOR_RANGE + DESCRIPTOR_RANGE(4, ERROR) + DESCRIPTOR_RANGE(0, SRV) + DESCRIPTOR_RANGE(1, UAV) + DESCRIPTOR_RANGE(2, CBV) + DESCRIPTOR_RANGE(3, Sampler) +DESCRIPTOR_RANGE(0, NONE) +#undef DESCRIPTOR_RANGE +#endif // DESCRIPTOR_RANGE + #ifdef ROOT_PARAMETER ROOT_PARAMETER(0, DescriptorTable) diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp b/llvm/lib/Target/DirectX/DXILRootSignature.cpp index 878272537d366..33a74d71027aa 100644 --- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp +++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp @@ -12,6 +12,7 @@ //===--===// #include "DXILRootSignature.h" #include "DirectX.h" +#include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/DXILMetadataAnalysis.h" @@ -27,6 +28,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include @@ -166,6 +168,90 @@ static bool parseRootDescriptors(LLVMContext *Ctx, return false; } +static bool parseDescriptorRange(LLVMContext *Ctx, + mcdxbc::RootSignatureDesc &RSD, + mcdxbc::DescriptorTable &Table, + MDNode *RangeDescriptorNode) { + + if (RangeDescriptorNode->getNumOperands() != 6) +return reportError(Ctx, "Invalid format for Descriptor Range"); + + dxbc::RTS0::v2::DescriptorRange Range; + + std::optional ElementText = + extractMdStringValue(RangeDescriptorNode, 0); + + if (!ElementText.has_value()) +return reportError(Ctx, "Descriptor Range, first element is not a string."); + + Range.RangeType = + StringSwitch(*ElementText) + .Case("CBV", llvm::to_underlying(dxbc::DescriptorRangeType::CBV)) + .Case("SRV", llvm::to_underlying(dxbc::DescriptorRangeType::SRV)) + .Case("UAV", llvm::to_underlying(dxbc::DescriptorRangeType::UAV)) + .Case("Sampler", +llvm::to_underlying(dxbc::DescriptorRangeType::Sampler)) + .Default(llvm::to_underlying(dxbc::DescriptorRangeType::ERROR)); + + if (std::optional Val = extractMdIntValue(RangeDescriptorNode, 1)) +Range.NumDescriptors = *Val; + else +return reportError(Ctx, "Invalid value for Number of Descriptor in Range"); + + if (std::optional Val = extractMdIntValue(RangeDescriptorNode, 2)) +Range.BaseShaderRegister = *Val; + else +return reportError(Ctx, "Invalid value for BaseShaderRegister"); + + if (std::optional Val = extractMdIntValue(RangeDescriptorNode, 3)) +Range.RegisterSpace = *Val; + else +return reportError(Ctx, "Invalid value for RegisterSpace"); + + if (std::optional Val = extractMdIntValue(RangeDescriptorNode, 4)) +Range.OffsetInDescriptorsFromTableStart = *Val; + else +return reportError(Ctx, + "Invalid value for OffsetInDescriptorsFromTableStart"); + + if (std::optional Val = extractMdIntValue(RangeDescriptorNode, 5)) +Range.Flags = *Val; + else +return reportError(Ctx, "Invalid value for Descriptor Range Flags"); + + Table.Ranges.push_back(Range); + return false; +} + +static bool parseDescri
[llvm-branch-commits] [clang] release/20.x: [clang-format] Correctly annotate token-pasted function decl names (#142337) (PR #142482)
https://github.com/HazardyKnusperkeks approved this pull request. https://github.com/llvm/llvm-project/pull/142482 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [HLSL] Add descriptor table metadata parsing (PR #142492)
https://github.com/joaosaffran unassigned https://github.com/llvm/llvm-project/pull/142492 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [HLSL] Add descriptor table metadata parsing (PR #142492)
https://github.com/joaosaffran created https://github.com/llvm/llvm-project/pull/142492 - adds parsing from metadata into dxcontainer binary - adds validations as described in the spec - adds testing scenarios Closes: #[126640](https://github.com/llvm/llvm-project/issues/126640) >From 3e6b07e119988058defd305199ad3e08d424eebd Mon Sep 17 00:00:00 2001 From: joaosaffran Date: Mon, 2 Jun 2025 19:36:32 + Subject: [PATCH 1/4] add parsing --- .../BinaryFormat/DXContainerConstants.def | 11 ++ llvm/lib/Target/DirectX/DXILRootSignature.cpp | 176 ++ llvm/lib/Target/DirectX/DXILRootSignature.h | 3 +- 3 files changed, 189 insertions(+), 1 deletion(-) diff --git a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def index 501ef0c31cdd0..fa66450c563c4 100644 --- a/llvm/include/llvm/BinaryFormat/DXContainerConstants.def +++ b/llvm/include/llvm/BinaryFormat/DXContainerConstants.def @@ -98,6 +98,17 @@ DESCRIPTOR_RANGE_FLAG(16, DESCRIPTORS_STATIC_KEEPING_BUFFER_BOUNDS_CHECKS) #undef DESCRIPTOR_RANGE_FLAG #endif // DESCRIPTOR_RANGE_FLAG +// DESCRIPTOR_RANGE(value, name). +#ifdef DESCRIPTOR_RANGE + DESCRIPTOR_RANGE(4, ERROR) + DESCRIPTOR_RANGE(0, SRV) + DESCRIPTOR_RANGE(1, UAV) + DESCRIPTOR_RANGE(2, CBV) + DESCRIPTOR_RANGE(3, Sampler) +DESCRIPTOR_RANGE(0, NONE) +#undef DESCRIPTOR_RANGE +#endif // DESCRIPTOR_RANGE + #ifdef ROOT_PARAMETER ROOT_PARAMETER(0, DescriptorTable) diff --git a/llvm/lib/Target/DirectX/DXILRootSignature.cpp b/llvm/lib/Target/DirectX/DXILRootSignature.cpp index 878272537d366..a14a9fdce5cbd 100644 --- a/llvm/lib/Target/DirectX/DXILRootSignature.cpp +++ b/llvm/lib/Target/DirectX/DXILRootSignature.cpp @@ -12,6 +12,7 @@ //===--===// #include "DXILRootSignature.h" #include "DirectX.h" +#include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/DXILMetadataAnalysis.h" @@ -27,6 +28,7 @@ #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include #include #include #include @@ -166,6 +168,88 @@ static bool parseRootDescriptors(LLVMContext *Ctx, return false; } +static bool parseDescriptorRange(LLVMContext *Ctx, + mcdxbc::RootSignatureDesc &RSD, + mcdxbc::DescriptorTable &Table, + MDNode *RangeDescriptorNode) { + + if (RangeDescriptorNode->getNumOperands() != 6) +return reportError(Ctx, "Invalid format for Descriptor Range"); + + dxbc::RTS0::v2::DescriptorRange Range; + + std::optional ElementText = + extractMdStringValue(RangeDescriptorNode, 0); + + if (!ElementText.has_value()) +return reportError(Ctx, "Descriptor Range, first element is not a string."); + + Range.RangeType = + StringSwitch(*ElementText) + .Case("CBV", llvm::to_underlying(dxbc::DescriptorRangeType::CBV)) + .Case("SRV", llvm::to_underlying(dxbc::DescriptorRangeType::SRV)) + .Case("UAV", llvm::to_underlying(dxbc::DescriptorRangeType::UAV)) + .Case("Sampler", +llvm::to_underlying(dxbc::DescriptorRangeType::Sampler)) + .Default(llvm::to_underlying(dxbc::DescriptorRangeType::ERROR)); + + if (std::optional Val = extractMdIntValue(RangeDescriptorNode, 1)) +Range.NumDescriptors = *Val; + else +return reportError(Ctx, "Invalid value for Number of Descriptor in Range"); + + if (std::optional Val = extractMdIntValue(RangeDescriptorNode, 2)) +Range.BaseShaderRegister = *Val; + else +return reportError(Ctx, "Invalid value for BaseShaderRegister"); + + if (std::optional Val = extractMdIntValue(RangeDescriptorNode, 3)) +Range.RegisterSpace = *Val; + else +return reportError(Ctx, "Invalid value for RegisterSpace"); + + if (std::optional Val = extractMdIntValue(RangeDescriptorNode, 4)) +Range.OffsetInDescriptorsFromTableStart = *Val; + else +return reportError(Ctx, + "Invalid value for OffsetInDescriptorsFromTableStart"); + + if (std::optional Val = extractMdIntValue(RangeDescriptorNode, 5)) +Range.Flags = *Val; + else +return reportError(Ctx, "Invalid value for Descriptor Range Flags"); + + Table.Ranges.push_back(Range); + return false; +} + +static bool parseDescriptorTable(LLVMContext *Ctx, + mcdxbc::RootSignatureDesc &RSD, + MDNode *DescriptorTableNode) { + if (DescriptorTableNode->getNumOperands() < 2) +return reportError(Ctx, "Invalid format for Descriptor Table"); + + dxbc::RTS0::v1::RootParameterHeader Header; + if (std::optional Val = extractMdIntValue(DescriptorTableNode, 1)) +Header.ShaderVisibility = *Val; + else +return reportError(Ctx, "Invalid value for ShaderVisibility"); + + mcdxbc::Descri
[llvm-branch-commits] [clang] [UBSan][Ignorelist] Expanding =sanitize to global. (PR #142456)
https://github.com/qinkunbao updated https://github.com/llvm/llvm-project/pull/142456 >From c0d1cb94966a24e12525bb5c1d863e43cd13e3c2 Mon Sep 17 00:00:00 2001 From: Qinkun Bao Date: Mon, 2 Jun 2025 19:10:16 + Subject: [PATCH 1/2] Change the test. Created using spr 1.3.6 --- ...relist.test => asan-global-ignorelist.test} | 18 +++--- 1 file changed, 11 insertions(+), 7 deletions(-) rename clang/test/CodeGen/{ubsan-global-ignorelist.test => asan-global-ignorelist.test} (60%) diff --git a/clang/test/CodeGen/ubsan-global-ignorelist.test b/clang/test/CodeGen/asan-global-ignorelist.test similarity index 60% rename from clang/test/CodeGen/ubsan-global-ignorelist.test rename to clang/test/CodeGen/asan-global-ignorelist.test index 8d7812217f331..3ec2847a8a4e9 100644 --- a/clang/test/CodeGen/ubsan-global-ignorelist.test +++ b/clang/test/CodeGen/asan-global-ignorelist.test @@ -1,11 +1,11 @@ // RUN: rm -rf %t // RUN: split-file %s %t -// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE -// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds -fsanitize-ignorelist=%t/order-0.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,IGNORE -// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds -fsanitize-ignorelist=%t/order-1.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE -// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds -fsanitize-ignorelist=%t/order-2.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,IGNORE -// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=bounds -fsanitize-ignorelist=%t/order-3.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=address %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=address -fsanitize-ignorelist=%t/order-0.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,IGNORE +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=address -fsanitize-ignorelist=%t/order-1.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=address -fsanitize-ignorelist=%t/order-2.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,IGNORE +// RUN: %clang -target x86_64-unknown-linux-gnu -S -emit-llvm -fsanitize=address -fsanitize-ignorelist=%t/order-3.ignorelist %t/test.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE // The same type can appear multiple times within an ignorelist. Any ``=sanitize`` type // entries enable sanitizer instrumentation, even if it was ignored by entries before. @@ -13,18 +13,22 @@ // precedence. //--- order-0.ignorelist +[address] global:global_array //--- order-1.ignorelist +[address] global:global_array global:global_array=sanitize //--- order-2.ignorelist +[address] global:* global:global_array=sanitize global:global_array //--- order-3.ignorelist +[address] global:* global:global_array=sanitize global:global* @@ -34,6 +38,6 @@ global:*array=sanitize unsigned global_array[100] = {-1}; // CHECK-LABEL: define dso_local i32 @test -// IGNORE-NOT: call void @__ubsan_handle_out_of_bounds -// SANITIZE: call void @__ubsan_handle_out_of_bounds +// IGNORE-NOT: call void @__asan_report_load4 +// SANITIZE: call void @__asan_report_load4 int test(int i) { return global_array[i]; } >From 0ae18c201b4a78aef9ed3df0528026bf7e6f2a67 Mon Sep 17 00:00:00 2001 From: Qinkun Bao Date: Mon, 2 Jun 2025 19:45:30 + Subject: [PATCH 2/2] Add implementation Created using spr 1.3.6 --- clang/lib/Basic/NoSanitizeList.cpp| 2 +- .../test/CodeGen/asan-global-ignorelist.test | 22 +++ 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/clang/lib/Basic/NoSanitizeList.cpp b/clang/lib/Basic/NoSanitizeList.cpp index ba36f78175422..24d2276f50ddf 100644 --- a/clang/lib/Basic/NoSanitizeList.cpp +++ b/clang/lib/Basic/NoSanitizeList.cpp @@ -44,7 +44,7 @@ bool NoSanitizeList::containsPrefix(SanitizerMask Mask, StringRef Prefix, bool NoSanitizeList::containsGlobal(SanitizerMask Mask, StringRef GlobalName, StringRef Category) const { - return SSCL->inSection(Mask, "global", GlobalName, Category); + return containsPrefix(Mask, "global", GlobalName, Category); } bool NoSanitizeList::containsType(SanitizerMask Mask, StringRef MangledTypeName, diff --git a/clang/test/CodeGen/asan-global-ignorelist.test b/clang/test/CodeGen/asan-global-ignorelist.test index 3ec2847a8a4e9..2bb37c100c067 100644 --- a/clang/test/CodeGen/asan-global-ignorelist.test +++ b/clang/test/CodeGen/asan-global-ignorelist.test @@ -1
[llvm-branch-commits] [NFC][PromoteMem2Reg] Move IncomingVals, IncomingLocs, Worklist into class (PR #142468)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/142468 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [Sanitizer][Ignorelist] Expanding =sanitize to global. (PR #142456)
https://github.com/qinkunbao edited https://github.com/llvm/llvm-project/pull/142456 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][PromoteMem2Reg] move (PR #142468)
https://github.com/vitalybuka created https://github.com/llvm/llvm-project/pull/142468 `IncomingVals`, `IncomingLocs`, `Worklist` into class members. They are all DFS state related, as `Visited`. But visited is already a class member. On it's own the patch has no value, but it simplify stuff in the next patch. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][PromoteMem2Reg] Move IncomingVals, IncomingLocs, Worklist into class (PR #142468)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/142468 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFCI][PromoteMem2Reg] Don't handle the first successor out of order (PR #142464)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/142464 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AArch64][SME] Support split ZPR and PPR area allocation (PR #142392)
efriedma-quic wrote: In the implementation you're interested in, is there not a hazard between PPRs and GPRs? What's the interaction between this and aarch64-enable-zpr-predicate-spills? https://github.com/llvm/llvm-project/pull/142392 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [UBSan][Ignorelist] Expanding =sanitize to global. (PR #142456)
https://github.com/qinkunbao created https://github.com/llvm/llvm-project/pull/142456 See https://github.com/llvm/llvm-project/issues/139128 If multiple entries match the source, than the latest entry takes the precedence.` ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] 0c04af9 - Revert "[MLIR][XeGPU] Add unroll patterns and blocking pass for XeGPU [2/N] (…"
Author: Chao Chen Date: 2025-06-02T14:14:03-05:00 New Revision: 0c04af9c9748947bf65b411b8c41c90cf6ba16ed URL: https://github.com/llvm/llvm-project/commit/0c04af9c9748947bf65b411b8c41c90cf6ba16ed DIFF: https://github.com/llvm/llvm-project/commit/0c04af9c9748947bf65b411b8c41c90cf6ba16ed.diff LOG: Revert "[MLIR][XeGPU] Add unroll patterns and blocking pass for XeGPU [2/N] (…" This reverts commit 0210750d5a5b4cfc8d2b6a9e94ace24d31d65ddc. Added: Modified: mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h mlir/lib/Dialect/XeGPU/Transforms/CMakeLists.txt mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp mlir/lib/Dialect/XeGPU/Utils/CMakeLists.txt mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp Removed: mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp mlir/test/Dialect/XeGPU/xegpu-blocking.mlir diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td index 84c1dc1373ee5..032ce5bc18334 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -295,17 +295,11 @@ def XeGPU_LayoutAttr : XeGPUAttr<"Layout", "layout"> { } LayoutAttr dropSgLayoutAndData() { - // avoid every field of the attribute is nullptr, which may lead to segment fault - if (!getInstData() && !getLaneLayout()) -return nullptr; return LayoutAttr::get(getContext(), nullptr, nullptr, getInstData(), getLaneLayout(), getLaneData(), getOrder()); } LayoutAttr dropInstData() { - // avoid every field of the attribute is nullptr, which may lead to segment fault - if (!getSgLayout() && !getLaneLayout()) -return nullptr; return LayoutAttr::get(getContext(), getSgLayout(), getSgData(), nullptr, getLaneLayout(), getLaneData(), getOrder()); } diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td index 8bdf19ac0e47d..6f585f9ceb29b 100644 --- a/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/Passes.td @@ -45,17 +45,4 @@ def XeGPUWgToSgDistribute : Pass<"xegpu-wg-to-sg-distribute"> { "gpu::GPUDialect", "index::IndexDialect"]; } -def XeGPUBlocking: Pass<"xegpu-blocking"> { - let summary = "Block XeGPU ops into smaller size."; - let description = [{ -This pass partitions operations that process large shapes into multiple -operations on smaller shapes, as specified by the inst_data in the layout -attribute. This enables each resulting operation to be efficiently mapped -to a hardware instruction. - }]; - let dependentDialects = [ - "memref::MemRefDialect", "xegpu::XeGPUDialect", "vector::VectorDialect" - ]; -} - #endif // MLIR_DIALECT_XEGPU_TRANSFORMS_PASSES_TD diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h index f9327d63869c0..3616fa614e7f9 100644 --- a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h +++ b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h @@ -13,12 +13,6 @@ namespace mlir { class VectorType; -class OpOperand; -class OpResult; -class OpBuilder; -class ValueRange; -class TypeConverter; - namespace xegpu { class LayoutAttr; class TensorDescType; @@ -56,59 +50,6 @@ FailureOr getDistributedVectorType(xegpu::TensorDescType tdescTy); FailureOr getDistributedVectorType(VectorType originalType, LayoutAttr layout); -/// Return the attribute name for the OpOperand to attach LayoutAttr -std::string getLayoutName(const OpOperand &operand); - -/// Return the attribute name for the OpResult to attach LayoutAttr -std::string getLayoutName(const OpResult result); - -/// Retrieves the LayoutAttr associated with a given Value. For TensorDescType -/// values, the LayoutAttr is extracted from the TensorDescType itself. For -/// other values, it is obtained from the attributes of the defining operation. -/// Returns nullptr if no LayoutAttr is found. -LayoutAttr getLayoutAttr(const Value value); - -/// Retrieves the LayoutAttr associated with a given OpOperand. It will -/// first check the operand_layout_{id} of the owner operation. If not found, -/// it will check the operand itself and its defining op. -LayoutAttr getLayoutAttr(const OpOperand &opr); - -/// Sets the LayoutAttr for a given OpOperand or OpResult by attaching -/// it to the owner's dictionary attributes -template || - std::is_same_v>> -void setLayoutAttr(const T &operandOrResult, const LayoutAttr layout)
[llvm-branch-commits] [Sanitizer][Ignorelist] Expanding =sanitize to mainfile. (PR #142472)
https://github.com/qinkunbao edited https://github.com/llvm/llvm-project/pull/142472 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Sanitizer][Ignorelist] Expanding =sanitize to mainfile. (PR #142472)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Qinkun Bao (qinkunbao) Changes See https://github.com/llvm/llvm-project/issues/139128 If multiple entries match the source, than the latest entry takes the precedence. --- Full diff: https://github.com/llvm/llvm-project/pull/142472.diff 2 Files Affected: - (modified) clang/lib/Basic/NoSanitizeList.cpp (+1-1) - (modified) clang/test/CodeGen/sanitize-ignorelist-mainfile.c (+16-5) ``diff diff --git a/clang/lib/Basic/NoSanitizeList.cpp b/clang/lib/Basic/NoSanitizeList.cpp index 24d2276f50ddf..96f79fb2a2a29 100644 --- a/clang/lib/Basic/NoSanitizeList.cpp +++ b/clang/lib/Basic/NoSanitizeList.cpp @@ -64,7 +64,7 @@ bool NoSanitizeList::containsFile(SanitizerMask Mask, StringRef FileName, bool NoSanitizeList::containsMainFile(SanitizerMask Mask, StringRef FileName, StringRef Category) const { - return SSCL->inSection(Mask, "mainfile", FileName, Category); + return containsPrefix(Mask, "mainfile", FileName, Category); } bool NoSanitizeList::containsLocation(SanitizerMask Mask, SourceLocation Loc, diff --git a/clang/test/CodeGen/sanitize-ignorelist-mainfile.c b/clang/test/CodeGen/sanitize-ignorelist-mainfile.c index 419e0417c1d82..599ff8e597c98 100644 --- a/clang/test/CodeGen/sanitize-ignorelist-mainfile.c +++ b/clang/test/CodeGen/sanitize-ignorelist-mainfile.c @@ -1,8 +1,10 @@ /// Test mainfile in a sanitizer special case list. // RUN: rm -rf %t && split-file %s %t -// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fsanitize=address,alignment %t/a.c -o - | FileCheck %s --check-prefixes=CHECK,DEFAULT +// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fsanitize=address,alignment %t/a.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE // RUN: %clang_cc1 -emit-llvm -triple x86_64 -fsanitize=address,alignment -fsanitize-ignorelist=%t/a.list %t/a.c -o - | FileCheck %s --check-prefixes=CHECK,IGNORE // RUN: %clang_cc1 -emit-llvm -triple x86_64 -fsanitize=address,alignment -fsanitize-ignorelist=%t/b.list %t/a.c -o - | FileCheck %s --check-prefixes=CHECK,IGNORE +// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fsanitize=address,alignment -fsanitize-ignorelist=%t/c.list %t/a.c -o - | FileCheck %s --check-prefixes=CHECK,SANITIZE +// RUN: %clang_cc1 -emit-llvm -triple x86_64 -fsanitize=address,alignment -fsanitize-ignorelist=%t/d.list %t/a.c -o - | FileCheck %s --check-prefixes=CHECK,IGNORE //--- a.list mainfile:*a.c @@ -14,6 +16,15 @@ mainfile:*a.c [alignment] mainfile:*.c +//--- c.list +mainfile:*a.c +mainfile:*a.c=sanitize + +//--- d.list +mainfile:*a.c +mainfile:*a.c=sanitize +mainfile:*a.c + //--- a.h int global_h; @@ -30,12 +41,12 @@ int foo(void *x) { return load(x); } -// DEFAULT: @___asan_gen_{{.*}} = {{.*}} c"global_h\00" -// DEFAULT: @___asan_gen_{{.*}} = {{.*}} c"global_c\00" +// SANITIZE: @___asan_gen_{{.*}} = {{.*}} c"global_h\00" +// SANITIZE: @___asan_gen_{{.*}} = {{.*}} c"global_c\00" // IGNORE-NOT: @___asan_gen_ // CHECK-LABEL: define {{.*}}@load( -// DEFAULT: call void @__ubsan_handle_type_mismatch_v1_abort( -// DEFAULT: call void @__asan_report_load4( +// SANITIZE: call void @__ubsan_handle_type_mismatch_v1_abort( +// SANITIZE: call void @__asan_report_load4( // IGNORE-NOT:call void @__ubsan_handle_type_mismatch_v1_abort( // IGNORE-NOT:call void @__asan_report_load4( `` https://github.com/llvm/llvm-project/pull/142472 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Sanitizer][Ignorelist] Expanding =sanitize to global. (PR #142472)
https://github.com/qinkunbao created https://github.com/llvm/llvm-project/pull/142472 See https://github.com/llvm/llvm-project/issues/139128 If multiple entries match the source, than the latest entry takes the precedence. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg (PR #142474)
@@ -281,18 +281,44 @@ struct AllocaInfo { } }; +template class VectorWithUndo { + SmallVector Vals; + SmallVector, 8> Undo; + +public: + void undo(size_t S) { +while (S < Undo.size()) { + Vals[Undo.back().first] = Undo.back().second; + Undo.pop_back(); +} + } + + void assign(size_t Sz, const T &Val) { Vals.assign(Sz, Val); } + + size_t size() const { return Undo.size(); } + + const T &operator[](size_t Idx) const { return Vals[Idx]; } + + void set(size_t Idx, const T &Val) { +if (Vals[Idx] == Val) + return; +Undo.emplace_back(Idx, Vals[Idx]); +Vals[Idx] = Val; + } + + void init(size_t Idx, const T &Val) { Vals[Idx] = Val; } fmayer wrote: would it make sense to add asserts that this is not called after `set` or `undo`? https://github.com/llvm/llvm-project/pull/142474 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][PromoteMem2Reg] Move IncomingVals, IncomingLocs, Worklist into class (PR #142468)
https://github.com/fmayer approved this pull request. LGTM with context of the follow up. By itself, this patch is probably making things worse https://github.com/llvm/llvm-project/pull/142468 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [llvm] [HLSL][RootSignature] Implement validation of resource ranges for `RootDescriptors` (PR #140962)
https://github.com/inbelic updated https://github.com/llvm/llvm-project/pull/140962 >From bcc056ea5c753c3b1fa83d214c6bd14e90d9ee25 Mon Sep 17 00:00:00 2001 From: Finn Plummer Date: Wed, 21 May 2025 00:12:04 + Subject: [PATCH 1/4] [HLSL][RootSignature] Plug into the thing --- .../clang/Basic/DiagnosticSemaKinds.td| 5 + clang/include/clang/Sema/SemaHLSL.h | 2 + clang/lib/Sema/SemaHLSL.cpp | 105 ++ .../RootSignature-resource-ranges-err.hlsl| 26 + .../RootSignature-resource-ranges.hlsl| 22 .../llvm/Frontend/HLSL/HLSLRootSignature.h| 9 +- llvm/lib/Frontend/HLSL/HLSLRootSignature.cpp | 2 + 7 files changed, 169 insertions(+), 2 deletions(-) create mode 100644 clang/test/SemaHLSL/RootSignature-resource-ranges-err.hlsl create mode 100644 clang/test/SemaHLSL/RootSignature-resource-ranges.hlsl diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f0bd5a1174020..b1026e733ec37 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -12967,6 +12967,11 @@ def err_hlsl_expect_arg_const_int_one_or_neg_one: Error< def err_invalid_hlsl_resource_type: Error< "invalid __hlsl_resource_t type attributes">; +def err_hlsl_resource_range_overlap: Error< + "resource ranges %select{t|u|b|s}0[%1;%2] and %select{t|u|b|s}3[%4;%5] " + "overlap within space = %6 and visibility = " + "%select{All|Vertex|Hull|Domain|Geometry|Pixel|Amplification|Mesh}7">; + // Layout randomization diagnostics. def err_non_designated_init_used : Error< "a randomized struct can only be initialized with a designated initializer">; diff --git a/clang/include/clang/Sema/SemaHLSL.h b/clang/include/clang/Sema/SemaHLSL.h index 15182bb27bbdf..a161236d8baa0 100644 --- a/clang/include/clang/Sema/SemaHLSL.h +++ b/clang/include/clang/Sema/SemaHLSL.h @@ -119,6 +119,8 @@ class SemaHLSL : public SemaBase { bool IsCompAssign); void emitLogicalOperatorFixIt(Expr *LHS, Expr *RHS, BinaryOperatorKind Opc); + // Returns true when D is invalid and a diagnostic was produced + bool handleRootSignatureDecl(HLSLRootSignatureDecl *D, SourceLocation Loc); void handleRootSignatureAttr(Decl *D, const ParsedAttr &AL); void handleNumThreadsAttr(Decl *D, const ParsedAttr &AL); void handleWaveSizeAttr(Decl *D, const ParsedAttr &AL); diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index e6daa67fcee95..e46cca89db5a4 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -39,6 +39,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" +#include "llvm/Frontend/HLSL/HLSLRootSignature.h" #include "llvm/Support/Casting.h" #include "llvm/Support/DXILABI.h" #include "llvm/Support/ErrorHandling.h" @@ -951,6 +952,108 @@ void SemaHLSL::emitLogicalOperatorFixIt(Expr *LHS, Expr *RHS, << NewFnName << FixItHint::CreateReplacement(FullRange, OS.str()); } +namespace { + +// A resource range overlaps with another resource range if they have: +// - equivalent ResourceClass (SRV, UAV, CBuffer, Sampler) +// - equivalent resource space +// - overlapping visbility +class ResourceRanges { +public: + // KeyT: 32-lsb denotes resource space, and 32-msb denotes resource type enum + using KeyT = uint64_t; + + static const unsigned NumVisEnums = + (unsigned)llvm::hlsl::rootsig::ShaderVisibility::NumEnums; + +private: + llvm::hlsl::rootsig::ResourceRange::IMap::Allocator Allocator; + + // Denotes a mapping of a unique combination of ResourceClass and register + // space to a ResourceRange + using MapT = llvm::SmallDenseMap; + + // Denotes a mapping for each unique visibility + MapT RangeMaps[NumVisEnums]; + + constexpr static KeyT getKey(const llvm::hlsl::rootsig::RangeInfo &Info) { +uint64_t SpacePacked = (uint64_t)Info.Space; +uint64_t ClassPacked = (uint64_t)llvm::to_underlying(Info.Class); +return (ClassPacked << 32) | SpacePacked; + } + +public: + // Returns std::nullopt if there was no collision. Otherwise, it will + // return the RangeInfo of the collision + std::optional + addRange(const llvm::hlsl::rootsig::RangeInfo &Info) { +MapT &VisRangeMap = RangeMaps[llvm::to_underlying(Info.Vis)]; +auto [It, _] = VisRangeMap.insert( +{getKey(Info), llvm::hlsl::rootsig::ResourceRange(Allocator)}); +auto Res = It->second.insert(Info); +if (Res.has_value()) + return Res; + +MutableArrayRef Maps = +Info.Vis == llvm::hlsl::rootsig::ShaderVisibility::All +? MutableArrayRef{RangeMaps}.drop_front() +: MutableArrayRef{RangeMaps}.take_front(); + +for (MapT &CurMap : Maps) { + auto CurIt = CurMap.find(getKey(Info)); + if (CurIt != CurMap.end()) +if (auto Overlapping = CurIt->second.getOverlapping(Info)) +
[llvm-branch-commits] [PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg (PR #142474)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: Vitaly Buka (vitalybuka) Changes When BasicBlock has a large number of allocas, and successors, we had to copy entire IncomingVals and IncomingLocs vectors for successors. Additional changes in IncomingVals and IncomingLocs are infrequent (only Load/Store into alloc affect arrays). Given the nature of DFS traversal, instead of copying the entire vector, we can keep track of the changes and undo all changes done by successors. --- Full diff: https://github.com/llvm/llvm-project/pull/142474.diff 1 Files Affected: - (modified) llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp (+50-36) ``diff diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index 9ddcbd516e00a..3220f57aeeade 100644 --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -281,18 +281,44 @@ struct AllocaInfo { } }; +template class VectorWithUndo { + SmallVector Vals; + SmallVector, 8> Undo; + +public: + void undo(size_t S) { +while (S < Undo.size()) { + Vals[Undo.back().first] = Undo.back().second; + Undo.pop_back(); +} + } + + void assign(size_t Sz, const T &Val) { Vals.assign(Sz, Val); } + + size_t size() const { return Undo.size(); } + + const T &operator[](size_t Idx) const { return Vals[Idx]; } + + void set(size_t Idx, const T &Val) { +if (Vals[Idx] == Val) + return; +Undo.emplace_back(Idx, Vals[Idx]); +Vals[Idx] = Val; + } + + void init(size_t Idx, const T &Val) { Vals[Idx] = Val; } +}; + /// Data package used by RenamePass(). struct RenamePassData { - using ValVector = std::vector; - using LocationVector = std::vector; - - RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L) - : BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {} + RenamePassData(BasicBlock *B, BasicBlock *P, size_t V, size_t L) + : BB(B), Pred(P), UndoVals(V), UndoLocs(L) {} BasicBlock *BB; BasicBlock *Pred; - ValVector Values; - LocationVector Locations; + + size_t UndoVals; + size_t UndoLocs; }; /// This assigns and keeps a per-bb relative ordering of load/store @@ -393,10 +419,10 @@ struct PromoteMem2Reg { SmallVector BBNumPreds; /// The state of incoming values for the current DFS step. - RenamePassData::ValVector IncomingVals; + VectorWithUndo IncomingVals; /// The state of incoming locations for the current DFS step. - RenamePassData::LocationVector IncomingLocs; + VectorWithUndo IncomingLocs; // DFS work stack. SmallVector WorkList; @@ -445,17 +471,15 @@ struct PromoteMem2Reg { DVRAssignsToDelete.clear(); } - void pushToWorklist(BasicBlock *BB, BasicBlock *Pred, - RenamePassData::ValVector IncVals, - RenamePassData::LocationVector IncLocs) { -WorkList.emplace_back(BB, Pred, std::move(IncVals), std::move(IncLocs)); + void pushToWorklist(BasicBlock *BB, BasicBlock *Pred) { +WorkList.emplace_back(BB, Pred, IncomingVals.size(), IncomingVals.size()); } RenamePassData popFromWorklist() { -RenamePassData R = std::move(WorkList.back()); +RenamePassData R = WorkList.back(); WorkList.pop_back(); -IncomingVals = std::move(R.Values); -IncomingLocs = std::move(R.Locations); +IncomingVals.undo(R.UndoVals); +IncomingLocs.undo(R.UndoLocs); return R; } }; @@ -871,22 +895,20 @@ void PromoteMem2Reg::run() { // been stored yet. In this case, it will get this null value. IncomingVals.assign(Allocas.size(), nullptr); for (unsigned i = 0, e = Allocas.size(); i != e; ++i) -IncomingVals[i] = UndefValue::get(Allocas[i]->getAllocatedType()); +IncomingVals.init(i, UndefValue::get(Allocas[i]->getAllocatedType())); // When handling debug info, treat all incoming values as if they have unknown // locations until proven otherwise. IncomingLocs.assign(Allocas.size(), {}); // The renamer uses the Visited set to avoid infinite loops. - Visited.resize(F.getMaxBlockNumber()); + Visited.resize(F.getMaxBlockNumber(), false); + + // Add the entry block to the worklist, with a null predecessor. + pushToWorklist(&F.front(), nullptr); - // Walks all basic blocks in the function performing the SSA rename algorithm - // and inserting the phi nodes we marked as necessary - pushToWorklist(&F.front(), nullptr, std::move(IncomingVals), - std::move(IncomingLocs)); do { RenamePassData RPD = popFromWorklist(); -// RenamePass may add new worklist entries. RenamePass(RPD.BB, RPD.Pred); } while (!WorkList.empty()); @@ -1153,7 +1175,7 @@ void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred) { APN->setHasNoSignedZeros(true); // The currently active variable for this block is now the PHI. -IncomingVals[AllocaNo] = APN; +
[llvm-branch-commits] [PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg (PR #142474)
https://github.com/vitalybuka created https://github.com/llvm/llvm-project/pull/142474 When BasicBlock has a large number of allocas, and successors, we had to copy entire IncomingVals and IncomingLocs vectors for successors. Additional changes in IncomingVals and IncomingLocs are infrequent (only Load/Store into alloc affect arrays). Given the nature of DFS traversal, instead of copying the entire vector, we can keep track of the changes and undo all changes done by successors. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg (PR #142474)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/142474 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg (PR #142474)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/142474 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg (PR #142474)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/142474 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg (PR #142474)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/142474 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg (PR #142474)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/142474 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-format] Correctly annotate token-pasted function decl names (#142337) (PR #142482)
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/142482 Backport 7bf5862dbfda590282f50b14e6d7d5f990bf1900 Requested by: @owenca >From 6481daabf7354eb69807f3d7ba1bdd1e26406c32 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Mon, 2 Jun 2025 13:35:27 -0700 Subject: [PATCH] [clang-format] Correctly annotate token-pasted function decl names (#142337) Fix #142178 (cherry picked from commit 7bf5862dbfda590282f50b14e6d7d5f990bf1900) --- clang/lib/Format/TokenAnnotator.cpp | 2 ++ clang/unittests/Format/TokenAnnotatorTest.cpp | 7 +++ 2 files changed, 9 insertions(+) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 11b941c5a0411..0c13356ca96de 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3839,6 +3839,8 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts, } else { if (Current.isNot(TT_StartOfName) || Current.NestingLevel != 0) return false; +while (Next && Next->startsSequence(tok::hashhash, tok::identifier)) + Next = Next->Next->Next; for (; Next; Next = Next->Next) { if (Next->is(TT_TemplateOpener) && Next->MatchingParen) { Next = Next->MatchingParen; diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 757db66c3e298..602c2d5eba29a 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2187,6 +2187,13 @@ TEST_F(TokenAnnotatorTest, UnderstandsFunctionDeclarationNames) { EXPECT_TOKEN(Tokens[1], tok::identifier, TT_FunctionDeclarationName); EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_FunctionDeclarationLParen); + Tokens = annotate("#define FUNC(foo, bar, baz) \\\n" +" auto foo##bar##baz() -> Type {}"); + ASSERT_EQ(Tokens.size(), 23u) << Tokens; + EXPECT_TOKEN(Tokens[11], tok::identifier, TT_FunctionDeclarationName); + EXPECT_TOKEN(Tokens[16], tok::l_paren, TT_FunctionDeclarationLParen); + EXPECT_TOKEN(Tokens[18], tok::arrow, TT_TrailingReturnArrow); + Tokens = annotate("int iso_time(time_t);"); ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[1], tok::identifier, TT_FunctionDeclarationName); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-format] Correctly annotate token-pasted function decl names (#142337) (PR #142482)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/142482 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-format] Correctly annotate token-pasted function decl names (#142337) (PR #142482)
llvmbot wrote: @HazardyKnusperkeks What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/142482 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [PromoteMem2Reg] Optimize memory usage in PromoteMem2Reg (PR #142474)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/142474 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/20.x: [clang-format] Correctly annotate token-pasted function decl names (#142337) (PR #142482)
llvmbot wrote: @llvm/pr-subscribers-clang-format Author: None (llvmbot) Changes Backport 7bf5862dbfda590282f50b14e6d7d5f990bf1900 Requested by: @owenca --- Full diff: https://github.com/llvm/llvm-project/pull/142482.diff 2 Files Affected: - (modified) clang/lib/Format/TokenAnnotator.cpp (+2) - (modified) clang/unittests/Format/TokenAnnotatorTest.cpp (+7) ``diff diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 11b941c5a0411..0c13356ca96de 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3839,6 +3839,8 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts, } else { if (Current.isNot(TT_StartOfName) || Current.NestingLevel != 0) return false; +while (Next && Next->startsSequence(tok::hashhash, tok::identifier)) + Next = Next->Next->Next; for (; Next; Next = Next->Next) { if (Next->is(TT_TemplateOpener) && Next->MatchingParen) { Next = Next->MatchingParen; diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 757db66c3e298..602c2d5eba29a 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2187,6 +2187,13 @@ TEST_F(TokenAnnotatorTest, UnderstandsFunctionDeclarationNames) { EXPECT_TOKEN(Tokens[1], tok::identifier, TT_FunctionDeclarationName); EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_FunctionDeclarationLParen); + Tokens = annotate("#define FUNC(foo, bar, baz) \\\n" +" auto foo##bar##baz() -> Type {}"); + ASSERT_EQ(Tokens.size(), 23u) << Tokens; + EXPECT_TOKEN(Tokens[11], tok::identifier, TT_FunctionDeclarationName); + EXPECT_TOKEN(Tokens[16], tok::l_paren, TT_FunctionDeclarationLParen); + EXPECT_TOKEN(Tokens[18], tok::arrow, TT_TrailingReturnArrow); + Tokens = annotate("int iso_time(time_t);"); ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[1], tok::identifier, TT_FunctionDeclarationName); `` https://github.com/llvm/llvm-project/pull/142482 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][PromoteMem2Reg] Move IncomingVals, IncomingLocs, Worklist into class (PR #142468)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/142468 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][PromoteMem2Reg] Move IncomingVals, IncomingLocs, Worklist into class (PR #142468)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/142468 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][PromoteMem2Reg] Move IncomingVals, IncomingLocs, Worklist into class (PR #142468)
https://github.com/vitalybuka edited https://github.com/llvm/llvm-project/pull/142468 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][PromoteMem2Reg] Move IncomingVals, IncomingLocs, Worklist into class (PR #142468)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/142468 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [NFC][PromoteMem2Reg] Move IncomingVals, IncomingLocs, Worklist into class (PR #142468)
https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/142468 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] TargetLibraryInfo: Add tests for function availability (PR #142536)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/142536?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#142538** https://app.graphite.dev/github/pr/llvm/llvm-project/142538?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#142537** https://app.graphite.dev/github/pr/llvm/llvm-project/142537?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * **#142536** https://app.graphite.dev/github/pr/llvm/llvm-project/142536?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/142536?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#142535** https://app.graphite.dev/github/pr/llvm/llvm-project/142535?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/142536 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] llvm-tli-checker: Print custom name instead of standard name (PR #142537)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/142537 Previously this always printed the standard name if the function was available, leaving any custom name override untested. Print the target's name instead. The message should possibly include the standard name for reference. >From 52efdd8cb4cda57363a5bf868cae7ca82bd04513 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 2 Jun 2025 14:40:54 +0200 Subject: [PATCH] llvm-tli-checker: Print custom name instead of standard name Previously this always printed the standard name if the function was available, leaving any custom name override untested. Print the target's name instead. The message should possibly include the standard name for reference. --- .../arm64_32-apple-watchos.test | 4 ++-- .../TargetLibraryInfo/armv7s-apple-ios7.0.test | 4 ++-- .../i386-apple-macosx10.7.0.test | 4 ++-- .../x86_64-pc-windows-msvc17.test| 10 +- llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp | 16 +--- 5 files changed, 24 insertions(+), 14 deletions(-) diff --git a/llvm/test/Analysis/TargetLibraryInfo/arm64_32-apple-watchos.test b/llvm/test/Analysis/TargetLibraryInfo/arm64_32-apple-watchos.test index 1878911ac85be..018ec131eebc9 100644 --- a/llvm/test/Analysis/TargetLibraryInfo/arm64_32-apple-watchos.test +++ b/llvm/test/Analysis/TargetLibraryInfo/arm64_32-apple-watchos.test @@ -229,8 +229,8 @@ CHECK-NEXT: available: 'execve' CHECK-NEXT: available: 'execvp' CHECK-NEXT: available: 'execvpe' CHECK-NEXT: available: 'exp' -CHECK-NEXT: available: 'exp10' -CHECK-NEXT: available: 'exp10f' +CHECK-NEXT: available: '__exp10' +CHECK-NEXT: available: '__exp10f' CHECK-NEXT: not available: 'exp10l' CHECK-NEXT: available: 'exp2' CHECK-NEXT: available: 'exp2f' diff --git a/llvm/test/Analysis/TargetLibraryInfo/armv7s-apple-ios7.0.test b/llvm/test/Analysis/TargetLibraryInfo/armv7s-apple-ios7.0.test index 03cb8c6915331..71b731f7927cb 100644 --- a/llvm/test/Analysis/TargetLibraryInfo/armv7s-apple-ios7.0.test +++ b/llvm/test/Analysis/TargetLibraryInfo/armv7s-apple-ios7.0.test @@ -229,8 +229,8 @@ CHECK-NEXT: available: 'execve' CHECK-NEXT: available: 'execvp' CHECK-NEXT: available: 'execvpe' CHECK-NEXT: available: 'exp' -CHECK-NEXT: available: 'exp10' -CHECK-NEXT: available: 'exp10f' +CHECK-NEXT: available: '__exp10' +CHECK-NEXT: available: '__exp10f' CHECK-NEXT: not available: 'exp10l' CHECK-NEXT: available: 'exp2' CHECK-NEXT: available: 'exp2f' diff --git a/llvm/test/Analysis/TargetLibraryInfo/i386-apple-macosx10.7.0.test b/llvm/test/Analysis/TargetLibraryInfo/i386-apple-macosx10.7.0.test index cb7b0b176905e..cfeba5fe2bbd8 100644 --- a/llvm/test/Analysis/TargetLibraryInfo/i386-apple-macosx10.7.0.test +++ b/llvm/test/Analysis/TargetLibraryInfo/i386-apple-macosx10.7.0.test @@ -286,7 +286,7 @@ CHECK-NEXT: available: 'fork' CHECK-NEXT: available: 'fprintf' CHECK-NEXT: available: 'fputc' CHECK-NEXT: not available: 'fputc_unlocked' -CHECK-NEXT: available: 'fputs' +CHECK-NEXT: available: 'fputs$UNIX2003' CHECK-NEXT: not available: 'fputs_unlocked' CHECK-NEXT: available: 'fread' CHECK-NEXT: not available: 'fread_unlocked' @@ -308,7 +308,7 @@ CHECK-NEXT: available: 'ftello' CHECK-NEXT: not available: 'ftello64' CHECK-NEXT: available: 'ftrylockfile' CHECK-NEXT: available: 'funlockfile' -CHECK-NEXT: available: 'fwrite' +CHECK-NEXT: available: 'fwrite$UNIX2003' CHECK-NEXT: not available: 'fwrite_unlocked' CHECK-NEXT: available: 'getc' CHECK-NEXT: available: 'getc_unlocked' diff --git a/llvm/test/Analysis/TargetLibraryInfo/x86_64-pc-windows-msvc17.test b/llvm/test/Analysis/TargetLibraryInfo/x86_64-pc-windows-msvc17.test index 2653cba3a0271..5ebbbd7c26415 100644 --- a/llvm/test/Analysis/TargetLibraryInfo/x86_64-pc-windows-msvc17.test +++ b/llvm/test/Analysis/TargetLibraryInfo/x86_64-pc-windows-msvc17.test @@ -190,7 +190,7 @@ CHECK-NEXT: available: 'atoll' CHECK-NEXT: not available: 'bcmp' CHECK-NEXT: not available: 'bcopy' CHECK-NEXT: not available: 'bzero' -CHECK-NEXT: available: 'cabs' +CHECK-NEXT: available: '_cabs' CHECK-NEXT: not available: 'cabsf' CHECK-NEXT: not available: 'cabsl' CHECK-NEXT: available: 'calloc' @@ -204,8 +204,8 @@ CHECK-NEXT: not available: 'chmod' CHECK-NEXT: not available: 'chown' CHECK-NEXT: available: 'clearerr' CHECK-NEXT: not available: 'closedir' -CHECK-NEXT: available: 'copysign' -CHECK-NEXT: available: 'copysignf' +CHECK-NEXT: available: '_copysign' +CHECK-NEXT: available: '_copysignf' CHECK-NEXT: not available: 'copysignl' CHECK-NEXT: available: 'cos' CHECK-NEXT: available: 'cosf' @@ -347,8 +347,8 @@ CHECK-NEXT: not available: 'log2l' CHECK-NEXT: not available: 'ilogb' CHECK-NEXT: not available: 'ilogbf' CHECK-NEXT: not available: 'ilogbl' -CHECK-NEXT: available: 'logb' -CHECK-NEXT: available: 'logbf' +CHECK-NEXT: available: '_logb' +CHECK-NEXT: available: '_logbf' CHECK-NEXT: not available: 'logbl' CHECK-NEX
[llvm-branch-commits] [llvm] llvm-tli-checker: Avoid a temporary string while printing (PR #142538)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/142538 Directly write to the output instead of building a string to print. >From d0644fa28031eaa54bff4819fb165830ea87c719 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 2 Jun 2025 16:03:38 +0200 Subject: [PATCH] llvm-tli-checker: Avoid a temporary string while printing Directly write to the output instead of building a string to print. --- .../llvm-tli-checker/llvm-tli-checker.cpp | 22 +-- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp index 725fe7138509d..5d58115aa94a5 100644 --- a/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp +++ b/llvm/tools/llvm-tli-checker/llvm-tli-checker.cpp @@ -98,16 +98,12 @@ static void reportArchiveChildIssue(const object::Archive::Child &C, int Index, } // Return Name, and if Name is mangled, append "aka" and the demangled name. -static std::string getPrintableName(StringRef Name) { - std::string OutputName = "'"; - OutputName += Name; - OutputName += "'"; +static void printPrintableName(raw_ostream &OS, StringRef Name) { + OS << '\'' << Name << '\''; + std::string DemangledName(demangle(Name)); - if (Name != DemangledName) { -OutputName += " aka "; -OutputName += DemangledName; - } - return OutputName; + if (Name != DemangledName) +OS << " aka " << DemangledName; } static void reportNumberOfEntries(const TargetLibraryInfo &TLI, @@ -138,10 +134,10 @@ static void dumpTLIEntries(const TargetLibraryInfo &TLI) { StringRef Name = TLI.getName(LF); // If there is a custom name, print it. // TODO: Should we include the standard name in the printed line? - outs() << getPrintableName(Name); + printPrintableName(outs(), Name); } else { // If it's not available, refer to it by the standard name. - outs() << getPrintableName(TargetLibraryInfo::getStandardName(LF)); + printPrintableName(outs(), TargetLibraryInfo::getStandardName(LF)); } outs() << '\n'; @@ -345,7 +341,9 @@ int main(int argc, char *argv[]) { constexpr char YesNo[2][4] = {"no ", "yes"}; constexpr char Indicator[4][3] = {"!!", ">>", "<<", "=="}; outs() << Indicator[Which] << " TLI " << YesNo[TLIHas] << " SDK " - << YesNo[SDKHas] << ": " << getPrintableName(TLIName) << '\n'; + << YesNo[SDKHas] << ": "; +printPrintableName(outs(), TLIName); +outs() << '\n'; } } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
@@ -1211,6 +1211,11 @@ def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src, def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src, (MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float> +def : Pat<(v2i64 (atomic_load_128_v2i64 addr:$src)), + (VMOVAPDrm addr:$src)>; // load atomic <2 x i64> +def : Pat<(v4i32 (atomic_load_128_v4i32 addr:$src)), + (VMOVAPDrm addr:$src)>; // load atomic <4 x i32> RKSimon wrote: These require SSE/AVX/AVX512 variants (see below) - but x86 doesn't guarantee atomics for anything above 8 bytes (and those must be aligned to avoid cacheline crossing). https://github.com/llvm/llvm-project/pull/120716 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
@@ -1904,6 +1904,20 @@ def atomic_load_64 : let MemoryVT = i64; } +def atomic_load_128_v2i64 : + PatFrag<(ops node:$ptr), + (atomic_load node:$ptr)> { + let IsAtomic = true; + let MemoryVT = v2i64; +} + +def atomic_load_128_v4i32 : + PatFrag<(ops node:$ptr), + (atomic_load node:$ptr)> { + let IsAtomic = true; + let MemoryVT = v4i32; arsenm wrote: This patch should not require adding this, or touching any of the backend patterns https://github.com/llvm/llvm-project/pull/120716 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)
@@ -6003,6 +6006,74 @@ SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { N->getOperand(1), N->getOperand(2)); } +/// Either return the same load or provide appropriate casts +/// from the load and return that. +static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT, + TypeSize LdWidth, TypeSize FirstVTWidth, + SDLoc dl, SelectionDAG &DAG) { + assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + if (!FirstVT.isVector()) { +unsigned NumElts = +WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); +EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts); +SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); +return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); + } + assert(FirstVT == WidenVT); + return LdOp; +} + +static std::optional findMemType(SelectionDAG &DAG, + const TargetLowering &TLI, unsigned Width, + EVT WidenVT, unsigned Align, + unsigned WidenEx); + +SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) { + EVT WidenVT = + TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT LdVT = LD->getMemoryVT(); + SDLoc dl(LD); + assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors"); + assert(LdVT.isScalableVector() == WidenVT.isScalableVector() && + "Must be scalable"); + assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() && + "Expected equivalent element types"); + + // Load information + SDValue Chain = LD->getChain(); + SDValue BasePtr = LD->getBasePtr(); + MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); + AAMDNodes AAInfo = LD->getAAInfo(); + + TypeSize LdWidth = LdVT.getSizeInBits(); + TypeSize WidenWidth = WidenVT.getSizeInBits(); + TypeSize WidthDiff = WidenWidth - LdWidth; + + // Find the vector type that can load from. + std::optional FirstVT = + findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0, arsenm wrote: Why is this using 0 for the sign instead of passing in the alignment of the load? https://github.com/llvm/llvm-project/pull/120598 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SelectionDAG] Widen <2 x T> vector types for atomic load (PR #120598)
https://github.com/arsenm edited https://github.com/llvm/llvm-project/pull/120598 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [X86] Cast atomic vectors in IR to support floats (PR #142320)
@@ -32070,6 +32070,13 @@ X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { } } +TargetLowering::AtomicExpansionKind +X86TargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { + if (LI->getType()->getScalarType()->isFloatingPointTy()) +return AtomicExpansionKind::CastToInteger; + return AtomicExpansionKind::None; +} + arsenm wrote: We want to get away from using the IR expansion https://github.com/llvm/llvm-project/pull/142320 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
@@ -1211,6 +1211,11 @@ def : Pat<(v4i32 (scalar_to_vector (i32 (atomic_load_32 addr:$src, def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src, (MOV64toPQIrm addr:$src)>; // load atomic <2 x i32,float> +def : Pat<(v2i64 (atomic_load_128_v2i64 addr:$src)), + (VMOVAPDrm addr:$src)>; // load atomic <2 x i64> +def : Pat<(v4i32 (atomic_load_128_v4i32 addr:$src)), + (VMOVAPDrm addr:$src)>; // load atomic <4 x i32> RKSimon wrote: I think all known AVX capable x86 targets allow 16 byte aligned atomics, but its not official. https://github.com/llvm/llvm-project/pull/120716 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/RKSimon edited https://github.com/llvm/llvm-project/pull/120716 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #120716)
https://github.com/RKSimon edited https://github.com/llvm/llvm-project/pull/120716 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] [MLIR] Fix incorrect slice contiguity inference in `vector::isContiguousSlice` (PR #142422)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/142422 Previously, slices were sometimes marked as non-contiguous when they were actually contiguous. This occurred when the vector type had leading unit dimensions, e.g., `vector<1x1x...x1xd0xd1x...xdn-1xT>`. In such cases, only the trailing `n` dimensions of the memref need to be contiguous, not the entire vector rank. This affects how `FlattenContiguousRowMajorTransfer{Read,Write}Pattern` flattens `transfer_read` and `transfer_write` ops. The pattern used to collapse a number of dimensions equal the vector rank, which may be is incorrect when leading dimensions are unit-sized. This patch fixes the issue by collapsing only as many trailing memref dimensions as are actually contiguous. >From d9a470e098553dbac74e81f98e0077718f6d9ed1 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 2 Jun 2025 15:13:13 + Subject: [PATCH] [MLIR] Fix incorrect slice contiguity inference in `vector::isContiguousSlice` Previously, slices were sometimes marked as non-contiguous when they were actually contiguous. This occurred when the vector type had leading unit dimensions, e.g., `vector<1x1x...x1xd0xd1x...xdn-1xT>``. In such cases, only the trailing n dimensions of the memref need to be contiguous, not the entire vector rank. This affects how `FlattenContiguousRowMajorTransfer{Read,Write}Pattern` flattens `transfer_read` and `transfer_write`` ops. The pattern used to collapse a number of dimensions equal the vector rank, which may be is incorrect when leading dimensions are unit-sized. This patch fixes the issue by collapsing only as many trailing memref dimensions as are actually contiguous. --- .../mlir/Dialect/Utils/IndexingUtils.h| 3 +- .../mlir/Dialect/Vector/Utils/VectorUtils.h | 54 - mlir/lib/Dialect/Utils/IndexingUtils.cpp | 6 +- .../Transforms/VectorTransferOpTransforms.cpp | 8 +- mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp | 25 ++-- .../Vector/vector-transfer-flatten.mlir | 108 +- 6 files changed, 126 insertions(+), 78 deletions(-) diff --git a/mlir/include/mlir/Dialect/Utils/IndexingUtils.h b/mlir/include/mlir/Dialect/Utils/IndexingUtils.h index 99218f491ddef..a1c8ec2db056a 100644 --- a/mlir/include/mlir/Dialect/Utils/IndexingUtils.h +++ b/mlir/include/mlir/Dialect/Utils/IndexingUtils.h @@ -40,7 +40,8 @@ class ArrayAttr; /// Assuming `sizes` is `[s0, .. sn]`, return the vector /// `[s1 * ... * sn, s2 * ... * sn, ..., sn, 1]`. /// -/// `sizes` elements are asserted to be non-negative. +/// `sizes` element `s0` is asserted to be kDynamic or non-negative. +/// `sizes` elements `s1` to `sn` are asserted to be non-negative. /// /// Return an empty vector if `sizes` is empty. SmallVector computeSuffixProduct(ArrayRef sizes); diff --git a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h index 6609b28d77b6c..ed06d7a029494 100644 --- a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h +++ b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h @@ -49,35 +49,37 @@ FailureOr> isTranspose2DSlice(vector::TransposeOp op); /// Return true if `vectorType` is a contiguous slice of `memrefType`. /// -/// Only the N = vectorType.getRank() trailing dims of `memrefType` are -/// checked (the other dims are not relevant). Note that for `vectorType` to be -/// a contiguous slice of `memrefType`, the trailing dims of the latter have -/// to be contiguous - this is checked by looking at the corresponding strides. +/// The leading unit dimensions of the vector type are ignored as they +/// are not relevant to the result. Let N be the number of the vector +/// dimensions after ignoring a leading sequence of unit ones. /// -/// There might be some restriction on the leading dim of `VectorType`: +/// For `vectorType` to be a contiguous slice of `memrefType` +/// a) the N trailing dimensions of the latter must be contiguous, and +/// b) the trailing N dimensions of `vectorType` and `memrefType`, +/// except the first of them, must match. /// -/// Case 1. If all the trailing dims of `vectorType` match the trailing dims -/// of `memrefType` then the leading dim of `vectorType` can be -/// arbitrary. -/// -///Ex. 1.1 contiguous slice, perfect match -/// vector<4x3x2xi32> from memref<5x4x3x2xi32> -///Ex. 1.2 contiguous slice, the leading dim does not match (2 != 4) -/// vector<2x3x2xi32> from memref<5x4x3x2xi32> -/// -/// Case 2. If an "internal" dim of `vectorType` does not match the -/// corresponding trailing dim in `memrefType` then the remaining -/// leading dims of `vectorType` have to be 1 (the first non-matching -/// dim can be arbitrary). +/// Examples: /// -///Ex. 2.1 non-contiguous slice, 2 != 3 and the leading dim != <1> -/// vector<2x2x2xi32> from memref<5x4x3x2xi32> -///Ex. 2
[llvm-branch-commits] [mlir] [MLIR] Fix incorrect slice contiguity inference in `vector::isContiguousSlice` (PR #142422)
llvmbot wrote: @llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-vector Author: Momchil Velikov (momchil-velikov) Changes Previously, slices were sometimes marked as non-contiguous when they were actually contiguous. This occurred when the vector type had leading unit dimensions, e.g., `vector<1x1x...x1xd0xd1x...xdn-1xT>`. In such cases, only the trailing `n` dimensions of the memref need to be contiguous, not the entire vector rank. This affects how `FlattenContiguousRowMajorTransfer{Read,Write}Pattern` flattens `transfer_read` and `transfer_write` ops. The pattern used to collapse a number of dimensions equal the vector rank, which may be is incorrect when leading dimensions are unit-sized. This patch fixes the issue by collapsing only as many trailing memref dimensions as are actually contiguous. --- Full diff: https://github.com/llvm/llvm-project/pull/142422.diff 6 Files Affected: - (modified) mlir/include/mlir/Dialect/Utils/IndexingUtils.h (+2-1) - (modified) mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h (+28-26) - (modified) mlir/lib/Dialect/Utils/IndexingUtils.cpp (+4-2) - (modified) mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp (+6-2) - (modified) mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp (+8-17) - (modified) mlir/test/Dialect/Vector/vector-transfer-flatten.mlir (+78-30) ``diff diff --git a/mlir/include/mlir/Dialect/Utils/IndexingUtils.h b/mlir/include/mlir/Dialect/Utils/IndexingUtils.h index 99218f491ddef..a1c8ec2db056a 100644 --- a/mlir/include/mlir/Dialect/Utils/IndexingUtils.h +++ b/mlir/include/mlir/Dialect/Utils/IndexingUtils.h @@ -40,7 +40,8 @@ class ArrayAttr; /// Assuming `sizes` is `[s0, .. sn]`, return the vector /// `[s1 * ... * sn, s2 * ... * sn, ..., sn, 1]`. /// -/// `sizes` elements are asserted to be non-negative. +/// `sizes` element `s0` is asserted to be kDynamic or non-negative. +/// `sizes` elements `s1` to `sn` are asserted to be non-negative. /// /// Return an empty vector if `sizes` is empty. SmallVector computeSuffixProduct(ArrayRef sizes); diff --git a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h index 6609b28d77b6c..ed06d7a029494 100644 --- a/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h +++ b/mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h @@ -49,35 +49,37 @@ FailureOr> isTranspose2DSlice(vector::TransposeOp op); /// Return true if `vectorType` is a contiguous slice of `memrefType`. /// -/// Only the N = vectorType.getRank() trailing dims of `memrefType` are -/// checked (the other dims are not relevant). Note that for `vectorType` to be -/// a contiguous slice of `memrefType`, the trailing dims of the latter have -/// to be contiguous - this is checked by looking at the corresponding strides. +/// The leading unit dimensions of the vector type are ignored as they +/// are not relevant to the result. Let N be the number of the vector +/// dimensions after ignoring a leading sequence of unit ones. /// -/// There might be some restriction on the leading dim of `VectorType`: +/// For `vectorType` to be a contiguous slice of `memrefType` +/// a) the N trailing dimensions of the latter must be contiguous, and +/// b) the trailing N dimensions of `vectorType` and `memrefType`, +/// except the first of them, must match. /// -/// Case 1. If all the trailing dims of `vectorType` match the trailing dims -/// of `memrefType` then the leading dim of `vectorType` can be -/// arbitrary. -/// -///Ex. 1.1 contiguous slice, perfect match -/// vector<4x3x2xi32> from memref<5x4x3x2xi32> -///Ex. 1.2 contiguous slice, the leading dim does not match (2 != 4) -/// vector<2x3x2xi32> from memref<5x4x3x2xi32> -/// -/// Case 2. If an "internal" dim of `vectorType` does not match the -/// corresponding trailing dim in `memrefType` then the remaining -/// leading dims of `vectorType` have to be 1 (the first non-matching -/// dim can be arbitrary). +/// Examples: /// -///Ex. 2.1 non-contiguous slice, 2 != 3 and the leading dim != <1> -/// vector<2x2x2xi32> from memref<5x4x3x2xi32> -///Ex. 2.2 contiguous slice, 2 != 3 and the leading dim == <1> -/// vector<1x2x2xi32> from memref<5x4x3x2xi32> -///Ex. 2.3. contiguous slice, 2 != 3 and the leading dims == <1x1> -/// vector<1x1x2x2xi32> from memref<5x4x3x2xi32> -///Ex. 2.4. non-contiguous slice, 2 != 3 and the leading dims != <1x1> -/// vector<2x1x2x2xi32> from memref<5x4x3x2xi32>) +/// Ex.1 contiguous slice, perfect match +/// vector<4x3x2xi32> from memref<5x4x3x2xi32> +/// Ex.2 contiguous slice, the leading dim does not match (2 != 4) +/// vector<2x3x2xi32> from memref<5x4x3x2xi32> +/// Ex.3 non-contiguous slice, 2 != 3 +/// vector<2x2x2xi32> from memref<5x4x3x2xi32> +/// Ex.4 contiguous slice, leading unit dimension of t