[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Determine if computed gotos using `blockaddress` (#132536) (PR #133082)
https://github.com/dianqk updated https://github.com/llvm/llvm-project/pull/133082 >From a78eb0808e553ac933c39e41164bb530ce025a0e Mon Sep 17 00:00:00 2001 From: dianqk Date: Wed, 26 Mar 2025 21:27:43 +0800 Subject: [PATCH] [TailDuplicator] Determine if computed gotos using `blockaddress` (#132536) Using `blockaddress` should be more reliable than determining if an operand comes from a jump table index. Alternative: Add the `MachineInstr::MIFlag::ComputedGoto` flag when lowering `indirectbr`. But I don't think this approach is suitable to backport. (cherry picked from commit 66f158d91803875de63d8f2a437ce8ecb22c4141) --- llvm/include/llvm/CodeGen/MachineBasicBlock.h | 9 + llvm/include/llvm/CodeGen/MachineInstr.h | 16 +- llvm/lib/CodeGen/TailDuplicator.cpp | 2 +- .../CodeGen/X86/tail-dup-computed-goto.mir| 265 +- 4 files changed, 203 insertions(+), 89 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h index 0b803a9724742..11efb2f656a7a 100644 --- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h +++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h @@ -311,6 +311,15 @@ class MachineBasicBlock const MachineFunction *getParent() const { return xParent; } MachineFunction *getParent() { return xParent; } + /// Returns true if the original IR terminator is an `indirectbr`. This + /// typically corresponds to a `goto` in C, rather than jump tables. + bool terminatorIsComputedGoto() const { +return back().isIndirectBranch() && + llvm::all_of(successors(), [](const MachineBasicBlock *Succ) { + return Succ->isIRBlockAddressTaken(); + }); + } + using instr_iterator = Instructions::iterator; using const_instr_iterator = Instructions::const_iterator; using reverse_instr_iterator = Instructions::reverse_iterator; diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h index b26cabe801ee8..997d6a5554e06 100644 --- a/llvm/include/llvm/CodeGen/MachineInstr.h +++ b/llvm/include/llvm/CodeGen/MachineInstr.h @@ -994,17 +994,8 @@ class MachineInstr /// Return true if this is an indirect branch, such as a /// branch through a register. - bool isIndirectBranch(QueryType Type = AnyInBundle, -bool IncludeJumpTable = true) const { -return hasProperty(MCID::IndirectBranch, Type) && - (IncludeJumpTable || !llvm::any_of(operands(), [](const auto &Op) { - return Op.isJTI(); -})); - } - - bool isComputedGoto(QueryType Type = AnyInBundle) const { -// Jump tables are not considered computed gotos. -return isIndirectBranch(Type, /*IncludeJumpTable=*/false); + bool isIndirectBranch(QueryType Type = AnyInBundle) const { +return hasProperty(MCID::IndirectBranch, Type); } /// Return true if this is a branch which may fall @@ -2088,6 +2079,9 @@ class MachineInstr MCSymbol *PreInstrSymbol, MCSymbol *PostInstrSymbol, MDNode *HeapAllocMarker, MDNode *PCSections, uint32_t CFIType, MDNode *MMRAs); + + /// Returns true if all successors are IRBlockAddressTaken. + bool jumpToIRBlockAddressTaken() const; }; /// Special DenseMapInfo traits to compare MachineInstr* by *value* of the diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp index 21f75458c90f3..b0de3c322ddd0 100644 --- a/llvm/lib/CodeGen/TailDuplicator.cpp +++ b/llvm/lib/CodeGen/TailDuplicator.cpp @@ -604,7 +604,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple, bool HasComputedGoto = false; if (!TailBB.empty()) { HasIndirectbr = TailBB.back().isIndirectBranch(); -HasComputedGoto = TailBB.back().isComputedGoto(); +HasComputedGoto = TailBB.terminatorIsComputedGoto(); } if (HasIndirectbr && PreRegAlloc) diff --git a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir index a472dc67d8d51..17de405928d37 100644 --- a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir +++ b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir @@ -2,15 +2,27 @@ # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication -tail-dup-pred-size=1 -tail-dup-succ-size=1 %s -o - | FileCheck %s # Check that only the computed goto is not be restrict by tail-dup-pred-size and tail-dup-succ-size. --- | + @computed_goto.dispatch = constant [5 x ptr] [ptr null, ptr blockaddress(@computed_goto, %bb1), ptr blockaddress(@computed_goto, %bb2), ptr blockaddress(@computed_goto, %bb3), ptr blockaddress(@computed_goto, %bb4)] declare i64 @f0() declare i64 @f1() declare i64 @f2() declare i64 @f3() declare i64 @f4() declare i64 @f5() - @computed_goto.dispatch = external global [5 x ptr] - define void @computed_goto() { ret void } + define void @computed_goto() { +start: + ret void +bb1: + ret
[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Determine if computed gotos using `blockaddress` (#132536) (PR #133082)
https://github.com/arsenm approved this pull request. I can't say I know much about the feature but this should strictly move in a more conservative direction https://github.com/llvm/llvm-project/pull/133082 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] llvm-reduce: Defer a shouldKeep call in operand reduction (PR #133387)
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/133387 >From fa597dd4161693813a3566fd1d4a3c7df1d00746 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 28 Mar 2025 12:58:20 +0700 Subject: [PATCH] llvm-reduce: Defer a shouldKeep call in operand reduction Ideally shouldKeep is only called in contexts that will successfully do something. --- llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp index b0bca015434fa..8b6446725b7d4 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp @@ -26,8 +26,8 @@ extractOperandsFromModule(Oracle &O, ReducerWorkItem &WorkItem, for (auto &I : instructions(&F)) { if (PHINode *Phi = dyn_cast(&I)) { for (auto &Op : Phi->incoming_values()) { - if (!O.shouldKeep()) { -if (Value *Reduced = ReduceValue(Op)) + if (Value *Reduced = ReduceValue(Op)) { +if (!O.shouldKeep()) Phi->setIncomingValueForBlock(Phi->getIncomingBlock(Op), Reduced); } } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang] Track final substitution for Subst* AST nodes (PR #132748)
https://github.com/mizvekov updated https://github.com/llvm/llvm-project/pull/132748 >From 2d4717492599f445975019339024e2d1bc02128f Mon Sep 17 00:00:00 2001 From: Matheus Izvekov Date: Sat, 22 Mar 2025 16:03:04 -0300 Subject: [PATCH 1/4] [clang] Track final substitution for SubstTemplateTemplateParm nodes --- clang/include/clang/AST/ASTContext.h | 9 + clang/include/clang/AST/PropertiesBase.td | 3 ++- clang/include/clang/AST/TemplateName.h | 17 - clang/include/clang/AST/Type.h | 3 ++- clang/lib/AST/ASTContext.cpp | 6 +++--- clang/lib/AST/ASTImporter.cpp | 2 +- clang/lib/AST/TemplateName.cpp | 6 -- clang/lib/AST/TextNodeDumper.cpp | 2 ++ clang/lib/Sema/SemaTemplateInstantiate.cpp | 8 ++-- 9 files changed, 33 insertions(+), 23 deletions(-) diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h index 1f7c75559e1e9..14a097189ca86 100644 --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -2396,10 +2396,11 @@ class ASTContext : public RefCountedBase { const IdentifierInfo *Name) const; TemplateName getDependentTemplateName(NestedNameSpecifier *NNS, OverloadedOperatorKind Operator) const; - TemplateName - getSubstTemplateTemplateParm(TemplateName replacement, Decl *AssociatedDecl, - unsigned Index, - std::optional PackIndex) const; + TemplateName getSubstTemplateTemplateParm(TemplateName replacement, +Decl *AssociatedDecl, +unsigned Index, +std::optional PackIndex, +bool Final) const; TemplateName getSubstTemplateTemplateParmPack(const TemplateArgument &ArgPack, Decl *AssociatedDecl, unsigned Index, diff --git a/clang/include/clang/AST/PropertiesBase.td b/clang/include/clang/AST/PropertiesBase.td index 5f3a885832e2e..416914db2f7c8 100644 --- a/clang/include/clang/AST/PropertiesBase.td +++ b/clang/include/clang/AST/PropertiesBase.td @@ -729,8 +729,9 @@ let Class = PropertyTypeCase in { def : Property<"packIndex", Optional> { let Read = [{ parm->getPackIndex() }]; } + def : Property<"final", Bool> { let Read = [{ parm->getFinal() }]; } def : Creator<[{ -return ctx.getSubstTemplateTemplateParm(replacement, associatedDecl, index, packIndex); +return ctx.getSubstTemplateTemplateParm(replacement, associatedDecl, index, packIndex, final); }]>; } let Class = PropertyTypeCase in { diff --git a/clang/include/clang/AST/TemplateName.h b/clang/include/clang/AST/TemplateName.h index ce97f834bfc1d..313802502f818 100644 --- a/clang/include/clang/AST/TemplateName.h +++ b/clang/include/clang/AST/TemplateName.h @@ -413,9 +413,11 @@ class SubstTemplateTemplateParmStorage SubstTemplateTemplateParmStorage(TemplateName Replacement, Decl *AssociatedDecl, unsigned Index, - std::optional PackIndex) + std::optional PackIndex, + bool Final) : UncommonTemplateNameStorage(SubstTemplateTemplateParm, Index, -PackIndex ? *PackIndex + 1 : 0), +((PackIndex ? *PackIndex + 1 : 0) << 1) | +Final), Replacement(Replacement), AssociatedDecl(AssociatedDecl) { assert(AssociatedDecl != nullptr); } @@ -429,10 +431,15 @@ class SubstTemplateTemplateParmStorage /// This should match the result of `getParameter()->getIndex()`. unsigned getIndex() const { return Bits.Index; } + // This substitution is Final, which means the substitution is fully + // sugared: it doesn't need to be resugared later. + bool getFinal() const { return Bits.Data & 1; } + std::optional getPackIndex() const { -if (Bits.Data == 0) +auto Data = Bits.Data >> 1; +if (Data == 0) return std::nullopt; -return Bits.Data - 1; +return Data - 1; } TemplateTemplateParmDecl *getParameter() const; @@ -442,7 +449,7 @@ class SubstTemplateTemplateParmStorage static void Profile(llvm::FoldingSetNodeID &ID, TemplateName Replacement, Decl *AssociatedDecl, unsigned Index, - std::optional PackIndex); + std::optional PackIndex, bool Final); }; class DeducedTemplateStorage : public UncommonTemplateNameStorage, diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index c927eb13711c1..e62b9938c9ba1 100644 --- a/clang/include/clang/AST/Type.h +++ b
[llvm-branch-commits] [llvm] [LoopInterchange] Add tests for the vectorization profitability (NFC) (PR #133665)
https://github.com/kasuga-fj created https://github.com/llvm/llvm-project/pull/133665 There is a problem with the current profitability check for vectorization in LoopInterchange. There are both false positives and false negatives. The former means that the heuristic may say that "an exchange is necessary to vectorize the innermost loop" even though it's already possible. The latter means that the heuristic may miss a case where an exchange is necessary to vectorize the innermost loop. Note that this is not a dependency analysis problem. These problems can occur even if the analysis is accurate (no overestimation). This patch adds tests to clarify the cases that should be fixed. The root cause of these cases is that the heuristic doesn't handle the direction of a dependency correctly. >From b53b7ce2b303ff9ea94d77b3ffe74d1697db9f3d Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Thu, 27 Mar 2025 07:04:27 + Subject: [PATCH] [LoopInterchange] Add tests for the vectorization profitability (NFC) There is a problem with the current profitability check for vectorization in LoopInterchange. There are both false positives and false negatives. The former means that the heuristic may say that "an exchange is necessary to vectorize the innermost loop" even though it's already possible. The latter means that the heuristic may miss a case where an exchange is necessary to vectorize the innermost loop. Note that this is not a dependency analysis problem. These problems can occur even if the analysis is accurate (no overestimation). This patch adds tests to clarify the cases that should be fixed. The root cause of these cases is that the heuristic doesn't handle the direction of a dependency correctly. --- .../profitability-vectorization-heuristic.ll | 108 ++ 1 file changed, 108 insertions(+) create mode 100644 llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll diff --git a/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll new file mode 100644 index 0..606117e70db86 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll @@ -0,0 +1,108 @@ +; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \ +; RUN: -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=vectorize +; RUN: FileCheck -input-file %t %s + +@A = dso_local global [256 x [256 x float]] zeroinitializer +@B = dso_local global [256 x [256 x float]] zeroinitializer +@C = dso_local global [256 x [256 x float]] zeroinitializer + +; Check that the below loops are exchanged for vectorization. +; +; for (int i = 0; i < 256; i++) { +; for (int j = 1; j < 256; j++) { +; A[i][j] = A[i][j-1] + B[i][j]; +; C[i][j] += 1; +; } +; } +; +; FIXME: These loops are not exchanged at this time due to the problem of +; profitablity heuristic for vectorization. + +; CHECK: --- !Missed +; CHECK-NEXT: Pass:loop-interchange +; CHECK-NEXT: Name:InterchangeNotProfitable +; CHECK-NEXT: Function:interchange_necesasry_for_vectorization +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: Interchanging loops is not considered to improve cache locality nor vectorization. +; CHECK-NEXT: ... +define void @interchange_necesasry_for_vectorization() { +entry: + br label %for.i.header + +for.i.header: + %i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ] + br label %for.j.body + +for.j.body: + %j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ] + %j.dec = add nsw i64 %j, -1 + %a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j.dec + %b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, i64 %j + %c.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @C, i64 %i, i64 %j + %a = load float, ptr %a.load.index, align 4 + %b = load float, ptr %b.index, align 4 + %c = load float, ptr %c.index, align 4 + %add.0 = fadd float %a, %b + %a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j + store float %add.0, ptr %a.store.index, align 4 + %add.1 = fadd float %c, 1.0 + store float %add.1, ptr %c.index, align 4 + %j.next = add nuw nsw i64 %j, 1 + %cmp.j = icmp eq i64 %j.next, 256 + br i1 %cmp.j, label %for.i.inc, label %for.j.body + +for.i.inc: + %i.next = add nuw nsw i64 %i, 1 + %cmp.i = icmp eq i64 %i.next, 256 + br i1 %cmp.i, label %exit, label %for.i.header + +exit: + ret void +} + +; Check that the following innermost loop can be vectorized so that +; interchangig is unnecessary. +; +; for (int i = 0; i < 256; i++) +; for (int j = 1; j < 256; j++) +; A[i][j-1] = A[i][j] + B[i][j]; +; +; FIXME: These loops are exchanged at this time due to the problem of +; profitablity heuristic for vectorization. + +; CHECK: --- !Passed +; CHECK-NEXT: Pass
[llvm-branch-commits] [llvm] Store GUIDs in metadata (PR #133682)
https://github.com/orodley created https://github.com/llvm/llvm-project/pull/133682 This takes the existing AssignGUID pass from CtxProfAnalysis, and runs it by default, at the appropriate stages of the LTO pipeline. It also changes GlobalValue::getGUID() to retrieve the GUID from the metadata instead of computing it. We don't yet have the supporting downstream changes to make a dedicated GUID table in bitcode, nor do we use the metadata as part of ThinLTO -- it retains its existing mechanisms of recomputing GUIDs from separately saved data. That will be changed later. >From 1379952ca664e04c4aa6806a724bcda1b0fc1a48 Mon Sep 17 00:00:00 2001 From: Owen Rodley Date: Mon, 31 Mar 2025 16:16:35 +1100 Subject: [PATCH] Store GUIDs in metadata This takes the existing AssignGUID pass from CtxProfAnalysis, and runs it by default, at the appropriate stages of the LTO pipeline. It also changes GlobalValue::getGUID() to retrieve the GUID from the metadata instead of computing it. We don't yet have the supporting downstream changes to make a dedicated GUID table in bitcode, nor do we use the metadata as part of ThinLTO -- it retains its existing mechanisms of recomputing GUIDs from separately saved data. That will be changed later. --- llvm/include/llvm/Analysis/CtxProfAnalysis.h | 34 +++--- llvm/include/llvm/IR/FixedMetadataKinds.def | 1 + llvm/include/llvm/IR/GlobalValue.h| 4 +- .../llvm/Transforms/Utils/AssignGUID.h| 34 ++ llvm/lib/Analysis/CtxProfAnalysis.cpp | 44 ++- llvm/lib/IR/Globals.cpp | 33 ++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassBuilderPipelines.cpp | 9 +++- .../Instrumentation/PGOCtxProfFlattening.cpp | 2 +- .../Instrumentation/PGOCtxProfLowering.cpp| 3 +- llvm/lib/Transforms/Utils/AssignGUID.cpp | 34 ++ llvm/lib/Transforms/Utils/CMakeLists.txt | 1 + .../Transforms/Utils/CallPromotionUtils.cpp | 4 +- llvm/lib/Transforms/Utils/InlineFunction.cpp | 4 +- 14 files changed, 129 insertions(+), 79 deletions(-) create mode 100644 llvm/include/llvm/Transforms/Utils/AssignGUID.h create mode 100644 llvm/lib/Transforms/Utils/AssignGUID.cpp diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index ede8bd2fe5001..484cc638a2d53 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -37,9 +37,6 @@ class PGOContextualProfile { // we'll need when we maintain the profiles during IPO transformations. std::map FuncInfo; - /// Get the GUID of this Function if it's defined in this module. - GlobalValue::GUID getDefinedFunctionGUID(const Function &F) const; - // This is meant to be constructed from CtxProfAnalysis, which will also set // its state piecemeal. PGOContextualProfile() = default; @@ -57,7 +54,7 @@ class PGOContextualProfile { const PGOCtxProfile &profiles() const { return Profiles; } bool isFunctionKnown(const Function &F) const { -return getDefinedFunctionGUID(F) != 0; +return F.getGUID() != 0; } StringRef getFunctionName(GlobalValue::GUID GUID) const { @@ -69,22 +66,22 @@ class PGOContextualProfile { uint32_t getNumCounters(const Function &F) const { assert(isFunctionKnown(F)); -return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex; +return FuncInfo.find(F.getGUID())->second.NextCounterIndex; } uint32_t getNumCallsites(const Function &F) const { assert(isFunctionKnown(F)); -return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCallsiteIndex; +return FuncInfo.find(F.getGUID())->second.NextCallsiteIndex; } uint32_t allocateNextCounterIndex(const Function &F) { assert(isFunctionKnown(F)); -return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex++; +return FuncInfo.find(F.getGUID())->second.NextCounterIndex++; } uint32_t allocateNextCallsiteIndex(const Function &F) { assert(isFunctionKnown(F)); -return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCallsiteIndex++; +return FuncInfo.find(F.getGUID())->second.NextCallsiteIndex++; } using ConstVisitor = function_ref; @@ -145,26 +142,5 @@ class CtxProfAnalysisPrinterPass const PrintMode Mode; }; -/// Assign a GUID to functions as metadata. GUID calculation takes linkage into -/// account, which may change especially through and after thinlto. By -/// pre-computing and assigning as metadata, this mechanism is resilient to such -/// changes (as well as name changes e.g. suffix ".llvm." additions). - -// FIXME(mtrofin): we can generalize this mechanism to calculate a GUID early in -// the pass pipeline, associate it with any Global Value, and then use it for -// PGO and ThinLTO. -// At that point, this should be moved elsewhere. -class AssignGUIDPass : public PassInfoMixin { -public:
[llvm-branch-commits] [llvm] Store GUIDs in metadata (PR #133682)
https://github.com/orodley updated https://github.com/llvm/llvm-project/pull/133682 >From dd0751618d4eac29a6af13b2e747ed505ec9b321 Mon Sep 17 00:00:00 2001 From: Owen Rodley Date: Mon, 31 Mar 2025 16:16:35 +1100 Subject: [PATCH] Store GUIDs in metadata This takes the existing AssignGUID pass from CtxProfAnalysis, and runs it by default, at the appropriate stages of the LTO pipeline. It also changes GlobalValue::getGUID() to retrieve the GUID from the metadata instead of computing it. We don't yet have the supporting downstream changes to make a dedicated GUID table in bitcode, nor do we use the metadata as part of ThinLTO -- it retains its existing mechanisms of recomputing GUIDs from separately saved data. That will be changed later. --- llvm/include/llvm/Analysis/CtxProfAnalysis.h | 36 +++ llvm/include/llvm/IR/FixedMetadataKinds.def | 1 + llvm/include/llvm/IR/GlobalValue.h| 4 +- .../llvm/Transforms/Utils/AssignGUID.h| 34 ++ llvm/lib/Analysis/CtxProfAnalysis.cpp | 44 ++- llvm/lib/IR/Globals.cpp | 33 ++ llvm/lib/Passes/PassBuilder.cpp | 1 + llvm/lib/Passes/PassBuilderPipelines.cpp | 9 +++- .../Instrumentation/PGOCtxProfFlattening.cpp | 2 +- .../Instrumentation/PGOCtxProfLowering.cpp| 3 +- llvm/lib/Transforms/Utils/AssignGUID.cpp | 34 ++ llvm/lib/Transforms/Utils/CMakeLists.txt | 1 + .../Transforms/Utils/CallPromotionUtils.cpp | 5 +-- llvm/lib/Transforms/Utils/InlineFunction.cpp | 4 +- 14 files changed, 129 insertions(+), 82 deletions(-) create mode 100644 llvm/include/llvm/Transforms/Utils/AssignGUID.h create mode 100644 llvm/lib/Transforms/Utils/AssignGUID.cpp diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index ede8bd2fe5001..2e0b97b3844d9 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -37,9 +37,6 @@ class PGOContextualProfile { // we'll need when we maintain the profiles during IPO transformations. std::map FuncInfo; - /// Get the GUID of this Function if it's defined in this module. - GlobalValue::GUID getDefinedFunctionGUID(const Function &F) const; - // This is meant to be constructed from CtxProfAnalysis, which will also set // its state piecemeal. PGOContextualProfile() = default; @@ -56,9 +53,7 @@ class PGOContextualProfile { const PGOCtxProfile &profiles() const { return Profiles; } - bool isFunctionKnown(const Function &F) const { -return getDefinedFunctionGUID(F) != 0; - } + bool isFunctionKnown(const Function &F) const { return F.getGUID() != 0; } StringRef getFunctionName(GlobalValue::GUID GUID) const { auto It = FuncInfo.find(GUID); @@ -69,22 +64,22 @@ class PGOContextualProfile { uint32_t getNumCounters(const Function &F) const { assert(isFunctionKnown(F)); -return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex; +return FuncInfo.find(F.getGUID())->second.NextCounterIndex; } uint32_t getNumCallsites(const Function &F) const { assert(isFunctionKnown(F)); -return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCallsiteIndex; +return FuncInfo.find(F.getGUID())->second.NextCallsiteIndex; } uint32_t allocateNextCounterIndex(const Function &F) { assert(isFunctionKnown(F)); -return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex++; +return FuncInfo.find(F.getGUID())->second.NextCounterIndex++; } uint32_t allocateNextCallsiteIndex(const Function &F) { assert(isFunctionKnown(F)); -return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCallsiteIndex++; +return FuncInfo.find(F.getGUID())->second.NextCallsiteIndex++; } using ConstVisitor = function_ref; @@ -145,26 +140,5 @@ class CtxProfAnalysisPrinterPass const PrintMode Mode; }; -/// Assign a GUID to functions as metadata. GUID calculation takes linkage into -/// account, which may change especially through and after thinlto. By -/// pre-computing and assigning as metadata, this mechanism is resilient to such -/// changes (as well as name changes e.g. suffix ".llvm." additions). - -// FIXME(mtrofin): we can generalize this mechanism to calculate a GUID early in -// the pass pipeline, associate it with any Global Value, and then use it for -// PGO and ThinLTO. -// At that point, this should be moved elsewhere. -class AssignGUIDPass : public PassInfoMixin { -public: - explicit AssignGUIDPass() = default; - - /// Assign a GUID *if* one is not already assign, as a function metadata named - /// `GUIDMetadataName`. - PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); - static const char *GUIDMetadataName; - // This should become GlobalValue::getGUID - static uint64_t getGUID(const Function &F); -}; - } // namespace llvm #endif // LLVM_ANALYSIS_CTX
[llvm-branch-commits] [llvm] Store GUIDs in metadata (PR #133682)
orodley wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/133682?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#133682** https://app.graphite.dev/github/pr/llvm/llvm-project/133682?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/133682?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#129644** https://app.graphite.dev/github/pr/llvm/llvm-project/129644?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/133682 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] Store GUIDs in metadata (PR #133682)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff 8ef355aa433a44220eaf0062039b53770ebb9835 1379952ca664e04c4aa6806a724bcda1b0fc1a48 --extensions h,cpp -- llvm/include/llvm/Transforms/Utils/AssignGUID.h llvm/lib/Transforms/Utils/AssignGUID.cpp llvm/include/llvm/Analysis/CtxProfAnalysis.h llvm/include/llvm/IR/GlobalValue.h llvm/lib/Analysis/CtxProfAnalysis.cpp llvm/lib/IR/Globals.cpp llvm/lib/Passes/PassBuilder.cpp llvm/lib/Passes/PassBuilderPipelines.cpp llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp llvm/lib/Transforms/Utils/CallPromotionUtils.cpp llvm/lib/Transforms/Utils/InlineFunction.cpp `` View the diff from clang-format here. ``diff diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h b/llvm/include/llvm/Analysis/CtxProfAnalysis.h index 484cc638a2..2e0b97b384 100644 --- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h +++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h @@ -53,9 +53,7 @@ public: const PGOCtxProfile &profiles() const { return Profiles; } - bool isFunctionKnown(const Function &F) const { -return F.getGUID() != 0; - } + bool isFunctionKnown(const Function &F) const { return F.getGUID() != 0; } StringRef getFunctionName(GlobalValue::GUID GUID) const { auto It = FuncInfo.find(GUID); diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index d5840673ad..6912338ce5 100644 --- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -655,7 +655,6 @@ CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, Function &Callee, // times, and the indirect BB, IndirectCount times Ctx.counters()[DirectID] = DirectCount; Ctx.counters()[IndirectID] = IndirectCount; - }; CtxProf.update(ProfileUpdater, Caller); return &DirectCall; `` https://github.com/llvm/llvm-project/pull/133682 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoopInterchange] Improve profitability check for vectorization (PR #133672)
https://github.com/kasuga-fj created https://github.com/llvm/llvm-project/pull/133672 The vectorization profitability has a process to check whether a given loop can be vectorized or not. Since the process is conservative, a loop that can be vectorized may be deemed not to be possible. This can trigger unnecessary exchanges. This patch improves the profitability decision by mitigating such misjudgments. Before this patch, we considered a loop to be vectorizable only when there are no loop carried dependencies with the IV of the loop. However, a loop carried dependency doesn't prevent vectorization if the distance is positive. This patch makes the vectorization check more accurate by allowing a loop with the positive dependency. Note that it is difficult to make a complete decision whether a loop can be vectorized or not. To achieve this, we must check the vector width and the distance of dependency. >From cdec72a2b2c365e29cbe05f2ad2d21b403104999 Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Thu, 27 Mar 2025 10:45:26 + Subject: [PATCH] [LoopInterchange] Improve profitability check for vectorization The vectorization profitability has a process to check whether a given loop can be vectorized or not. Since the process is conservative, a loop that can be vectorized may be deemed not to be possible. This can trigger unnecessary exchanges. This patch improves the profitability decision by mitigating such misjudgments. Before this patch, we considered a loop to be vectorizable only when there are no loop carried dependencies with the IV of the loop. However, a loop carried dependency doesn't prevent vectorization if the distance is positive. This patch makes the vectorization check more accurate by allowing a loop with the positive dependency. Note that it is difficult to make a complete decision whether a loop can be vectorized or not. To achieve this, we must check the vector width and the distance of dependency. --- .../lib/Transforms/Scalar/LoopInterchange.cpp | 128 ++ .../profitability-vectorization-heuristic.ll | 8 +- 2 files changed, 106 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index b6b0b7d7a947a..0c3a9cbfeed5f 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -17,8 +17,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/LoopCacheAnalysis.h" #include "llvm/Analysis/LoopInfo.h" @@ -80,6 +80,21 @@ enum class RuleTy { ForVectorization, }; +/// Store the information about if corresponding direction vector was negated +/// by normalization or not. This is necessary to restore the original one from +/// a row of a dependency matrix because we only manage normalized direction +/// vectors. Also, duplicate vectors are eliminated, so there may be both +/// original and negated vectors for a single entry (a row of dependency +/// matrix). E.g., if there are two direction vectors `[< =]` and `[> =]`, the +/// later one will be converted to the same as former one by normalization, so +/// only `[< =]` would be retained in the final result. +struct NegatedStatus { + bool Original = false; + bool Negated = false; + + bool isNonNegativeDir(char Dir) const; +}; + } // end anonymous namespace // Minimum loop depth supported. @@ -126,9 +141,10 @@ static void printDepMatrix(CharMatrix &DepMatrix) { } #endif -static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, - Loop *L, DependenceInfo *DI, - ScalarEvolution *SE, +static bool populateDependencyMatrix(CharMatrix &DepMatrix, + std::vector &NegStatusVec, + unsigned Level, Loop *L, + DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE) { using ValueVector = SmallVector; @@ -167,7 +183,9 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, return false; } ValueVector::iterator I, IE, J, JE; - StringSet<> Seen; + + // Manage all found direction vectors. and map it to the index of DepMatrix. + StringMap Seen; for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { for (J = I, JE = MemInstr.end(); J != JE; ++J) { @@ -182,7 +200,8 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, assert(D->isOrdered() && "Expected an output, flow or anti dep."); // If the direction vector is negative, normalize it to // make it non-negative. -if (D->normalize(SE)) +
[llvm-branch-commits] [llvm] [LoopInterchange] Improve profitability check for vectorization (PR #133672)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: Ryotaro Kasuga (kasuga-fj) Changes The vectorization profitability has a process to check whether a given loop can be vectorized or not. Since the process is conservative, a loop that can be vectorized may be deemed not to be possible. This can trigger unnecessary exchanges. This patch improves the profitability decision by mitigating such misjudgments. Before this patch, we considered a loop to be vectorizable only when there are no loop carried dependencies with the IV of the loop. However, a loop carried dependency doesn't prevent vectorization if the distance is positive. This patch makes the vectorization check more accurate by allowing a loop with the positive dependency. Note that it is difficult to make a complete decision whether a loop can be vectorized or not. To achieve this, we must check the vector width and the distance of dependency. --- Full diff: https://github.com/llvm/llvm-project/pull/133672.diff 2 Files Affected: - (modified) llvm/lib/Transforms/Scalar/LoopInterchange.cpp (+103-25) - (modified) llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll (+3-5) ``diff diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index b6b0b7d7a947a..0c3a9cbfeed5f 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -17,8 +17,8 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/StringSet.h" #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/Analysis/LoopCacheAnalysis.h" #include "llvm/Analysis/LoopInfo.h" @@ -80,6 +80,21 @@ enum class RuleTy { ForVectorization, }; +/// Store the information about if corresponding direction vector was negated +/// by normalization or not. This is necessary to restore the original one from +/// a row of a dependency matrix because we only manage normalized direction +/// vectors. Also, duplicate vectors are eliminated, so there may be both +/// original and negated vectors for a single entry (a row of dependency +/// matrix). E.g., if there are two direction vectors `[< =]` and `[> =]`, the +/// later one will be converted to the same as former one by normalization, so +/// only `[< =]` would be retained in the final result. +struct NegatedStatus { + bool Original = false; + bool Negated = false; + + bool isNonNegativeDir(char Dir) const; +}; + } // end anonymous namespace // Minimum loop depth supported. @@ -126,9 +141,10 @@ static void printDepMatrix(CharMatrix &DepMatrix) { } #endif -static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, - Loop *L, DependenceInfo *DI, - ScalarEvolution *SE, +static bool populateDependencyMatrix(CharMatrix &DepMatrix, + std::vector &NegStatusVec, + unsigned Level, Loop *L, + DependenceInfo *DI, ScalarEvolution *SE, OptimizationRemarkEmitter *ORE) { using ValueVector = SmallVector; @@ -167,7 +183,9 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, return false; } ValueVector::iterator I, IE, J, JE; - StringSet<> Seen; + + // Manage all found direction vectors. and map it to the index of DepMatrix. + StringMap Seen; for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) { for (J = I, JE = MemInstr.end(); J != JE; ++J) { @@ -182,7 +200,8 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, assert(D->isOrdered() && "Expected an output, flow or anti dep."); // If the direction vector is negative, normalize it to // make it non-negative. -if (D->normalize(SE)) +bool Normalized = D->normalize(SE); +if (Normalized) LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n"); LLVM_DEBUG(StringRef DepType = D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output"; @@ -214,8 +233,17 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, } // Make sure we only add unique entries to the dependency matrix. -if (Seen.insert(StringRef(Dep.data(), Dep.size())).second) +unsigned Index = DepMatrix.size(); +auto [Ite, Inserted] = +Seen.try_emplace(StringRef(Dep.data(), Dep.size()), Index); +if (Inserted) { DepMatrix.push_back(Dep); + NegStatusVec.push_back(NegatedStatus{}); +} else + Index = Ite->second; + +NegatedStatus &Status = NegStatusVec[Index]; +(Normalized ? Status.Negated : Status.Or
[llvm-branch-commits] [llvm] [Metadata] Preserve MD_prof when merging instructions when one is missing. (PR #132433)
@@ -3436,6 +3433,16 @@ static void combineMetadata(Instruction *K, const Instruction *J, K->setMetadata(LLVMContext::MD_callsite, MDNode::getMergedCallsiteMetadata(KCallSite, JCallSite)); } + + // Merge prof metadata. + // Handle separately to support cases where only one instruction has the + // metadata. + auto JProf = J->getMetadata(LLVMContext::MD_prof); + auto KProf = K->getMetadata(LLVMContext::MD_prof); + if (!AAOnly && (JProf || KProf)) { snehasish wrote: Removing the condition was intentional. In the case that `DoesKMove` is true, the merging of md_prof can be skipped if `J` does not have prof metadata (a minor optimization). I felt that it was simpler to just perform the merge regardless. Let me know if you feel otherwise. https://github.com/llvm/llvm-project/pull/132433 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SDAG] Introduce inbounds flag for pointer arithmetic (PR #131862)
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/131862 >From 42481628ec10fe863bc9bca94efa84cd414d385b Mon Sep 17 00:00:00 2001 From: Fabian Ritter Date: Mon, 17 Mar 2025 06:51:16 -0400 Subject: [PATCH] [SDAG] Introduce inbounds flag for pointer arithmetic This patch introduces an inbounds SDNodeFlag, to show that a pointer addition SDNode implements an inbounds getelementptr operation (i.e., the pointer operand is in bounds wrt. the allocated object it is based on, and the arithmetic does not change that). The flag is set in the DAG construction when lowering inbounds GEPs. Inbounds information is useful in the ISel when selecting memory instructions that perform address computations whose intermediate steps must be in the same memory region as the final result. A follow-up patch will start using it for AMDGPU's flat memory instructions, where the immediate offset must not affect the memory aperture of the address. A similar patch for gMIR and GlobalISel will follow. For SWDEV-516125. --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h| 9 +++-- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp| 3 +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 3 +++ .../CodeGen/X86/merge-store-partially-alias-loads.ll | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 2283f99202e2f..13ac65f5d731c 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -415,12 +415,15 @@ struct SDNodeFlags { Unpredictable = 1 << 13, // Compare instructions which may carry the samesign flag. SameSign = 1 << 14, +// Pointer arithmetic instructions that remain in bounds, e.g., implementing +// an inbounds GEP. +InBounds = 1 << 15, // NOTE: Please update LargestValue in LLVM_DECLARE_ENUM_AS_BITMASK below // the class definition when adding new flags. PoisonGeneratingFlags = NoUnsignedWrap | NoSignedWrap | Exact | Disjoint | -NonNeg | NoNaNs | NoInfs | SameSign, +NonNeg | NoNaNs | NoInfs | SameSign | InBounds, FastMathFlags = NoNaNs | NoInfs | NoSignedZeros | AllowReciprocal | AllowContract | ApproximateFuncs | AllowReassociation, }; @@ -455,6 +458,7 @@ struct SDNodeFlags { void setAllowReassociation(bool b) { setFlag(b); } void setNoFPExcept(bool b) { setFlag(b); } void setUnpredictable(bool b) { setFlag(b); } + void setInBounds(bool b) { setFlag(b); } // These are accessors for each flag. bool hasNoUnsignedWrap() const { return Flags & NoUnsignedWrap; } @@ -472,6 +476,7 @@ struct SDNodeFlags { bool hasAllowReassociation() const { return Flags & AllowReassociation; } bool hasNoFPExcept() const { return Flags & NoFPExcept; } bool hasUnpredictable() const { return Flags & Unpredictable; } + bool hasInBounds() const { return Flags & InBounds; } bool operator==(const SDNodeFlags &Other) const { return Flags == Other.Flags; @@ -481,7 +486,7 @@ struct SDNodeFlags { }; LLVM_DECLARE_ENUM_AS_BITMASK(decltype(SDNodeFlags::None), - SDNodeFlags::SameSign); + SDNodeFlags::InBounds); inline SDNodeFlags operator|(SDNodeFlags LHS, SDNodeFlags RHS) { LHS |= RHS; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e3c34382d6354..e8336399d289f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4284,6 +4284,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (NW.hasNoUnsignedWrap() || (int64_t(Offset) >= 0 && NW.hasNoUnsignedSignedWrap())) Flags |= SDNodeFlags::NoUnsignedWrap; +Flags.setInBounds(NW.isInBounds()); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, DAG.getConstant(Offset, dl, N.getValueType()), Flags); @@ -4327,6 +4328,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (NW.hasNoUnsignedWrap() || (Offs.isNonNegative() && NW.hasNoUnsignedSignedWrap())) Flags.setNoUnsignedWrap(true); +Flags.setInBounds(NW.isInBounds()); OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType()); @@ -4389,6 +4391,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // pointer index type (add nuw). SDNodeFlags AddFlags; AddFlags.setNoUnsignedWrap(NW.hasNoUnsignedWrap()); + AddFlags.setInBounds(NW.isInBounds()); N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN, AddFlags); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp ind
[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)
regehr wrote: yeah I don't feel like we'll get interesting variants out of this one very often, but who knows. at some point it would be interesting to get a global view of which passes are providing interesting variants in practice and which ones aren't. anyhow-- LGTM, this shouldn't slow reductions down noticeably. https://github.com/llvm/llvm-project/pull/133627 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Metadata] Preserve MD_prof when merging instructions when one is missing. (PR #132433)
@@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2 +; RUN: opt < %s -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=HOIST + +; Test case based on C++ code with manualy annotated !prof metadata. +; This is to test that when calls to 'func1' from 'if.then' block +; and 'if.else' block are hoisted, the branch_weights are merged and +; attached to merged call rather than dropped. +; +; int func1(int a, int b) ; +; int func2(int a, int b) ; + +; int func(int a, int b, bool c) { +;int sum= 0; +;if(c) { +;sum += func1(a, b); +;} else { +;sum += func1(a, b); +;sum -= func2(a, b); +;} +;return sum; +; } +define i32 @_Z4funciib(i32 %a, i32 %b, i1 %c) { +; HOIST-LABEL: define i32 @_Z4funciib +; HOIST-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i1 [[C:%.*]]) { +; HOIST-NEXT: entry: +; HOIST-NEXT:[[CALL:%.*]] = tail call i32 @_Z5func1ii(i32 [[A]], i32 [[B]]), !prof [[PROF0:![0-9]+]] +; HOIST-NEXT:br i1 [[C]], label [[IF_END:%.*]], label [[IF_ELSE:%.*]] +; HOIST: if.else: +; HOIST-NEXT:[[CALL3:%.*]] = tail call i32 @_Z5func2ii(i32 [[A]], i32 [[B]]) +; HOIST-NEXT:[[SUB:%.*]] = sub i32 [[CALL]], [[CALL3]] +; HOIST-NEXT:br label [[IF_END]] +; HOIST: if.end: +; HOIST-NEXT:[[SUM_0:%.*]] = phi i32 [ [[SUB]], [[IF_ELSE]] ], [ [[CALL]], [[ENTRY:%.*]] ] +; HOIST-NEXT:ret i32 [[SUM_0]] +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = tail call i32 @_Z5func1ii(i32 %a, i32 %b), !prof !0 + br label %if.end + +if.else: ; preds = %entry + %call1 = tail call i32 @_Z5func1ii(i32 %a, i32 %b) + %call3 = tail call i32 @_Z5func2ii(i32 %a, i32 %b) + %sub = sub i32 %call1, %call3 + br label %if.end + +if.end: ; preds = %if.else, %if.then + %sum.0 = phi i32 [ %call, %if.then ], [ %sub, %if.else ] + ret i32 %sum.0 +} + +declare i32 @_Z5func1ii(i32, i32) + +declare i32 @_Z5func2ii(i32, i32) + +!0 = !{!"branch_weights", i32 10} +!1 = !{!"branch_weights", i32 90} snehasish wrote: Removed. https://github.com/llvm/llvm-project/pull/132433 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Metadata] Preserve MD_prof when merging instructions when one is missing. (PR #132433)
https://github.com/snehasish updated https://github.com/llvm/llvm-project/pull/132433 >From 42a9972571f8c8872e7d71def2236be400428606 Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Fri, 21 Mar 2025 17:00:38 + Subject: [PATCH] Update tests, apply clang-tidy suggestions --- llvm/lib/Transforms/Utils/Local.cpp | 19 -- ...rect-call-branch-weights-preserve-hoist.ll | 62 ++ ...irect-call-branch-weights-preserve-sink.ll | 63 +++ 3 files changed, 138 insertions(+), 6 deletions(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-sink.ll diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index edec0e7a94422..c136825d47b9c 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3355,9 +3355,10 @@ static void combineMetadata(Instruction *K, const Instruction *J, case LLVMContext::MD_invariant_group: // Preserve !invariant.group in K. break; - // Keep empty cases for mmra, memprof, and callsite to prevent them from - // being removed as unknown metadata. The actual merging is handled + // Keep empty cases for prof, mmra, memprof, and callsite to prevent them + // from being removed as unknown metadata. The actual merging is handled // separately below. + case LLVMContext::MD_prof: case LLVMContext::MD_mmra: case LLVMContext::MD_memprof: case LLVMContext::MD_callsite: @@ -3386,10 +3387,6 @@ static void combineMetadata(Instruction *K, const Instruction *J, if (!AAOnly) K->setMetadata(Kind, JMD); break; - case LLVMContext::MD_prof: -if (!AAOnly && DoesKMove) - K->setMetadata(Kind, MDNode::getMergedProfMetadata(KMD, JMD, K, J)); -break; case LLVMContext::MD_noalias_addrspace: if (DoesKMove) K->setMetadata(Kind, @@ -3436,6 +3433,16 @@ static void combineMetadata(Instruction *K, const Instruction *J, K->setMetadata(LLVMContext::MD_callsite, MDNode::getMergedCallsiteMetadata(KCallSite, JCallSite)); } + + // Merge prof metadata. + // Handle separately to support cases where only one instruction has the + // metadata. + auto *JProf = J->getMetadata(LLVMContext::MD_prof); + auto *KProf = K->getMetadata(LLVMContext::MD_prof); + if (!AAOnly && (JProf || KProf)) { +K->setMetadata(LLVMContext::MD_prof, + MDNode::getMergedProfMetadata(KProf, JProf, K, J)); + } } void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, diff --git a/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll b/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll new file mode 100644 index 0..d6058134f5285 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2 +; RUN: opt < %s -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=HOIST + +; Test case based on C++ code with manualy annotated !prof metadata. +; This is to test that when calls to 'func1' from 'if.then' block +; and 'if.else' block are hoisted, the branch_weights are merged and +; attached to merged call rather than dropped. +; +; int func1(int a, int b) ; +; int func2(int a, int b) ; + +; int func(int a, int b, bool c) { +;int sum= 0; +;if(c) { +;sum += func1(a, b); +;} else { +;sum += func1(a, b); +;sum -= func2(a, b); +;} +;return sum; +; } +define i32 @_Z4funciib(i32 %a, i32 %b, i1 %c) { +; HOIST-LABEL: define i32 @_Z4funciib +; HOIST-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i1 [[C:%.*]]) { +; HOIST-NEXT: entry: +; HOIST-NEXT:[[CALL:%.*]] = tail call i32 @_Z5func1ii(i32 [[A]], i32 [[B]]), !prof [[PROF0:![0-9]+]] +; HOIST-NEXT:br i1 [[C]], label [[IF_END:%.*]], label [[IF_ELSE:%.*]] +; HOIST: if.else: +; HOIST-NEXT:[[CALL3:%.*]] = tail call i32 @_Z5func2ii(i32 [[A]], i32 [[B]]) +; HOIST-NEXT:[[SUB:%.*]] = sub i32 [[CALL]], [[CALL3]] +; HOIST-NEXT:br label [[IF_END]] +; HOIST: if.end: +; HOIST-NEXT:[[SUM_0:%.*]] = phi i32 [ [[SUB]], [[IF_ELSE]] ], [ [[CALL]], [[ENTRY:%.*]] ] +; HOIST-NEXT:ret i32 [[SUM_0]] +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = tail call i32 @_Z5func1ii(i32 %a, i32 %b) + br label %if.end + +if.else: ; preds = %entry + %call1 = tail call i32 @_Z5func1ii(i32 %a, i32 %b), !prof !0 + %call3 = tail call i32 @_Z5func2ii(i32 %a, i32 %b) + %
[llvm-branch-commits] [llvm] [LoopInterchange] Add tests for the vectorization profitability (NFC) (PR #133665)
llvmbot wrote: @llvm/pr-subscribers-llvm-transforms Author: Ryotaro Kasuga (kasuga-fj) Changes There is a problem with the current profitability check for vectorization in LoopInterchange. There are both false positives and false negatives. The former means that the heuristic may say that "an exchange is necessary to vectorize the innermost loop" even though it's already possible. The latter means that the heuristic may miss a case where an exchange is necessary to vectorize the innermost loop. Note that this is not a dependency analysis problem. These problems can occur even if the analysis is accurate (no overestimation). This patch adds tests to clarify the cases that should be fixed. The root cause of these cases is that the heuristic doesn't handle the direction of a dependency correctly. --- Full diff: https://github.com/llvm/llvm-project/pull/133665.diff 1 Files Affected: - (added) llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll (+108) ``diff diff --git a/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll new file mode 100644 index 0..606117e70db86 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll @@ -0,0 +1,108 @@ +; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \ +; RUN: -pass-remarks-output=%t -disable-output -loop-interchange-profitabilities=vectorize +; RUN: FileCheck -input-file %t %s + +@A = dso_local global [256 x [256 x float]] zeroinitializer +@B = dso_local global [256 x [256 x float]] zeroinitializer +@C = dso_local global [256 x [256 x float]] zeroinitializer + +; Check that the below loops are exchanged for vectorization. +; +; for (int i = 0; i < 256; i++) { +; for (int j = 1; j < 256; j++) { +; A[i][j] = A[i][j-1] + B[i][j]; +; C[i][j] += 1; +; } +; } +; +; FIXME: These loops are not exchanged at this time due to the problem of +; profitablity heuristic for vectorization. + +; CHECK: --- !Missed +; CHECK-NEXT: Pass:loop-interchange +; CHECK-NEXT: Name:InterchangeNotProfitable +; CHECK-NEXT: Function:interchange_necesasry_for_vectorization +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: Interchanging loops is not considered to improve cache locality nor vectorization. +; CHECK-NEXT: ... +define void @interchange_necesasry_for_vectorization() { +entry: + br label %for.i.header + +for.i.header: + %i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ] + br label %for.j.body + +for.j.body: + %j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ] + %j.dec = add nsw i64 %j, -1 + %a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j.dec + %b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, i64 %j + %c.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @C, i64 %i, i64 %j + %a = load float, ptr %a.load.index, align 4 + %b = load float, ptr %b.index, align 4 + %c = load float, ptr %c.index, align 4 + %add.0 = fadd float %a, %b + %a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j + store float %add.0, ptr %a.store.index, align 4 + %add.1 = fadd float %c, 1.0 + store float %add.1, ptr %c.index, align 4 + %j.next = add nuw nsw i64 %j, 1 + %cmp.j = icmp eq i64 %j.next, 256 + br i1 %cmp.j, label %for.i.inc, label %for.j.body + +for.i.inc: + %i.next = add nuw nsw i64 %i, 1 + %cmp.i = icmp eq i64 %i.next, 256 + br i1 %cmp.i, label %exit, label %for.i.header + +exit: + ret void +} + +; Check that the following innermost loop can be vectorized so that +; interchangig is unnecessary. +; +; for (int i = 0; i < 256; i++) +; for (int j = 1; j < 256; j++) +; A[i][j-1] = A[i][j] + B[i][j]; +; +; FIXME: These loops are exchanged at this time due to the problem of +; profitablity heuristic for vectorization. + +; CHECK: --- !Passed +; CHECK-NEXT: Pass:loop-interchange +; CHECK-NEXT: Name:Interchanged +; CHECK-NEXT: Function:interchange_unnecesasry_for_vectorization +; CHECK-NEXT: Args: +; CHECK-NEXT: - String: Loop interchanged with enclosing loop. +define void @interchange_unnecesasry_for_vectorization() { +entry: + br label %for.i.header + +for.i.header: + %i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ] + br label %for.j.body + +for.j.body: + %j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ] + %j.dec = add nsw i64 %j, -1 + %a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64 %i, i64 %j + %b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, i64 %j + %a = load float, ptr %a.load.index, align 4 + %b = load float, ptr %b.index, align 4 + %add = fadd float %a, %b + %a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, i64
[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)
arsenm wrote: > [!WARNING] > This pull request is not mergeable via GitHub because a downstack PR is > open. Once all requirements are satisfied, merge this PR as a stack href="https://app.graphite.dev/github/pr/llvm/llvm-project/133627?utm_source=stack-comment-downstack-mergeability-warning"; > >on Graphite. > https://graphite.dev/docs/merge-pull-requests";>Learn more * **#133627** https://app.graphite.dev/github/pr/llvm/llvm-project/133627?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/133627?utm_source=stack-comment-view-in-graphite"; target="_blank">(View in Graphite) * **#132686** https://app.graphite.dev/github/pr/llvm/llvm-project/132686?utm_source=stack-comment-icon"; target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" width="10px" height="10px"/> * `main` This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn more about https://stacking.dev/?utm_source=stack-comment";>stacking. https://github.com/llvm/llvm-project/pull/133627 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/133627 Extend the instruction -> return reduction with one that inserts return of function arguments. Not sure how useful this really is. This has more freedom since we could insert the return anywhere in the function, but this just inserts the return in the entry block. >From 349a15d2581c6701f947eeeb0dee6ad4728d8a58 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 24 Mar 2025 14:33:36 +0700 Subject: [PATCH] llvm-reduce: Reduce with early return of arguments Extend the instruction -> return reduction with one that inserts return of function arguments. Not sure how useful this really is. This has more freedom since we could insert the return anywhere in the function, but this just inserts the return in the entry block. --- .../reduce-values-to-return-args.ll | 77 +++ ...-values-to-return-nonvoid-noncallee-use.ll | 2 +- .../llvm-reduce/reduce-values-to-return.ll| 2 +- llvm/tools/llvm-reduce/DeltaPasses.def| 5 +- .../deltas/ReduceValuesToReturn.cpp | 42 +- .../llvm-reduce/deltas/ReduceValuesToReturn.h | 3 +- 6 files changed, 124 insertions(+), 7 deletions(-) create mode 100644 llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll new file mode 100644 index 0..abbc643822033 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll @@ -0,0 +1,77 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=arguments-to-return --test FileCheck --test-arg --check-prefixes=INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefixes=RESULT %s < %t + + +; INTERESTING-LABEL: @move_entry_block_use_argument_to_return(i32 %arg, ptr %ptr) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_entry_block_use_argument_to_return( +; RESULT-NEXT: ret i32 %arg +; RESULT-NEXT: } +define void @move_entry_block_use_argument_to_return(i32 %arg, ptr %ptr) { + store i32 %arg, ptr %ptr + ret void +} + +; INTERESTING-LABEL: @move_entry_block_use_argument_to_return_existing_ret(i32 %arg, ptr %ptr) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_entry_block_use_argument_to_return_existing_ret( +; RESULT-NEXT: ret i32 %arg +; RESULT-NEXT: } +define i32 @move_entry_block_use_argument_to_return_existing_ret(i32 %arg, ptr %ptr) { + store i32 %arg, ptr %ptr + ret i32 0 +} + +; INTERESTING-LABEL: @move_phi_block_use_argument_to_return(i32 %arg, ptr %ptr0, ptr %ptr1, i1 %cond0, i1 %cond1) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_phi_block_use_argument_to_return( +; RESULT-NEXT: entry: +; RESULT-NEXT: ret i32 %arg +define void @move_phi_block_use_argument_to_return(i32 %arg, ptr %ptr0, ptr %ptr1, i1 %cond0, i1 %cond1) { +entry: + br i1 %cond0, label %bb0, label %bb1 + +bb0: + %phi = phi i32 [ %arg, %entry ], [ 123, %bb1 ] + store i32 %arg, ptr %ptr0 + store i32 %phi, ptr %ptr1 + br label %bb1 + +bb1: + br i1 %cond1, label %bb0, label %bb2 + +bb2: + ret void +} + +; INTERESTING-LABEL: define {{.*}} @keep_second_arg(i32 %arg0, ptr %arg1) { +; INTERESTING: %arg1 + +; RESULT-LABEL: define ptr @keep_second_arg( +; RESULT-NEXT: ret ptr %arg1 +; RESULT-NEXT: } +define void @keep_second_arg(i32 %arg0, ptr %arg1) { + store i32 %arg0, ptr %arg1 + ret void +} + +; INTERESTING-LABEL: @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +; INTERESTING: i32 %arg2 + +; RESULT-LABEL: define i32 @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +; RESULT-NEXT: entry: +; RESULT-NEXT: ret i32 %arg2 +define void @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +entry: + br i1 %arg0, label %bb0, label %bb1 + +bb0: + store i32 %arg2, ptr %arg1 + ret void + +bb1: + ret void +} diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll index 215ea97a8be91..11166479318c6 100644 --- a/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll @@ -1,7 +1,7 @@ ; Make sure we don't break on non-callee uses of funtions with a ; non-void return type. -; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=values-to-return --test FileCheck --test-arg --check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=instructions-to-return --test FileCheck --test-arg --check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t ; RUN: FileCheck --check-prefix=RESULT %s < %t ; INTERESTING-LABEL: @interesting( diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return.ll b/llvm/test/tools/llvm-reduce/reduce-values-
[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)
llvmbot wrote: @llvm/pr-subscribers-llvm-ir Author: Matt Arsenault (arsenm) Changes Extend the instruction -> return reduction with one that inserts return of function arguments. Not sure how useful this really is. This has more freedom since we could insert the return anywhere in the function, but this just inserts the return in the entry block. --- Full diff: https://github.com/llvm/llvm-project/pull/133627.diff 6 Files Affected: - (added) llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll (+77) - (modified) llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll (+1-1) - (modified) llvm/test/tools/llvm-reduce/reduce-values-to-return.ll (+1-1) - (modified) llvm/tools/llvm-reduce/DeltaPasses.def (+4-1) - (modified) llvm/tools/llvm-reduce/deltas/ReduceValuesToReturn.cpp (+39-3) - (modified) llvm/tools/llvm-reduce/deltas/ReduceValuesToReturn.h (+2-1) ``diff diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll new file mode 100644 index 0..abbc643822033 --- /dev/null +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll @@ -0,0 +1,77 @@ +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=arguments-to-return --test FileCheck --test-arg --check-prefixes=INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: FileCheck --check-prefixes=RESULT %s < %t + + +; INTERESTING-LABEL: @move_entry_block_use_argument_to_return(i32 %arg, ptr %ptr) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_entry_block_use_argument_to_return( +; RESULT-NEXT: ret i32 %arg +; RESULT-NEXT: } +define void @move_entry_block_use_argument_to_return(i32 %arg, ptr %ptr) { + store i32 %arg, ptr %ptr + ret void +} + +; INTERESTING-LABEL: @move_entry_block_use_argument_to_return_existing_ret(i32 %arg, ptr %ptr) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_entry_block_use_argument_to_return_existing_ret( +; RESULT-NEXT: ret i32 %arg +; RESULT-NEXT: } +define i32 @move_entry_block_use_argument_to_return_existing_ret(i32 %arg, ptr %ptr) { + store i32 %arg, ptr %ptr + ret i32 0 +} + +; INTERESTING-LABEL: @move_phi_block_use_argument_to_return(i32 %arg, ptr %ptr0, ptr %ptr1, i1 %cond0, i1 %cond1) { +; INTERESTING: %arg + +; RESULT-LABEL: define i32 @move_phi_block_use_argument_to_return( +; RESULT-NEXT: entry: +; RESULT-NEXT: ret i32 %arg +define void @move_phi_block_use_argument_to_return(i32 %arg, ptr %ptr0, ptr %ptr1, i1 %cond0, i1 %cond1) { +entry: + br i1 %cond0, label %bb0, label %bb1 + +bb0: + %phi = phi i32 [ %arg, %entry ], [ 123, %bb1 ] + store i32 %arg, ptr %ptr0 + store i32 %phi, ptr %ptr1 + br label %bb1 + +bb1: + br i1 %cond1, label %bb0, label %bb2 + +bb2: + ret void +} + +; INTERESTING-LABEL: define {{.*}} @keep_second_arg(i32 %arg0, ptr %arg1) { +; INTERESTING: %arg1 + +; RESULT-LABEL: define ptr @keep_second_arg( +; RESULT-NEXT: ret ptr %arg1 +; RESULT-NEXT: } +define void @keep_second_arg(i32 %arg0, ptr %arg1) { + store i32 %arg0, ptr %arg1 + ret void +} + +; INTERESTING-LABEL: @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +; INTERESTING: i32 %arg2 + +; RESULT-LABEL: define i32 @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +; RESULT-NEXT: entry: +; RESULT-NEXT: ret i32 %arg2 +define void @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) { +entry: + br i1 %arg0, label %bb0, label %bb1 + +bb0: + store i32 %arg2, ptr %arg1 + ret void + +bb1: + ret void +} diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll index 215ea97a8be91..11166479318c6 100644 --- a/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll @@ -1,7 +1,7 @@ ; Make sure we don't break on non-callee uses of funtions with a ; non-void return type. -; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=values-to-return --test FileCheck --test-arg --check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t +; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=instructions-to-return --test FileCheck --test-arg --check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t ; RUN: FileCheck --check-prefix=RESULT %s < %t ; INTERESTING-LABEL: @interesting( diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return.ll b/llvm/test/tools/llvm-reduce/reduce-values-to-return.ll index 0c36db8ebc278..2af87aad05169 100644 --- a/llvm/test/tools/llvm-reduce/reduce-values-to-return.ll +++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return.ll @@ -1,7 +1,7 @@ ; Test that llvm-reduce can move intermediate values by inserting ; early returns ; -; RUN: llvm-reduce --abort-on-invalid-reduction --delta-passes=values-to-return --test FileCheck -
[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)
https://github.com/arsenm ready_for_review https://github.com/llvm/llvm-project/pull/133627 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] [clang-tools-extra] [libcxx] [clang] improved preservation of template keyword (PR #133610)
https://github.com/mizvekov edited https://github.com/llvm/llvm-project/pull/133610 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [Metadata] Preserve MD_prof when merging instructions when one is missing. (PR #132433)
https://github.com/snehasish updated https://github.com/llvm/llvm-project/pull/132433 >From 42a9972571f8c8872e7d71def2236be400428606 Mon Sep 17 00:00:00 2001 From: Snehasish Kumar Date: Fri, 21 Mar 2025 17:00:38 + Subject: [PATCH] Update tests, apply clang-tidy suggestions --- llvm/lib/Transforms/Utils/Local.cpp | 19 -- ...rect-call-branch-weights-preserve-hoist.ll | 62 ++ ...irect-call-branch-weights-preserve-sink.ll | 63 +++ 3 files changed, 138 insertions(+), 6 deletions(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-sink.ll diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index edec0e7a94422..c136825d47b9c 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -3355,9 +3355,10 @@ static void combineMetadata(Instruction *K, const Instruction *J, case LLVMContext::MD_invariant_group: // Preserve !invariant.group in K. break; - // Keep empty cases for mmra, memprof, and callsite to prevent them from - // being removed as unknown metadata. The actual merging is handled + // Keep empty cases for prof, mmra, memprof, and callsite to prevent them + // from being removed as unknown metadata. The actual merging is handled // separately below. + case LLVMContext::MD_prof: case LLVMContext::MD_mmra: case LLVMContext::MD_memprof: case LLVMContext::MD_callsite: @@ -3386,10 +3387,6 @@ static void combineMetadata(Instruction *K, const Instruction *J, if (!AAOnly) K->setMetadata(Kind, JMD); break; - case LLVMContext::MD_prof: -if (!AAOnly && DoesKMove) - K->setMetadata(Kind, MDNode::getMergedProfMetadata(KMD, JMD, K, J)); -break; case LLVMContext::MD_noalias_addrspace: if (DoesKMove) K->setMetadata(Kind, @@ -3436,6 +3433,16 @@ static void combineMetadata(Instruction *K, const Instruction *J, K->setMetadata(LLVMContext::MD_callsite, MDNode::getMergedCallsiteMetadata(KCallSite, JCallSite)); } + + // Merge prof metadata. + // Handle separately to support cases where only one instruction has the + // metadata. + auto *JProf = J->getMetadata(LLVMContext::MD_prof); + auto *KProf = K->getMetadata(LLVMContext::MD_prof); + if (!AAOnly && (JProf || KProf)) { +K->setMetadata(LLVMContext::MD_prof, + MDNode::getMergedProfMetadata(KProf, JProf, K, J)); + } } void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J, diff --git a/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll b/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll new file mode 100644 index 0..d6058134f5285 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals --version 2 +; RUN: opt < %s -passes='simplifycfg' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefix=HOIST + +; Test case based on C++ code with manualy annotated !prof metadata. +; This is to test that when calls to 'func1' from 'if.then' block +; and 'if.else' block are hoisted, the branch_weights are merged and +; attached to merged call rather than dropped. +; +; int func1(int a, int b) ; +; int func2(int a, int b) ; + +; int func(int a, int b, bool c) { +;int sum= 0; +;if(c) { +;sum += func1(a, b); +;} else { +;sum += func1(a, b); +;sum -= func2(a, b); +;} +;return sum; +; } +define i32 @_Z4funciib(i32 %a, i32 %b, i1 %c) { +; HOIST-LABEL: define i32 @_Z4funciib +; HOIST-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i1 [[C:%.*]]) { +; HOIST-NEXT: entry: +; HOIST-NEXT:[[CALL:%.*]] = tail call i32 @_Z5func1ii(i32 [[A]], i32 [[B]]), !prof [[PROF0:![0-9]+]] +; HOIST-NEXT:br i1 [[C]], label [[IF_END:%.*]], label [[IF_ELSE:%.*]] +; HOIST: if.else: +; HOIST-NEXT:[[CALL3:%.*]] = tail call i32 @_Z5func2ii(i32 [[A]], i32 [[B]]) +; HOIST-NEXT:[[SUB:%.*]] = sub i32 [[CALL]], [[CALL3]] +; HOIST-NEXT:br label [[IF_END]] +; HOIST: if.end: +; HOIST-NEXT:[[SUM_0:%.*]] = phi i32 [ [[SUB]], [[IF_ELSE]] ], [ [[CALL]], [[ENTRY:%.*]] ] +; HOIST-NEXT:ret i32 [[SUM_0]] +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: ; preds = %entry + %call = tail call i32 @_Z5func1ii(i32 %a, i32 %b) + br label %if.end + +if.else: ; preds = %entry + %call1 = tail call i32 @_Z5func1ii(i32 %a, i32 %b), !prof !0 + %call3 = tail call i32 @_Z5func2ii(i32 %a, i32 %b) + %