[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Determine if computed gotos using `blockaddress` (#132536) (PR #133082)

2025-03-30 Thread via llvm-branch-commits

https://github.com/dianqk updated 
https://github.com/llvm/llvm-project/pull/133082

>From a78eb0808e553ac933c39e41164bb530ce025a0e Mon Sep 17 00:00:00 2001
From: dianqk 
Date: Wed, 26 Mar 2025 21:27:43 +0800
Subject: [PATCH] [TailDuplicator] Determine if computed gotos using
 `blockaddress` (#132536)

Using `blockaddress` should be more reliable than determining if an
operand comes from a jump table index.

Alternative: Add the `MachineInstr::MIFlag::ComputedGoto` flag when
lowering `indirectbr`. But I don't think this approach is suitable to
backport.

(cherry picked from commit 66f158d91803875de63d8f2a437ce8ecb22c4141)
---
 llvm/include/llvm/CodeGen/MachineBasicBlock.h |   9 +
 llvm/include/llvm/CodeGen/MachineInstr.h  |  16 +-
 llvm/lib/CodeGen/TailDuplicator.cpp   |   2 +-
 .../CodeGen/X86/tail-dup-computed-goto.mir| 265 +-
 4 files changed, 203 insertions(+), 89 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h 
b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 0b803a9724742..11efb2f656a7a 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -311,6 +311,15 @@ class MachineBasicBlock
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
 
+  /// Returns true if the original IR terminator is an `indirectbr`. This
+  /// typically corresponds to a `goto` in C, rather than jump tables.
+  bool terminatorIsComputedGoto() const {
+return back().isIndirectBranch() &&
+   llvm::all_of(successors(), [](const MachineBasicBlock *Succ) {
+ return Succ->isIRBlockAddressTaken();
+   });
+  }
+
   using instr_iterator = Instructions::iterator;
   using const_instr_iterator = Instructions::const_iterator;
   using reverse_instr_iterator = Instructions::reverse_iterator;
diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h 
b/llvm/include/llvm/CodeGen/MachineInstr.h
index b26cabe801ee8..997d6a5554e06 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -994,17 +994,8 @@ class MachineInstr
 
   /// Return true if this is an indirect branch, such as a
   /// branch through a register.
-  bool isIndirectBranch(QueryType Type = AnyInBundle,
-bool IncludeJumpTable = true) const {
-return hasProperty(MCID::IndirectBranch, Type) &&
-   (IncludeJumpTable || !llvm::any_of(operands(), [](const auto &Op) {
-  return Op.isJTI();
-}));
-  }
-
-  bool isComputedGoto(QueryType Type = AnyInBundle) const {
-// Jump tables are not considered computed gotos.
-return isIndirectBranch(Type, /*IncludeJumpTable=*/false);
+  bool isIndirectBranch(QueryType Type = AnyInBundle) const {
+return hasProperty(MCID::IndirectBranch, Type);
   }
 
   /// Return true if this is a branch which may fall
@@ -2088,6 +2079,9 @@ class MachineInstr
 MCSymbol *PreInstrSymbol, MCSymbol *PostInstrSymbol,
 MDNode *HeapAllocMarker, MDNode *PCSections,
 uint32_t CFIType, MDNode *MMRAs);
+
+  /// Returns true if all successors are IRBlockAddressTaken.
+  bool jumpToIRBlockAddressTaken() const;
 };
 
 /// Special DenseMapInfo traits to compare MachineInstr* by *value* of the
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp 
b/llvm/lib/CodeGen/TailDuplicator.cpp
index 21f75458c90f3..b0de3c322ddd0 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -604,7 +604,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
   bool HasComputedGoto = false;
   if (!TailBB.empty()) {
 HasIndirectbr = TailBB.back().isIndirectBranch();
-HasComputedGoto = TailBB.back().isComputedGoto();
+HasComputedGoto = TailBB.terminatorIsComputedGoto();
   }
 
   if (HasIndirectbr && PreRegAlloc)
diff --git a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir 
b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
index a472dc67d8d51..17de405928d37 100644
--- a/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
+++ b/llvm/test/CodeGen/X86/tail-dup-computed-goto.mir
@@ -2,15 +2,27 @@
 # RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=early-tailduplication 
-tail-dup-pred-size=1 -tail-dup-succ-size=1 %s -o - | FileCheck %s
 # Check that only the computed goto is not be restrict by tail-dup-pred-size 
and tail-dup-succ-size.
 --- |
+  @computed_goto.dispatch = constant [5 x ptr] [ptr null, ptr 
blockaddress(@computed_goto, %bb1), ptr blockaddress(@computed_goto, %bb2), ptr 
blockaddress(@computed_goto, %bb3), ptr blockaddress(@computed_goto, %bb4)]
   declare i64 @f0()
   declare i64 @f1()
   declare i64 @f2()
   declare i64 @f3()
   declare i64 @f4()
   declare i64 @f5()
-  @computed_goto.dispatch = external global [5 x ptr]
-  define void @computed_goto() { ret void }
+  define void @computed_goto() {
+start:
+  ret void
+bb1:
+  ret

[llvm-branch-commits] [llvm] release/20.x: [TailDuplicator] Determine if computed gotos using `blockaddress` (#132536) (PR #133082)

2025-03-30 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm approved this pull request.

I can't say I know much about the feature but this should strictly move in a 
more conservative direction 

https://github.com/llvm/llvm-project/pull/133082
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] llvm-reduce: Defer a shouldKeep call in operand reduction (PR #133387)

2025-03-30 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm updated 
https://github.com/llvm/llvm-project/pull/133387

>From fa597dd4161693813a3566fd1d4a3c7df1d00746 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Fri, 28 Mar 2025 12:58:20 +0700
Subject: [PATCH] llvm-reduce: Defer a shouldKeep call in operand reduction

Ideally shouldKeep is only called in contexts that will successfully
do something.
---
 llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp 
b/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp
index b0bca015434fa..8b6446725b7d4 100644
--- a/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp
+++ b/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp
@@ -26,8 +26,8 @@ extractOperandsFromModule(Oracle &O, ReducerWorkItem 
&WorkItem,
 for (auto &I : instructions(&F)) {
   if (PHINode *Phi = dyn_cast(&I)) {
 for (auto &Op : Phi->incoming_values()) {
-  if (!O.shouldKeep()) {
-if (Value *Reduced = ReduceValue(Op))
+  if (Value *Reduced = ReduceValue(Op)) {
+if (!O.shouldKeep())
   Phi->setIncomingValueForBlock(Phi->getIncomingBlock(Op), 
Reduced);
   }
 }

___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang] Track final substitution for Subst* AST nodes (PR #132748)

2025-03-30 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov updated 
https://github.com/llvm/llvm-project/pull/132748

>From 2d4717492599f445975019339024e2d1bc02128f Mon Sep 17 00:00:00 2001
From: Matheus Izvekov 
Date: Sat, 22 Mar 2025 16:03:04 -0300
Subject: [PATCH 1/4] [clang] Track final substitution for
 SubstTemplateTemplateParm nodes

---
 clang/include/clang/AST/ASTContext.h   |  9 +
 clang/include/clang/AST/PropertiesBase.td  |  3 ++-
 clang/include/clang/AST/TemplateName.h | 17 -
 clang/include/clang/AST/Type.h |  3 ++-
 clang/lib/AST/ASTContext.cpp   |  6 +++---
 clang/lib/AST/ASTImporter.cpp  |  2 +-
 clang/lib/AST/TemplateName.cpp |  6 --
 clang/lib/AST/TextNodeDumper.cpp   |  2 ++
 clang/lib/Sema/SemaTemplateInstantiate.cpp |  8 ++--
 9 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/clang/include/clang/AST/ASTContext.h 
b/clang/include/clang/AST/ASTContext.h
index 1f7c75559e1e9..14a097189ca86 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -2396,10 +2396,11 @@ class ASTContext : public RefCountedBase {
 const IdentifierInfo *Name) const;
   TemplateName getDependentTemplateName(NestedNameSpecifier *NNS,
 OverloadedOperatorKind Operator) const;
-  TemplateName
-  getSubstTemplateTemplateParm(TemplateName replacement, Decl *AssociatedDecl,
-   unsigned Index,
-   std::optional PackIndex) const;
+  TemplateName getSubstTemplateTemplateParm(TemplateName replacement,
+Decl *AssociatedDecl,
+unsigned Index,
+std::optional PackIndex,
+bool Final) const;
   TemplateName getSubstTemplateTemplateParmPack(const TemplateArgument 
&ArgPack,
 Decl *AssociatedDecl,
 unsigned Index,
diff --git a/clang/include/clang/AST/PropertiesBase.td 
b/clang/include/clang/AST/PropertiesBase.td
index 5f3a885832e2e..416914db2f7c8 100644
--- a/clang/include/clang/AST/PropertiesBase.td
+++ b/clang/include/clang/AST/PropertiesBase.td
@@ -729,8 +729,9 @@ let Class = PropertyTypeCase in {
   def : Property<"packIndex", Optional> {
 let Read = [{ parm->getPackIndex() }];
   }
+  def : Property<"final", Bool> { let Read = [{ parm->getFinal() }]; }
   def : Creator<[{
-return ctx.getSubstTemplateTemplateParm(replacement, associatedDecl, 
index, packIndex);
+return ctx.getSubstTemplateTemplateParm(replacement, associatedDecl, 
index, packIndex, final);
   }]>;
 }
 let Class = PropertyTypeCase in 
{
diff --git a/clang/include/clang/AST/TemplateName.h 
b/clang/include/clang/AST/TemplateName.h
index ce97f834bfc1d..313802502f818 100644
--- a/clang/include/clang/AST/TemplateName.h
+++ b/clang/include/clang/AST/TemplateName.h
@@ -413,9 +413,11 @@ class SubstTemplateTemplateParmStorage
 
   SubstTemplateTemplateParmStorage(TemplateName Replacement,
Decl *AssociatedDecl, unsigned Index,
-   std::optional PackIndex)
+   std::optional PackIndex,
+   bool Final)
   : UncommonTemplateNameStorage(SubstTemplateTemplateParm, Index,
-PackIndex ? *PackIndex + 1 : 0),
+((PackIndex ? *PackIndex + 1 : 0) << 1) |
+Final),
 Replacement(Replacement), AssociatedDecl(AssociatedDecl) {
 assert(AssociatedDecl != nullptr);
   }
@@ -429,10 +431,15 @@ class SubstTemplateTemplateParmStorage
   /// This should match the result of `getParameter()->getIndex()`.
   unsigned getIndex() const { return Bits.Index; }
 
+  // This substitution is Final, which means the substitution is fully
+  // sugared: it doesn't need to be resugared later.
+  bool getFinal() const { return Bits.Data & 1; }
+
   std::optional getPackIndex() const {
-if (Bits.Data == 0)
+auto Data = Bits.Data >> 1;
+if (Data == 0)
   return std::nullopt;
-return Bits.Data - 1;
+return Data - 1;
   }
 
   TemplateTemplateParmDecl *getParameter() const;
@@ -442,7 +449,7 @@ class SubstTemplateTemplateParmStorage
 
   static void Profile(llvm::FoldingSetNodeID &ID, TemplateName Replacement,
   Decl *AssociatedDecl, unsigned Index,
-  std::optional PackIndex);
+  std::optional PackIndex, bool Final);
 };
 
 class DeducedTemplateStorage : public UncommonTemplateNameStorage,
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index c927eb13711c1..e62b9938c9ba1 100644
--- a/clang/include/clang/AST/Type.h
+++ b

[llvm-branch-commits] [llvm] [LoopInterchange] Add tests for the vectorization profitability (NFC) (PR #133665)

2025-03-30 Thread Ryotaro Kasuga via llvm-branch-commits

https://github.com/kasuga-fj created 
https://github.com/llvm/llvm-project/pull/133665

There is a problem with the current profitability check for vectorization in 
LoopInterchange. There are both false positives and false negatives. The former 
means that the heuristic may say that "an exchange is necessary to vectorize 
the innermost loop" even though it's already possible. The latter means that 
the heuristic may miss a case where an exchange is necessary to vectorize the 
innermost loop. Note that this is not a dependency analysis problem.  These 
problems can occur even if the analysis is accurate (no overestimation).

This patch adds tests to clarify the cases that should be fixed. The root cause 
of these cases is that the heuristic doesn't handle the direction of a 
dependency correctly.

>From b53b7ce2b303ff9ea94d77b3ffe74d1697db9f3d Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga 
Date: Thu, 27 Mar 2025 07:04:27 +
Subject: [PATCH] [LoopInterchange] Add tests for the vectorization
 profitability (NFC)

There is a problem with the current profitability check for
vectorization in LoopInterchange. There are both false positives and
false negatives. The former means that the heuristic may say that "an
exchange is necessary to vectorize the innermost loop" even though it's
already possible. The latter means that the heuristic may miss a case
where an exchange is necessary to vectorize the innermost loop. Note
that this is not a dependency analysis problem.  These problems can
occur even if the analysis is accurate (no overestimation).

This patch adds tests to clarify the cases that should be fixed. The
root cause of these cases is that the heuristic doesn't handle the
direction of a dependency correctly.
---
 .../profitability-vectorization-heuristic.ll  | 108 ++
 1 file changed, 108 insertions(+)
 create mode 100644 
llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll

diff --git 
a/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll 
b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
new file mode 100644
index 0..606117e70db86
--- /dev/null
+++ 
b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
+; RUN: -pass-remarks-output=%t -disable-output 
-loop-interchange-profitabilities=vectorize
+; RUN: FileCheck -input-file %t %s
+
+@A = dso_local global [256 x [256 x float]] zeroinitializer
+@B = dso_local global [256 x [256 x float]] zeroinitializer
+@C = dso_local global [256 x [256 x float]] zeroinitializer
+
+; Check that the below loops are exchanged for vectorization.
+;
+; for (int i = 0; i < 256; i++) {
+;   for (int j = 1; j < 256; j++) {
+; A[i][j] = A[i][j-1] + B[i][j];
+; C[i][j] += 1;
+;   }
+; }
+;
+; FIXME: These loops are not exchanged at this time due to the problem of
+; profitablity heuristic for vectorization.
+
+; CHECK:  --- !Missed
+; CHECK-NEXT: Pass:loop-interchange
+; CHECK-NEXT: Name:InterchangeNotProfitable
+; CHECK-NEXT: Function:interchange_necesasry_for_vectorization
+; CHECK-NEXT: Args:
+; CHECK-NEXT:   - String:  Interchanging loops is not considered to 
improve cache locality nor vectorization.
+; CHECK-NEXT: ...
+define void @interchange_necesasry_for_vectorization() {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ]
+  br label %for.j.body
+
+for.j.body:
+  %j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ]
+  %j.dec = add nsw i64 %j, -1
+  %a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, 
i64 %i, i64 %j.dec
+  %b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, 
i64 %j
+  %c.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @C, i64 %i, 
i64 %j
+  %a = load float, ptr %a.load.index, align 4
+  %b = load float, ptr %b.index, align 4
+  %c = load float, ptr %c.index, align 4
+  %add.0 = fadd float %a, %b
+  %a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, 
i64 %i, i64 %j
+  store float %add.0, ptr %a.store.index, align 4
+  %add.1 = fadd float %c, 1.0
+  store float %add.1, ptr %c.index, align 4
+  %j.next = add nuw nsw i64 %j, 1
+  %cmp.j = icmp eq i64 %j.next, 256
+  br i1 %cmp.j, label %for.i.inc, label %for.j.body
+
+for.i.inc:
+  %i.next = add nuw nsw i64 %i, 1
+  %cmp.i = icmp eq i64 %i.next, 256
+  br i1 %cmp.i, label %exit, label %for.i.header
+
+exit:
+  ret void
+}
+
+; Check that the following innermost loop can be vectorized so that
+; interchangig is unnecessary.
+;
+; for (int i = 0; i < 256; i++)
+;   for (int j = 1; j < 256; j++)
+; A[i][j-1] = A[i][j] + B[i][j];
+;
+; FIXME: These loops are exchanged at this time due to the problem of
+; profitablity heuristic for vectorization.
+
+; CHECK:  --- !Passed
+; CHECK-NEXT: Pass

[llvm-branch-commits] [llvm] Store GUIDs in metadata (PR #133682)

2025-03-30 Thread Owen Rodley via llvm-branch-commits

https://github.com/orodley created 
https://github.com/llvm/llvm-project/pull/133682

This takes the existing AssignGUID pass from CtxProfAnalysis, and runs
it by default, at the appropriate stages of the LTO pipeline. It also
changes GlobalValue::getGUID() to retrieve the GUID from the metadata
instead of computing it.

We don't yet have the supporting downstream changes to make a dedicated
GUID table in bitcode, nor do we use the metadata as part of ThinLTO --
it retains its existing mechanisms of recomputing GUIDs from separately
saved data. That will be changed later.

>From 1379952ca664e04c4aa6806a724bcda1b0fc1a48 Mon Sep 17 00:00:00 2001
From: Owen Rodley 
Date: Mon, 31 Mar 2025 16:16:35 +1100
Subject: [PATCH] Store GUIDs in metadata

This takes the existing AssignGUID pass from CtxProfAnalysis, and runs
it by default, at the appropriate stages of the LTO pipeline. It also
changes GlobalValue::getGUID() to retrieve the GUID from the metadata
instead of computing it.

We don't yet have the supporting downstream changes to make a dedicated
GUID table in bitcode, nor do we use the metadata as part of ThinLTO --
it retains its existing mechanisms of recomputing GUIDs from separately
saved data. That will be changed later.
---
 llvm/include/llvm/Analysis/CtxProfAnalysis.h  | 34 +++---
 llvm/include/llvm/IR/FixedMetadataKinds.def   |  1 +
 llvm/include/llvm/IR/GlobalValue.h|  4 +-
 .../llvm/Transforms/Utils/AssignGUID.h| 34 ++
 llvm/lib/Analysis/CtxProfAnalysis.cpp | 44 ++-
 llvm/lib/IR/Globals.cpp   | 33 ++
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 llvm/lib/Passes/PassBuilderPipelines.cpp  |  9 +++-
 .../Instrumentation/PGOCtxProfFlattening.cpp  |  2 +-
 .../Instrumentation/PGOCtxProfLowering.cpp|  3 +-
 llvm/lib/Transforms/Utils/AssignGUID.cpp  | 34 ++
 llvm/lib/Transforms/Utils/CMakeLists.txt  |  1 +
 .../Transforms/Utils/CallPromotionUtils.cpp   |  4 +-
 llvm/lib/Transforms/Utils/InlineFunction.cpp  |  4 +-
 14 files changed, 129 insertions(+), 79 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/Utils/AssignGUID.h
 create mode 100644 llvm/lib/Transforms/Utils/AssignGUID.cpp

diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h 
b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index ede8bd2fe5001..484cc638a2d53 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -37,9 +37,6 @@ class PGOContextualProfile {
   // we'll need when we maintain the profiles during IPO transformations.
   std::map FuncInfo;
 
-  /// Get the GUID of this Function if it's defined in this module.
-  GlobalValue::GUID getDefinedFunctionGUID(const Function &F) const;
-
   // This is meant to be constructed from CtxProfAnalysis, which will also set
   // its state piecemeal.
   PGOContextualProfile() = default;
@@ -57,7 +54,7 @@ class PGOContextualProfile {
   const PGOCtxProfile &profiles() const { return Profiles; }
 
   bool isFunctionKnown(const Function &F) const {
-return getDefinedFunctionGUID(F) != 0;
+return F.getGUID() != 0;
   }
 
   StringRef getFunctionName(GlobalValue::GUID GUID) const {
@@ -69,22 +66,22 @@ class PGOContextualProfile {
 
   uint32_t getNumCounters(const Function &F) const {
 assert(isFunctionKnown(F));
-return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex;
+return FuncInfo.find(F.getGUID())->second.NextCounterIndex;
   }
 
   uint32_t getNumCallsites(const Function &F) const {
 assert(isFunctionKnown(F));
-return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCallsiteIndex;
+return FuncInfo.find(F.getGUID())->second.NextCallsiteIndex;
   }
 
   uint32_t allocateNextCounterIndex(const Function &F) {
 assert(isFunctionKnown(F));
-return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex++;
+return FuncInfo.find(F.getGUID())->second.NextCounterIndex++;
   }
 
   uint32_t allocateNextCallsiteIndex(const Function &F) {
 assert(isFunctionKnown(F));
-return 
FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCallsiteIndex++;
+return FuncInfo.find(F.getGUID())->second.NextCallsiteIndex++;
   }
 
   using ConstVisitor = function_ref;
@@ -145,26 +142,5 @@ class CtxProfAnalysisPrinterPass
   const PrintMode Mode;
 };
 
-/// Assign a GUID to functions as metadata. GUID calculation takes linkage into
-/// account, which may change especially through and after thinlto. By
-/// pre-computing and assigning as metadata, this mechanism is resilient to 
such
-/// changes (as well as name changes e.g. suffix ".llvm." additions).
-
-// FIXME(mtrofin): we can generalize this mechanism to calculate a GUID early 
in
-// the pass pipeline, associate it with any Global Value, and then use it for
-// PGO and ThinLTO.
-// At that point, this should be moved elsewhere.
-class AssignGUIDPass : public PassInfoMixin {
-public:

[llvm-branch-commits] [llvm] Store GUIDs in metadata (PR #133682)

2025-03-30 Thread Owen Rodley via llvm-branch-commits

https://github.com/orodley updated 
https://github.com/llvm/llvm-project/pull/133682

>From dd0751618d4eac29a6af13b2e747ed505ec9b321 Mon Sep 17 00:00:00 2001
From: Owen Rodley 
Date: Mon, 31 Mar 2025 16:16:35 +1100
Subject: [PATCH] Store GUIDs in metadata

This takes the existing AssignGUID pass from CtxProfAnalysis, and runs
it by default, at the appropriate stages of the LTO pipeline. It also
changes GlobalValue::getGUID() to retrieve the GUID from the metadata
instead of computing it.

We don't yet have the supporting downstream changes to make a dedicated
GUID table in bitcode, nor do we use the metadata as part of ThinLTO --
it retains its existing mechanisms of recomputing GUIDs from separately
saved data. That will be changed later.
---
 llvm/include/llvm/Analysis/CtxProfAnalysis.h  | 36 +++
 llvm/include/llvm/IR/FixedMetadataKinds.def   |  1 +
 llvm/include/llvm/IR/GlobalValue.h|  4 +-
 .../llvm/Transforms/Utils/AssignGUID.h| 34 ++
 llvm/lib/Analysis/CtxProfAnalysis.cpp | 44 ++-
 llvm/lib/IR/Globals.cpp   | 33 ++
 llvm/lib/Passes/PassBuilder.cpp   |  1 +
 llvm/lib/Passes/PassBuilderPipelines.cpp  |  9 +++-
 .../Instrumentation/PGOCtxProfFlattening.cpp  |  2 +-
 .../Instrumentation/PGOCtxProfLowering.cpp|  3 +-
 llvm/lib/Transforms/Utils/AssignGUID.cpp  | 34 ++
 llvm/lib/Transforms/Utils/CMakeLists.txt  |  1 +
 .../Transforms/Utils/CallPromotionUtils.cpp   |  5 +--
 llvm/lib/Transforms/Utils/InlineFunction.cpp  |  4 +-
 14 files changed, 129 insertions(+), 82 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/Utils/AssignGUID.h
 create mode 100644 llvm/lib/Transforms/Utils/AssignGUID.cpp

diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h 
b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index ede8bd2fe5001..2e0b97b3844d9 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -37,9 +37,6 @@ class PGOContextualProfile {
   // we'll need when we maintain the profiles during IPO transformations.
   std::map FuncInfo;
 
-  /// Get the GUID of this Function if it's defined in this module.
-  GlobalValue::GUID getDefinedFunctionGUID(const Function &F) const;
-
   // This is meant to be constructed from CtxProfAnalysis, which will also set
   // its state piecemeal.
   PGOContextualProfile() = default;
@@ -56,9 +53,7 @@ class PGOContextualProfile {
 
   const PGOCtxProfile &profiles() const { return Profiles; }
 
-  bool isFunctionKnown(const Function &F) const {
-return getDefinedFunctionGUID(F) != 0;
-  }
+  bool isFunctionKnown(const Function &F) const { return F.getGUID() != 0; }
 
   StringRef getFunctionName(GlobalValue::GUID GUID) const {
 auto It = FuncInfo.find(GUID);
@@ -69,22 +64,22 @@ class PGOContextualProfile {
 
   uint32_t getNumCounters(const Function &F) const {
 assert(isFunctionKnown(F));
-return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex;
+return FuncInfo.find(F.getGUID())->second.NextCounterIndex;
   }
 
   uint32_t getNumCallsites(const Function &F) const {
 assert(isFunctionKnown(F));
-return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCallsiteIndex;
+return FuncInfo.find(F.getGUID())->second.NextCallsiteIndex;
   }
 
   uint32_t allocateNextCounterIndex(const Function &F) {
 assert(isFunctionKnown(F));
-return FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCounterIndex++;
+return FuncInfo.find(F.getGUID())->second.NextCounterIndex++;
   }
 
   uint32_t allocateNextCallsiteIndex(const Function &F) {
 assert(isFunctionKnown(F));
-return 
FuncInfo.find(getDefinedFunctionGUID(F))->second.NextCallsiteIndex++;
+return FuncInfo.find(F.getGUID())->second.NextCallsiteIndex++;
   }
 
   using ConstVisitor = function_ref;
@@ -145,26 +140,5 @@ class CtxProfAnalysisPrinterPass
   const PrintMode Mode;
 };
 
-/// Assign a GUID to functions as metadata. GUID calculation takes linkage into
-/// account, which may change especially through and after thinlto. By
-/// pre-computing and assigning as metadata, this mechanism is resilient to 
such
-/// changes (as well as name changes e.g. suffix ".llvm." additions).
-
-// FIXME(mtrofin): we can generalize this mechanism to calculate a GUID early 
in
-// the pass pipeline, associate it with any Global Value, and then use it for
-// PGO and ThinLTO.
-// At that point, this should be moved elsewhere.
-class AssignGUIDPass : public PassInfoMixin {
-public:
-  explicit AssignGUIDPass() = default;
-
-  /// Assign a GUID *if* one is not already assign, as a function metadata 
named
-  /// `GUIDMetadataName`.
-  PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
-  static const char *GUIDMetadataName;
-  // This should become GlobalValue::getGUID
-  static uint64_t getGUID(const Function &F);
-};
-
 } // namespace llvm
 #endif // LLVM_ANALYSIS_CTX

[llvm-branch-commits] [llvm] Store GUIDs in metadata (PR #133682)

2025-03-30 Thread Owen Rodley via llvm-branch-commits

orodley wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/133682?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#133682** https://app.graphite.dev/github/pr/llvm/llvm-project/133682?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/133682?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#129644** https://app.graphite.dev/github/pr/llvm/llvm-project/129644?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/133682
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] Store GUIDs in metadata (PR #133682)

2025-03-30 Thread via llvm-branch-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff 8ef355aa433a44220eaf0062039b53770ebb9835 
1379952ca664e04c4aa6806a724bcda1b0fc1a48 --extensions h,cpp -- 
llvm/include/llvm/Transforms/Utils/AssignGUID.h 
llvm/lib/Transforms/Utils/AssignGUID.cpp 
llvm/include/llvm/Analysis/CtxProfAnalysis.h llvm/include/llvm/IR/GlobalValue.h 
llvm/lib/Analysis/CtxProfAnalysis.cpp llvm/lib/IR/Globals.cpp 
llvm/lib/Passes/PassBuilder.cpp llvm/lib/Passes/PassBuilderPipelines.cpp 
llvm/lib/Transforms/Instrumentation/PGOCtxProfFlattening.cpp 
llvm/lib/Transforms/Instrumentation/PGOCtxProfLowering.cpp 
llvm/lib/Transforms/Utils/CallPromotionUtils.cpp 
llvm/lib/Transforms/Utils/InlineFunction.cpp
``





View the diff from clang-format here.


``diff
diff --git a/llvm/include/llvm/Analysis/CtxProfAnalysis.h 
b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
index 484cc638a2..2e0b97b384 100644
--- a/llvm/include/llvm/Analysis/CtxProfAnalysis.h
+++ b/llvm/include/llvm/Analysis/CtxProfAnalysis.h
@@ -53,9 +53,7 @@ public:
 
   const PGOCtxProfile &profiles() const { return Profiles; }
 
-  bool isFunctionKnown(const Function &F) const {
-return F.getGUID() != 0;
-  }
+  bool isFunctionKnown(const Function &F) const { return F.getGUID() != 0; }
 
   StringRef getFunctionName(GlobalValue::GUID GUID) const {
 auto It = FuncInfo.find(GUID);
diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp 
b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
index d5840673ad..6912338ce5 100644
--- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp
@@ -655,7 +655,6 @@ CallBase *llvm::promoteCallWithIfThenElse(CallBase &CB, 
Function &Callee,
 // times, and the indirect BB, IndirectCount times
 Ctx.counters()[DirectID] = DirectCount;
 Ctx.counters()[IndirectID] = IndirectCount;
-
   };
   CtxProf.update(ProfileUpdater, Caller);
   return &DirectCall;

``




https://github.com/llvm/llvm-project/pull/133682
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LoopInterchange] Improve profitability check for vectorization (PR #133672)

2025-03-30 Thread Ryotaro Kasuga via llvm-branch-commits

https://github.com/kasuga-fj created 
https://github.com/llvm/llvm-project/pull/133672

The vectorization profitability has a process to check whether a given loop can 
be vectorized or not. Since the process is conservative, a loop that can be 
vectorized may be deemed not to be possible. This can trigger unnecessary 
exchanges.
This patch improves the profitability decision by mitigating such misjudgments. 
Before this patch, we considered a loop to be vectorizable only when there are 
no loop carried dependencies with the IV of the loop. However, a loop carried 
dependency doesn't prevent vectorization if the distance is positive. This 
patch makes the vectorization check more accurate by allowing a loop with the 
positive dependency. Note that it is difficult to make a complete decision 
whether a loop can be vectorized or not. To achieve this, we must check the 
vector width and the distance of dependency.

>From cdec72a2b2c365e29cbe05f2ad2d21b403104999 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga 
Date: Thu, 27 Mar 2025 10:45:26 +
Subject: [PATCH] [LoopInterchange] Improve profitability check for
 vectorization

The vectorization profitability has a process to check whether a given
loop can be vectorized or not. Since the process is conservative, a loop
that can be vectorized may be deemed not to be possible. This can
trigger unnecessary exchanges.
This patch improves the profitability decision by mitigating such
misjudgments. Before this patch, we considered a loop to be vectorizable
only when there are no loop carried dependencies with the IV of the
loop. However, a loop carried dependency doesn't prevent vectorization
if the distance is positive. This patch makes the vectorization check
more accurate by allowing a loop with the positive dependency. Note that
it is difficult to make a complete decision whether a loop can be
vectorized or not. To achieve this, we must check the vector width and
the distance of dependency.
---
 .../lib/Transforms/Scalar/LoopInterchange.cpp | 128 ++
 .../profitability-vectorization-heuristic.ll  |   8 +-
 2 files changed, 106 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp 
b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index b6b0b7d7a947a..0c3a9cbfeed5f 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -17,8 +17,8 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/LoopCacheAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -80,6 +80,21 @@ enum class RuleTy {
   ForVectorization,
 };
 
+/// Store the information about if corresponding direction vector was negated
+/// by normalization or not. This is necessary to restore the original one from
+/// a row of a dependency matrix because we only manage normalized direction
+/// vectors. Also, duplicate vectors are eliminated, so there may be both
+/// original and negated vectors for a single entry (a row of dependency
+/// matrix). E.g., if there are two direction vectors `[< =]` and `[> =]`, the
+/// later one will be converted to the same as former one by normalization, so
+/// only `[< =]` would be retained in the final result.
+struct NegatedStatus {
+  bool Original = false;
+  bool Negated = false;
+
+  bool isNonNegativeDir(char Dir) const;
+};
+
 } // end anonymous namespace
 
 // Minimum loop depth supported.
@@ -126,9 +141,10 @@ static void printDepMatrix(CharMatrix &DepMatrix) {
 }
 #endif
 
-static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
- Loop *L, DependenceInfo *DI,
- ScalarEvolution *SE,
+static bool populateDependencyMatrix(CharMatrix &DepMatrix,
+ std::vector &NegStatusVec,
+ unsigned Level, Loop *L,
+ DependenceInfo *DI, ScalarEvolution *SE,
  OptimizationRemarkEmitter *ORE) {
   using ValueVector = SmallVector;
 
@@ -167,7 +183,9 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, 
unsigned Level,
 return false;
   }
   ValueVector::iterator I, IE, J, JE;
-  StringSet<> Seen;
+
+  // Manage all found direction vectors. and map it to the index of DepMatrix.
+  StringMap Seen;
 
   for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
@@ -182,7 +200,8 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, 
unsigned Level,
 assert(D->isOrdered() && "Expected an output, flow or anti dep.");
 // If the direction vector is negative, normalize it to
 // make it non-negative.
-if (D->normalize(SE))
+  

[llvm-branch-commits] [llvm] [LoopInterchange] Improve profitability check for vectorization (PR #133672)

2025-03-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Ryotaro Kasuga (kasuga-fj)


Changes

The vectorization profitability has a process to check whether a given loop can 
be vectorized or not. Since the process is conservative, a loop that can be 
vectorized may be deemed not to be possible. This can trigger unnecessary 
exchanges.
This patch improves the profitability decision by mitigating such misjudgments. 
Before this patch, we considered a loop to be vectorizable only when there are 
no loop carried dependencies with the IV of the loop. However, a loop carried 
dependency doesn't prevent vectorization if the distance is positive. This 
patch makes the vectorization check more accurate by allowing a loop with the 
positive dependency. Note that it is difficult to make a complete decision 
whether a loop can be vectorized or not. To achieve this, we must check the 
vector width and the distance of dependency.

---
Full diff: https://github.com/llvm/llvm-project/pull/133672.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Scalar/LoopInterchange.cpp (+103-25) 
- (modified) 
llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll 
(+3-5) 


``diff
diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp 
b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index b6b0b7d7a947a..0c3a9cbfeed5f 100644
--- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -17,8 +17,8 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSet.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/LoopCacheAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -80,6 +80,21 @@ enum class RuleTy {
   ForVectorization,
 };
 
+/// Store the information about if corresponding direction vector was negated
+/// by normalization or not. This is necessary to restore the original one from
+/// a row of a dependency matrix because we only manage normalized direction
+/// vectors. Also, duplicate vectors are eliminated, so there may be both
+/// original and negated vectors for a single entry (a row of dependency
+/// matrix). E.g., if there are two direction vectors `[< =]` and `[> =]`, the
+/// later one will be converted to the same as former one by normalization, so
+/// only `[< =]` would be retained in the final result.
+struct NegatedStatus {
+  bool Original = false;
+  bool Negated = false;
+
+  bool isNonNegativeDir(char Dir) const;
+};
+
 } // end anonymous namespace
 
 // Minimum loop depth supported.
@@ -126,9 +141,10 @@ static void printDepMatrix(CharMatrix &DepMatrix) {
 }
 #endif
 
-static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
- Loop *L, DependenceInfo *DI,
- ScalarEvolution *SE,
+static bool populateDependencyMatrix(CharMatrix &DepMatrix,
+ std::vector &NegStatusVec,
+ unsigned Level, Loop *L,
+ DependenceInfo *DI, ScalarEvolution *SE,
  OptimizationRemarkEmitter *ORE) {
   using ValueVector = SmallVector;
 
@@ -167,7 +183,9 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, 
unsigned Level,
 return false;
   }
   ValueVector::iterator I, IE, J, JE;
-  StringSet<> Seen;
+
+  // Manage all found direction vectors. and map it to the index of DepMatrix.
+  StringMap Seen;
 
   for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
 for (J = I, JE = MemInstr.end(); J != JE; ++J) {
@@ -182,7 +200,8 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, 
unsigned Level,
 assert(D->isOrdered() && "Expected an output, flow or anti dep.");
 // If the direction vector is negative, normalize it to
 // make it non-negative.
-if (D->normalize(SE))
+bool Normalized = D->normalize(SE);
+if (Normalized)
   LLVM_DEBUG(dbgs() << "Negative dependence vector normalized.\n");
 LLVM_DEBUG(StringRef DepType =
D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
@@ -214,8 +233,17 @@ static bool populateDependencyMatrix(CharMatrix 
&DepMatrix, unsigned Level,
 }
 
 // Make sure we only add unique entries to the dependency matrix.
-if (Seen.insert(StringRef(Dep.data(), Dep.size())).second)
+unsigned Index = DepMatrix.size();
+auto [Ite, Inserted] =
+Seen.try_emplace(StringRef(Dep.data(), Dep.size()), Index);
+if (Inserted) {
   DepMatrix.push_back(Dep);
+  NegStatusVec.push_back(NegatedStatus{});
+} else
+  Index = Ite->second;
+
+NegatedStatus &Status = NegStatusVec[Index];
+(Normalized ? Status.Negated : Status.Or

[llvm-branch-commits] [llvm] [Metadata] Preserve MD_prof when merging instructions when one is missing. (PR #132433)

2025-03-30 Thread Snehasish Kumar via llvm-branch-commits


@@ -3436,6 +3433,16 @@ static void combineMetadata(Instruction *K, const 
Instruction *J,
 K->setMetadata(LLVMContext::MD_callsite,
MDNode::getMergedCallsiteMetadata(KCallSite, JCallSite));
   }
+
+  // Merge prof metadata.
+  // Handle separately to support cases where only one instruction has the
+  // metadata.
+  auto JProf = J->getMetadata(LLVMContext::MD_prof);
+  auto KProf = K->getMetadata(LLVMContext::MD_prof);
+  if (!AAOnly && (JProf || KProf)) {

snehasish wrote:

Removing the condition was intentional. In the case that `DoesKMove` is true, 
the merging of md_prof can be skipped if `J` does not have prof metadata (a 
minor optimization). I felt that it was simpler to just perform the merge 
regardless. Let me know if you feel otherwise.

https://github.com/llvm/llvm-project/pull/132433
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SDAG] Introduce inbounds flag for pointer arithmetic (PR #131862)

2025-03-30 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/131862

>From 42481628ec10fe863bc9bca94efa84cd414d385b Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Mon, 17 Mar 2025 06:51:16 -0400
Subject: [PATCH] [SDAG] Introduce inbounds flag for pointer arithmetic

This patch introduces an inbounds SDNodeFlag, to show that a pointer
addition SDNode implements an inbounds getelementptr operation (i.e.,
the pointer operand is in bounds wrt. the allocated object it is based
on, and the arithmetic does not change that). The flag is set in the DAG
construction when lowering inbounds GEPs.

Inbounds information is useful in the ISel when selecting memory
instructions that perform address computations whose intermediate steps
must be in the same memory region as the final result. A follow-up patch
will start using it for AMDGPU's flat memory instructions, where the
immediate offset must not affect the memory aperture of the address.

A similar patch for gMIR and GlobalISel will follow.

For SWDEV-516125.
---
 llvm/include/llvm/CodeGen/SelectionDAGNodes.h| 9 +++--
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp| 3 +++
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 3 +++
 .../CodeGen/X86/merge-store-partially-alias-loads.ll | 2 +-
 4 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h 
b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 2283f99202e2f..13ac65f5d731c 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -415,12 +415,15 @@ struct SDNodeFlags {
 Unpredictable = 1 << 13,
 // Compare instructions which may carry the samesign flag.
 SameSign = 1 << 14,
+// Pointer arithmetic instructions that remain in bounds, e.g., 
implementing
+// an inbounds GEP.
+InBounds = 1 << 15,
 
 // NOTE: Please update LargestValue in LLVM_DECLARE_ENUM_AS_BITMASK below
 // the class definition when adding new flags.
 
 PoisonGeneratingFlags = NoUnsignedWrap | NoSignedWrap | Exact | Disjoint |
-NonNeg | NoNaNs | NoInfs | SameSign,
+NonNeg | NoNaNs | NoInfs | SameSign | InBounds,
 FastMathFlags = NoNaNs | NoInfs | NoSignedZeros | AllowReciprocal |
 AllowContract | ApproximateFuncs | AllowReassociation,
   };
@@ -455,6 +458,7 @@ struct SDNodeFlags {
   void setAllowReassociation(bool b) { setFlag(b); }
   void setNoFPExcept(bool b) { setFlag(b); }
   void setUnpredictable(bool b) { setFlag(b); }
+  void setInBounds(bool b) { setFlag(b); }
 
   // These are accessors for each flag.
   bool hasNoUnsignedWrap() const { return Flags & NoUnsignedWrap; }
@@ -472,6 +476,7 @@ struct SDNodeFlags {
   bool hasAllowReassociation() const { return Flags & AllowReassociation; }
   bool hasNoFPExcept() const { return Flags & NoFPExcept; }
   bool hasUnpredictable() const { return Flags & Unpredictable; }
+  bool hasInBounds() const { return Flags & InBounds; }
 
   bool operator==(const SDNodeFlags &Other) const {
 return Flags == Other.Flags;
@@ -481,7 +486,7 @@ struct SDNodeFlags {
 };
 
 LLVM_DECLARE_ENUM_AS_BITMASK(decltype(SDNodeFlags::None),
- SDNodeFlags::SameSign);
+ SDNodeFlags::InBounds);
 
 inline SDNodeFlags operator|(SDNodeFlags LHS, SDNodeFlags RHS) {
   LHS |= RHS;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e3c34382d6354..e8336399d289f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4284,6 +4284,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User 
&I) {
 if (NW.hasNoUnsignedWrap() ||
 (int64_t(Offset) >= 0 && NW.hasNoUnsignedSignedWrap()))
   Flags |= SDNodeFlags::NoUnsignedWrap;
+Flags.setInBounds(NW.isInBounds());
 
 N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
 DAG.getConstant(Offset, dl, N.getValueType()), Flags);
@@ -4327,6 +4328,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User 
&I) {
 if (NW.hasNoUnsignedWrap() ||
 (Offs.isNonNegative() && NW.hasNoUnsignedSignedWrap()))
   Flags.setNoUnsignedWrap(true);
+Flags.setInBounds(NW.isInBounds());
 
 OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType());
 
@@ -4389,6 +4391,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User 
&I) {
   // pointer index type (add nuw).
   SDNodeFlags AddFlags;
   AddFlags.setNoUnsignedWrap(NW.hasNoUnsignedWrap());
+  AddFlags.setInBounds(NW.isInBounds());
 
   N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, IdxN, AddFlags);
 }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp 
b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
ind

[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)

2025-03-30 Thread John Regehr via llvm-branch-commits

regehr wrote:

yeah I don't feel like we'll get interesting variants out of this one very 
often, but who knows. at some point it would be interesting to get a global 
view of which passes are providing interesting variants in practice and which 
ones aren't.

anyhow-- LGTM, this shouldn't slow reductions down noticeably. 

https://github.com/llvm/llvm-project/pull/133627
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Metadata] Preserve MD_prof when merging instructions when one is missing. (PR #132433)

2025-03-30 Thread Snehasish Kumar via llvm-branch-commits


@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --check-globals --version 2
+; RUN: opt < %s -passes='simplifycfg' 
-simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s 
--check-prefix=HOIST
+
+; Test case based on C++ code with manualy annotated !prof metadata.
+; This is to test that when calls to 'func1' from 'if.then' block
+; and 'if.else' block are hoisted, the branch_weights are merged and
+; attached to merged call rather than dropped.
+;
+; int func1(int a, int b) ;
+; int func2(int a, int b) ;
+
+; int func(int a, int b, bool c) {
+;int sum= 0;
+;if(c) {
+;sum += func1(a, b);
+;} else {
+;sum += func1(a, b);
+;sum -= func2(a, b);
+;}
+;return sum;
+; }
+define i32 @_Z4funciib(i32 %a, i32 %b, i1 %c) {
+; HOIST-LABEL: define i32 @_Z4funciib
+; HOIST-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i1 [[C:%.*]]) {
+; HOIST-NEXT:  entry:
+; HOIST-NEXT:[[CALL:%.*]] = tail call i32 @_Z5func1ii(i32 [[A]], i32 
[[B]]), !prof [[PROF0:![0-9]+]]
+; HOIST-NEXT:br i1 [[C]], label [[IF_END:%.*]], label [[IF_ELSE:%.*]]
+; HOIST:   if.else:
+; HOIST-NEXT:[[CALL3:%.*]] = tail call i32 @_Z5func2ii(i32 [[A]], i32 
[[B]])
+; HOIST-NEXT:[[SUB:%.*]] = sub i32 [[CALL]], [[CALL3]]
+; HOIST-NEXT:br label [[IF_END]]
+; HOIST:   if.end:
+; HOIST-NEXT:[[SUM_0:%.*]] = phi i32 [ [[SUB]], [[IF_ELSE]] ], [ [[CALL]], 
[[ENTRY:%.*]] ]
+; HOIST-NEXT:ret i32 [[SUM_0]]
+;
+entry:
+  br i1 %c, label %if.then, label %if.else
+
+if.then:  ; preds = %entry
+  %call = tail call i32 @_Z5func1ii(i32 %a, i32 %b), !prof !0
+  br label %if.end
+
+if.else:  ; preds = %entry
+  %call1 = tail call i32 @_Z5func1ii(i32 %a, i32 %b)
+  %call3 = tail call i32 @_Z5func2ii(i32 %a, i32 %b)
+  %sub = sub i32 %call1, %call3
+  br label %if.end
+
+if.end:   ; preds = %if.else, %if.then
+  %sum.0 = phi i32 [ %call, %if.then ], [ %sub, %if.else ]
+  ret i32 %sum.0
+}
+
+declare i32 @_Z5func1ii(i32, i32)
+
+declare i32 @_Z5func2ii(i32, i32)
+
+!0 = !{!"branch_weights", i32 10}
+!1 = !{!"branch_weights", i32 90}

snehasish wrote:

Removed.

https://github.com/llvm/llvm-project/pull/132433
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Metadata] Preserve MD_prof when merging instructions when one is missing. (PR #132433)

2025-03-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish updated 
https://github.com/llvm/llvm-project/pull/132433

>From 42a9972571f8c8872e7d71def2236be400428606 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Fri, 21 Mar 2025 17:00:38 +
Subject: [PATCH] Update tests, apply clang-tidy suggestions

---
 llvm/lib/Transforms/Utils/Local.cpp   | 19 --
 ...rect-call-branch-weights-preserve-hoist.ll | 62 ++
 ...irect-call-branch-weights-preserve-sink.ll | 63 +++
 3 files changed, 138 insertions(+), 6 deletions(-)
 create mode 100644 
llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll
 create mode 100644 
llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-sink.ll

diff --git a/llvm/lib/Transforms/Utils/Local.cpp 
b/llvm/lib/Transforms/Utils/Local.cpp
index edec0e7a94422..c136825d47b9c 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3355,9 +3355,10 @@ static void combineMetadata(Instruction *K, const 
Instruction *J,
   case LLVMContext::MD_invariant_group:
 // Preserve !invariant.group in K.
 break;
-  // Keep empty cases for mmra, memprof, and callsite to prevent them from
-  // being removed as unknown metadata. The actual merging is handled
+  // Keep empty cases for prof, mmra, memprof, and callsite to prevent them
+  // from being removed as unknown metadata. The actual merging is handled
   // separately below.
+  case LLVMContext::MD_prof:
   case LLVMContext::MD_mmra:
   case LLVMContext::MD_memprof:
   case LLVMContext::MD_callsite:
@@ -3386,10 +3387,6 @@ static void combineMetadata(Instruction *K, const 
Instruction *J,
 if (!AAOnly)
   K->setMetadata(Kind, JMD);
 break;
-  case LLVMContext::MD_prof:
-if (!AAOnly && DoesKMove)
-  K->setMetadata(Kind, MDNode::getMergedProfMetadata(KMD, JMD, K, J));
-break;
   case LLVMContext::MD_noalias_addrspace:
 if (DoesKMove)
   K->setMetadata(Kind,
@@ -3436,6 +3433,16 @@ static void combineMetadata(Instruction *K, const 
Instruction *J,
 K->setMetadata(LLVMContext::MD_callsite,
MDNode::getMergedCallsiteMetadata(KCallSite, JCallSite));
   }
+
+  // Merge prof metadata.
+  // Handle separately to support cases where only one instruction has the
+  // metadata.
+  auto *JProf = J->getMetadata(LLVMContext::MD_prof);
+  auto *KProf = K->getMetadata(LLVMContext::MD_prof);
+  if (!AAOnly && (JProf || KProf)) {
+K->setMetadata(LLVMContext::MD_prof,
+   MDNode::getMergedProfMetadata(KProf, JProf, K, J));
+  }
 }
 
 void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
diff --git 
a/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll
 
b/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll
new file mode 100644
index 0..d6058134f5285
--- /dev/null
+++ 
b/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --check-globals --version 2
+; RUN: opt < %s -passes='simplifycfg' 
-simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s 
--check-prefix=HOIST
+
+; Test case based on C++ code with manualy annotated !prof metadata.
+; This is to test that when calls to 'func1' from 'if.then' block
+; and 'if.else' block are hoisted, the branch_weights are merged and
+; attached to merged call rather than dropped.
+;
+; int func1(int a, int b) ;
+; int func2(int a, int b) ;
+
+; int func(int a, int b, bool c) {
+;int sum= 0;
+;if(c) {
+;sum += func1(a, b);
+;} else {
+;sum += func1(a, b);
+;sum -= func2(a, b);
+;}
+;return sum;
+; }
+define i32 @_Z4funciib(i32 %a, i32 %b, i1 %c) {
+; HOIST-LABEL: define i32 @_Z4funciib
+; HOIST-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i1 [[C:%.*]]) {
+; HOIST-NEXT:  entry:
+; HOIST-NEXT:[[CALL:%.*]] = tail call i32 @_Z5func1ii(i32 [[A]], i32 
[[B]]), !prof [[PROF0:![0-9]+]]
+; HOIST-NEXT:br i1 [[C]], label [[IF_END:%.*]], label [[IF_ELSE:%.*]]
+; HOIST:   if.else:
+; HOIST-NEXT:[[CALL3:%.*]] = tail call i32 @_Z5func2ii(i32 [[A]], i32 
[[B]])
+; HOIST-NEXT:[[SUB:%.*]] = sub i32 [[CALL]], [[CALL3]]
+; HOIST-NEXT:br label [[IF_END]]
+; HOIST:   if.end:
+; HOIST-NEXT:[[SUM_0:%.*]] = phi i32 [ [[SUB]], [[IF_ELSE]] ], [ [[CALL]], 
[[ENTRY:%.*]] ]
+; HOIST-NEXT:ret i32 [[SUM_0]]
+;
+entry:
+  br i1 %c, label %if.then, label %if.else
+
+if.then:  ; preds = %entry
+  %call = tail call i32 @_Z5func1ii(i32 %a, i32 %b)
+  br label %if.end
+
+if.else:  ; preds = %entry
+  %call1 = tail call i32 @_Z5func1ii(i32 %a, i32 %b), !prof !0
+  %call3 = tail call i32 @_Z5func2ii(i32 %a, i32 %b)
+  %

[llvm-branch-commits] [llvm] [LoopInterchange] Add tests for the vectorization profitability (NFC) (PR #133665)

2025-03-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Ryotaro Kasuga (kasuga-fj)


Changes

There is a problem with the current profitability check for vectorization in 
LoopInterchange. There are both false positives and false negatives. The former 
means that the heuristic may say that "an exchange is necessary to vectorize 
the innermost loop" even though it's already possible. The latter means that 
the heuristic may miss a case where an exchange is necessary to vectorize the 
innermost loop. Note that this is not a dependency analysis problem.  These 
problems can occur even if the analysis is accurate (no overestimation).

This patch adds tests to clarify the cases that should be fixed. The root cause 
of these cases is that the heuristic doesn't handle the direction of a 
dependency correctly.

---
Full diff: https://github.com/llvm/llvm-project/pull/133665.diff


1 Files Affected:

- (added) 
llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll 
(+108) 


``diff
diff --git 
a/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll 
b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
new file mode 100644
index 0..606117e70db86
--- /dev/null
+++ 
b/llvm/test/Transforms/LoopInterchange/profitability-vectorization-heuristic.ll
@@ -0,0 +1,108 @@
+; RUN: opt < %s -passes=loop-interchange -cache-line-size=64 \
+; RUN: -pass-remarks-output=%t -disable-output 
-loop-interchange-profitabilities=vectorize
+; RUN: FileCheck -input-file %t %s
+
+@A = dso_local global [256 x [256 x float]] zeroinitializer
+@B = dso_local global [256 x [256 x float]] zeroinitializer
+@C = dso_local global [256 x [256 x float]] zeroinitializer
+
+; Check that the below loops are exchanged for vectorization.
+;
+; for (int i = 0; i < 256; i++) {
+;   for (int j = 1; j < 256; j++) {
+; A[i][j] = A[i][j-1] + B[i][j];
+; C[i][j] += 1;
+;   }
+; }
+;
+; FIXME: These loops are not exchanged at this time due to the problem of
+; profitablity heuristic for vectorization.
+
+; CHECK:  --- !Missed
+; CHECK-NEXT: Pass:loop-interchange
+; CHECK-NEXT: Name:InterchangeNotProfitable
+; CHECK-NEXT: Function:interchange_necesasry_for_vectorization
+; CHECK-NEXT: Args:
+; CHECK-NEXT:   - String:  Interchanging loops is not considered to 
improve cache locality nor vectorization.
+; CHECK-NEXT: ...
+define void @interchange_necesasry_for_vectorization() {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ]
+  br label %for.j.body
+
+for.j.body:
+  %j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ]
+  %j.dec = add nsw i64 %j, -1
+  %a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, 
i64 %i, i64 %j.dec
+  %b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, 
i64 %j
+  %c.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @C, i64 %i, 
i64 %j
+  %a = load float, ptr %a.load.index, align 4
+  %b = load float, ptr %b.index, align 4
+  %c = load float, ptr %c.index, align 4
+  %add.0 = fadd float %a, %b
+  %a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, 
i64 %i, i64 %j
+  store float %add.0, ptr %a.store.index, align 4
+  %add.1 = fadd float %c, 1.0
+  store float %add.1, ptr %c.index, align 4
+  %j.next = add nuw nsw i64 %j, 1
+  %cmp.j = icmp eq i64 %j.next, 256
+  br i1 %cmp.j, label %for.i.inc, label %for.j.body
+
+for.i.inc:
+  %i.next = add nuw nsw i64 %i, 1
+  %cmp.i = icmp eq i64 %i.next, 256
+  br i1 %cmp.i, label %exit, label %for.i.header
+
+exit:
+  ret void
+}
+
+; Check that the following innermost loop can be vectorized so that
+; interchangig is unnecessary.
+;
+; for (int i = 0; i < 256; i++)
+;   for (int j = 1; j < 256; j++)
+; A[i][j-1] = A[i][j] + B[i][j];
+;
+; FIXME: These loops are exchanged at this time due to the problem of
+; profitablity heuristic for vectorization.
+
+; CHECK:  --- !Passed
+; CHECK-NEXT: Pass:loop-interchange
+; CHECK-NEXT: Name:Interchanged
+; CHECK-NEXT: Function:interchange_unnecesasry_for_vectorization
+; CHECK-NEXT: Args:
+; CHECK-NEXT:   - String:  Loop interchanged with enclosing loop.
+define void @interchange_unnecesasry_for_vectorization() {
+entry:
+  br label %for.i.header
+
+for.i.header:
+  %i = phi i64 [ 1, %entry ], [ %i.next, %for.i.inc ]
+  br label %for.j.body
+
+for.j.body:
+  %j = phi i64 [ 1, %for.i.header ], [ %j.next, %for.j.body ]
+  %j.dec = add nsw i64 %j, -1
+  %a.load.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, 
i64 %i, i64 %j
+  %b.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @B, i64 %i, 
i64 %j
+  %a = load float, ptr %a.load.index, align 4
+  %b = load float, ptr %b.index, align 4
+  %add = fadd float %a, %b
+  %a.store.index = getelementptr nuw inbounds [256 x [256 x float]], ptr @A, 
i64

[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)

2025-03-30 Thread Matt Arsenault via llvm-branch-commits

arsenm wrote:

> [!WARNING]
> This pull request is not mergeable via GitHub because a downstack PR is 
> open. Once all requirements are satisfied, merge this PR as a stack  href="https://app.graphite.dev/github/pr/llvm/llvm-project/133627?utm_source=stack-comment-downstack-mergeability-warning";
>  >on Graphite.
> https://graphite.dev/docs/merge-pull-requests";>Learn more

* **#133627** https://app.graphite.dev/github/pr/llvm/llvm-project/133627?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/> 👈 https://app.graphite.dev/github/pr/llvm/llvm-project/133627?utm_source=stack-comment-view-in-graphite";
 target="_blank">(View in Graphite)
* **#132686** https://app.graphite.dev/github/pr/llvm/llvm-project/132686?utm_source=stack-comment-icon";
 target="_blank">https://static.graphite.dev/graphite-32x32-black.png"; alt="Graphite" 
width="10px" height="10px"/>
* `main`




This stack of pull requests is managed by https://graphite.dev?utm-source=stack-comment";>Graphite. Learn 
more about https://stacking.dev/?utm_source=stack-comment";>stacking.


https://github.com/llvm/llvm-project/pull/133627
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)

2025-03-30 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm created 
https://github.com/llvm/llvm-project/pull/133627

Extend the instruction -> return reduction with one that inserts
return of function arguments. Not sure how useful this really is. This
has more freedom since we could insert the return anywhere in the function,
but this just inserts the return in the entry block.

>From 349a15d2581c6701f947eeeb0dee6ad4728d8a58 Mon Sep 17 00:00:00 2001
From: Matt Arsenault 
Date: Mon, 24 Mar 2025 14:33:36 +0700
Subject: [PATCH] llvm-reduce: Reduce with early return of arguments

Extend the instruction -> return reduction with one that inserts
return of function arguments. Not sure how useful this really is. This
has more freedom since we could insert the return anywhere in the function,
but this just inserts the return in the entry block.
---
 .../reduce-values-to-return-args.ll   | 77 +++
 ...-values-to-return-nonvoid-noncallee-use.ll |  2 +-
 .../llvm-reduce/reduce-values-to-return.ll|  2 +-
 llvm/tools/llvm-reduce/DeltaPasses.def|  5 +-
 .../deltas/ReduceValuesToReturn.cpp   | 42 +-
 .../llvm-reduce/deltas/ReduceValuesToReturn.h |  3 +-
 6 files changed, 124 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll

diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll
new file mode 100644
index 0..abbc643822033
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll
@@ -0,0 +1,77 @@
+; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=arguments-to-return --test FileCheck --test-arg 
--check-prefixes=INTERESTING --test-arg %s --test-arg --input-file %s -o %t
+; RUN: FileCheck --check-prefixes=RESULT %s < %t
+
+
+; INTERESTING-LABEL: @move_entry_block_use_argument_to_return(i32 %arg, ptr 
%ptr) {
+; INTERESTING: %arg
+
+; RESULT-LABEL: define i32 @move_entry_block_use_argument_to_return(
+; RESULT-NEXT: ret i32 %arg
+; RESULT-NEXT: }
+define void @move_entry_block_use_argument_to_return(i32 %arg, ptr %ptr) {
+  store i32 %arg, ptr %ptr
+  ret void
+}
+
+; INTERESTING-LABEL: @move_entry_block_use_argument_to_return_existing_ret(i32 
%arg, ptr %ptr) {
+; INTERESTING: %arg
+
+; RESULT-LABEL: define i32 
@move_entry_block_use_argument_to_return_existing_ret(
+; RESULT-NEXT: ret i32 %arg
+; RESULT-NEXT: }
+define i32 @move_entry_block_use_argument_to_return_existing_ret(i32 %arg, ptr 
%ptr) {
+  store i32 %arg, ptr %ptr
+  ret i32 0
+}
+
+; INTERESTING-LABEL: @move_phi_block_use_argument_to_return(i32 %arg, ptr 
%ptr0, ptr %ptr1, i1 %cond0, i1 %cond1) {
+; INTERESTING: %arg
+
+; RESULT-LABEL: define i32 @move_phi_block_use_argument_to_return(
+; RESULT-NEXT: entry:
+; RESULT-NEXT: ret i32 %arg
+define void @move_phi_block_use_argument_to_return(i32 %arg, ptr %ptr0, ptr 
%ptr1, i1 %cond0, i1 %cond1) {
+entry:
+  br i1 %cond0, label %bb0, label %bb1
+
+bb0:
+  %phi = phi i32 [ %arg, %entry ], [ 123, %bb1 ]
+  store i32 %arg, ptr %ptr0
+  store i32 %phi, ptr %ptr1
+  br label %bb1
+
+bb1:
+  br i1 %cond1, label %bb0, label %bb2
+
+bb2:
+  ret void
+}
+
+; INTERESTING-LABEL: define {{.*}} @keep_second_arg(i32 %arg0, ptr %arg1) {
+; INTERESTING: %arg1
+
+; RESULT-LABEL: define ptr @keep_second_arg(
+; RESULT-NEXT: ret ptr %arg1
+; RESULT-NEXT: }
+define void @keep_second_arg(i32 %arg0, ptr %arg1) {
+  store i32 %arg0, ptr %arg1
+  ret void
+}
+
+; INTERESTING-LABEL: @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) {
+; INTERESTING: i32 %arg2
+
+; RESULT-LABEL: define i32 @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 
%arg2) {
+; RESULT-NEXT: entry:
+; RESULT-NEXT: ret i32 %arg2
+define void @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) {
+entry:
+  br i1 %arg0, label %bb0, label %bb1
+
+bb0:
+  store i32 %arg2, ptr %arg1
+  ret void
+
+bb1:
+  ret void
+}
diff --git 
a/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll
index 215ea97a8be91..11166479318c6 100644
--- 
a/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll
+++ 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll
@@ -1,7 +1,7 @@
 ; Make sure we don't break on non-callee uses of funtions with a
 ; non-void return type.
 
-; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=values-to-return --test FileCheck --test-arg 
--check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t
+; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=instructions-to-return --test FileCheck --test-arg 
--check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t
 ; RUN: FileCheck --check-prefix=RESULT %s < %t
 
 ; INTERESTING-LABEL: @interesting(
diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return.ll 
b/llvm/test/tools/llvm-reduce/reduce-values-

[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)

2025-03-30 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-ir

Author: Matt Arsenault (arsenm)


Changes

Extend the instruction -> return reduction with one that inserts
return of function arguments. Not sure how useful this really is. This
has more freedom since we could insert the return anywhere in the function,
but this just inserts the return in the entry block.

---
Full diff: https://github.com/llvm/llvm-project/pull/133627.diff


6 Files Affected:

- (added) llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll (+77) 
- (modified) 
llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll 
(+1-1) 
- (modified) llvm/test/tools/llvm-reduce/reduce-values-to-return.ll (+1-1) 
- (modified) llvm/tools/llvm-reduce/DeltaPasses.def (+4-1) 
- (modified) llvm/tools/llvm-reduce/deltas/ReduceValuesToReturn.cpp (+39-3) 
- (modified) llvm/tools/llvm-reduce/deltas/ReduceValuesToReturn.h (+2-1) 


``diff
diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll
new file mode 100644
index 0..abbc643822033
--- /dev/null
+++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return-args.ll
@@ -0,0 +1,77 @@
+; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=arguments-to-return --test FileCheck --test-arg 
--check-prefixes=INTERESTING --test-arg %s --test-arg --input-file %s -o %t
+; RUN: FileCheck --check-prefixes=RESULT %s < %t
+
+
+; INTERESTING-LABEL: @move_entry_block_use_argument_to_return(i32 %arg, ptr 
%ptr) {
+; INTERESTING: %arg
+
+; RESULT-LABEL: define i32 @move_entry_block_use_argument_to_return(
+; RESULT-NEXT: ret i32 %arg
+; RESULT-NEXT: }
+define void @move_entry_block_use_argument_to_return(i32 %arg, ptr %ptr) {
+  store i32 %arg, ptr %ptr
+  ret void
+}
+
+; INTERESTING-LABEL: @move_entry_block_use_argument_to_return_existing_ret(i32 
%arg, ptr %ptr) {
+; INTERESTING: %arg
+
+; RESULT-LABEL: define i32 
@move_entry_block_use_argument_to_return_existing_ret(
+; RESULT-NEXT: ret i32 %arg
+; RESULT-NEXT: }
+define i32 @move_entry_block_use_argument_to_return_existing_ret(i32 %arg, ptr 
%ptr) {
+  store i32 %arg, ptr %ptr
+  ret i32 0
+}
+
+; INTERESTING-LABEL: @move_phi_block_use_argument_to_return(i32 %arg, ptr 
%ptr0, ptr %ptr1, i1 %cond0, i1 %cond1) {
+; INTERESTING: %arg
+
+; RESULT-LABEL: define i32 @move_phi_block_use_argument_to_return(
+; RESULT-NEXT: entry:
+; RESULT-NEXT: ret i32 %arg
+define void @move_phi_block_use_argument_to_return(i32 %arg, ptr %ptr0, ptr 
%ptr1, i1 %cond0, i1 %cond1) {
+entry:
+  br i1 %cond0, label %bb0, label %bb1
+
+bb0:
+  %phi = phi i32 [ %arg, %entry ], [ 123, %bb1 ]
+  store i32 %arg, ptr %ptr0
+  store i32 %phi, ptr %ptr1
+  br label %bb1
+
+bb1:
+  br i1 %cond1, label %bb0, label %bb2
+
+bb2:
+  ret void
+}
+
+; INTERESTING-LABEL: define {{.*}} @keep_second_arg(i32 %arg0, ptr %arg1) {
+; INTERESTING: %arg1
+
+; RESULT-LABEL: define ptr @keep_second_arg(
+; RESULT-NEXT: ret ptr %arg1
+; RESULT-NEXT: }
+define void @keep_second_arg(i32 %arg0, ptr %arg1) {
+  store i32 %arg0, ptr %arg1
+  ret void
+}
+
+; INTERESTING-LABEL: @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) {
+; INTERESTING: i32 %arg2
+
+; RESULT-LABEL: define i32 @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 
%arg2) {
+; RESULT-NEXT: entry:
+; RESULT-NEXT: ret i32 %arg2
+define void @multi_void_return_arg(i1 %arg0, ptr %arg1, i32 %arg2) {
+entry:
+  br i1 %arg0, label %bb0, label %bb1
+
+bb0:
+  store i32 %arg2, ptr %arg1
+  ret void
+
+bb1:
+  ret void
+}
diff --git 
a/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll
index 215ea97a8be91..11166479318c6 100644
--- 
a/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll
+++ 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return-nonvoid-noncallee-use.ll
@@ -1,7 +1,7 @@
 ; Make sure we don't break on non-callee uses of funtions with a
 ; non-void return type.
 
-; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=values-to-return --test FileCheck --test-arg 
--check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t
+; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=instructions-to-return --test FileCheck --test-arg 
--check-prefix=INTERESTING --test-arg %s --test-arg --input-file %s -o %t
 ; RUN: FileCheck --check-prefix=RESULT %s < %t
 
 ; INTERESTING-LABEL: @interesting(
diff --git a/llvm/test/tools/llvm-reduce/reduce-values-to-return.ll 
b/llvm/test/tools/llvm-reduce/reduce-values-to-return.ll
index 0c36db8ebc278..2af87aad05169 100644
--- a/llvm/test/tools/llvm-reduce/reduce-values-to-return.ll
+++ b/llvm/test/tools/llvm-reduce/reduce-values-to-return.ll
@@ -1,7 +1,7 @@
 ; Test that llvm-reduce can move intermediate values by inserting
 ; early returns
 ;
-; RUN: llvm-reduce --abort-on-invalid-reduction 
--delta-passes=values-to-return --test FileCheck -

[llvm-branch-commits] [llvm] llvm-reduce: Reduce with early return of arguments (PR #133627)

2025-03-30 Thread Matt Arsenault via llvm-branch-commits

https://github.com/arsenm ready_for_review 
https://github.com/llvm/llvm-project/pull/133627
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] [clang-tools-extra] [libcxx] [clang] improved preservation of template keyword (PR #133610)

2025-03-30 Thread Matheus Izvekov via llvm-branch-commits

https://github.com/mizvekov edited 
https://github.com/llvm/llvm-project/pull/133610
___
llvm-branch-commits mailing list
llvm-branch-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [Metadata] Preserve MD_prof when merging instructions when one is missing. (PR #132433)

2025-03-30 Thread Snehasish Kumar via llvm-branch-commits

https://github.com/snehasish updated 
https://github.com/llvm/llvm-project/pull/132433

>From 42a9972571f8c8872e7d71def2236be400428606 Mon Sep 17 00:00:00 2001
From: Snehasish Kumar 
Date: Fri, 21 Mar 2025 17:00:38 +
Subject: [PATCH] Update tests, apply clang-tidy suggestions

---
 llvm/lib/Transforms/Utils/Local.cpp   | 19 --
 ...rect-call-branch-weights-preserve-hoist.ll | 62 ++
 ...irect-call-branch-weights-preserve-sink.ll | 63 +++
 3 files changed, 138 insertions(+), 6 deletions(-)
 create mode 100644 
llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll
 create mode 100644 
llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-sink.ll

diff --git a/llvm/lib/Transforms/Utils/Local.cpp 
b/llvm/lib/Transforms/Utils/Local.cpp
index edec0e7a94422..c136825d47b9c 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -3355,9 +3355,10 @@ static void combineMetadata(Instruction *K, const 
Instruction *J,
   case LLVMContext::MD_invariant_group:
 // Preserve !invariant.group in K.
 break;
-  // Keep empty cases for mmra, memprof, and callsite to prevent them from
-  // being removed as unknown metadata. The actual merging is handled
+  // Keep empty cases for prof, mmra, memprof, and callsite to prevent them
+  // from being removed as unknown metadata. The actual merging is handled
   // separately below.
+  case LLVMContext::MD_prof:
   case LLVMContext::MD_mmra:
   case LLVMContext::MD_memprof:
   case LLVMContext::MD_callsite:
@@ -3386,10 +3387,6 @@ static void combineMetadata(Instruction *K, const 
Instruction *J,
 if (!AAOnly)
   K->setMetadata(Kind, JMD);
 break;
-  case LLVMContext::MD_prof:
-if (!AAOnly && DoesKMove)
-  K->setMetadata(Kind, MDNode::getMergedProfMetadata(KMD, JMD, K, J));
-break;
   case LLVMContext::MD_noalias_addrspace:
 if (DoesKMove)
   K->setMetadata(Kind,
@@ -3436,6 +3433,16 @@ static void combineMetadata(Instruction *K, const 
Instruction *J,
 K->setMetadata(LLVMContext::MD_callsite,
MDNode::getMergedCallsiteMetadata(KCallSite, JCallSite));
   }
+
+  // Merge prof metadata.
+  // Handle separately to support cases where only one instruction has the
+  // metadata.
+  auto *JProf = J->getMetadata(LLVMContext::MD_prof);
+  auto *KProf = K->getMetadata(LLVMContext::MD_prof);
+  if (!AAOnly && (JProf || KProf)) {
+K->setMetadata(LLVMContext::MD_prof,
+   MDNode::getMergedProfMetadata(KProf, JProf, K, J));
+  }
 }
 
 void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J,
diff --git 
a/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll
 
b/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll
new file mode 100644
index 0..d6058134f5285
--- /dev/null
+++ 
b/llvm/test/Transforms/SimplifyCFG/merge-direct-call-branch-weights-preserve-hoist.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --check-globals --version 2
+; RUN: opt < %s -passes='simplifycfg' 
-simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s 
--check-prefix=HOIST
+
+; Test case based on C++ code with manualy annotated !prof metadata.
+; This is to test that when calls to 'func1' from 'if.then' block
+; and 'if.else' block are hoisted, the branch_weights are merged and
+; attached to merged call rather than dropped.
+;
+; int func1(int a, int b) ;
+; int func2(int a, int b) ;
+
+; int func(int a, int b, bool c) {
+;int sum= 0;
+;if(c) {
+;sum += func1(a, b);
+;} else {
+;sum += func1(a, b);
+;sum -= func2(a, b);
+;}
+;return sum;
+; }
+define i32 @_Z4funciib(i32 %a, i32 %b, i1 %c) {
+; HOIST-LABEL: define i32 @_Z4funciib
+; HOIST-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i1 [[C:%.*]]) {
+; HOIST-NEXT:  entry:
+; HOIST-NEXT:[[CALL:%.*]] = tail call i32 @_Z5func1ii(i32 [[A]], i32 
[[B]]), !prof [[PROF0:![0-9]+]]
+; HOIST-NEXT:br i1 [[C]], label [[IF_END:%.*]], label [[IF_ELSE:%.*]]
+; HOIST:   if.else:
+; HOIST-NEXT:[[CALL3:%.*]] = tail call i32 @_Z5func2ii(i32 [[A]], i32 
[[B]])
+; HOIST-NEXT:[[SUB:%.*]] = sub i32 [[CALL]], [[CALL3]]
+; HOIST-NEXT:br label [[IF_END]]
+; HOIST:   if.end:
+; HOIST-NEXT:[[SUM_0:%.*]] = phi i32 [ [[SUB]], [[IF_ELSE]] ], [ [[CALL]], 
[[ENTRY:%.*]] ]
+; HOIST-NEXT:ret i32 [[SUM_0]]
+;
+entry:
+  br i1 %c, label %if.then, label %if.else
+
+if.then:  ; preds = %entry
+  %call = tail call i32 @_Z5func1ii(i32 %a, i32 %b)
+  br label %if.end
+
+if.else:  ; preds = %entry
+  %call1 = tail call i32 @_Z5func1ii(i32 %a, i32 %b), !prof !0
+  %call3 = tail call i32 @_Z5func2ii(i32 %a, i32 %b)
+  %