[llvm-branch-commits] [llvm] [SLU][profcheck] create likely branch weights for guard->branch (PR #164271)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164271

>From 466d5f3f12337d3cccdaf184b83c6fb80c0a9186 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Mon, 20 Oct 2025 08:21:26 -0700
Subject: [PATCH] [SLU][profcheck] create likely branch weights for
 guard->branch

---
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  |  12 +-
 .../Transforms/SimpleLoopUnswitch/guards.ll   | 181 +-
 2 files changed, 139 insertions(+), 54 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp 
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index dd36a63c1564b..88a19d3683991 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -40,6 +40,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ProfDataUtils.h"
@@ -2829,9 +2830,14 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst 
*GI, Loop &L,
  MSSAU->getMemorySSA()->verifyMemorySSA();
 
   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
-  Instruction *DeoptBlockTerm =
-  SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true,
-GI->getMetadata(LLVMContext::MD_prof), &DTU, 
&LI);
+  // llvm.experimental.guard doesn't have branch weights. We can assume,
+  // however, that the deopt path is unlikely.
+  Instruction *DeoptBlockTerm = SplitBlockAndInsertIfThen(
+  GI->getArgOperand(0), GI, true,
+  !ProfcheckDisableMetadataFixes && EstimateProfile
+  ? MDBuilder(GI->getContext()).createUnlikelyBranchWeights()
+  : nullptr,
+  &DTU, &LI);
   BranchInst *CheckBI = cast(CheckBB->getTerminator());
   // SplitBlockAndInsertIfThen inserts control flow that branches to
   // DeoptBlockTerm if the condition is true.  We want the opposite.
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll 
b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
index 533b1f691f5ad..e83047e397d3d 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
@@ -1,26 +1,34 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: -p --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: -p --check-globals all --version 5
 ; RUN: opt -passes='loop(simple-loop-unswitch),verify' 
-simple-loop-unswitch-guards -S < %s | FileCheck %s
 ; RUN: opt -passes='simple-loop-unswitch' 
-simple-loop-unswitch-guards -S < %s | FileCheck %s
 ; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' 
-simple-loop-unswitch-guards  -verify-memoryssa -verify-loop-info -S < %s | 
FileCheck %s
 
 declare void @llvm.experimental.guard(i1, ...)
 
-define void @test_simple_case(i1 %cond, i32 %N) {
-; CHECK-LABEL: @test_simple_case(
+define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
+; CHECK-LABEL: define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond, label %entry.split.us, label %entry.split, !prof 
!1
 ; CHECK:   entry.split.us:
-; CHECK-NEXT:br label [[LOOP_US:%.*]]
+; CHECK-NEXT:br label %loop.us
 ; CHECK:   loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ 
[[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
-; CHECK-NEXT:br label [[GUARDED_US]]
+; CHECK-NEXT:%iv.us = phi i32 [ 0, %entry.split.us ], [ %iv.next.us, 
%guarded.us ]
+; CHECK-NEXT:br label %guarded.us
 ; CHECK:   guarded.us:
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
-; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label 
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:%iv.next.us = add i32 %iv.us, 1
+; CHECK-NEXT:%loop.cond.us = icmp slt i32 %iv.next.us, %N
+; CHECK-NEXT:br i1 %loop.cond.us, label %loop.us, label %exit.split.us
+; CHECK:   exit.split.us:
+; CHECK-NEXT:br label %exit
+; CHECK:   entry.split:
+; CHECK-NEXT:br label %loop
+; CHECK:   loop:
+; CHECK-NEXT:br label %deopt
 ; CHECK:   deopt:
 ; CHECK-NEXT:call void (i1, ...) @llvm.experimental.guard(i1 false) [ 
"deopt"() ]
 ; CHECK-NEXT:unreachable
+; CHECK:   exit:
+; CHECK-NEXT:ret void
 ;
 
 entry:
@@ -38,25 +46,39 @@ exit:
 }
 
 define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
-; CHECK-LABEL: @test_two_guards(
+; CHECK-LABEL: define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond1, label %entry.split.us, label %entry.split, 
!prof !1
 ; CHECK:

[llvm-branch-commits] [llvm] [LIR][profcheck] Reuse the loop's exit condition profile (PR #164523)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164523

>From aafb258bc4561fb35d86518a2d96769ab9b2ac46 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 21 Oct 2025 17:24:49 -0700
Subject: [PATCH] [LIR][profcheck] Reuse the loop's exit condition profile

---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 40 +--
 .../LoopIdiom/X86/preserve-profile.ll | 70 +++
 2 files changed, 106 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp 
b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 019536ca91ae0..9070d252ae09f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -72,6 +72,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -105,6 +106,7 @@ STATISTIC(
 STATISTIC(NumShiftUntilZero,
   "Number of uncountable loops recognized as 'shift until zero' 
idiom");
 
+namespace llvm {
 bool DisableLIRP::All;
 static cl::opt
 DisableLIRPAll("disable-" DEBUG_TYPE "-all",
@@ -163,6 +165,10 @@ static cl::opt ForceMemsetPatternIntrinsic(
 cl::desc("Use memset.pattern intrinsic whenever possible"), 
cl::init(false),
 cl::Hidden);
 
+extern cl::opt ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
 namespace {
 
 class LoopIdiomRecognize {
@@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
   // The loop trip count check.
   auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
CurLoop->getName() + ".ivcheck");
-  Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+  SmallVector BranchWeights;
+  const bool HasBranchWeights =
+  !ProfcheckDisableMetadataFixes &&
+  extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+  auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+  if (HasBranchWeights) {
+if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
+  std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights,
+ /*IsExpected=*/false);
+  }
+
   LoopHeaderBB->getTerminator()->eraseFromParent();
 
   // Populate the IV PHI.
@@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, 
ScalarEvolution *SE,
 /// %start = <...>
 /// %extraoffset = <...>
 /// <...>
-/// br label %for.cond
+/// br label %loop
 ///
 ///   loop:
-/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
+/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
 /// %nbits = add nsw i8 %iv, %extraoffset
 /// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
 /// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
@@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
 
   // The loop terminator.
   Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
-  Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+  SmallVector BranchWeights;
+  const bool HasBranchWeights =
+  !ProfcheckDisableMetadataFixes &&
+  extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+  auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+  if (HasBranchWeights) {
+if (InvertedCond)
+  std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
+  }
   LoopHeaderBB->getTerminator()->eraseFromParent();
 
   // Populate the IV PHI.
diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll 
b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
new file mode 100644
index 0..d01bb748d9422
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
@@ -0,0 +1,70 @@
+; RUN: opt 
-passes="module(print),function(loop(loop-idiom)),module(print)"
 -mtriple=x86_64 -mcpu=core-avx2 %s -disable-output 2>&1 | FileCheck 
--check-prefix=PROFILE %s
+
+declare void @escape_inner(i8, i8, i8, i1, i8)
+declare void @escape_outer(i8, i8, i8, i1, i8)
+
+declare i8 @gen.i8()
+
+; Most basic pattern; Note that iff the shift amount is offset, said offsetting
+; must not cause an overflow, but `add nsw` is fine.
+define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+  %nbits = add nsw i8 %iv, %extraoffset
+  %val.shifted = ashr i8 %val, %nbits
+  %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+  %iv.next = add i8 %iv, 1
+
+  call void @escap

[llvm-branch-commits] [llvm] [SLU][profcheck] Propagate profile for branches on injected conditions. (PR #164476)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164476

>From f696a5d6aa0895cfad1a61eab1f3a0ee79bd894c Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 21 Oct 2025 11:22:01 -0700
Subject: [PATCH] [SLU][profcheck] Propagate profile for branches on injected
 conditions.

---
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  |   9 +-
 .../inject-invariant-conditions.ll| 142 +-
 2 files changed, 79 insertions(+), 72 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp 
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 88a19d3683991..988825737f04f 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -3201,10 +3201,15 @@ 
injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
   Builder.SetInsertPoint(TI);
   auto *InvariantBr =
   Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
+  // We don't know anything about the relation between the limits.
+  setExplicitlyUnknownBranchWeightsIfProfiled(
+  *InvariantBr, *InvariantBr->getParent()->getParent(), DEBUG_TYPE);
 
   Builder.SetInsertPoint(CheckBlock);
-  Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
-   TI->getSuccessor(1));
+  Builder.CreateCondBr(
+  TI->getCondition(), TI->getSuccessor(0), TI->getSuccessor(1),
+  !ProfcheckDisableMetadataFixes ? TI->getMetadata(LLVMContext::MD_prof)
+ : nullptr);
   TI->eraseFromParent();
 
   // Fixup phis.
diff --git 
a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll 
b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
index 536e0c6a0e74a..3c84dea2a0672 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
@@ -2,40 +2,40 @@
 ; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true 
-passes="loop(simple-loop-unswitch),simplifycfg" | FileCheck %s
 ; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true 
-passes="loop-mssa(simple-loop-unswitch),simplifycfg" 
-verify-memoryssa | FileCheck %s
 
-define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr 
noundef %arr, ptr noundef %x_p) {
+define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr 
noundef %arr, ptr noundef %x_p) !prof !{!"function_entry_count", i32 10} {
 ; CHECK-LABEL: @test_01(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef !0
+; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef 
[[META1:![0-9]+]]
 ; CHECK-NEXT:[[INJECTED_COND:%.*]] = icmp ule i32 [[LIMIT:%.*]], [[X]]
-; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label 
[[LOOP:%.*]]
+; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label 
[[LOOP:%.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:   loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], 
[[GUARDED_US:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:[[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 
[[IV_US]]
-; CHECK-NEXT:[[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4
-; CHECK-NEXT:[[BOUND_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LIMIT]]
-; CHECK-NEXT:br i1 [[BOUND_CHECK_US]], label [[GUARDED_US]], label 
[[COMMON_RET:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] 
], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV]]
+; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
+; CHECK-NEXT:[[BOUND_CHECK:%.*]] = icmp ult i32 [[EL]], [[LIMIT]]
+; CHECK-NEXT:br i1 [[BOUND_CHECK]], label [[GUARDED_US]], label 
[[COMMON_RET:%.*]], !prof [[PROF3:![0-9]+]]
 ; CHECK:   guarded.us:
-; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[X]]
-; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 
[[EL_US]]
-; CHECK-NEXT:store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL]], [[X]]
+; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 
[[EL]]
+; CHECK-NEXT:store i32 [[IV]], ptr [[ARR_PTR_US]], align 4
+; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV]], 1
 ; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
 ; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label 
[[COMMON_RET]]
 ; CHECK:   loop:
-; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 
0, [[ENTRY]] ]
-; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
-; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
-; CHECK-NEXT:[[BOUND_CHEC

[llvm-branch-commits] [llvm] [LIR][profcheck] Reuse the loop's exit condition profile (PR #164523)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164523

>From aafb258bc4561fb35d86518a2d96769ab9b2ac46 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 21 Oct 2025 17:24:49 -0700
Subject: [PATCH] [LIR][profcheck] Reuse the loop's exit condition profile

---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 40 +--
 .../LoopIdiom/X86/preserve-profile.ll | 70 +++
 2 files changed, 106 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp 
b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 019536ca91ae0..9070d252ae09f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -72,6 +72,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -105,6 +106,7 @@ STATISTIC(
 STATISTIC(NumShiftUntilZero,
   "Number of uncountable loops recognized as 'shift until zero' 
idiom");
 
+namespace llvm {
 bool DisableLIRP::All;
 static cl::opt
 DisableLIRPAll("disable-" DEBUG_TYPE "-all",
@@ -163,6 +165,10 @@ static cl::opt ForceMemsetPatternIntrinsic(
 cl::desc("Use memset.pattern intrinsic whenever possible"), 
cl::init(false),
 cl::Hidden);
 
+extern cl::opt ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
 namespace {
 
 class LoopIdiomRecognize {
@@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
   // The loop trip count check.
   auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
CurLoop->getName() + ".ivcheck");
-  Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+  SmallVector BranchWeights;
+  const bool HasBranchWeights =
+  !ProfcheckDisableMetadataFixes &&
+  extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+  auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+  if (HasBranchWeights) {
+if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
+  std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights,
+ /*IsExpected=*/false);
+  }
+
   LoopHeaderBB->getTerminator()->eraseFromParent();
 
   // Populate the IV PHI.
@@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, 
ScalarEvolution *SE,
 /// %start = <...>
 /// %extraoffset = <...>
 /// <...>
-/// br label %for.cond
+/// br label %loop
 ///
 ///   loop:
-/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
+/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
 /// %nbits = add nsw i8 %iv, %extraoffset
 /// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
 /// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
@@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
 
   // The loop terminator.
   Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
-  Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+  SmallVector BranchWeights;
+  const bool HasBranchWeights =
+  !ProfcheckDisableMetadataFixes &&
+  extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+  auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+  if (HasBranchWeights) {
+if (InvertedCond)
+  std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
+  }
   LoopHeaderBB->getTerminator()->eraseFromParent();
 
   // Populate the IV PHI.
diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll 
b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
new file mode 100644
index 0..d01bb748d9422
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
@@ -0,0 +1,70 @@
+; RUN: opt 
-passes="module(print),function(loop(loop-idiom)),module(print)"
 -mtriple=x86_64 -mcpu=core-avx2 %s -disable-output 2>&1 | FileCheck 
--check-prefix=PROFILE %s
+
+declare void @escape_inner(i8, i8, i8, i1, i8)
+declare void @escape_outer(i8, i8, i8, i1, i8)
+
+declare i8 @gen.i8()
+
+; Most basic pattern; Note that iff the shift amount is offset, said offsetting
+; must not cause an overflow, but `add nsw` is fine.
+define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+  %nbits = add nsw i8 %iv, %extraoffset
+  %val.shifted = ashr i8 %val, %nbits
+  %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+  %iv.next = add i8 %iv, 1
+
+  call void @escap

[llvm-branch-commits] [llvm] [LVer][profcheck] explicitly set unknown branch weights for the versioned/unversioned selector (PR #164507)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164507

>From e745aaf0e5454fd7f79269517a1ca2eb4c582edd Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 21 Oct 2025 15:20:14 -0700
Subject: [PATCH] [LVer][profcheck] explicitly set unknown branch weights for
 the versioned/unversioned selector

---
 llvm/lib/Transforms/Utils/LoopVersioning.cpp   | 10 --
 .../Transforms/LoopDistribute/basic-with-memchecks.ll  |  5 +++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp 
b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index ec2e6c1ab796b..4786819d18fa4 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
@@ -109,8 +110,13 @@ void LoopVersioning::versionLoop(
   // Insert the conditional branch based on the result of the memchecks.
   Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
   Builder.SetInsertPoint(OrigTerm);
-  Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
-   VersionedLoop->getLoopPreheader());
+  auto *BI =
+  Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
+   VersionedLoop->getLoopPreheader());
+  // We don't know what the probability of executing the versioned vs the
+  // unversioned variants is.
+  setExplicitlyUnknownBranchWeightsIfProfiled(
+  *BI, *BI->getParent()->getParent(), DEBUG_TYPE);
   OrigTerm->eraseFromParent();
 
   // The loops merge in the original exit block.  This is now dominated by the
diff --git a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll 
b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
index 97ea2c6708dad..2828882afe779 100644
--- a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
+++ b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
@@ -28,7 +28,7 @@ target triple = "x86_64-apple-macosx10.10.0"
 @E = common global ptr null, align 8
 
 ; CHECK-LABEL: @f(
-define void @f() {
+define void @f() !prof !{!"function_entry_count", i32 10} {
 entry:
   %a = load ptr, ptr @A, align 8
   %b = load ptr, ptr @B, align 8
@@ -55,7 +55,7 @@ entry:
 ; CHECK: = icmp
 
 ; CHECK-NOT: = icmp
-; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label 
%for.body.ph.ldist1
+; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label 
%for.body.ph.ldist1, !prof ![[PROF1:[0-9]]]
 
 ; The non-distributed loop that the memchecks fall back on.
 
@@ -289,3 +289,4 @@ attributes #1 = { nounwind convergent }
 
 !0 = distinct !{!0, !1}
 !1 = !{!"llvm.loop.distribute.enable", i1 true}
+; CHECK: ![[PROF1]] = !{!"unknown", !"loop-versioning"}

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SLU][profcheck] create likely branch weights for guard->branch (PR #164271)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164271

>From 466d5f3f12337d3cccdaf184b83c6fb80c0a9186 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Mon, 20 Oct 2025 08:21:26 -0700
Subject: [PATCH] [SLU][profcheck] create likely branch weights for
 guard->branch

---
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  |  12 +-
 .../Transforms/SimpleLoopUnswitch/guards.ll   | 181 +-
 2 files changed, 139 insertions(+), 54 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp 
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index dd36a63c1564b..88a19d3683991 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -40,6 +40,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ProfDataUtils.h"
@@ -2829,9 +2830,14 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst 
*GI, Loop &L,
  MSSAU->getMemorySSA()->verifyMemorySSA();
 
   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
-  Instruction *DeoptBlockTerm =
-  SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true,
-GI->getMetadata(LLVMContext::MD_prof), &DTU, 
&LI);
+  // llvm.experimental.guard doesn't have branch weights. We can assume,
+  // however, that the deopt path is unlikely.
+  Instruction *DeoptBlockTerm = SplitBlockAndInsertIfThen(
+  GI->getArgOperand(0), GI, true,
+  !ProfcheckDisableMetadataFixes && EstimateProfile
+  ? MDBuilder(GI->getContext()).createUnlikelyBranchWeights()
+  : nullptr,
+  &DTU, &LI);
   BranchInst *CheckBI = cast(CheckBB->getTerminator());
   // SplitBlockAndInsertIfThen inserts control flow that branches to
   // DeoptBlockTerm if the condition is true.  We want the opposite.
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll 
b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
index 533b1f691f5ad..e83047e397d3d 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
@@ -1,26 +1,34 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: -p --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: -p --check-globals all --version 5
 ; RUN: opt -passes='loop(simple-loop-unswitch),verify' 
-simple-loop-unswitch-guards -S < %s | FileCheck %s
 ; RUN: opt -passes='simple-loop-unswitch' 
-simple-loop-unswitch-guards -S < %s | FileCheck %s
 ; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' 
-simple-loop-unswitch-guards  -verify-memoryssa -verify-loop-info -S < %s | 
FileCheck %s
 
 declare void @llvm.experimental.guard(i1, ...)
 
-define void @test_simple_case(i1 %cond, i32 %N) {
-; CHECK-LABEL: @test_simple_case(
+define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
+; CHECK-LABEL: define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond, label %entry.split.us, label %entry.split, !prof 
!1
 ; CHECK:   entry.split.us:
-; CHECK-NEXT:br label [[LOOP_US:%.*]]
+; CHECK-NEXT:br label %loop.us
 ; CHECK:   loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ 
[[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
-; CHECK-NEXT:br label [[GUARDED_US]]
+; CHECK-NEXT:%iv.us = phi i32 [ 0, %entry.split.us ], [ %iv.next.us, 
%guarded.us ]
+; CHECK-NEXT:br label %guarded.us
 ; CHECK:   guarded.us:
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
-; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label 
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:%iv.next.us = add i32 %iv.us, 1
+; CHECK-NEXT:%loop.cond.us = icmp slt i32 %iv.next.us, %N
+; CHECK-NEXT:br i1 %loop.cond.us, label %loop.us, label %exit.split.us
+; CHECK:   exit.split.us:
+; CHECK-NEXT:br label %exit
+; CHECK:   entry.split:
+; CHECK-NEXT:br label %loop
+; CHECK:   loop:
+; CHECK-NEXT:br label %deopt
 ; CHECK:   deopt:
 ; CHECK-NEXT:call void (i1, ...) @llvm.experimental.guard(i1 false) [ 
"deopt"() ]
 ; CHECK-NEXT:unreachable
+; CHECK:   exit:
+; CHECK-NEXT:ret void
 ;
 
 entry:
@@ -38,25 +46,39 @@ exit:
 }
 
 define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
-; CHECK-LABEL: @test_two_guards(
+; CHECK-LABEL: define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond1, label %entry.split.us, label %entry.split, 
!prof !1
 ; CHECK:

[llvm-branch-commits] [compiler-rt] [compiler-rt] Default to Lit's Internal Shell (PR #165148)

2025-10-29 Thread Florian Mayer via llvm-branch-commits

https://github.com/fmayer approved this pull request.


https://github.com/llvm/llvm-project/pull/165148
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LVer][profcheck] explicitly set unknown branch weights for the versioned/unversioned selector (PR #164507)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164507

>From e745aaf0e5454fd7f79269517a1ca2eb4c582edd Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 21 Oct 2025 15:20:14 -0700
Subject: [PATCH] [LVer][profcheck] explicitly set unknown branch weights for
 the versioned/unversioned selector

---
 llvm/lib/Transforms/Utils/LoopVersioning.cpp   | 10 --
 .../Transforms/LoopDistribute/basic-with-memchecks.ll  |  5 +++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp 
b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index ec2e6c1ab796b..4786819d18fa4 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
@@ -109,8 +110,13 @@ void LoopVersioning::versionLoop(
   // Insert the conditional branch based on the result of the memchecks.
   Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
   Builder.SetInsertPoint(OrigTerm);
-  Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
-   VersionedLoop->getLoopPreheader());
+  auto *BI =
+  Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
+   VersionedLoop->getLoopPreheader());
+  // We don't know what the probability of executing the versioned vs the
+  // unversioned variants is.
+  setExplicitlyUnknownBranchWeightsIfProfiled(
+  *BI, *BI->getParent()->getParent(), DEBUG_TYPE);
   OrigTerm->eraseFromParent();
 
   // The loops merge in the original exit block.  This is now dominated by the
diff --git a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll 
b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
index 97ea2c6708dad..2828882afe779 100644
--- a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
+++ b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
@@ -28,7 +28,7 @@ target triple = "x86_64-apple-macosx10.10.0"
 @E = common global ptr null, align 8
 
 ; CHECK-LABEL: @f(
-define void @f() {
+define void @f() !prof !{!"function_entry_count", i32 10} {
 entry:
   %a = load ptr, ptr @A, align 8
   %b = load ptr, ptr @B, align 8
@@ -55,7 +55,7 @@ entry:
 ; CHECK: = icmp
 
 ; CHECK-NOT: = icmp
-; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label 
%for.body.ph.ldist1
+; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label 
%for.body.ph.ldist1, !prof ![[PROF1:[0-9]]]
 
 ; The non-distributed loop that the memchecks fall back on.
 
@@ -289,3 +289,4 @@ attributes #1 = { nounwind convergent }
 
 !0 = distinct !{!0, !1}
 !1 = !{!"llvm.loop.distribute.enable", i1 true}
+; CHECK: ![[PROF1]] = !{!"unknown", !"loop-versioning"}

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LSCFG][profcheck] Add dummy branch weights for the dummy switch to dead exits (PR #164714)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164714

>From 5b7729327168a7bfbd0c6f561cd0e6a55d54fb10 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Wed, 22 Oct 2025 14:34:31 -0700
Subject: [PATCH] [LSCFG][profcheck] Add dummy branch weights for the dummy
 switch to dead exits

---
 .../lib/Transforms/Scalar/LoopSimplifyCFG.cpp |  12 ++
 .../LoopSimplifyCFG/constant-fold-branch.ll   | 104 +-
 2 files changed, 66 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp 
b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index b9546c5fa236b..e902b71776973 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
@@ -393,6 +394,17 @@ class ConstantTerminatorFoldingImpl {
   DTUpdates.push_back({DominatorTree::Insert, Preheader, BB});
   ++NumLoopExitsDeleted;
 }
+// We don't really need to add branch weights to DummySwitch, because all
+// but one branches are just a temporary artifact - see the comment on top
+// of this function. But, it's easy to estimate the weights, and it helps
+// maintain a property of the overall compiler - that the branch weights
+// don't "just get dropped" accidentally (i.e. profcheck)
+if (DummySwitch->getParent()->getParent()->hasProfileData()) {
+  SmallVector DummyBranchWeights(1 + DummySwitch->getNumCases());
+  // default. 100% probability, the rest are dead.
+  DummyBranchWeights[0] = 1;
+  setBranchWeights(*DummySwitch, DummyBranchWeights, /*IsExpected=*/false);
+}
 
 assert(L.getLoopPreheader() == NewPreheader && "Malformed CFG?");
 if (Loop *OuterLoop = LI.getLoopFor(Preheader)) {
diff --git a/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll 
b/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
index 1ec212f0bb5ea..46b6209986fed 100644
--- a/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
+++ b/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --check-globals
 ; REQUIRES: asserts
 ; RUN: opt -S -enable-loop-simplifycfg-term-folding=true 
-passes=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 
< %s | FileCheck %s
 ; RUN: opt -S -enable-loop-simplifycfg-term-folding=true 
-passes='require,loop(loop-simplifycfg)' -verify-loop-info 
-verify-dom-info -verify-loop-lcssa < %s | FileCheck %s
@@ -59,7 +59,7 @@ define i32 @dead_backedge_test_switch_loop(i32 %end) {
 ; CHECK:   dead_backedge:
 ; CHECK-NEXT:[[I_2]] = add i32 [[I_1]], 10
 ; CHECK-NEXT:switch i32 1, label [[EXIT:%.*]] [
-; CHECK-NEXT:i32 0, label [[HEADER_BACKEDGE]]
+; CHECK-NEXT:  i32 0, label [[HEADER_BACKEDGE]]
 ; CHECK-NEXT:]
 ; CHECK:   exit:
 ; CHECK-NEXT:[[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ]
@@ -233,12 +233,12 @@ exit:
 
 ; Check that we preserve static reachibility of a dead exit block while 
deleting
 ; a branch.
-define i32 @dead_exit_test_branch_loop(i32 %end) {
+define i32 @dead_exit_test_branch_loop(i32 %end) !prof 
!{!"function_entry_count", i32 10} {
 ; CHECK-LABEL: @dead_exit_test_branch_loop(
 ; CHECK-NEXT:  preheader:
 ; CHECK-NEXT:switch i32 0, label [[PREHEADER_SPLIT:%.*]] [
-; CHECK-NEXT:i32 1, label [[DEAD:%.*]]
-; CHECK-NEXT:]
+; CHECK-NEXT:  i32 1, label [[DEAD:%.*]]
+; CHECK-NEXT:], !prof [[PROF1:![0-9]+]]
 ; CHECK:   preheader.split:
 ; CHECK-NEXT:br label [[HEADER:%.*]]
 ; CHECK:   header:
@@ -262,7 +262,7 @@ preheader:
 
 header:
   %i = phi i32 [0, %preheader], [%i.inc, %backedge]
-  br i1 true, label %backedge, label %dead
+  br i1 true, label %backedge, label %dead, !prof !{!"branch_weights", i32 10, 
i32 1}
 
 dead:
   br label %dummy
@@ -286,7 +286,7 @@ define i32 @dead_exit_test_switch_loop(i32 %end) {
 ; CHECK-LABEL: @dead_exit_test_switch_loop(
 ; CHECK-NEXT:  preheader:
 ; CHECK-NEXT:switch i32 0, label [[PREHEADER_SPLIT:%.*]] [
-; CHECK-NEXT:i32 1, label [[DEAD:%.*]]
+; CHECK-NEXT:  i32 1, label [[DEAD:%.*]]
 ; CHECK-NEXT:]
 ; CHECK:   preheader.split:
 ; CHECK-NEXT:br label [[HEADER:%.*]]
@@ -383,9 +383,9 @@ define i32 @dead_loop_test_switch_loop(i32 %end) {
 ; CHECK:   header:
 ; CHECK-NEXT:[[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ 
[[I_INC:%.*]], [[BACKEDGE:%.*]] ]
 ; CHECK-NEXT:switch i32 1, label [[DEAD:%.*]] [
-; CHECK-NEXT:i32 0, label [[DEAD]]
-; CHECK-NEXT:i32 1, label [[BACKEDGE]]
-; CHECK-NEXT:i32 2, lab

[llvm-branch-commits] [llvm] [SLU][profcheck] Use the original branch weigths in `buildPartialInvariantUnswitchConditionalBranch` (PR #164270)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164270

>From f3362bc0bcb593c8e27c87a8e71ab0dc38e8dc72 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Sat, 18 Oct 2025 17:14:08 -0700
Subject: [PATCH] [SLU][profcheck] Use the original branch weigths in
 `buildPartialInvariantUnswitchConditionalBranch`

---
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  | 17 -
 .../SimpleLoopUnswitch/partial-unswitch.ll| 75 +++
 2 files changed, 56 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp 
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index bb6c879f4d47e..dd36a63c1564b 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -337,7 +337,7 @@ static void buildPartialUnswitchConditionalBranch(
 static void buildPartialInvariantUnswitchConditionalBranch(
 BasicBlock &BB, ArrayRef ToDuplicate, bool Direction,
 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
-MemorySSAUpdater *MSSAU) {
+MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch) {
   ValueToValueMapTy VMap;
   for (auto *Val : reverse(ToDuplicate)) {
 Instruction *Inst = cast(Val);
@@ -377,8 +377,17 @@ static void buildPartialInvariantUnswitchConditionalBranch(
   IRBuilder<> IRB(&BB);
   IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
   Value *Cond = VMap[ToDuplicate[0]];
-  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
-   Direction ? &NormalSucc : &UnswitchedSucc);
+  auto *ProfData =
+  !ProfcheckDisableMetadataFixes &&
+  ToDuplicate[0] == 
skipTrivialSelect(OriginalBranch.getCondition())
+  ? OriginalBranch.getMetadata(LLVMContext::MD_prof)
+  : nullptr;
+  auto *BR =
+  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+   Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
+  if (!ProfData)
+setExplicitlyUnknownBranchWeightsIfProfiled(
+*BR, *BR->getParent()->getParent(), DEBUG_TYPE);
 }
 
 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
@@ -2515,7 +2524,7 @@ static void unswitchNontrivialInvariants(
 // the branch in the split block.
 if (PartiallyInvariant)
   buildPartialInvariantUnswitchConditionalBranch(
-  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
+  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);
 else {
   buildPartialUnswitchConditionalBranch(
   *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll 
b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
index 1d8942079ffd8..87161707d9f69 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
@@ -1,14 +1,14 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --check-globals
 ; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' 
-S < %s | FileCheck %s
 
 declare void @clobber()
 
-define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) {
+define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) !prof !0 {
 ; CHECK-LABEL: @partial_unswitch_true_successor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:[[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100
-; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:   entry.split.us:
 ; CHECK-NEXT:br label [[LOOP_HEADER_US:%.*]]
 ; CHECK:   loop.header.us:
@@ -19,7 +19,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) 
{
 ; CHECK:   loop.latch.us:
 ; CHECK-NEXT:[[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]
 ; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label 
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label 
[[EXIT_SPLIT_US:%.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:   exit.split.us:
 ; CHECK-NEXT:br label [[EXIT:%.*]]
 ; CHECK:   entry.split:
@@ -28,7 +28,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) 
{
 ; CHECK-NEXT:[[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ 
[[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
 ; CHECK-NEXT:[[LV:%.*]] = load i32, ptr [[PTR]], align 4
 ; CHECK-NEXT:[[SC:%.*]] = icmp eq i32 [[LV]], 100
-; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]]
+; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]], 
!prof [[PROF1]]
 ; CHECK:   noclobber:
 ; CHECK-NEXT:   

[llvm-branch-commits] [llvm] [SLU][profcheck] Propagate profile for branches on injected conditions. (PR #164476)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164476

>From f696a5d6aa0895cfad1a61eab1f3a0ee79bd894c Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 21 Oct 2025 11:22:01 -0700
Subject: [PATCH] [SLU][profcheck] Propagate profile for branches on injected
 conditions.

---
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  |   9 +-
 .../inject-invariant-conditions.ll| 142 +-
 2 files changed, 79 insertions(+), 72 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp 
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 88a19d3683991..988825737f04f 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -3201,10 +3201,15 @@ 
injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
   Builder.SetInsertPoint(TI);
   auto *InvariantBr =
   Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
+  // We don't know anything about the relation between the limits.
+  setExplicitlyUnknownBranchWeightsIfProfiled(
+  *InvariantBr, *InvariantBr->getParent()->getParent(), DEBUG_TYPE);
 
   Builder.SetInsertPoint(CheckBlock);
-  Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
-   TI->getSuccessor(1));
+  Builder.CreateCondBr(
+  TI->getCondition(), TI->getSuccessor(0), TI->getSuccessor(1),
+  !ProfcheckDisableMetadataFixes ? TI->getMetadata(LLVMContext::MD_prof)
+ : nullptr);
   TI->eraseFromParent();
 
   // Fixup phis.
diff --git 
a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll 
b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
index 536e0c6a0e74a..3c84dea2a0672 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
@@ -2,40 +2,40 @@
 ; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true 
-passes="loop(simple-loop-unswitch),simplifycfg" | FileCheck %s
 ; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true 
-passes="loop-mssa(simple-loop-unswitch),simplifycfg" 
-verify-memoryssa | FileCheck %s
 
-define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr 
noundef %arr, ptr noundef %x_p) {
+define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr 
noundef %arr, ptr noundef %x_p) !prof !{!"function_entry_count", i32 10} {
 ; CHECK-LABEL: @test_01(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef !0
+; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef 
[[META1:![0-9]+]]
 ; CHECK-NEXT:[[INJECTED_COND:%.*]] = icmp ule i32 [[LIMIT:%.*]], [[X]]
-; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label 
[[LOOP:%.*]]
+; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label 
[[LOOP:%.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:   loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], 
[[GUARDED_US:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:[[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 
[[IV_US]]
-; CHECK-NEXT:[[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4
-; CHECK-NEXT:[[BOUND_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LIMIT]]
-; CHECK-NEXT:br i1 [[BOUND_CHECK_US]], label [[GUARDED_US]], label 
[[COMMON_RET:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] 
], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV]]
+; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
+; CHECK-NEXT:[[BOUND_CHECK:%.*]] = icmp ult i32 [[EL]], [[LIMIT]]
+; CHECK-NEXT:br i1 [[BOUND_CHECK]], label [[GUARDED_US]], label 
[[COMMON_RET:%.*]], !prof [[PROF3:![0-9]+]]
 ; CHECK:   guarded.us:
-; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[X]]
-; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 
[[EL_US]]
-; CHECK-NEXT:store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL]], [[X]]
+; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 
[[EL]]
+; CHECK-NEXT:store i32 [[IV]], ptr [[ARR_PTR_US]], align 4
+; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV]], 1
 ; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
 ; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label 
[[COMMON_RET]]
 ; CHECK:   loop:
-; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 
0, [[ENTRY]] ]
-; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
-; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
-; CHECK-NEXT:[[BOUND_CHEC

[llvm-branch-commits] [llvm] [SLU][profcheck] Use the original branch weigths in `buildPartialInvariantUnswitchConditionalBranch` (PR #164270)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164270

>From f3362bc0bcb593c8e27c87a8e71ab0dc38e8dc72 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Sat, 18 Oct 2025 17:14:08 -0700
Subject: [PATCH] [SLU][profcheck] Use the original branch weigths in
 `buildPartialInvariantUnswitchConditionalBranch`

---
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  | 17 -
 .../SimpleLoopUnswitch/partial-unswitch.ll| 75 +++
 2 files changed, 56 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp 
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index bb6c879f4d47e..dd36a63c1564b 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -337,7 +337,7 @@ static void buildPartialUnswitchConditionalBranch(
 static void buildPartialInvariantUnswitchConditionalBranch(
 BasicBlock &BB, ArrayRef ToDuplicate, bool Direction,
 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
-MemorySSAUpdater *MSSAU) {
+MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch) {
   ValueToValueMapTy VMap;
   for (auto *Val : reverse(ToDuplicate)) {
 Instruction *Inst = cast(Val);
@@ -377,8 +377,17 @@ static void buildPartialInvariantUnswitchConditionalBranch(
   IRBuilder<> IRB(&BB);
   IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
   Value *Cond = VMap[ToDuplicate[0]];
-  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
-   Direction ? &NormalSucc : &UnswitchedSucc);
+  auto *ProfData =
+  !ProfcheckDisableMetadataFixes &&
+  ToDuplicate[0] == 
skipTrivialSelect(OriginalBranch.getCondition())
+  ? OriginalBranch.getMetadata(LLVMContext::MD_prof)
+  : nullptr;
+  auto *BR =
+  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+   Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
+  if (!ProfData)
+setExplicitlyUnknownBranchWeightsIfProfiled(
+*BR, *BR->getParent()->getParent(), DEBUG_TYPE);
 }
 
 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
@@ -2515,7 +2524,7 @@ static void unswitchNontrivialInvariants(
 // the branch in the split block.
 if (PartiallyInvariant)
   buildPartialInvariantUnswitchConditionalBranch(
-  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
+  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);
 else {
   buildPartialUnswitchConditionalBranch(
   *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll 
b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
index 1d8942079ffd8..87161707d9f69 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
@@ -1,14 +1,14 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --check-globals
 ; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' 
-S < %s | FileCheck %s
 
 declare void @clobber()
 
-define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) {
+define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) !prof !0 {
 ; CHECK-LABEL: @partial_unswitch_true_successor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:[[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100
-; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:   entry.split.us:
 ; CHECK-NEXT:br label [[LOOP_HEADER_US:%.*]]
 ; CHECK:   loop.header.us:
@@ -19,7 +19,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) 
{
 ; CHECK:   loop.latch.us:
 ; CHECK-NEXT:[[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]
 ; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label 
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label 
[[EXIT_SPLIT_US:%.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:   exit.split.us:
 ; CHECK-NEXT:br label [[EXIT:%.*]]
 ; CHECK:   entry.split:
@@ -28,7 +28,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) 
{
 ; CHECK-NEXT:[[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ 
[[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
 ; CHECK-NEXT:[[LV:%.*]] = load i32, ptr [[PTR]], align 4
 ; CHECK-NEXT:[[SC:%.*]] = icmp eq i32 [[LV]], 100
-; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]]
+; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]], 
!prof [[PROF1]]
 ; CHECK:   noclobber:
 ; CHECK-NEXT:   

[llvm-branch-commits] [llvm] [LSCFG][profcheck] Add dummy branch weights for the dummy switch to dead exits (PR #164714)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164714

>From 5b7729327168a7bfbd0c6f561cd0e6a55d54fb10 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Wed, 22 Oct 2025 14:34:31 -0700
Subject: [PATCH] [LSCFG][profcheck] Add dummy branch weights for the dummy
 switch to dead exits

---
 .../lib/Transforms/Scalar/LoopSimplifyCFG.cpp |  12 ++
 .../LoopSimplifyCFG/constant-fold-branch.ll   | 104 +-
 2 files changed, 66 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp 
b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index b9546c5fa236b..e902b71776973 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
@@ -393,6 +394,17 @@ class ConstantTerminatorFoldingImpl {
   DTUpdates.push_back({DominatorTree::Insert, Preheader, BB});
   ++NumLoopExitsDeleted;
 }
+// We don't really need to add branch weights to DummySwitch, because all
+// but one branches are just a temporary artifact - see the comment on top
+// of this function. But, it's easy to estimate the weights, and it helps
+// maintain a property of the overall compiler - that the branch weights
+// don't "just get dropped" accidentally (i.e. profcheck)
+if (DummySwitch->getParent()->getParent()->hasProfileData()) {
+  SmallVector DummyBranchWeights(1 + DummySwitch->getNumCases());
+  // default. 100% probability, the rest are dead.
+  DummyBranchWeights[0] = 1;
+  setBranchWeights(*DummySwitch, DummyBranchWeights, /*IsExpected=*/false);
+}
 
 assert(L.getLoopPreheader() == NewPreheader && "Malformed CFG?");
 if (Loop *OuterLoop = LI.getLoopFor(Preheader)) {
diff --git a/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll 
b/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
index 1ec212f0bb5ea..46b6209986fed 100644
--- a/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
+++ b/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --check-globals
 ; REQUIRES: asserts
 ; RUN: opt -S -enable-loop-simplifycfg-term-folding=true 
-passes=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 
< %s | FileCheck %s
 ; RUN: opt -S -enable-loop-simplifycfg-term-folding=true 
-passes='require,loop(loop-simplifycfg)' -verify-loop-info 
-verify-dom-info -verify-loop-lcssa < %s | FileCheck %s
@@ -59,7 +59,7 @@ define i32 @dead_backedge_test_switch_loop(i32 %end) {
 ; CHECK:   dead_backedge:
 ; CHECK-NEXT:[[I_2]] = add i32 [[I_1]], 10
 ; CHECK-NEXT:switch i32 1, label [[EXIT:%.*]] [
-; CHECK-NEXT:i32 0, label [[HEADER_BACKEDGE]]
+; CHECK-NEXT:  i32 0, label [[HEADER_BACKEDGE]]
 ; CHECK-NEXT:]
 ; CHECK:   exit:
 ; CHECK-NEXT:[[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ]
@@ -233,12 +233,12 @@ exit:
 
 ; Check that we preserve static reachibility of a dead exit block while 
deleting
 ; a branch.
-define i32 @dead_exit_test_branch_loop(i32 %end) {
+define i32 @dead_exit_test_branch_loop(i32 %end) !prof 
!{!"function_entry_count", i32 10} {
 ; CHECK-LABEL: @dead_exit_test_branch_loop(
 ; CHECK-NEXT:  preheader:
 ; CHECK-NEXT:switch i32 0, label [[PREHEADER_SPLIT:%.*]] [
-; CHECK-NEXT:i32 1, label [[DEAD:%.*]]
-; CHECK-NEXT:]
+; CHECK-NEXT:  i32 1, label [[DEAD:%.*]]
+; CHECK-NEXT:], !prof [[PROF1:![0-9]+]]
 ; CHECK:   preheader.split:
 ; CHECK-NEXT:br label [[HEADER:%.*]]
 ; CHECK:   header:
@@ -262,7 +262,7 @@ preheader:
 
 header:
   %i = phi i32 [0, %preheader], [%i.inc, %backedge]
-  br i1 true, label %backedge, label %dead
+  br i1 true, label %backedge, label %dead, !prof !{!"branch_weights", i32 10, 
i32 1}
 
 dead:
   br label %dummy
@@ -286,7 +286,7 @@ define i32 @dead_exit_test_switch_loop(i32 %end) {
 ; CHECK-LABEL: @dead_exit_test_switch_loop(
 ; CHECK-NEXT:  preheader:
 ; CHECK-NEXT:switch i32 0, label [[PREHEADER_SPLIT:%.*]] [
-; CHECK-NEXT:i32 1, label [[DEAD:%.*]]
+; CHECK-NEXT:  i32 1, label [[DEAD:%.*]]
 ; CHECK-NEXT:]
 ; CHECK:   preheader.split:
 ; CHECK-NEXT:br label [[HEADER:%.*]]
@@ -383,9 +383,9 @@ define i32 @dead_loop_test_switch_loop(i32 %end) {
 ; CHECK:   header:
 ; CHECK-NEXT:[[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ 
[[I_INC:%.*]], [[BACKEDGE:%.*]] ]
 ; CHECK-NEXT:switch i32 1, label [[DEAD:%.*]] [
-; CHECK-NEXT:i32 0, label [[DEAD]]
-; CHECK-NEXT:i32 1, label [[BACKEDGE]]
-; CHECK-NEXT:i32 2, lab

[llvm-branch-commits] [llvm] [DA] Check nsw when extracting a constant operand of SCEVMul (PR #164408)

2025-10-29 Thread Ryotaro Kasuga via llvm-branch-commits

https://github.com/kasuga-fj updated 
https://github.com/llvm/llvm-project/pull/164408

>From 4c5c963d99d99b4649b39fca172917fc0b09ccb2 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga 
Date: Tue, 21 Oct 2025 12:23:25 +
Subject: [PATCH 1/2] [DA] Check nsw when extracting a constant operand of
 SCEVMul

---
 llvm/lib/Analysis/DependenceAnalysis.cpp  |  5 +++--
 llvm/test/Analysis/DependenceAnalysis/GCD.ll  |  6 +++---
 .../Analysis/DependenceAnalysis/SymbolicSIV.ll|  4 ++--
 .../DependenceAnalysis/gcd-miv-overflow.ll| 15 ++-
 4 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp 
b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 853bd66c8a7f8..36ac252aba6ed 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -2828,8 +2828,9 @@ static std::optional getConstantPart(const SCEV 
*Expr) {
   if (const auto *Constant = dyn_cast(Expr))
 return Constant->getAPInt();
   if (const auto *Product = dyn_cast(Expr))
-if (const auto *Constant = dyn_cast(Product->getOperand(0)))
-  return Constant->getAPInt();
+if (Product->hasNoSignedWrap())
+  if (auto *Constant = dyn_cast(Product->getOperand(0)))
+return Constant->getAPInt();
   return std::nullopt;
 }
 
diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll 
b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
index 03343e7a98211..cb14d189afe4c 100644
--- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
@@ -254,7 +254,7 @@ define void @gcd4(ptr %A, ptr %B, i64 %M, i64 %N) nounwind 
uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 
%conv, ptr %arrayidx, align 4
 ; CHECK-NEXT:da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load 
i32, ptr %arrayidx16, align 4
-; CHECK-NEXT:da analyze - none!
+; CHECK-NEXT:da analyze - flow [* *|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 
%0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load 
i32, ptr %arrayidx16, align 4
@@ -322,7 +322,7 @@ define void @gcd5(ptr %A, ptr %B, i64 %M, i64 %N) nounwind 
uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 
%conv, ptr %arrayidx, align 4
 ; CHECK-NEXT:da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load 
i32, ptr %arrayidx16, align 4
-; CHECK-NEXT:da analyze - flow [<> *]!
+; CHECK-NEXT:da analyze - flow [* *|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 
%0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load 
i32, ptr %arrayidx16, align 4
@@ -390,7 +390,7 @@ define void @gcd6(i64 %n, ptr %A, ptr %B) nounwind uwtable 
ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store 
i32 %conv, ptr %arrayidx5, align 4
 ; CHECK-NEXT:da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: %2 = 
load i32, ptr %arrayidx9, align 4
-; CHECK-NEXT:da analyze - none!
+; CHECK-NEXT:da analyze - flow [* *|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store 
i32 %2, ptr %B.addr.12, align 4
 ; CHECK-NEXT:da analyze - confused!
 ; CHECK-NEXT:  Src: %2 = load i32, ptr %arrayidx9, align 4 --> Dst: %2 = load 
i32, ptr %arrayidx9, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll 
b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
index cdfaec76fa892..73a415baef4c4 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
@@ -384,7 +384,7 @@ define void @symbolicsiv6(ptr %A, ptr %B, i64 %n, i64 %N, 
i64 %M) nounwind uwtab
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 
%conv, ptr %arrayidx, align 4
 ; CHECK-NEXT:da analyze - none!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load 
i32, ptr %arrayidx7, align 4
-; CHECK-NEXT:da analyze - none!
+; CHECK-NEXT:da analyze - flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 
%0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: %0 = load 
i32, ptr %arrayidx7, align 4
@@ -440,7 +440,7 @@ define void @symbolicsiv7(ptr %A, ptr %B, i64 %n, i64 %N, 
i64 %M) nounwind uwtab
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 
%conv, ptr %arrayidx, align 4
 ; CHECK-NEXT:da analyze - none!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %1 = load

[llvm-branch-commits] [llvm] [llvm-readobj, ELF] Support reading binary with more than PN_XNUM segments. (PR #165278)

2025-10-29 Thread via llvm-branch-commits

aokblast wrote:

> @aokblast, is this ready for review? It's currently still attempting to merge 
> into another of your user branches, whereas if it's ready for review, I'd 
> expect it to be merging into `main`.

Hello, yes, it is ready to review. But I create a user branch based on the one 
you approved yesterday temporarily since MaskRay haven't accepted that patch. 
If he doesn't responese in a week, I will merge that patch so that this patch 
can be merged into main.

https://github.com/llvm/llvm-project/pull/165278
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LIR][profcheck] Reuse the loop's exit condition profile (PR #164523)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164523

>From 1bd2ba55726bd525908b186e0460e0556e7c3c6a Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 21 Oct 2025 17:24:49 -0700
Subject: [PATCH] [LIR][profcheck] Reuse the loop's exit condition profile

---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 40 +--
 .../LoopIdiom/X86/preserve-profile.ll | 70 +++
 2 files changed, 106 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp 
b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 019536ca91ae0..9070d252ae09f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -72,6 +72,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -105,6 +106,7 @@ STATISTIC(
 STATISTIC(NumShiftUntilZero,
   "Number of uncountable loops recognized as 'shift until zero' 
idiom");
 
+namespace llvm {
 bool DisableLIRP::All;
 static cl::opt
 DisableLIRPAll("disable-" DEBUG_TYPE "-all",
@@ -163,6 +165,10 @@ static cl::opt ForceMemsetPatternIntrinsic(
 cl::desc("Use memset.pattern intrinsic whenever possible"), 
cl::init(false),
 cl::Hidden);
 
+extern cl::opt ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
 namespace {
 
 class LoopIdiomRecognize {
@@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
   // The loop trip count check.
   auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
CurLoop->getName() + ".ivcheck");
-  Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+  SmallVector BranchWeights;
+  const bool HasBranchWeights =
+  !ProfcheckDisableMetadataFixes &&
+  extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+  auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+  if (HasBranchWeights) {
+if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
+  std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights,
+ /*IsExpected=*/false);
+  }
+
   LoopHeaderBB->getTerminator()->eraseFromParent();
 
   // Populate the IV PHI.
@@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, 
ScalarEvolution *SE,
 /// %start = <...>
 /// %extraoffset = <...>
 /// <...>
-/// br label %for.cond
+/// br label %loop
 ///
 ///   loop:
-/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
+/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
 /// %nbits = add nsw i8 %iv, %extraoffset
 /// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
 /// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
@@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
 
   // The loop terminator.
   Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
-  Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+  SmallVector BranchWeights;
+  const bool HasBranchWeights =
+  !ProfcheckDisableMetadataFixes &&
+  extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+  auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+  if (HasBranchWeights) {
+if (InvertedCond)
+  std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
+  }
   LoopHeaderBB->getTerminator()->eraseFromParent();
 
   // Populate the IV PHI.
diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll 
b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
new file mode 100644
index 0..d01bb748d9422
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
@@ -0,0 +1,70 @@
+; RUN: opt 
-passes="module(print),function(loop(loop-idiom)),module(print)"
 -mtriple=x86_64 -mcpu=core-avx2 %s -disable-output 2>&1 | FileCheck 
--check-prefix=PROFILE %s
+
+declare void @escape_inner(i8, i8, i8, i1, i8)
+declare void @escape_outer(i8, i8, i8, i1, i8)
+
+declare i8 @gen.i8()
+
+; Most basic pattern; Note that iff the shift amount is offset, said offsetting
+; must not cause an overflow, but `add nsw` is fine.
+define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+  %nbits = add nsw i8 %iv, %extraoffset
+  %val.shifted = ashr i8 %val, %nbits
+  %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+  %iv.next = add i8 %iv, 1
+
+  call void @escap

[llvm-branch-commits] [llvm] [LVer][profcheck] explicitly set unknown branch weights for the versioned/unversioned selector (PR #164507)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164507

>From 577971fd0c10a2733c3255609f5cb7db874011a4 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 21 Oct 2025 15:20:14 -0700
Subject: [PATCH] [LVer][profcheck] explicitly set unknown branch weights for
 the versioned/unversioned selector

---
 llvm/lib/Transforms/Utils/LoopVersioning.cpp   | 10 --
 .../Transforms/LoopDistribute/basic-with-memchecks.ll  |  5 +++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp 
b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index ec2e6c1ab796b..4786819d18fa4 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
@@ -109,8 +110,13 @@ void LoopVersioning::versionLoop(
   // Insert the conditional branch based on the result of the memchecks.
   Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
   Builder.SetInsertPoint(OrigTerm);
-  Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
-   VersionedLoop->getLoopPreheader());
+  auto *BI =
+  Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
+   VersionedLoop->getLoopPreheader());
+  // We don't know what the probability of executing the versioned vs the
+  // unversioned variants is.
+  setExplicitlyUnknownBranchWeightsIfProfiled(
+  *BI, *BI->getParent()->getParent(), DEBUG_TYPE);
   OrigTerm->eraseFromParent();
 
   // The loops merge in the original exit block.  This is now dominated by the
diff --git a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll 
b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
index 97ea2c6708dad..2828882afe779 100644
--- a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
+++ b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
@@ -28,7 +28,7 @@ target triple = "x86_64-apple-macosx10.10.0"
 @E = common global ptr null, align 8
 
 ; CHECK-LABEL: @f(
-define void @f() {
+define void @f() !prof !{!"function_entry_count", i32 10} {
 entry:
   %a = load ptr, ptr @A, align 8
   %b = load ptr, ptr @B, align 8
@@ -55,7 +55,7 @@ entry:
 ; CHECK: = icmp
 
 ; CHECK-NOT: = icmp
-; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label 
%for.body.ph.ldist1
+; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label 
%for.body.ph.ldist1, !prof ![[PROF1:[0-9]]]
 
 ; The non-distributed loop that the memchecks fall back on.
 
@@ -289,3 +289,4 @@ attributes #1 = { nounwind convergent }
 
 !0 = distinct !{!0, !1}
 !1 = !{!"llvm.loop.distribute.enable", i1 true}
+; CHECK: ![[PROF1]] = !{!"unknown", !"loop-versioning"}

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SLU][profcheck] Propagate profile for branches on injected conditions. (PR #164476)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164476

>From 54eacf7f9cdd8e57ba591e0bbf4b8eb35ff8bf4d Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 21 Oct 2025 11:22:01 -0700
Subject: [PATCH] [SLU][profcheck] Propagate profile for branches on injected
 conditions.

---
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  |   9 +-
 .../inject-invariant-conditions.ll| 142 +-
 2 files changed, 79 insertions(+), 72 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp 
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index f54d1ba5ca14e..ff4fcb599c5dd 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -3199,10 +3199,15 @@ 
injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
   Builder.SetInsertPoint(TI);
   auto *InvariantBr =
   Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
+  // We don't know anything about the relation between the limits.
+  setExplicitlyUnknownBranchWeightsIfProfiled(
+  *InvariantBr, *InvariantBr->getParent()->getParent(), DEBUG_TYPE);
 
   Builder.SetInsertPoint(CheckBlock);
-  Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
-   TI->getSuccessor(1));
+  Builder.CreateCondBr(
+  TI->getCondition(), TI->getSuccessor(0), TI->getSuccessor(1),
+  !ProfcheckDisableMetadataFixes ? TI->getMetadata(LLVMContext::MD_prof)
+ : nullptr);
   TI->eraseFromParent();
 
   // Fixup phis.
diff --git 
a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll 
b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
index 536e0c6a0e74a..3c84dea2a0672 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
@@ -2,40 +2,40 @@
 ; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true 
-passes="loop(simple-loop-unswitch),simplifycfg" | FileCheck %s
 ; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true 
-passes="loop-mssa(simple-loop-unswitch),simplifycfg" 
-verify-memoryssa | FileCheck %s
 
-define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr 
noundef %arr, ptr noundef %x_p) {
+define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr 
noundef %arr, ptr noundef %x_p) !prof !{!"function_entry_count", i32 10} {
 ; CHECK-LABEL: @test_01(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef !0
+; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef 
[[META1:![0-9]+]]
 ; CHECK-NEXT:[[INJECTED_COND:%.*]] = icmp ule i32 [[LIMIT:%.*]], [[X]]
-; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label 
[[LOOP:%.*]]
+; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label 
[[LOOP:%.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:   loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], 
[[GUARDED_US:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:[[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 
[[IV_US]]
-; CHECK-NEXT:[[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4
-; CHECK-NEXT:[[BOUND_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LIMIT]]
-; CHECK-NEXT:br i1 [[BOUND_CHECK_US]], label [[GUARDED_US]], label 
[[COMMON_RET:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] 
], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV]]
+; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
+; CHECK-NEXT:[[BOUND_CHECK:%.*]] = icmp ult i32 [[EL]], [[LIMIT]]
+; CHECK-NEXT:br i1 [[BOUND_CHECK]], label [[GUARDED_US]], label 
[[COMMON_RET:%.*]], !prof [[PROF3:![0-9]+]]
 ; CHECK:   guarded.us:
-; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[X]]
-; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 
[[EL_US]]
-; CHECK-NEXT:store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL]], [[X]]
+; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 
[[EL]]
+; CHECK-NEXT:store i32 [[IV]], ptr [[ARR_PTR_US]], align 4
+; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV]], 1
 ; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
 ; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label 
[[COMMON_RET]]
 ; CHECK:   loop:
-; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 
0, [[ENTRY]] ]
-; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
-; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
-; CHECK-NEXT:[[BOUND_CHEC

[llvm-branch-commits] [llvm] [LSCFG][profcheck] Add dummy branch weights for the dummy switch to dead exits (PR #164714)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164714

>From e24859154f6753e6c9abaa5dd272a9a10f93b1ad Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Wed, 22 Oct 2025 14:34:31 -0700
Subject: [PATCH] [LSCFG][profcheck] Add dummy branch weights for the dummy
 switch to dead exits

---
 .../lib/Transforms/Scalar/LoopSimplifyCFG.cpp |  12 ++
 .../LoopSimplifyCFG/constant-fold-branch.ll   | 104 +-
 2 files changed, 66 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp 
b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index b9546c5fa236b..e902b71776973 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
@@ -393,6 +394,17 @@ class ConstantTerminatorFoldingImpl {
   DTUpdates.push_back({DominatorTree::Insert, Preheader, BB});
   ++NumLoopExitsDeleted;
 }
+// We don't really need to add branch weights to DummySwitch, because all
+// but one branches are just a temporary artifact - see the comment on top
+// of this function. But, it's easy to estimate the weights, and it helps
+// maintain a property of the overall compiler - that the branch weights
+// don't "just get dropped" accidentally (i.e. profcheck)
+if (DummySwitch->getParent()->getParent()->hasProfileData()) {
+  SmallVector DummyBranchWeights(1 + DummySwitch->getNumCases());
+  // default. 100% probability, the rest are dead.
+  DummyBranchWeights[0] = 1;
+  setBranchWeights(*DummySwitch, DummyBranchWeights, /*IsExpected=*/false);
+}
 
 assert(L.getLoopPreheader() == NewPreheader && "Malformed CFG?");
 if (Loop *OuterLoop = LI.getLoopFor(Preheader)) {
diff --git a/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll 
b/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
index 1ec212f0bb5ea..46b6209986fed 100644
--- a/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
+++ b/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --check-globals
 ; REQUIRES: asserts
 ; RUN: opt -S -enable-loop-simplifycfg-term-folding=true 
-passes=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa 
< %s | FileCheck %s
 ; RUN: opt -S -enable-loop-simplifycfg-term-folding=true 
-passes='require,loop(loop-simplifycfg)' -verify-loop-info 
-verify-dom-info -verify-loop-lcssa < %s | FileCheck %s
@@ -59,7 +59,7 @@ define i32 @dead_backedge_test_switch_loop(i32 %end) {
 ; CHECK:   dead_backedge:
 ; CHECK-NEXT:[[I_2]] = add i32 [[I_1]], 10
 ; CHECK-NEXT:switch i32 1, label [[EXIT:%.*]] [
-; CHECK-NEXT:i32 0, label [[HEADER_BACKEDGE]]
+; CHECK-NEXT:  i32 0, label [[HEADER_BACKEDGE]]
 ; CHECK-NEXT:]
 ; CHECK:   exit:
 ; CHECK-NEXT:[[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ]
@@ -233,12 +233,12 @@ exit:
 
 ; Check that we preserve static reachibility of a dead exit block while 
deleting
 ; a branch.
-define i32 @dead_exit_test_branch_loop(i32 %end) {
+define i32 @dead_exit_test_branch_loop(i32 %end) !prof 
!{!"function_entry_count", i32 10} {
 ; CHECK-LABEL: @dead_exit_test_branch_loop(
 ; CHECK-NEXT:  preheader:
 ; CHECK-NEXT:switch i32 0, label [[PREHEADER_SPLIT:%.*]] [
-; CHECK-NEXT:i32 1, label [[DEAD:%.*]]
-; CHECK-NEXT:]
+; CHECK-NEXT:  i32 1, label [[DEAD:%.*]]
+; CHECK-NEXT:], !prof [[PROF1:![0-9]+]]
 ; CHECK:   preheader.split:
 ; CHECK-NEXT:br label [[HEADER:%.*]]
 ; CHECK:   header:
@@ -262,7 +262,7 @@ preheader:
 
 header:
   %i = phi i32 [0, %preheader], [%i.inc, %backedge]
-  br i1 true, label %backedge, label %dead
+  br i1 true, label %backedge, label %dead, !prof !{!"branch_weights", i32 10, 
i32 1}
 
 dead:
   br label %dummy
@@ -286,7 +286,7 @@ define i32 @dead_exit_test_switch_loop(i32 %end) {
 ; CHECK-LABEL: @dead_exit_test_switch_loop(
 ; CHECK-NEXT:  preheader:
 ; CHECK-NEXT:switch i32 0, label [[PREHEADER_SPLIT:%.*]] [
-; CHECK-NEXT:i32 1, label [[DEAD:%.*]]
+; CHECK-NEXT:  i32 1, label [[DEAD:%.*]]
 ; CHECK-NEXT:]
 ; CHECK:   preheader.split:
 ; CHECK-NEXT:br label [[HEADER:%.*]]
@@ -383,9 +383,9 @@ define i32 @dead_loop_test_switch_loop(i32 %end) {
 ; CHECK:   header:
 ; CHECK-NEXT:[[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [ 
[[I_INC:%.*]], [[BACKEDGE:%.*]] ]
 ; CHECK-NEXT:switch i32 1, label [[DEAD:%.*]] [
-; CHECK-NEXT:i32 0, label [[DEAD]]
-; CHECK-NEXT:i32 1, label [[BACKEDGE]]
-; CHECK-NEXT:i32 2, lab

[llvm-branch-commits] [llvm] [SLU][profcheck] Use the original branch weigths in `buildPartialInvariantUnswitchConditionalBranch` (PR #164270)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164270

>From 31442a49e459d9712aece05e66ff0d1e4a70721c Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Sat, 18 Oct 2025 17:14:08 -0700
Subject: [PATCH] [SLU][profcheck] Use the original branch weigths in
 `buildPartialInvariantUnswitchConditionalBranch`

---
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  | 17 -
 .../SimpleLoopUnswitch/partial-unswitch.ll| 75 +++
 2 files changed, 56 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp 
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 763e95a5955a8..2e7a7fec745fb 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -335,7 +335,7 @@ static void buildPartialUnswitchConditionalBranch(
 static void buildPartialInvariantUnswitchConditionalBranch(
 BasicBlock &BB, ArrayRef ToDuplicate, bool Direction,
 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
-MemorySSAUpdater *MSSAU) {
+MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch) {
   ValueToValueMapTy VMap;
   for (auto *Val : reverse(ToDuplicate)) {
 Instruction *Inst = cast(Val);
@@ -375,8 +375,17 @@ static void buildPartialInvariantUnswitchConditionalBranch(
   IRBuilder<> IRB(&BB);
   IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
   Value *Cond = VMap[ToDuplicate[0]];
-  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
-   Direction ? &NormalSucc : &UnswitchedSucc);
+  auto *ProfData =
+  !ProfcheckDisableMetadataFixes &&
+  ToDuplicate[0] == 
skipTrivialSelect(OriginalBranch.getCondition())
+  ? OriginalBranch.getMetadata(LLVMContext::MD_prof)
+  : nullptr;
+  auto *BR =
+  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+   Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
+  if (!ProfData)
+setExplicitlyUnknownBranchWeightsIfProfiled(
+*BR, *BR->getParent()->getParent(), DEBUG_TYPE);
 }
 
 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
@@ -2513,7 +2522,7 @@ static void unswitchNontrivialInvariants(
 // the branch in the split block.
 if (PartiallyInvariant)
   buildPartialInvariantUnswitchConditionalBranch(
-  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
+  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);
 else {
   buildPartialUnswitchConditionalBranch(
   *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll 
b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
index 1d8942079ffd8..87161707d9f69 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
@@ -1,14 +1,14 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --check-globals
 ; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' 
-S < %s | FileCheck %s
 
 declare void @clobber()
 
-define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) {
+define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) !prof !0 {
 ; CHECK-LABEL: @partial_unswitch_true_successor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:[[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100
-; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:   entry.split.us:
 ; CHECK-NEXT:br label [[LOOP_HEADER_US:%.*]]
 ; CHECK:   loop.header.us:
@@ -19,7 +19,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) 
{
 ; CHECK:   loop.latch.us:
 ; CHECK-NEXT:[[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]
 ; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label 
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label 
[[EXIT_SPLIT_US:%.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:   exit.split.us:
 ; CHECK-NEXT:br label [[EXIT:%.*]]
 ; CHECK:   entry.split:
@@ -28,7 +28,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) 
{
 ; CHECK-NEXT:[[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ 
[[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
 ; CHECK-NEXT:[[LV:%.*]] = load i32, ptr [[PTR]], align 4
 ; CHECK-NEXT:[[SC:%.*]] = icmp eq i32 [[LV]], 100
-; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]]
+; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]], 
!prof [[PROF1]]
 ; CHECK:   noclobber:
 ; CHECK-NEXT:   

[llvm-branch-commits] [llvm] [SLU][profcheck] create likely branch weights for guard->branch (PR #164271)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164271

>From 7967c99ec1b2121b09690fc0cb57dac8814edc00 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Mon, 20 Oct 2025 08:21:26 -0700
Subject: [PATCH] [SLU][profcheck] create likely branch weights for
 guard->branch

---
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  |  12 +-
 .../Transforms/SimpleLoopUnswitch/guards.ll   | 181 +-
 2 files changed, 139 insertions(+), 54 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp 
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 2e7a7fec745fb..f54d1ba5ca14e 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -40,6 +40,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ProfDataUtils.h"
@@ -2827,9 +2828,14 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst 
*GI, Loop &L,
  MSSAU->getMemorySSA()->verifyMemorySSA();
 
   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
-  Instruction *DeoptBlockTerm =
-  SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true,
-GI->getMetadata(LLVMContext::MD_prof), &DTU, 
&LI);
+  // llvm.experimental.guard doesn't have branch weights. We can assume,
+  // however, that the deopt path is unlikely.
+  Instruction *DeoptBlockTerm = SplitBlockAndInsertIfThen(
+  GI->getArgOperand(0), GI, true,
+  !ProfcheckDisableMetadataFixes && EstimateProfile
+  ? MDBuilder(GI->getContext()).createUnlikelyBranchWeights()
+  : nullptr,
+  &DTU, &LI);
   BranchInst *CheckBI = cast(CheckBB->getTerminator());
   // SplitBlockAndInsertIfThen inserts control flow that branches to
   // DeoptBlockTerm if the condition is true.  We want the opposite.
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll 
b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
index 533b1f691f5ad..e83047e397d3d 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
@@ -1,26 +1,34 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: -p --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: -p --check-globals all --version 5
 ; RUN: opt -passes='loop(simple-loop-unswitch),verify' 
-simple-loop-unswitch-guards -S < %s | FileCheck %s
 ; RUN: opt -passes='simple-loop-unswitch' 
-simple-loop-unswitch-guards -S < %s | FileCheck %s
 ; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' 
-simple-loop-unswitch-guards  -verify-memoryssa -verify-loop-info -S < %s | 
FileCheck %s
 
 declare void @llvm.experimental.guard(i1, ...)
 
-define void @test_simple_case(i1 %cond, i32 %N) {
-; CHECK-LABEL: @test_simple_case(
+define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
+; CHECK-LABEL: define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond, label %entry.split.us, label %entry.split, !prof 
!1
 ; CHECK:   entry.split.us:
-; CHECK-NEXT:br label [[LOOP_US:%.*]]
+; CHECK-NEXT:br label %loop.us
 ; CHECK:   loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ 
[[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
-; CHECK-NEXT:br label [[GUARDED_US]]
+; CHECK-NEXT:%iv.us = phi i32 [ 0, %entry.split.us ], [ %iv.next.us, 
%guarded.us ]
+; CHECK-NEXT:br label %guarded.us
 ; CHECK:   guarded.us:
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
-; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label 
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:%iv.next.us = add i32 %iv.us, 1
+; CHECK-NEXT:%loop.cond.us = icmp slt i32 %iv.next.us, %N
+; CHECK-NEXT:br i1 %loop.cond.us, label %loop.us, label %exit.split.us
+; CHECK:   exit.split.us:
+; CHECK-NEXT:br label %exit
+; CHECK:   entry.split:
+; CHECK-NEXT:br label %loop
+; CHECK:   loop:
+; CHECK-NEXT:br label %deopt
 ; CHECK:   deopt:
 ; CHECK-NEXT:call void (i1, ...) @llvm.experimental.guard(i1 false) [ 
"deopt"() ]
 ; CHECK-NEXT:unreachable
+; CHECK:   exit:
+; CHECK-NEXT:ret void
 ;
 
 entry:
@@ -38,25 +46,39 @@ exit:
 }
 
 define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
-; CHECK-LABEL: @test_two_guards(
+; CHECK-LABEL: define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond1, label %entry.split.us, label %entry.split, 
!prof !1
 ; CHECK:

[llvm-branch-commits] [llvm] [SLU][profcheck] Propagate profile for branches on injected conditions. (PR #164476)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164476

>From 54eacf7f9cdd8e57ba591e0bbf4b8eb35ff8bf4d Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 21 Oct 2025 11:22:01 -0700
Subject: [PATCH] [SLU][profcheck] Propagate profile for branches on injected
 conditions.

---
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  |   9 +-
 .../inject-invariant-conditions.ll| 142 +-
 2 files changed, 79 insertions(+), 72 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp 
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index f54d1ba5ca14e..ff4fcb599c5dd 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -3199,10 +3199,15 @@ 
injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
   Builder.SetInsertPoint(TI);
   auto *InvariantBr =
   Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
+  // We don't know anything about the relation between the limits.
+  setExplicitlyUnknownBranchWeightsIfProfiled(
+  *InvariantBr, *InvariantBr->getParent()->getParent(), DEBUG_TYPE);
 
   Builder.SetInsertPoint(CheckBlock);
-  Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
-   TI->getSuccessor(1));
+  Builder.CreateCondBr(
+  TI->getCondition(), TI->getSuccessor(0), TI->getSuccessor(1),
+  !ProfcheckDisableMetadataFixes ? TI->getMetadata(LLVMContext::MD_prof)
+ : nullptr);
   TI->eraseFromParent();
 
   // Fixup phis.
diff --git 
a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll 
b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
index 536e0c6a0e74a..3c84dea2a0672 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
@@ -2,40 +2,40 @@
 ; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true 
-passes="loop(simple-loop-unswitch),simplifycfg" | FileCheck %s
 ; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true 
-passes="loop-mssa(simple-loop-unswitch),simplifycfg" 
-verify-memoryssa | FileCheck %s
 
-define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr 
noundef %arr, ptr noundef %x_p) {
+define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr 
noundef %arr, ptr noundef %x_p) !prof !{!"function_entry_count", i32 10} {
 ; CHECK-LABEL: @test_01(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef !0
+; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef 
[[META1:![0-9]+]]
 ; CHECK-NEXT:[[INJECTED_COND:%.*]] = icmp ule i32 [[LIMIT:%.*]], [[X]]
-; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label 
[[LOOP:%.*]]
+; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label 
[[LOOP:%.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:   loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], 
[[GUARDED_US:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:[[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 
[[IV_US]]
-; CHECK-NEXT:[[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4
-; CHECK-NEXT:[[BOUND_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LIMIT]]
-; CHECK-NEXT:br i1 [[BOUND_CHECK_US]], label [[GUARDED_US]], label 
[[COMMON_RET:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] 
], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV]]
+; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
+; CHECK-NEXT:[[BOUND_CHECK:%.*]] = icmp ult i32 [[EL]], [[LIMIT]]
+; CHECK-NEXT:br i1 [[BOUND_CHECK]], label [[GUARDED_US]], label 
[[COMMON_RET:%.*]], !prof [[PROF3:![0-9]+]]
 ; CHECK:   guarded.us:
-; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[X]]
-; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 
[[EL_US]]
-; CHECK-NEXT:store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL]], [[X]]
+; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32 
[[EL]]
+; CHECK-NEXT:store i32 [[IV]], ptr [[ARR_PTR_US]], align 4
+; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV]], 1
 ; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
 ; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label 
[[COMMON_RET]]
 ; CHECK:   loop:
-; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ 
0, [[ENTRY]] ]
-; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
-; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
-; CHECK-NEXT:[[BOUND_CHEC

[llvm-branch-commits] [llvm] [LoopUnroll] Fix block frequencies for epilogue (PR #159163)

2025-10-29 Thread Joel E. Denny via llvm-branch-commits

jdenny-ornl wrote:

ping

https://github.com/llvm/llvm-project/pull/159163
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SLU][profcheck] create likely branch weights for guard->branch (PR #164271)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164271

>From 7967c99ec1b2121b09690fc0cb57dac8814edc00 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Mon, 20 Oct 2025 08:21:26 -0700
Subject: [PATCH] [SLU][profcheck] create likely branch weights for
 guard->branch

---
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  |  12 +-
 .../Transforms/SimpleLoopUnswitch/guards.ll   | 181 +-
 2 files changed, 139 insertions(+), 54 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp 
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 2e7a7fec745fb..f54d1ba5ca14e 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -40,6 +40,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ProfDataUtils.h"
@@ -2827,9 +2828,14 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst 
*GI, Loop &L,
  MSSAU->getMemorySSA()->verifyMemorySSA();
 
   DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
-  Instruction *DeoptBlockTerm =
-  SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true,
-GI->getMetadata(LLVMContext::MD_prof), &DTU, 
&LI);
+  // llvm.experimental.guard doesn't have branch weights. We can assume,
+  // however, that the deopt path is unlikely.
+  Instruction *DeoptBlockTerm = SplitBlockAndInsertIfThen(
+  GI->getArgOperand(0), GI, true,
+  !ProfcheckDisableMetadataFixes && EstimateProfile
+  ? MDBuilder(GI->getContext()).createUnlikelyBranchWeights()
+  : nullptr,
+  &DTU, &LI);
   BranchInst *CheckBI = cast(CheckBB->getTerminator());
   // SplitBlockAndInsertIfThen inserts control flow that branches to
   // DeoptBlockTerm if the condition is true.  We want the opposite.
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll 
b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
index 533b1f691f5ad..e83047e397d3d 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
@@ -1,26 +1,34 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: -p --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: -p --check-globals all --version 5
 ; RUN: opt -passes='loop(simple-loop-unswitch),verify' 
-simple-loop-unswitch-guards -S < %s | FileCheck %s
 ; RUN: opt -passes='simple-loop-unswitch' 
-simple-loop-unswitch-guards -S < %s | FileCheck %s
 ; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' 
-simple-loop-unswitch-guards  -verify-memoryssa -verify-loop-info -S < %s | 
FileCheck %s
 
 declare void @llvm.experimental.guard(i1, ...)
 
-define void @test_simple_case(i1 %cond, i32 %N) {
-; CHECK-LABEL: @test_simple_case(
+define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
+; CHECK-LABEL: define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond, label %entry.split.us, label %entry.split, !prof 
!1
 ; CHECK:   entry.split.us:
-; CHECK-NEXT:br label [[LOOP_US:%.*]]
+; CHECK-NEXT:br label %loop.us
 ; CHECK:   loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ 
[[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
-; CHECK-NEXT:br label [[GUARDED_US]]
+; CHECK-NEXT:%iv.us = phi i32 [ 0, %entry.split.us ], [ %iv.next.us, 
%guarded.us ]
+; CHECK-NEXT:br label %guarded.us
 ; CHECK:   guarded.us:
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
-; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label 
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:%iv.next.us = add i32 %iv.us, 1
+; CHECK-NEXT:%loop.cond.us = icmp slt i32 %iv.next.us, %N
+; CHECK-NEXT:br i1 %loop.cond.us, label %loop.us, label %exit.split.us
+; CHECK:   exit.split.us:
+; CHECK-NEXT:br label %exit
+; CHECK:   entry.split:
+; CHECK-NEXT:br label %loop
+; CHECK:   loop:
+; CHECK-NEXT:br label %deopt
 ; CHECK:   deopt:
 ; CHECK-NEXT:call void (i1, ...) @llvm.experimental.guard(i1 false) [ 
"deopt"() ]
 ; CHECK-NEXT:unreachable
+; CHECK:   exit:
+; CHECK-NEXT:ret void
 ;
 
 entry:
@@ -38,25 +46,39 @@ exit:
 }
 
 define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
-; CHECK-LABEL: @test_two_guards(
+; CHECK-LABEL: define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond1, label %entry.split.us, label %entry.split, 
!prof !1
 ; CHECK:

[llvm-branch-commits] [llvm] [SLU][profcheck] Use the original branch weigths in `buildPartialInvariantUnswitchConditionalBranch` (PR #164270)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164270

>From 31442a49e459d9712aece05e66ff0d1e4a70721c Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Sat, 18 Oct 2025 17:14:08 -0700
Subject: [PATCH] [SLU][profcheck] Use the original branch weigths in
 `buildPartialInvariantUnswitchConditionalBranch`

---
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  | 17 -
 .../SimpleLoopUnswitch/partial-unswitch.ll| 75 +++
 2 files changed, 56 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp 
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 763e95a5955a8..2e7a7fec745fb 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -335,7 +335,7 @@ static void buildPartialUnswitchConditionalBranch(
 static void buildPartialInvariantUnswitchConditionalBranch(
 BasicBlock &BB, ArrayRef ToDuplicate, bool Direction,
 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
-MemorySSAUpdater *MSSAU) {
+MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch) {
   ValueToValueMapTy VMap;
   for (auto *Val : reverse(ToDuplicate)) {
 Instruction *Inst = cast(Val);
@@ -375,8 +375,17 @@ static void buildPartialInvariantUnswitchConditionalBranch(
   IRBuilder<> IRB(&BB);
   IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
   Value *Cond = VMap[ToDuplicate[0]];
-  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
-   Direction ? &NormalSucc : &UnswitchedSucc);
+  auto *ProfData =
+  !ProfcheckDisableMetadataFixes &&
+  ToDuplicate[0] == 
skipTrivialSelect(OriginalBranch.getCondition())
+  ? OriginalBranch.getMetadata(LLVMContext::MD_prof)
+  : nullptr;
+  auto *BR =
+  IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+   Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
+  if (!ProfData)
+setExplicitlyUnknownBranchWeightsIfProfiled(
+*BR, *BR->getParent()->getParent(), DEBUG_TYPE);
 }
 
 /// Rewrite the PHI nodes in an unswitched loop exit basic block.
@@ -2513,7 +2522,7 @@ static void unswitchNontrivialInvariants(
 // the branch in the split block.
 if (PartiallyInvariant)
   buildPartialInvariantUnswitchConditionalBranch(
-  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
+  *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);
 else {
   buildPartialUnswitchConditionalBranch(
   *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll 
b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
index 1d8942079ffd8..87161707d9f69 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
@@ -1,14 +1,14 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --check-globals
 ; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' 
-S < %s | FileCheck %s
 
 declare void @clobber()
 
-define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) {
+define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) !prof !0 {
 ; CHECK-LABEL: @partial_unswitch_true_successor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4
 ; CHECK-NEXT:[[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100
-; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label 
[[ENTRY_SPLIT:%.*]], !prof [[PROF1:![0-9]+]]
 ; CHECK:   entry.split.us:
 ; CHECK-NEXT:br label [[LOOP_HEADER_US:%.*]]
 ; CHECK:   loop.header.us:
@@ -19,7 +19,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) 
{
 ; CHECK:   loop.latch.us:
 ; CHECK-NEXT:[[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]
 ; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label 
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label 
[[EXIT_SPLIT_US:%.*]], !prof [[PROF2:![0-9]+]]
 ; CHECK:   exit.split.us:
 ; CHECK-NEXT:br label [[EXIT:%.*]]
 ; CHECK:   entry.split:
@@ -28,7 +28,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) 
{
 ; CHECK-NEXT:[[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ 
[[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
 ; CHECK-NEXT:[[LV:%.*]] = load i32, ptr [[PTR]], align 4
 ; CHECK-NEXT:[[SC:%.*]] = icmp eq i32 [[LV]], 100
-; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]]
+; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]], 
!prof [[PROF1]]
 ; CHECK:   noclobber:
 ; CHECK-NEXT:   

[llvm-branch-commits] [llvm] [LVer][profcheck] explicitly set unknown branch weights for the versioned/unversioned selector (PR #164507)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164507

>From 577971fd0c10a2733c3255609f5cb7db874011a4 Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 21 Oct 2025 15:20:14 -0700
Subject: [PATCH] [LVer][profcheck] explicitly set unknown branch weights for
 the versioned/unversioned selector

---
 llvm/lib/Transforms/Utils/LoopVersioning.cpp   | 10 --
 .../Transforms/LoopDistribute/basic-with-memchecks.ll  |  5 +++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp 
b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index ec2e6c1ab796b..4786819d18fa4 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
@@ -109,8 +110,13 @@ void LoopVersioning::versionLoop(
   // Insert the conditional branch based on the result of the memchecks.
   Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
   Builder.SetInsertPoint(OrigTerm);
-  Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
-   VersionedLoop->getLoopPreheader());
+  auto *BI =
+  Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
+   VersionedLoop->getLoopPreheader());
+  // We don't know what the probability of executing the versioned vs the
+  // unversioned variants is.
+  setExplicitlyUnknownBranchWeightsIfProfiled(
+  *BI, *BI->getParent()->getParent(), DEBUG_TYPE);
   OrigTerm->eraseFromParent();
 
   // The loops merge in the original exit block.  This is now dominated by the
diff --git a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll 
b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
index 97ea2c6708dad..2828882afe779 100644
--- a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
+++ b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
@@ -28,7 +28,7 @@ target triple = "x86_64-apple-macosx10.10.0"
 @E = common global ptr null, align 8
 
 ; CHECK-LABEL: @f(
-define void @f() {
+define void @f() !prof !{!"function_entry_count", i32 10} {
 entry:
   %a = load ptr, ptr @A, align 8
   %b = load ptr, ptr @B, align 8
@@ -55,7 +55,7 @@ entry:
 ; CHECK: = icmp
 
 ; CHECK-NOT: = icmp
-; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label 
%for.body.ph.ldist1
+; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label 
%for.body.ph.ldist1, !prof ![[PROF1:[0-9]]]
 
 ; The non-distributed loop that the memchecks fall back on.
 
@@ -289,3 +289,4 @@ attributes #1 = { nounwind convergent }
 
 !0 = distinct !{!0, !1}
 !1 = !{!"llvm.loop.distribute.enable", i1 true}
+; CHECK: ![[PROF1]] = !{!"unknown", !"loop-versioning"}

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LIR][profcheck] Reuse the loop's exit condition profile (PR #164523)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin updated 
https://github.com/llvm/llvm-project/pull/164523

>From 1bd2ba55726bd525908b186e0460e0556e7c3c6a Mon Sep 17 00:00:00 2001
From: Mircea Trofin 
Date: Tue, 21 Oct 2025 17:24:49 -0700
Subject: [PATCH] [LIR][profcheck] Reuse the loop's exit condition profile

---
 .../Transforms/Scalar/LoopIdiomRecognize.cpp  | 40 +--
 .../LoopIdiom/X86/preserve-profile.ll | 70 +++
 2 files changed, 106 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll

diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp 
b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 019536ca91ae0..9070d252ae09f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -72,6 +72,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -105,6 +106,7 @@ STATISTIC(
 STATISTIC(NumShiftUntilZero,
   "Number of uncountable loops recognized as 'shift until zero' 
idiom");
 
+namespace llvm {
 bool DisableLIRP::All;
 static cl::opt
 DisableLIRPAll("disable-" DEBUG_TYPE "-all",
@@ -163,6 +165,10 @@ static cl::opt ForceMemsetPatternIntrinsic(
 cl::desc("Use memset.pattern intrinsic whenever possible"), 
cl::init(false),
 cl::Hidden);
 
+extern cl::opt ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
 namespace {
 
 class LoopIdiomRecognize {
@@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
   // The loop trip count check.
   auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
CurLoop->getName() + ".ivcheck");
-  Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+  SmallVector BranchWeights;
+  const bool HasBranchWeights =
+  !ProfcheckDisableMetadataFixes &&
+  extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+  auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+  if (HasBranchWeights) {
+if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
+  std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights,
+ /*IsExpected=*/false);
+  }
+
   LoopHeaderBB->getTerminator()->eraseFromParent();
 
   // Populate the IV PHI.
@@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop, 
ScalarEvolution *SE,
 /// %start = <...>
 /// %extraoffset = <...>
 /// <...>
-/// br label %for.cond
+/// br label %loop
 ///
 ///   loop:
-/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
+/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
 /// %nbits = add nsw i8 %iv, %extraoffset
 /// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
 /// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
@@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
 
   // The loop terminator.
   Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
-  Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+  SmallVector BranchWeights;
+  const bool HasBranchWeights =
+  !ProfcheckDisableMetadataFixes &&
+  extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+  auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+  if (HasBranchWeights) {
+if (InvertedCond)
+  std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
+  }
   LoopHeaderBB->getTerminator()->eraseFromParent();
 
   // Populate the IV PHI.
diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll 
b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
new file mode 100644
index 0..d01bb748d9422
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
@@ -0,0 +1,70 @@
+; RUN: opt 
-passes="module(print),function(loop(loop-idiom)),module(print)"
 -mtriple=x86_64 -mcpu=core-avx2 %s -disable-output 2>&1 | FileCheck 
--check-prefix=PROFILE %s
+
+declare void @escape_inner(i8, i8, i8, i1, i8)
+declare void @escape_outer(i8, i8, i8, i1, i8)
+
+declare i8 @gen.i8()
+
+; Most basic pattern; Note that iff the shift amount is offset, said offsetting
+; must not cause an overflow, but `add nsw` is fine.
+define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+  %nbits = add nsw i8 %iv, %extraoffset
+  %val.shifted = ashr i8 %val, %nbits
+  %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+  %iv.next = add i8 %iv, 1
+
+  call void @escap

[llvm-branch-commits] [llvm] [LoopUnroll] Fix block frequencies for epilogue (PR #159163)

2025-10-29 Thread Mircea Trofin via llvm-branch-commits

https://github.com/mtrofin approved this pull request.


https://github.com/llvm/llvm-project/pull/159163
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DA] Check nsw when extracting a constant operand of SCEVMul (PR #164408)

2025-10-29 Thread Ryotaro Kasuga via llvm-branch-commits

https://github.com/kasuga-fj updated 
https://github.com/llvm/llvm-project/pull/164408

>From 4c5c963d99d99b4649b39fca172917fc0b09ccb2 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga 
Date: Tue, 21 Oct 2025 12:23:25 +
Subject: [PATCH 1/3] [DA] Check nsw when extracting a constant operand of
 SCEVMul

---
 llvm/lib/Analysis/DependenceAnalysis.cpp  |  5 +++--
 llvm/test/Analysis/DependenceAnalysis/GCD.ll  |  6 +++---
 .../Analysis/DependenceAnalysis/SymbolicSIV.ll|  4 ++--
 .../DependenceAnalysis/gcd-miv-overflow.ll| 15 ++-
 4 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp 
b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 853bd66c8a7f8..36ac252aba6ed 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -2828,8 +2828,9 @@ static std::optional getConstantPart(const SCEV 
*Expr) {
   if (const auto *Constant = dyn_cast(Expr))
 return Constant->getAPInt();
   if (const auto *Product = dyn_cast(Expr))
-if (const auto *Constant = dyn_cast(Product->getOperand(0)))
-  return Constant->getAPInt();
+if (Product->hasNoSignedWrap())
+  if (auto *Constant = dyn_cast(Product->getOperand(0)))
+return Constant->getAPInt();
   return std::nullopt;
 }
 
diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll 
b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
index 03343e7a98211..cb14d189afe4c 100644
--- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
@@ -254,7 +254,7 @@ define void @gcd4(ptr %A, ptr %B, i64 %M, i64 %N) nounwind 
uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 
%conv, ptr %arrayidx, align 4
 ; CHECK-NEXT:da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load 
i32, ptr %arrayidx16, align 4
-; CHECK-NEXT:da analyze - none!
+; CHECK-NEXT:da analyze - flow [* *|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 
%0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load 
i32, ptr %arrayidx16, align 4
@@ -322,7 +322,7 @@ define void @gcd5(ptr %A, ptr %B, i64 %M, i64 %N) nounwind 
uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 
%conv, ptr %arrayidx, align 4
 ; CHECK-NEXT:da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load 
i32, ptr %arrayidx16, align 4
-; CHECK-NEXT:da analyze - flow [<> *]!
+; CHECK-NEXT:da analyze - flow [* *|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 
%0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load 
i32, ptr %arrayidx16, align 4
@@ -390,7 +390,7 @@ define void @gcd6(i64 %n, ptr %A, ptr %B) nounwind uwtable 
ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store 
i32 %conv, ptr %arrayidx5, align 4
 ; CHECK-NEXT:da analyze - output [* *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: %2 = 
load i32, ptr %arrayidx9, align 4
-; CHECK-NEXT:da analyze - none!
+; CHECK-NEXT:da analyze - flow [* *|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store 
i32 %2, ptr %B.addr.12, align 4
 ; CHECK-NEXT:da analyze - confused!
 ; CHECK-NEXT:  Src: %2 = load i32, ptr %arrayidx9, align 4 --> Dst: %2 = load 
i32, ptr %arrayidx9, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll 
b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
index cdfaec76fa892..73a415baef4c4 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
@@ -384,7 +384,7 @@ define void @symbolicsiv6(ptr %A, ptr %B, i64 %n, i64 %N, 
i64 %M) nounwind uwtab
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 
%conv, ptr %arrayidx, align 4
 ; CHECK-NEXT:da analyze - none!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load 
i32, ptr %arrayidx7, align 4
-; CHECK-NEXT:da analyze - none!
+; CHECK-NEXT:da analyze - flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 
%0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: %0 = load 
i32, ptr %arrayidx7, align 4
@@ -440,7 +440,7 @@ define void @symbolicsiv7(ptr %A, ptr %B, i64 %n, i64 %N, 
i64 %M) nounwind uwtab
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 
%conv, ptr %arrayidx, align 4
 ; CHECK-NEXT:da analyze - none!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %1 = load

[llvm-branch-commits] [llvm] [DA] Check nsw when extracting a constant operand of SCEVMul (PR #164408)

2025-10-29 Thread Ryotaro Kasuga via llvm-branch-commits


@@ -2828,8 +2828,9 @@ static std::optional getConstantPart(const SCEV 
*Expr) {
   if (const auto *Constant = dyn_cast(Expr))
 return Constant->getAPInt();
   if (const auto *Product = dyn_cast(Expr))
-if (const auto *Constant = dyn_cast(Product->getOperand(0)))
-  return Constant->getAPInt();
+if (Product->hasNoSignedWrap())

kasuga-fj wrote:

Thanks, renamed

https://github.com/llvm/llvm-project/pull/164408
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [llvm-readobj, ELF] Support reading binary with more than PN_XNUM segments. (PR #165278)

2025-10-29 Thread James Henderson via llvm-branch-commits

jh7370 wrote:

@aokblast, is this ready for review? It's currently still attempting to merge 
into another of your user branches, whereas if it's ready for review, I'd 
expect it to be merging into `main`.

https://github.com/llvm/llvm-project/pull/165278
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT][PAC] Warn about synchronous unwind tables (PR #165227)

2025-10-29 Thread Peter Waller via llvm-branch-commits


@@ -133,11 +140,17 @@ Error 
PointerAuthCFIAnalyzer::runOnFunctions(BinaryContext &BC) {
   ParallelUtilities::runOnEachFunction(
   BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
   SkipPredicate, "PointerAuthCFIAnalyzer");
+
+  float IgnoredPercent = (100.0 * FunctionsIgnored) / Total;
   BC.outs() << "BOLT-INFO: PointerAuthCFIAnalyzer ran on " << Total
 << " functions. Ignored " << FunctionsIgnored << " functions "
-<< format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total)
+<< format("(%.2lf%%)", IgnoredPercent)
 << " because of CFI inconsistencies\n";
 
+  if (IgnoredPercent >= 10.0)
+BC.outs() << "BOLT-WARNING: PointerAuthCFIAnalyzer only supports "
+ "asynchronous unwind tables.\n";

peterwaller-arm wrote:

I think these are valid concerns. It could still be more actionable to the user 
without specifying the flag; calling out that the workload must be recompiled 
with them turned on. It could say something like "For C compilers the flag is 
typically spelled -fasynchronous-unwind-tables / 
-fno-asynchronous-unwind-tables". Even if a user is not using C this might turn 
out to be useful information to find relevant documentation.


https://github.com/llvm/llvm-project/pull/165227
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [DA] Check nsw when extracting a constant operand of SCEVMul (PR #164408)

2025-10-29 Thread Michael Kruse via llvm-branch-commits

https://github.com/Meinersbur approved this pull request.


https://github.com/llvm/llvm-project/pull/164408
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT][PAC] Warn about synchronous unwind tables (PR #165227)

2025-10-29 Thread Gergely Bálint via llvm-branch-commits


@@ -133,11 +140,17 @@ Error 
PointerAuthCFIAnalyzer::runOnFunctions(BinaryContext &BC) {
   ParallelUtilities::runOnEachFunction(
   BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
   SkipPredicate, "PointerAuthCFIAnalyzer");
+
+  float IgnoredPercent = (100.0 * FunctionsIgnored) / Total;
   BC.outs() << "BOLT-INFO: PointerAuthCFIAnalyzer ran on " << Total
 << " functions. Ignored " << FunctionsIgnored << " functions "
-<< format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total)
+<< format("(%.2lf%%)", IgnoredPercent)
 << " because of CFI inconsistencies\n";
 
+  if (IgnoredPercent >= 10.0)
+BC.outs() << "BOLT-WARNING: PointerAuthCFIAnalyzer only supports "
+ "asynchronous unwind tables.\n";

bgergely0 wrote:

Agreed, with careful wording it would be useful information to users.

https://github.com/llvm/llvm-project/pull/165227
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LV] Use assertion in VPExpressionRecipe creation (PR #165543)

2025-10-29 Thread Sam Tebbs via llvm-branch-commits

https://github.com/SamTebbs33 edited 
https://github.com/llvm/llvm-project/pull/165543
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LV] Use assertion in VPExpressionRecipe creation (PR #165543)

2025-10-29 Thread Sam Tebbs via llvm-branch-commits

https://github.com/SamTebbs33 created 
https://github.com/llvm/llvm-project/pull/165543

This PR re-introduces the assert that the cost of a partial reduction is valid 
during VPExpressionRecipe creation.

This is a stacked PR:
1. https://github.com/llvm/llvm-project/pull/165536

>From 0aa3c2d73ee5546b2d45b8ccf7c33ca39e7ec4d2 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs 
Date: Wed, 29 Oct 2025 10:10:00 +
Subject: [PATCH] [LV] Use assertion in VPExpressionRecipe creation

---
 .../Transforms/Vectorize/VPlanTransforms.cpp  | 70 ++-
 1 file changed, 38 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp 
b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d9ac26bba7507..e75c99c35938e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3532,24 +3532,28 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe 
*Red, VPCostContext &Ctx,
   auto *SrcVecTy = cast(toVectorTy(SrcTy, VF));
   TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
 
-  InstructionCost ExtRedCost;
-  InstructionCost ExtCost =
-  cast(VecOp)->computeCost(VF, Ctx);
-  InstructionCost RedCost = Red->computeCost(VF, Ctx);
-
   if (isa(Red)) {
 TargetTransformInfo::PartialReductionExtendKind ExtKind =
 TargetTransformInfo::getPartialReductionExtendKind(ExtOpc);
 // FIXME: Move partial reduction creation, costing and clamping
 // here from LoopVectorize.cpp.
-ExtRedCost = Ctx.TTI.getPartialReductionCost(
-Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
-llvm::TargetTransformInfo::PR_None, std::nullopt, 
Ctx.CostKind);
-  } else {
-ExtRedCost = Ctx.TTI.getExtendedReductionCost(
-Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
-Red->getFastMathFlags(), CostKind);
+InstructionCost PartialReductionCost =
+Ctx.TTI.getPartialReductionCost(
+Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
+llvm::TargetTransformInfo::PR_None, std::nullopt,
+Ctx.CostKind);
+assert(PartialReductionCost.isValid() &&
+   "A partial reduction should have a valid cost");
+return true;
   }
+
+  InstructionCost ExtCost =
+  cast(VecOp)->computeCost(VF, Ctx);
+  InstructionCost RedCost = Red->computeCost(VF, Ctx);
+
+  InstructionCost ExtRedCost = Ctx.TTI.getExtendedReductionCost(
+  Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
+  Red->getFastMathFlags(), CostKind);
   return ExtRedCost.isValid() && ExtRedCost < ExtCost + RedCost;
 },
 Range);
@@ -3595,33 +3599,35 @@ 
tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
   TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
   Type *SrcTy =
   Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
-  InstructionCost MulAccCost;
 
   if (IsPartialReduction) {
 Type *SrcTy2 =
 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) : 
nullptr;
 // FIXME: Move partial reduction creation, costing and clamping
 // here from LoopVectorize.cpp.
-MulAccCost = Ctx.TTI.getPartialReductionCost(
-Opcode, SrcTy, SrcTy2, RedTy, VF,
-Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
-   Ext0->getOpcode())
- : TargetTransformInfo::PR_None,
-Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
-   Ext1->getOpcode())
- : TargetTransformInfo::PR_None,
-Mul->getOpcode(), CostKind);
-  } else {
-// Only partial reductions support mixed extends at the moment.
-if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
-  return false;
-
-bool IsZExt =
-!Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
-auto *SrcVecTy = cast(toVectorTy(SrcTy, VF));
-MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
-SrcVecTy, CostKind);
+InstructionCost PartialReductionCost =
+Ctx.TTI.getPartialReductionCost(
+Opcode, SrcTy, SrcTy2, RedTy, VF,
+Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
+   Ext0->getOpcode())
+ : TargetTransformInfo::PR_None,
+Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
+   Ext1->getOpcode())
+ 

[llvm-branch-commits] [llvm] [LV] Use assertion in VPExpressionRecipe creation (PR #165543)

2025-10-29 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: Sam Tebbs (SamTebbs33)


Changes

This PR re-introduces the assert that the cost of a partial reduction is valid 
during VPExpressionRecipe creation.

This is a stacked PR:
1. https://github.com/llvm/llvm-project/pull/165536
2. -> https://github.com/llvm/llvm-project/pull/165543

---
Full diff: https://github.com/llvm/llvm-project/pull/165543.diff


1 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+38-32) 


``diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp 
b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d9ac26bba7507..e75c99c35938e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3532,24 +3532,28 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe 
*Red, VPCostContext &Ctx,
   auto *SrcVecTy = cast(toVectorTy(SrcTy, VF));
   TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
 
-  InstructionCost ExtRedCost;
-  InstructionCost ExtCost =
-  cast(VecOp)->computeCost(VF, Ctx);
-  InstructionCost RedCost = Red->computeCost(VF, Ctx);
-
   if (isa(Red)) {
 TargetTransformInfo::PartialReductionExtendKind ExtKind =
 TargetTransformInfo::getPartialReductionExtendKind(ExtOpc);
 // FIXME: Move partial reduction creation, costing and clamping
 // here from LoopVectorize.cpp.
-ExtRedCost = Ctx.TTI.getPartialReductionCost(
-Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
-llvm::TargetTransformInfo::PR_None, std::nullopt, 
Ctx.CostKind);
-  } else {
-ExtRedCost = Ctx.TTI.getExtendedReductionCost(
-Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
-Red->getFastMathFlags(), CostKind);
+InstructionCost PartialReductionCost =
+Ctx.TTI.getPartialReductionCost(
+Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
+llvm::TargetTransformInfo::PR_None, std::nullopt,
+Ctx.CostKind);
+assert(PartialReductionCost.isValid() &&
+   "A partial reduction should have a valid cost");
+return true;
   }
+
+  InstructionCost ExtCost =
+  cast(VecOp)->computeCost(VF, Ctx);
+  InstructionCost RedCost = Red->computeCost(VF, Ctx);
+
+  InstructionCost ExtRedCost = Ctx.TTI.getExtendedReductionCost(
+  Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
+  Red->getFastMathFlags(), CostKind);
   return ExtRedCost.isValid() && ExtRedCost < ExtCost + RedCost;
 },
 Range);
@@ -3595,33 +3599,35 @@ 
tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
   TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
   Type *SrcTy =
   Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
-  InstructionCost MulAccCost;
 
   if (IsPartialReduction) {
 Type *SrcTy2 =
 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) : 
nullptr;
 // FIXME: Move partial reduction creation, costing and clamping
 // here from LoopVectorize.cpp.
-MulAccCost = Ctx.TTI.getPartialReductionCost(
-Opcode, SrcTy, SrcTy2, RedTy, VF,
-Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
-   Ext0->getOpcode())
- : TargetTransformInfo::PR_None,
-Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
-   Ext1->getOpcode())
- : TargetTransformInfo::PR_None,
-Mul->getOpcode(), CostKind);
-  } else {
-// Only partial reductions support mixed extends at the moment.
-if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
-  return false;
-
-bool IsZExt =
-!Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
-auto *SrcVecTy = cast(toVectorTy(SrcTy, VF));
-MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
-SrcVecTy, CostKind);
+InstructionCost PartialReductionCost =
+Ctx.TTI.getPartialReductionCost(
+Opcode, SrcTy, SrcTy2, RedTy, VF,
+Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
+   Ext0->getOpcode())
+ : TargetTransformInfo::PR_None,
+Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
+   Ext1->getOpcode())
+ : TargetTransformInfo::PR_None,
+Mul->get

[llvm-branch-commits] [llvm] [LV] Use assertion in VPExpressionRecipe creation (PR #165543)

2025-10-29 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-vectorizers

Author: Sam Tebbs (SamTebbs33)


Changes

This PR re-introduces the assert that the cost of a partial reduction is valid 
during VPExpressionRecipe creation.

This is a stacked PR:
1. https://github.com/llvm/llvm-project/pull/165536
2. -> https://github.com/llvm/llvm-project/pull/165543

---
Full diff: https://github.com/llvm/llvm-project/pull/165543.diff


1 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+38-32) 


``diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp 
b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d9ac26bba7507..e75c99c35938e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3532,24 +3532,28 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe 
*Red, VPCostContext &Ctx,
   auto *SrcVecTy = cast(toVectorTy(SrcTy, VF));
   TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
 
-  InstructionCost ExtRedCost;
-  InstructionCost ExtCost =
-  cast(VecOp)->computeCost(VF, Ctx);
-  InstructionCost RedCost = Red->computeCost(VF, Ctx);
-
   if (isa(Red)) {
 TargetTransformInfo::PartialReductionExtendKind ExtKind =
 TargetTransformInfo::getPartialReductionExtendKind(ExtOpc);
 // FIXME: Move partial reduction creation, costing and clamping
 // here from LoopVectorize.cpp.
-ExtRedCost = Ctx.TTI.getPartialReductionCost(
-Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
-llvm::TargetTransformInfo::PR_None, std::nullopt, 
Ctx.CostKind);
-  } else {
-ExtRedCost = Ctx.TTI.getExtendedReductionCost(
-Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
-Red->getFastMathFlags(), CostKind);
+InstructionCost PartialReductionCost =
+Ctx.TTI.getPartialReductionCost(
+Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
+llvm::TargetTransformInfo::PR_None, std::nullopt,
+Ctx.CostKind);
+assert(PartialReductionCost.isValid() &&
+   "A partial reduction should have a valid cost");
+return true;
   }
+
+  InstructionCost ExtCost =
+  cast(VecOp)->computeCost(VF, Ctx);
+  InstructionCost RedCost = Red->computeCost(VF, Ctx);
+
+  InstructionCost ExtRedCost = Ctx.TTI.getExtendedReductionCost(
+  Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
+  Red->getFastMathFlags(), CostKind);
   return ExtRedCost.isValid() && ExtRedCost < ExtCost + RedCost;
 },
 Range);
@@ -3595,33 +3599,35 @@ 
tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
   TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
   Type *SrcTy =
   Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
-  InstructionCost MulAccCost;
 
   if (IsPartialReduction) {
 Type *SrcTy2 =
 Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) : 
nullptr;
 // FIXME: Move partial reduction creation, costing and clamping
 // here from LoopVectorize.cpp.
-MulAccCost = Ctx.TTI.getPartialReductionCost(
-Opcode, SrcTy, SrcTy2, RedTy, VF,
-Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
-   Ext0->getOpcode())
- : TargetTransformInfo::PR_None,
-Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
-   Ext1->getOpcode())
- : TargetTransformInfo::PR_None,
-Mul->getOpcode(), CostKind);
-  } else {
-// Only partial reductions support mixed extends at the moment.
-if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
-  return false;
-
-bool IsZExt =
-!Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
-auto *SrcVecTy = cast(toVectorTy(SrcTy, VF));
-MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
-SrcVecTy, CostKind);
+InstructionCost PartialReductionCost =
+Ctx.TTI.getPartialReductionCost(
+Opcode, SrcTy, SrcTy2, RedTy, VF,
+Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
+   Ext0->getOpcode())
+ : TargetTransformInfo::PR_None,
+Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
+   Ext1->getOpcode())
+ : TargetTransformInfo::PR_None,
+Mul->getOpco

[llvm-branch-commits] [llvm] [SDAG] Set InBounds when when computing offsets into memory objects (PR #165425)

2025-10-29 Thread Fabian Ritter via llvm-branch-commits


@@ -5626,17 +5626,31 @@ class LLVM_ABI TargetLowering : public 
TargetLoweringBase {
   /// Get a pointer to vector element \p Idx located in memory for a vector of
   /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out 
of
   /// bounds the returned pointer is unspecified, but will be within the vector
-  /// bounds.
-  SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
-  SDValue Index) const;
+  /// bounds. \p PtrArithFlags can be used to mark that arithmetic within the
+  /// vector in memory is known to not wrap or to be inbounds.
+  SDValue getVectorElementPointer(
+  SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index,
+  const SDNodeFlags PtrArithFlags = SDNodeFlags()) const;
+
+  /// Get a pointer to vector element \p Idx located in memory for a vector of
+  /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out 
of
+  /// bounds the returned pointer is unspecified, but will be within the vector
+  /// bounds. \p VecPtr is guaranteed to point to the beginning of a memory
+  /// location large enough for the vector.
+  SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr,
+  EVT VecVT, SDValue Index) const;

ritter-x2a wrote:

Probably not, you're right. I went for consistency with the surrounding 
declarations, but, looking further around in the file, there is precedent for 
inline definitions as well. Changed.

https://github.com/llvm/llvm-project/pull/165425
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [SDAG] Set InBounds when when computing offsets into memory objects (PR #165425)

2025-10-29 Thread Fabian Ritter via llvm-branch-commits

https://github.com/ritter-x2a updated 
https://github.com/llvm/llvm-project/pull/165425

>From 50f481b182228175e73b7754e42076337b8c613c Mon Sep 17 00:00:00 2001
From: Fabian Ritter 
Date: Mon, 27 Oct 2025 04:48:36 -0400
Subject: [PATCH 1/2] [SDAG] Set InBounds when when computing offsets into
 memory objects

When a load or store accesses N bytes starting from a pointer P, and we want to
compute an offset pointer within these N bytes after P, we know that the
arithmetic to add the offset must be inbounds. This is for example relevant
when legalizing too-wide memory accesses, when lowering memcpy&Co., or when
optimizing "vector-load -> extractelement" into an offset load.

For SWDEV-516125.
---
 llvm/include/llvm/CodeGen/SelectionDAG.h  | 12 ---
 llvm/include/llvm/CodeGen/TargetLowering.h| 26 ++
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  5 ++-
 .../CodeGen/SelectionDAG/TargetLowering.cpp   | 34 +--
 4 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h 
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index df6ce0fe1b037..1a5ffb38f2568 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1113,7 +1113,8 @@ class SelectionDAG {
   SDValue Mask, SDValue EVL);
 
   /// Returns sum of the base pointer and offset.
-  /// Unlike getObjectPtrOffset this does not set NoUnsignedWrap by default.
+  /// Unlike getObjectPtrOffset this does not set NoUnsignedWrap and InBounds 
by
+  /// default.
   LLVM_ABI SDValue
   getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL,
const SDNodeFlags Flags = SDNodeFlags());
@@ -1123,15 +1124,18 @@ class SelectionDAG {
 
   /// Create an add instruction with appropriate flags when used for
   /// addressing some offset of an object. i.e. if a load is split into 
multiple
-  /// components, create an add nuw from the base pointer to the offset.
+  /// components, create an add nuw (or ptradd nuw inbounds) from the base
+  /// pointer to the offset.
   SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset) {
-return getMemBasePlusOffset(Ptr, Offset, SL, SDNodeFlags::NoUnsignedWrap);
+return getMemBasePlusOffset(
+Ptr, Offset, SL, SDNodeFlags::NoUnsignedWrap | SDNodeFlags::InBounds);
   }
 
   SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, SDValue Offset) {
 // The object itself can't wrap around the address space, so it shouldn't 
be
 // possible for the adds of the offsets to the split parts to overflow.
-return getMemBasePlusOffset(Ptr, Offset, SL, SDNodeFlags::NoUnsignedWrap);
+return getMemBasePlusOffset(
+Ptr, Offset, SL, SDNodeFlags::NoUnsignedWrap | SDNodeFlags::InBounds);
   }
 
   /// Return a new CALLSEQ_START node, that starts new call frame, in which
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h 
b/llvm/include/llvm/CodeGen/TargetLowering.h
index d6ed3a8f739b3..28a43d82364b7 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5626,17 +5626,31 @@ class LLVM_ABI TargetLowering : public 
TargetLoweringBase {
   /// Get a pointer to vector element \p Idx located in memory for a vector of
   /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out 
of
   /// bounds the returned pointer is unspecified, but will be within the vector
-  /// bounds.
-  SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
-  SDValue Index) const;
+  /// bounds. \p PtrArithFlags can be used to mark that arithmetic within the
+  /// vector in memory is known to not wrap or to be inbounds.
+  SDValue getVectorElementPointer(
+  SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index,
+  const SDNodeFlags PtrArithFlags = SDNodeFlags()) const;
+
+  /// Get a pointer to vector element \p Idx located in memory for a vector of
+  /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out 
of
+  /// bounds the returned pointer is unspecified, but will be within the vector
+  /// bounds. \p VecPtr is guaranteed to point to the beginning of a memory
+  /// location large enough for the vector.
+  SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr,
+  EVT VecVT, SDValue Index) const;
 
   /// Get a pointer to a sub-vector of type \p SubVecVT at index \p Idx located
   /// in memory for a vector of type \p VecVT starting at a base address of
   /// \p VecPtr. If \p Idx plus the size of \p SubVecVT is out of bounds the
   /// returned pointer is unspecified, but the value returned will be such that
-  /// the entire subvector would be within the vector bounds.
-  SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
- EVT SubVecVT, SDValue Index

[llvm-branch-commits] [BOLT] Allow name matching stale profiles under infer-stale-profile (PR #165493)

2025-10-29 Thread Amir Ayupov via llvm-branch-commits

https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/165493


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [BOLT] Allow name matching stale profiles under infer-stale-profile (PR #165493)

2025-10-29 Thread Amir Ayupov via llvm-branch-commits

https://github.com/aaupov updated 
https://github.com/llvm/llvm-project/pull/165493


___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [LoongArch] Convert ld to fld when result is only used by sitofp (PR #165523)

2025-10-29 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-loongarch

Author: Zhaoxin Yang (ylzsx)


Changes

If the result of an integer load is only used by an integer-to-float 
conversion, use a fp load instead. This eliminates an integer-to-float-move 
(movgr2fr) instruction.

---
Full diff: https://github.com/llvm/llvm-project/pull/165523.diff


5 Files Affected:

- (modified) llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td (+5) 
- (modified) llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td (+3) 
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+45) 
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+1) 
- (modified) llvm/test/CodeGen/LoongArch/load-itofp-combine.ll (+8-23) 


``diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td 
b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index 690dd73014e57..cb6b7c7342ec6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -26,6 +26,7 @@ def SDT_LoongArchMOVFR2GR_S_LA64
 def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
 def SDT_LoongArchFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
 def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
+def SDT_LoongArchITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
 
 // ISD::BRCOND is custom-lowered to LoongArchISD::BRCOND for floating-point
 // comparisons to prevent recursive lowering.
@@ -39,6 +40,7 @@ def loongarch_movfr2gr_s_la64
 def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
 def loongarch_frecipe : SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchFRECIPE>;
 def loongarch_frsqrte : SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchFRSQRTE>;
+def loongarch_sitof : SDNode<"LoongArchISD::SITOF", SDT_LoongArchITOF>;
 
 
//===--===//
 // Instructions
@@ -346,6 +348,9 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg 
FPR32:$fa))),
 // fnmsub.s: -fj * fk + fa (the nsz flag on the FMA)
 def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa),
   (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
+
+// ffint.s.w
+def : Pat<(loongarch_sitof FPR32:$fj), (FFINT_S_W FPR32:$fj)>;
 } // Predicates = [HasBasicF]
 
 let Predicates = [HasBasicF, IsLA64] in {
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td 
b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index daefbaa52d42a..e973c80af807c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -307,6 +307,9 @@ def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg 
FPR64:$fa))),
 // fnmsub.d: -fj * fk + fa (the nsz flag on the FMA)
 def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa),
   (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
+
+// ffint.d.l
+def : Pat<(loongarch_sitof FPR64:$fj), (FFINT_D_L FPR64:$fj)>;
 } // Predicates = [HasBasicD]
 
 /// Floating point constants
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 80c96c6dc8eb6..3695c5a42790f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -451,6 +451,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const 
TargetMachine &TM,
   }
 
   // Set DAG combine for LA32 and LA64.
+  if (Subtarget.hasBasicF()) {
+setTargetDAGCombine(ISD::SINT_TO_FP);
+  }
 
   setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::OR);
@@ -6725,6 +6728,45 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG 
&DAG,
   return SDValue();
 }
 
+static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+TargetLowering::DAGCombinerInfo &DCI,
+const LoongArchSubtarget &Subtarget) {
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+
+  if (VT != MVT::f32 && VT != MVT::f64)
+return SDValue();
+  if (VT == MVT::f32 && !Subtarget.hasBasicF())
+return SDValue();
+  if (VT == MVT::f64 && !Subtarget.hasBasicD())
+return SDValue();
+
+  // Only optimize when the source and destination types have the same width.
+  if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
+return SDValue();
+
+  SDValue Src = N->getOperand(0);
+  // If the result of an integer load is only used by an integer-to-float
+  // conversion, use a fp load instead. This eliminates an 
integer-to-float-move
+  // (movgr2fr) instruction.
+  if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
+  // Do not change the width of a volatile load. This condition check is
+  // inspired by AArch64.
+  !cast(Src)->isVolatile()) {
+LoadSDNode *LN0 = cast(Src);
+SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
+   LN0->getPo

[llvm-branch-commits] [llvm] [LoongArch] Convert ld to fld when result is only used by sitofp (PR #165523)

2025-10-29 Thread Zhaoxin Yang via llvm-branch-commits

https://github.com/ylzsx created 
https://github.com/llvm/llvm-project/pull/165523

If the result of an integer load is only used by an integer-to-float 
conversion, use a fp load instead. This eliminates an integer-to-float-move 
(movgr2fr) instruction.

>From 25fc7d1d06a7b6b6a9d19ed82586094f58f8c527 Mon Sep 17 00:00:00 2001
From: yangzhaoxin 
Date: Wed, 29 Oct 2025 15:19:05 +0800
Subject: [PATCH] [LoongArch] Convert ld to fld when result is only used by
 sitofp

If the result of an integer load is only used by an integer-to-float
conversion, use a fp load instead. This eliminates an
integer-to-float-move (movgr2fr) instruction.
---
 .../LoongArch/LoongArchFloat32InstrInfo.td|  5 +++
 .../LoongArch/LoongArchFloat64InstrInfo.td|  3 ++
 .../LoongArch/LoongArchISelLowering.cpp   | 45 +++
 .../Target/LoongArch/LoongArchISelLowering.h  |  1 +
 .../CodeGen/LoongArch/load-itofp-combine.ll   | 31 -
 5 files changed, 62 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td 
b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index 690dd73014e57..cb6b7c7342ec6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -26,6 +26,7 @@ def SDT_LoongArchMOVFR2GR_S_LA64
 def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
 def SDT_LoongArchFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
 def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
+def SDT_LoongArchITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
 
 // ISD::BRCOND is custom-lowered to LoongArchISD::BRCOND for floating-point
 // comparisons to prevent recursive lowering.
@@ -39,6 +40,7 @@ def loongarch_movfr2gr_s_la64
 def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
 def loongarch_frecipe : SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchFRECIPE>;
 def loongarch_frsqrte : SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchFRSQRTE>;
+def loongarch_sitof : SDNode<"LoongArchISD::SITOF", SDT_LoongArchITOF>;
 
 
//===--===//
 // Instructions
@@ -346,6 +348,9 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg 
FPR32:$fa))),
 // fnmsub.s: -fj * fk + fa (the nsz flag on the FMA)
 def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa),
   (FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
+
+// ffint.s.w
+def : Pat<(loongarch_sitof FPR32:$fj), (FFINT_S_W FPR32:$fj)>;
 } // Predicates = [HasBasicF]
 
 let Predicates = [HasBasicF, IsLA64] in {
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td 
b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index daefbaa52d42a..e973c80af807c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -307,6 +307,9 @@ def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg 
FPR64:$fa))),
 // fnmsub.d: -fj * fk + fa (the nsz flag on the FMA)
 def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa),
   (FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
+
+// ffint.d.l
+def : Pat<(loongarch_sitof FPR64:$fj), (FFINT_D_L FPR64:$fj)>;
 } // Predicates = [HasBasicD]
 
 /// Floating point constants
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp 
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 80c96c6dc8eb6..3695c5a42790f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -451,6 +451,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const 
TargetMachine &TM,
   }
 
   // Set DAG combine for LA32 and LA64.
+  if (Subtarget.hasBasicF()) {
+setTargetDAGCombine(ISD::SINT_TO_FP);
+  }
 
   setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::OR);
@@ -6725,6 +6728,45 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG 
&DAG,
   return SDValue();
 }
 
+static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+TargetLowering::DAGCombinerInfo &DCI,
+const LoongArchSubtarget &Subtarget) {
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+
+  if (VT != MVT::f32 && VT != MVT::f64)
+return SDValue();
+  if (VT == MVT::f32 && !Subtarget.hasBasicF())
+return SDValue();
+  if (VT == MVT::f64 && !Subtarget.hasBasicD())
+return SDValue();
+
+  // Only optimize when the source and destination types have the same width.
+  if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
+return SDValue();
+
+  SDValue Src = N->getOperand(0);
+  // If the result of an integer load is only used by an integer-to-float
+  // conversion, use a fp load instead. This eliminates an 
integer-to-float-move
+  // (movgr2fr) instruction.
+  if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
+  // Do not change the width of 

[llvm-branch-commits] [llvm] [BOLT] Match functions with pseudo probes (PR #100446)

2025-10-29 Thread Amir Ayupov via llvm-branch-commits

https://github.com/aaupov edited 
https://github.com/llvm/llvm-project/pull/100446
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Move call probe information to CallSiteInfo (PR #165490)

2025-10-29 Thread Amir Ayupov via llvm-branch-commits

https://github.com/aaupov edited 
https://github.com/llvm/llvm-project/pull/165490
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #148900)

2025-10-29 Thread Simon Pilgrim via llvm-branch-commits


@@ -1220,6 +1220,11 @@ def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 
addr:$src,
 def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src,
   (VMOV64toPQIZrm addr:$src)>, Requires<[HasAVX512]>;
 
+def : Pat<(v2i64 (atomic_load_128_v2i64 addr:$src)),
+   (VMOVAPDrm addr:$src)>; // load atomic <2 x i64>
+def : Pat<(v4i32 (atomic_load_128_v4i32 addr:$src)),
+   (VMOVAPDrm addr:$src)>; // load atomic <4 x i32>

RKSimon wrote:

These need to be tagged with Requires<[UseAVX] and the SSE2/AVX512 variants 
adding as well (MOVAPDrm / VMOVAPDZ128rm)

https://github.com/llvm/llvm-project/pull/148900
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT][PAC] Warn about synchronous unwind tables (PR #165227)

2025-10-29 Thread Gergely Bálint via llvm-branch-commits


@@ -133,11 +140,17 @@ Error 
PointerAuthCFIAnalyzer::runOnFunctions(BinaryContext &BC) {
   ParallelUtilities::runOnEachFunction(
   BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
   SkipPredicate, "PointerAuthCFIAnalyzer");
+
+  float IgnoredPercent = (100.0 * FunctionsIgnored) / Total;
   BC.outs() << "BOLT-INFO: PointerAuthCFIAnalyzer ran on " << Total
 << " functions. Ignored " << FunctionsIgnored << " functions "
-<< format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total)
+<< format("(%.2lf%%)", IgnoredPercent)
 << " because of CFI inconsistencies\n";
 
+  if (IgnoredPercent >= 10.0)
+BC.outs() << "BOLT-WARNING: PointerAuthCFIAnalyzer only supports "
+ "asynchronous unwind tables.\n";

bgergely0 wrote:

I'm not a fan of adding exact flag names for two reasons:
- it's only valid for C/C++, whereas the ABI is "general" and other languages 
(Rust) also generate such unwind tables, but the flags are named differently
- `-fasync` is the default for clang, so if we have a sync unw table, it's 
because the `-fno-async` flag was passed. If the user then passes `-fasync` as 
well, the chosen option depends on the order of the two flags

WDYT?

https://github.com/llvm/llvm-project/pull/165227
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] [BOLT] Match functions with pseudo probes (PR #100446)

2025-10-29 Thread Amir Ayupov via llvm-branch-commits

https://github.com/aaupov edited 
https://github.com/llvm/llvm-project/pull/100446
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang-shlib] Fix linking libclang-cpp on Haiku (#156401) (PR #165638)

2025-10-29 Thread via llvm-branch-commits

llvmbot wrote:

@DavidSpickett What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/165638
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang-shlib] Fix linking libclang-cpp on Haiku (#156401) (PR #165638)

2025-10-29 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/165638

Backport 21bcd00e54416b0950da19fe8adb0628a19bf66f

Requested by: @brad0

>From a2cb10a0e31a8070a4b43db8a24e5b48ea96e147 Mon Sep 17 00:00:00 2001
From: Brad Smith 
Date: Wed, 29 Oct 2025 20:55:15 -0400
Subject: [PATCH] [clang-shlib] Fix linking libclang-cpp on Haiku (#156401)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Haiku requires linking in libnetwork.

Co-authored-by: Jérôme Duval 
(cherry picked from commit 21bcd00e54416b0950da19fe8adb0628a19bf66f)
---
 clang/tools/clang-shlib/CMakeLists.txt | 4 
 1 file changed, 4 insertions(+)

diff --git a/clang/tools/clang-shlib/CMakeLists.txt 
b/clang/tools/clang-shlib/CMakeLists.txt
index 945076e1ad810..a4d0aa5779a7e 100644
--- a/clang/tools/clang-shlib/CMakeLists.txt
+++ b/clang/tools/clang-shlib/CMakeLists.txt
@@ -41,6 +41,10 @@ if (CLANG_LINK_CLANG_DYLIB)
   set(INSTALL_WITH_TOOLCHAIN INSTALL_WITH_TOOLCHAIN)
 endif()
 
+if (HAIKU)
+  list(APPEND _DEPS network)
+endif()
+
 add_clang_library(clang-cpp
   SHARED
   ${INSTALL_WITH_TOOLCHAIN}

___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang-shlib] Fix linking libclang-cpp on Haiku (#156401) (PR #165638)

2025-10-29 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/165638
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [clang] release/21.x: [clang-shlib] Fix linking libclang-cpp on Haiku (#156401) (PR #165638)

2025-10-29 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: None (llvmbot)


Changes

Backport 21bcd00e54416b0950da19fe8adb0628a19bf66f

Requested by: @brad0

---
Full diff: https://github.com/llvm/llvm-project/pull/165638.diff


1 Files Affected:

- (modified) clang/tools/clang-shlib/CMakeLists.txt (+4) 


``diff
diff --git a/clang/tools/clang-shlib/CMakeLists.txt 
b/clang/tools/clang-shlib/CMakeLists.txt
index 945076e1ad810..a4d0aa5779a7e 100644
--- a/clang/tools/clang-shlib/CMakeLists.txt
+++ b/clang/tools/clang-shlib/CMakeLists.txt
@@ -41,6 +41,10 @@ if (CLANG_LINK_CLANG_DYLIB)
   set(INSTALL_WITH_TOOLCHAIN INSTALL_WITH_TOOLCHAIN)
 endif()
 
+if (HAIKU)
+  list(APPEND _DEPS network)
+endif()
+
 add_clang_library(clang-cpp
   SHARED
   ${INSTALL_WITH_TOOLCHAIN}

``




https://github.com/llvm/llvm-project/pull/165638
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy][COFF] Update .symidx values after stripping (#153322) (PR #165608)

2025-10-29 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/165608
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy][COFF] Update .symidx values after stripping (#153322) (PR #165608)

2025-10-29 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/165608

Backport 74d52f9639ca7588c622c0790ca18fa5bff66837

Requested by: @mstorsjo

>From 0921531330add39df5d83c4802eb76adf2aac744 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov 
Date: Wed, 3 Sep 2025 23:54:21 +0200
Subject: [PATCH] [llvm-objcopy][COFF] Update .symidx values after stripping
 (#153322)

After deleting debug sections, symbol indices are shifted but sections
consisting of .symidx directives are completely ignored. Update symbol
indices as well.

(cherry picked from commit 74d52f9639ca7588c622c0790ca18fa5bff66837)
---
 llvm/lib/ObjCopy/COFF/COFFObject.cpp  |   2 +
 llvm/lib/ObjCopy/COFF/COFFObject.h|   2 +
 llvm/lib/ObjCopy/COFF/COFFWriter.cpp  |  75 +++
 llvm/lib/ObjCopy/COFF/COFFWriter.h|   1 +
 .../COFF/strip-invalid-symidx-section.test| 188 ++
 .../COFF/strip-update-symidx-section.test | 173 
 6 files changed, 441 insertions(+)
 create mode 100644 
llvm/test/tools/llvm-objcopy/COFF/strip-invalid-symidx-section.test
 create mode 100644 
llvm/test/tools/llvm-objcopy/COFF/strip-update-symidx-section.test

diff --git a/llvm/lib/ObjCopy/COFF/COFFObject.cpp 
b/llvm/lib/ObjCopy/COFF/COFFObject.cpp
index 5fa13391c908f..91cf7e32a7396 100644
--- a/llvm/lib/ObjCopy/COFF/COFFObject.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFObject.cpp
@@ -18,6 +18,8 @@ using namespace object;
 void Object::addSymbols(ArrayRef NewSymbols) {
   for (Symbol S : NewSymbols) {
 S.UniqueId = NextSymbolUniqueId++;
+S.OriginalRawIndex = NextSymbolOriginalIndex;
+NextSymbolOriginalIndex += 1 + S.Sym.NumberOfAuxSymbols;
 Symbols.emplace_back(S);
   }
   updateSymbols();
diff --git a/llvm/lib/ObjCopy/COFF/COFFObject.h 
b/llvm/lib/ObjCopy/COFF/COFFObject.h
index cdd1f17fc6055..6b70add1bb1b7 100644
--- a/llvm/lib/ObjCopy/COFF/COFFObject.h
+++ b/llvm/lib/ObjCopy/COFF/COFFObject.h
@@ -89,6 +89,7 @@ struct Symbol {
   std::optional WeakTargetSymbolId;
   size_t UniqueId;
   size_t RawIndex;
+  size_t OriginalRawIndex;
   bool Referenced;
 };
 
@@ -140,6 +141,7 @@ struct Object {
   DenseMap SymbolMap;
 
   size_t NextSymbolUniqueId = 0;
+  size_t NextSymbolOriginalIndex = 0;
 
   std::vector Sections;
   DenseMap SectionMap;
diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp 
b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
index 350c4aec572c9..fed67d67f13a7 100644
--- a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
@@ -12,6 +12,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/COFF.h"
 #include "llvm/Object/COFF.h"
+#include "llvm/Support/CRC.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include 
@@ -92,6 +94,77 @@ Error COFFWriter::finalizeSymbolContents() {
   return Error::success();
 }
 
+Error COFFWriter::finalizeSymIdxContents() {
+  // CFGuards shouldn't be present in PE.
+  if (Obj.IsPE)
+return Error::success();
+
+  // Currently handle only sections consisting only of .symidx.
+  // TODO: other sections such as .impcall and .hybmp$x require more complex
+  // handling as they have more complex layout.
+  auto IsSymIdxSection = [](StringRef Name) {
+return Name == ".gljmp$y" || Name == ".giats$y" || Name == ".gfids$y" ||
+   Name == ".gehcont$y";
+  };
+
+  DenseMap SymIdMap;
+  SmallDenseMap SecIdMap;
+  for (Symbol &Sym : Obj.getMutableSymbols()) {
+SymIdMap[Sym.OriginalRawIndex] = Sym.RawIndex;
+
+// We collect only definition symbols of the sections to update the
+// checksums.
+if (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC &&
+Sym.Sym.NumberOfAuxSymbols == 1 && Sym.Sym.Value == 0 &&
+IsSymIdxSection(Sym.Name))
+  SecIdMap[Sym.TargetSectionId] =
+  reinterpret_cast(
+  Sym.AuxData[0].Opaque);
+  }
+
+  for (Section &Sec : Obj.getMutableSections()) {
+if (!IsSymIdxSection(Sec.Name))
+  continue;
+
+ArrayRef RawIds = Sec.getContents();
+// Nothing to do and also the checksum will be -1 instead of 0 if we
+// recalculate it on empty input.
+if (RawIds.size() == 0)
+  continue;
+
+auto SecDefIt = SecIdMap.find(Sec.UniqueId);
+if (SecDefIt == SecIdMap.end())
+  return createStringError(object_error::invalid_symbol_index,
+   "section '%s' does not have the corresponding "
+   "symbol or the symbol has unexpected format",
+   Sec.Name.str().c_str());
+
+// Create updated content.
+ArrayRef Ids(
+reinterpret_cast(RawIds.data()),
+RawIds.size() / 4);
+std::vector NewIds;
+for (support::ulittle32_t Id : Ids) {
+  auto SymIdIt = SymIdMap.find(Id);
+  if (SymIdIt == SymIdMap.end())
+return createStringError(object_error::invalid_symbol_index,
+ "section '%s' contains a .symidx (%

[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy][COFF] Update .symidx values after stripping (#153322) (PR #165608)

2025-10-29 Thread via llvm-branch-commits

llvmbot wrote:

@jh7370 What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/165608
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy][COFF] Update .symidx values after stripping (#153322) (PR #165608)

2025-10-29 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-binary-utilities

Author: None (llvmbot)


Changes

Backport 74d52f9639ca7588c622c0790ca18fa5bff66837

Requested by: @mstorsjo

---
Full diff: https://github.com/llvm/llvm-project/pull/165608.diff


6 Files Affected:

- (modified) llvm/lib/ObjCopy/COFF/COFFObject.cpp (+2) 
- (modified) llvm/lib/ObjCopy/COFF/COFFObject.h (+2) 
- (modified) llvm/lib/ObjCopy/COFF/COFFWriter.cpp (+75) 
- (modified) llvm/lib/ObjCopy/COFF/COFFWriter.h (+1) 
- (added) llvm/test/tools/llvm-objcopy/COFF/strip-invalid-symidx-section.test 
(+188) 
- (added) llvm/test/tools/llvm-objcopy/COFF/strip-update-symidx-section.test 
(+173) 


``diff
diff --git a/llvm/lib/ObjCopy/COFF/COFFObject.cpp 
b/llvm/lib/ObjCopy/COFF/COFFObject.cpp
index 5fa13391c908f..91cf7e32a7396 100644
--- a/llvm/lib/ObjCopy/COFF/COFFObject.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFObject.cpp
@@ -18,6 +18,8 @@ using namespace object;
 void Object::addSymbols(ArrayRef NewSymbols) {
   for (Symbol S : NewSymbols) {
 S.UniqueId = NextSymbolUniqueId++;
+S.OriginalRawIndex = NextSymbolOriginalIndex;
+NextSymbolOriginalIndex += 1 + S.Sym.NumberOfAuxSymbols;
 Symbols.emplace_back(S);
   }
   updateSymbols();
diff --git a/llvm/lib/ObjCopy/COFF/COFFObject.h 
b/llvm/lib/ObjCopy/COFF/COFFObject.h
index cdd1f17fc6055..6b70add1bb1b7 100644
--- a/llvm/lib/ObjCopy/COFF/COFFObject.h
+++ b/llvm/lib/ObjCopy/COFF/COFFObject.h
@@ -89,6 +89,7 @@ struct Symbol {
   std::optional WeakTargetSymbolId;
   size_t UniqueId;
   size_t RawIndex;
+  size_t OriginalRawIndex;
   bool Referenced;
 };
 
@@ -140,6 +141,7 @@ struct Object {
   DenseMap SymbolMap;
 
   size_t NextSymbolUniqueId = 0;
+  size_t NextSymbolOriginalIndex = 0;
 
   std::vector Sections;
   DenseMap SectionMap;
diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp 
b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
index 350c4aec572c9..fed67d67f13a7 100644
--- a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
@@ -12,6 +12,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/COFF.h"
 #include "llvm/Object/COFF.h"
+#include "llvm/Support/CRC.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include 
@@ -92,6 +94,77 @@ Error COFFWriter::finalizeSymbolContents() {
   return Error::success();
 }
 
+Error COFFWriter::finalizeSymIdxContents() {
+  // CFGuards shouldn't be present in PE.
+  if (Obj.IsPE)
+return Error::success();
+
+  // Currently handle only sections consisting only of .symidx.
+  // TODO: other sections such as .impcall and .hybmp$x require more complex
+  // handling as they have more complex layout.
+  auto IsSymIdxSection = [](StringRef Name) {
+return Name == ".gljmp$y" || Name == ".giats$y" || Name == ".gfids$y" ||
+   Name == ".gehcont$y";
+  };
+
+  DenseMap SymIdMap;
+  SmallDenseMap SecIdMap;
+  for (Symbol &Sym : Obj.getMutableSymbols()) {
+SymIdMap[Sym.OriginalRawIndex] = Sym.RawIndex;
+
+// We collect only definition symbols of the sections to update the
+// checksums.
+if (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC &&
+Sym.Sym.NumberOfAuxSymbols == 1 && Sym.Sym.Value == 0 &&
+IsSymIdxSection(Sym.Name))
+  SecIdMap[Sym.TargetSectionId] =
+  reinterpret_cast(
+  Sym.AuxData[0].Opaque);
+  }
+
+  for (Section &Sec : Obj.getMutableSections()) {
+if (!IsSymIdxSection(Sec.Name))
+  continue;
+
+ArrayRef RawIds = Sec.getContents();
+// Nothing to do and also the checksum will be -1 instead of 0 if we
+// recalculate it on empty input.
+if (RawIds.size() == 0)
+  continue;
+
+auto SecDefIt = SecIdMap.find(Sec.UniqueId);
+if (SecDefIt == SecIdMap.end())
+  return createStringError(object_error::invalid_symbol_index,
+   "section '%s' does not have the corresponding "
+   "symbol or the symbol has unexpected format",
+   Sec.Name.str().c_str());
+
+// Create updated content.
+ArrayRef Ids(
+reinterpret_cast(RawIds.data()),
+RawIds.size() / 4);
+std::vector NewIds;
+for (support::ulittle32_t Id : Ids) {
+  auto SymIdIt = SymIdMap.find(Id);
+  if (SymIdIt == SymIdMap.end())
+return createStringError(object_error::invalid_symbol_index,
+ "section '%s' contains a .symidx (%d) that is 
"
+ "incorrect or was stripped",
+ Sec.Name.str().c_str(), Id.value());
+  NewIds.push_back(support::ulittle32_t(SymIdIt->getSecond()));
+}
+ArrayRef NewRawIds(reinterpret_cast(NewIds.data()),
+RawIds.size());
+// Update the checksum.
+JamCRC JC(/*Init=*/0);
+JC.update(NewRawIds);
+SecDefIt->getSecond()->CheckSum = JC.getCRC();
+// Set new content.
+Sec.setOwnedContents(NewRawIds

[llvm-branch-commits] ELF: Rename RandomizePaddingSection to PaddingSection. (PR #155540)

2025-10-29 Thread Fangrui Song via llvm-branch-commits

MaskRay wrote:

This should be changed to rebase on top of main and it can be pushed before 
landing previous patches.

https://github.com/llvm/llvm-project/pull/155540
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] ELF: Rename RandomizePaddingSection to PaddingSection. (PR #155540)

2025-10-29 Thread Fangrui Song via llvm-branch-commits

https://github.com/MaskRay approved this pull request.


https://github.com/llvm/llvm-project/pull/155540
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [mlir] 2736b4d - Revert "[mlir][affine] Add fold logic when the affine.yield has IV as operand…"

2025-10-29 Thread via llvm-branch-commits

Author: lonely eagle
Date: 2025-10-30T02:15:05+08:00
New Revision: 2736b4d7963204aa182e0c8b344d50b73dc9fd5f

URL: 
https://github.com/llvm/llvm-project/commit/2736b4d7963204aa182e0c8b344d50b73dc9fd5f
DIFF: 
https://github.com/llvm/llvm-project/commit/2736b4d7963204aa182e0c8b344d50b73dc9fd5f.diff

LOG: Revert "[mlir][affine] Add fold logic when the affine.yield has IV as 
operand…"

This reverts commit e24e7ff7e3a5ff24c5b4d41131cbd6fbac86565e.

Added: 


Modified: 
mlir/lib/Dialect/Affine/IR/AffineOps.cpp
mlir/test/Dialect/Affine/canonicalize.mlir

Removed: 




diff  --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp 
b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index 002f1f60bb58e..0c3592124cdec 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -2610,21 +2610,6 @@ static std::optional 
getTrivialConstantTripCount(AffineForOp forOp) {
   return ub - lb <= 0 ? 0 : (ub - lb + step - 1) / step;
 }
 
-/// Calculate the constant value of the loop's induction variable for its last
-/// trip.
-static std::optional
-getConstantInductionVarForLastTrip(AffineForOp forOp) {
-  std::optional tripCount = getTrivialConstantTripCount(forOp);
-  if (!tripCount.has_value())
-return std::nullopt;
-  if (tripCount.value() == 0)
-return std::nullopt;
-  int64_t lb = forOp.getConstantLowerBound();
-  int64_t step = forOp.getStepAsInt();
-  int64_t lastTripIv = lb + (tripCount.value() - 1) * step;
-  return lastTripIv;
-}
-
 /// Fold the empty loop.
 static SmallVector AffineForEmptyLoopFolder(AffineForOp forOp) {
   if (!llvm::hasSingleElement(*forOp.getBody()))
@@ -2637,7 +2622,7 @@ static SmallVector 
AffineForEmptyLoopFolder(AffineForOp forOp) {
 // results.
 return forOp.getInits();
   }
-  SmallVector replacements;
+  SmallVector replacements;
   auto yieldOp = cast(forOp.getBody()->getTerminator());
   auto iterArgs = forOp.getRegionIterArgs();
   bool hasValDefinedOutsideLoop = false;
@@ -2645,14 +2630,10 @@ static SmallVector 
AffineForEmptyLoopFolder(AffineForOp forOp) {
   for (unsigned i = 0, e = yieldOp->getNumOperands(); i < e; ++i) {
 Value val = yieldOp.getOperand(i);
 BlockArgument *iterArgIt = llvm::find(iterArgs, val);
-if (val == forOp.getInductionVar()) {
-  if (auto lastTripIv = getConstantInductionVarForLastTrip(forOp)) {
-replacements.push_back(IntegerAttr::get(
-IndexType::get(forOp.getContext()), lastTripIv.value()));
-continue;
-  }
+// TODO: It should be possible to perform a replacement by computing the
+// last value of the IV based on the bounds and the step.
+if (val == forOp.getInductionVar())
   return {};
-}
 if (iterArgIt == iterArgs.end()) {
   // `val` is defined outside of the loop.
   assert(forOp.isDefinedOutsideOfLoop(val) &&
@@ -2675,7 +2656,7 @@ static SmallVector 
AffineForEmptyLoopFolder(AffineForOp forOp) {
   // out of order.
   if (tripCount.has_value() && tripCount.value() >= 2 && iterArgsNotInOrder)
 return {};
-  return replacements;
+  return llvm::to_vector_of(replacements);
 }
 
 /// Canonicalize the bounds of the given loop.

diff  --git a/mlir/test/Dialect/Affine/canonicalize.mlir 
b/mlir/test/Dialect/Affine/canonicalize.mlir
index 997f23b4bd669..1169cd1c29d74 100644
--- a/mlir/test/Dialect/Affine/canonicalize.mlir
+++ b/mlir/test/Dialect/Affine/canonicalize.mlir
@@ -609,19 +609,6 @@ func.func @fold_zero_iter_loops(%in : index) -> index {
 
 // -
 
-// CHECK-LABEL: func @fold_empty_loop_iv
-//  CHECK-SAME:   %[[INIT:.*]]: index
-func.func @fold_empty_loop_iv(%init: index) -> (index, index) {
-  %res:2 = affine.for %i = 0 to 10 step 1 iter_args(%arg0 = %init, %arg1 = 
%init) -> (index, index) {
-affine.yield %i, %arg1 : index, index
-  }
-  // CHECK: %[[C9:.*]] = arith.constant 9 : index
-  // CHECK: return %[[C9]], %[[INIT]] : index, index
-  return %res#0, %res#1 : index, index
-}
-
-// -
-
 // CHECK-DAG: #[[$SET:.*]] = affine_set<(d0, d1)[s0] : (d0 >= 0, -d0 + 1022 >= 
0, d1 >= 0, -d1 + s0 - 2 >= 0)>
 
 // CHECK-LABEL: func @canonicalize_affine_if



___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] release/21.x: [LLD][COFF] Fix manifest UAC trustInfo namespace (#165285) (PR #165605)

2025-10-29 Thread via llvm-branch-commits

https://github.com/llvmbot created 
https://github.com/llvm/llvm-project/pull/165605

Backport 6ab8e8fa03

Requested by: @nga888

>From db7b9214c3f3867a5eaa9a41cc1f69ebf804a61f Mon Sep 17 00:00:00 2001
From: Andrew Ng 
Date: Wed, 29 Oct 2025 10:35:20 +
Subject: [PATCH] [LLD][COFF] Fix manifest UAC trustInfo namespace (#165285)

Fix manifest `trustInfo` to use the `urn:schemas-microsoft-com:asm.v3`
namespace.

Fixes https://github.com/llvm/llvm-project/issues/120394.

(cherry picked from commit 6ab8e8fa031e0a22c0244c1aa8f54581ed9bffd1)
---
 lld/COFF/DriverUtils.cpp   |  2 +-
 lld/test/COFF/Inputs/manifest-uac.test | 11 +
 lld/test/COFF/manifest-uac.test| 33 +
 lld/test/COFF/manifest.test| 65 ++
 lld/test/COFF/manifestinput.test   | 35 +++---
 5 files changed, 96 insertions(+), 50 deletions(-)
 create mode 100644 lld/test/COFF/Inputs/manifest-uac.test
 create mode 100644 lld/test/COFF/manifest-uac.test

diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index d8b41c7f45400..5ef41c4c0a086 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -387,7 +387,7 @@ std::string LinkerDriver::createDefaultXml() {
  << "\n";
   if (ctx.config.manifestUAC) {
-os << "  \n"
+os << "  \n"
<< "\n"
<< "  \n"
<< " 
+
+  
+
+  
+
+  
+
diff --git a/lld/test/COFF/manifest-uac.test b/lld/test/COFF/manifest-uac.test
new file mode 100644
index 0..d3a17c7282716
--- /dev/null
+++ b/lld/test/COFF/manifest-uac.test
@@ -0,0 +1,33 @@
+# REQUIRES: libxml2
+
+# RUN: yaml2obj %p/Inputs/ret42.yaml -o %t.obj
+# RUN: lld-link /out:%t.exe /entry:main \
+# RUN:   /manifest:embed \
+# RUN:   /manifestinput:%p/Inputs/manifest-uac.test %t.obj
+# RUN: llvm-readobj --coff-resources %t.exe | FileCheck %s
+
+CHECK:  Data (
+CHECK-NEXT:   : 3C3F786D 6C207665 7273696F 6E3D2231  |.|
+CHECK-NEXT:   0070: 0A20203C 74727573 74496E66 6F20786D  |.  ..  ..  <|
+CHECK-NEXT:   0120: 2F726571 75657374 65645072 6976696C  |/requestedPrivil|
+CHECK-NEXT:   0130: 65676573 3E0A2020 20203C2F 73656375  |eges>..  .|
+CHECK-NEXT:   0160: 0A   |.|
+CHECK-NEXT: )
diff --git a/lld/test/COFF/manifest.test b/lld/test/COFF/manifest.test
index 4910600bd3a17..09de96e9bccfa 100644
--- a/lld/test/COFF/manifest.test
+++ b/lld/test/COFF/manifest.test
@@ -10,7 +10,7 @@
 MANIFEST: 
 MANIFEST: 
-MANIFEST:   
+MANIFEST:   
 MANIFEST: 
 MANIFEST:   
 MANIFEST:  
@@ -26,7 +26,7 @@ MANIFEST: 
 UAC: 
 UAC: 
-UAC:   
+UAC:   
 UAC: 
 UAC:   
 UAC:  
@@ -43,7 +43,7 @@ UAC: 
 DEPENDENCY: 
 DEPENDENCY: 
-DEPENDENCY:   
+DEPENDENCY:   
 DEPENDENCY: 
 DEPENDENCY:   
 DEPENDENCY:  
@@ -90,7 +90,7 @@ NOUACNODEP: 
 SEVERALDEPS: 
 SEVERALDEPS: 
-SEVERALDEPS:   
+SEVERALDEPS:   
 SEVERALDEPS: 
 SEVERALDEPS:   
 SEVERALDEPS:  
@@ -139,31 +139,34 @@ EMBED:   0040: 6D61732D 6D696372 6F736F66 742D636F  
|mas-microsoft-co|
 EMBED:   0050: 6D3A6173 6D2E7631 220A2020 20202020  |m:asm.v1".  |
 EMBED:   0060: 20202020 6D616E69 66657374 56657273  |manifestVers|
 EMBED:   0070: 696F6E3D 22312E30 223E0A20 203C7472  |ion="1.0">.  ..  . . |
-EMBED:   0100: 203C2F72 65717565 73746564 50726976  | ..  .  ..  |
-EMBED:   0160: 20202020 3C617373 656D626C 79496465  |..  .  .<|
-EMBED:   01C0: 64657065 6E64656E 74417373 656D626C  |dependentAssembl|
-EMBED:   01D0: 793E0A20 20202020 203C6173 73656D62  |y>.  ..  ..|
+EMBED:   0080: 75737449 6E666F20 786D6C6E 733D2275  |ustInfo xmlns="u|
+EMBED:   0090: 726E3A73 6368656D 61732D6D 6963726F  |rn:schemas-micro|
+EMBED:   00A0: 736F6674 2D636F6D 3A61736D 2E763322  |soft-com:asm.v3"|
+EMBED:   00B0: 3E0A2020 20203C73 65637572 6974793E  |>.|
+EMBED:   00C0: 0A202020 2020203C 72657175 65737465  |.  .   |
+EMBED:   00E0: 20202020 20203C72 65717565 73746564  |  .  |
+EMBED:   0140: 0A202020 203C2F73 65637572 6974793E  |.|
+EMBED:   0150: 0A20203C 2F747275 7374496E 666F3E0A  |.  .|
+EMBED:   0160: 20203C64 6570656E 64656E63 793E0A20  |  . |
+EMBED:   0170: 2020203C 64657065 6E64656E 74417373  |   .  .|
+EMBED:   01B0: 3C2F6465 70656E64 656E7441 7373656D  |.  .  ..|
+EMBED:   0200: 20203C61 7373656D 626C7949 64656E74  |  ..  ..|
 EMBED: )
diff --git a/lld/test/COFF/manifestinput.test b/lld/test/COFF/manifestinput.test
index 04af80a13312d..cbf27b1ea96b5 100644
--- a/lld/test/COFF/manifestinput.test
+++ b/lld/test/COFF/manifestinput.test
@@ -5,22 +5,21 @@
 # RUN:   /manifest:embed \
 # RUN:   /manifestuac:"level='requireAdministrator'" \
 # RUN:   /manifestinput:%p/Inputs/manifestinput.test %t.obj
-# RUN: llvm-readobj --coff-resources --file-headers %t.exe | FileCheck %s \
-# RUN:   -check-prefix TEST_EMBED
+# RUN: llvm-readobj --coff-resources --file-headers %t.e

[llvm-branch-commits] [lld] release/21.x: [LLD][COFF] Fix manifest UAC trustInfo namespace (#165285) (PR #165605)

2025-10-29 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-lld-coff

Author: None (llvmbot)


Changes

Backport 6ab8e8fa03

Requested by: @nga888

---
Full diff: https://github.com/llvm/llvm-project/pull/165605.diff


5 Files Affected:

- (modified) lld/COFF/DriverUtils.cpp (+1-1) 
- (added) lld/test/COFF/Inputs/manifest-uac.test (+11) 
- (added) lld/test/COFF/manifest-uac.test (+33) 
- (modified) lld/test/COFF/manifest.test (+34-31) 
- (modified) lld/test/COFF/manifestinput.test (+17-18) 


``diff
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index d8b41c7f45400..5ef41c4c0a086 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -387,7 +387,7 @@ std::string LinkerDriver::createDefaultXml() {
  << "\n";
   if (ctx.config.manifestUAC) {
-os << "  \n"
+os << "  \n"
<< "\n"
<< "  \n"
<< " 
+
+  
+
+  
+
+  
+
diff --git a/lld/test/COFF/manifest-uac.test b/lld/test/COFF/manifest-uac.test
new file mode 100644
index 0..d3a17c7282716
--- /dev/null
+++ b/lld/test/COFF/manifest-uac.test
@@ -0,0 +1,33 @@
+# REQUIRES: libxml2
+
+# RUN: yaml2obj %p/Inputs/ret42.yaml -o %t.obj
+# RUN: lld-link /out:%t.exe /entry:main \
+# RUN:   /manifest:embed \
+# RUN:   /manifestinput:%p/Inputs/manifest-uac.test %t.obj
+# RUN: llvm-readobj --coff-resources %t.exe | FileCheck %s
+
+CHECK:  Data (
+CHECK-NEXT:   : 3C3F786D 6C207665 7273696F 6E3D2231  |.|
+CHECK-NEXT:   0070: 0A20203C 74727573 74496E66 6F20786D  |.  ..  ..  <|
+CHECK-NEXT:   0120: 2F726571 75657374 65645072 6976696C  |/requestedPrivil|
+CHECK-NEXT:   0130: 65676573 3E0A2020 20203C2F 73656375  |eges>..  .|
+CHECK-NEXT:   0160: 0A   |.|
+CHECK-NEXT: )
diff --git a/lld/test/COFF/manifest.test b/lld/test/COFF/manifest.test
index 4910600bd3a17..09de96e9bccfa 100644
--- a/lld/test/COFF/manifest.test
+++ b/lld/test/COFF/manifest.test
@@ -10,7 +10,7 @@
 MANIFEST: 
 MANIFEST: 
-MANIFEST:   
+MANIFEST:   
 MANIFEST: 
 MANIFEST:   
 MANIFEST:  
@@ -26,7 +26,7 @@ MANIFEST: 
 UAC: 
 UAC: 
-UAC:   
+UAC:   
 UAC: 
 UAC:   
 UAC:  
@@ -43,7 +43,7 @@ UAC: 
 DEPENDENCY: 
 DEPENDENCY: 
-DEPENDENCY:   
+DEPENDENCY:   
 DEPENDENCY: 
 DEPENDENCY:   
 DEPENDENCY:  
@@ -90,7 +90,7 @@ NOUACNODEP: 
 SEVERALDEPS: 
 SEVERALDEPS: 
-SEVERALDEPS:   
+SEVERALDEPS:   
 SEVERALDEPS: 
 SEVERALDEPS:   
 SEVERALDEPS:  
@@ -139,31 +139,34 @@ EMBED:   0040: 6D61732D 6D696372 6F736F66 742D636F  
|mas-microsoft-co|
 EMBED:   0050: 6D3A6173 6D2E7631 220A2020 20202020  |m:asm.v1".  |
 EMBED:   0060: 20202020 6D616E69 66657374 56657273  |manifestVers|
 EMBED:   0070: 696F6E3D 22312E30 223E0A20 203C7472  |ion="1.0">.  ..  . . |
-EMBED:   0100: 203C2F72 65717565 73746564 50726976  | ..  .  ..  |
-EMBED:   0160: 20202020 3C617373 656D626C 79496465  |..  .  .<|
-EMBED:   01C0: 64657065 6E64656E 74417373 656D626C  |dependentAssembl|
-EMBED:   01D0: 793E0A20 20202020 203C6173 73656D62  |y>.  ..  ..|
+EMBED:   0080: 75737449 6E666F20 786D6C6E 733D2275  |ustInfo xmlns="u|
+EMBED:   0090: 726E3A73 6368656D 61732D6D 6963726F  |rn:schemas-micro|
+EMBED:   00A0: 736F6674 2D636F6D 3A61736D 2E763322  |soft-com:asm.v3"|
+EMBED:   00B0: 3E0A2020 20203C73 65637572 6974793E  |>.|
+EMBED:   00C0: 0A202020 2020203C 72657175 65737465  |.  .   |
+EMBED:   00E0: 20202020 20203C72 65717565 73746564  |  .  |
+EMBED:   0140: 0A202020 203C2F73 65637572 6974793E  |.|
+EMBED:   0150: 0A20203C 2F747275 7374496E 666F3E0A  |.  .|
+EMBED:   0160: 20203C64 6570656E 64656E63 793E0A20  |  . |
+EMBED:   0170: 2020203C 64657065 6E64656E 74417373  |   .  .|
+EMBED:   01B0: 3C2F6465 70656E64 656E7441 7373656D  |.  .  ..|
+EMBED:   0200: 20203C61 7373656D 626C7949 64656E74  |  ..  ..|
 EMBED: )
diff --git a/lld/test/COFF/manifestinput.test b/lld/test/COFF/manifestinput.test
index 04af80a13312d..cbf27b1ea96b5 100644
--- a/lld/test/COFF/manifestinput.test
+++ b/lld/test/COFF/manifestinput.test
@@ -5,22 +5,21 @@
 # RUN:   /manifest:embed \
 # RUN:   /manifestuac:"level='requireAdministrator'" \
 # RUN:   /manifestinput:%p/Inputs/manifestinput.test %t.obj
-# RUN: llvm-readobj --coff-resources --file-headers %t.exe | FileCheck %s \
-# RUN:   -check-prefix TEST_EMBED
+# RUN: llvm-readobj --coff-resources --file-headers %t.exe | FileCheck %s
 
-TEST_EMBED:  ResourceTableRVA: 0x2000
-TEST_EMBED-NEXT: ResourceTableSize: 0x2A0
-TEST_EMBED-DAG:  Resources [
-TEST_EMBED-NEXT:   Total Number of Resources: 1
-TEST_EMBED-DAG:Number of String Entries: 0
-TEST_EMBED-NEXT:   Number of ID Entries: 1
-TEST_EMBED-NEXT:   Type: MANIFEST (ID 24) [
-TEST_EMBED-NEXT: Table Offset: 0x18
-TEST_EMBED-NEXT: Number of String Entries: 0
-TEST_EMBED-NEXT: Number of ID Entries: 1
-TEST_EMBED

[llvm-branch-commits] [lld] release/21.x: [LLD][COFF] Fix manifest UAC trustInfo namespace (#165285) (PR #165605)

2025-10-29 Thread via llvm-branch-commits

llvmbot wrote:




@llvm/pr-subscribers-platform-windows

Author: None (llvmbot)


Changes

Backport 6ab8e8fa03

Requested by: @nga888

---
Full diff: https://github.com/llvm/llvm-project/pull/165605.diff


5 Files Affected:

- (modified) lld/COFF/DriverUtils.cpp (+1-1) 
- (added) lld/test/COFF/Inputs/manifest-uac.test (+11) 
- (added) lld/test/COFF/manifest-uac.test (+33) 
- (modified) lld/test/COFF/manifest.test (+34-31) 
- (modified) lld/test/COFF/manifestinput.test (+17-18) 


``diff
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index d8b41c7f45400..5ef41c4c0a086 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -387,7 +387,7 @@ std::string LinkerDriver::createDefaultXml() {
  << "\n";
   if (ctx.config.manifestUAC) {
-os << "  \n"
+os << "  \n"
<< "\n"
<< "  \n"
<< " 
+
+  
+
+  
+
+  
+
diff --git a/lld/test/COFF/manifest-uac.test b/lld/test/COFF/manifest-uac.test
new file mode 100644
index 0..d3a17c7282716
--- /dev/null
+++ b/lld/test/COFF/manifest-uac.test
@@ -0,0 +1,33 @@
+# REQUIRES: libxml2
+
+# RUN: yaml2obj %p/Inputs/ret42.yaml -o %t.obj
+# RUN: lld-link /out:%t.exe /entry:main \
+# RUN:   /manifest:embed \
+# RUN:   /manifestinput:%p/Inputs/manifest-uac.test %t.obj
+# RUN: llvm-readobj --coff-resources %t.exe | FileCheck %s
+
+CHECK:  Data (
+CHECK-NEXT:   : 3C3F786D 6C207665 7273696F 6E3D2231  |.|
+CHECK-NEXT:   0070: 0A20203C 74727573 74496E66 6F20786D  |.  ..  ..  <|
+CHECK-NEXT:   0120: 2F726571 75657374 65645072 6976696C  |/requestedPrivil|
+CHECK-NEXT:   0130: 65676573 3E0A2020 20203C2F 73656375  |eges>..  .|
+CHECK-NEXT:   0160: 0A   |.|
+CHECK-NEXT: )
diff --git a/lld/test/COFF/manifest.test b/lld/test/COFF/manifest.test
index 4910600bd3a17..09de96e9bccfa 100644
--- a/lld/test/COFF/manifest.test
+++ b/lld/test/COFF/manifest.test
@@ -10,7 +10,7 @@
 MANIFEST: 
 MANIFEST: 
-MANIFEST:   
+MANIFEST:   
 MANIFEST: 
 MANIFEST:   
 MANIFEST:  
@@ -26,7 +26,7 @@ MANIFEST: 
 UAC: 
 UAC: 
-UAC:   
+UAC:   
 UAC: 
 UAC:   
 UAC:  
@@ -43,7 +43,7 @@ UAC: 
 DEPENDENCY: 
 DEPENDENCY: 
-DEPENDENCY:   
+DEPENDENCY:   
 DEPENDENCY: 
 DEPENDENCY:   
 DEPENDENCY:  
@@ -90,7 +90,7 @@ NOUACNODEP: 
 SEVERALDEPS: 
 SEVERALDEPS: 
-SEVERALDEPS:   
+SEVERALDEPS:   
 SEVERALDEPS: 
 SEVERALDEPS:   
 SEVERALDEPS:  
@@ -139,31 +139,34 @@ EMBED:   0040: 6D61732D 6D696372 6F736F66 742D636F  
|mas-microsoft-co|
 EMBED:   0050: 6D3A6173 6D2E7631 220A2020 20202020  |m:asm.v1".  |
 EMBED:   0060: 20202020 6D616E69 66657374 56657273  |manifestVers|
 EMBED:   0070: 696F6E3D 22312E30 223E0A20 203C7472  |ion="1.0">.  ..  . . |
-EMBED:   0100: 203C2F72 65717565 73746564 50726976  | ..  .  ..  |
-EMBED:   0160: 20202020 3C617373 656D626C 79496465  |..  .  .<|
-EMBED:   01C0: 64657065 6E64656E 74417373 656D626C  |dependentAssembl|
-EMBED:   01D0: 793E0A20 20202020 203C6173 73656D62  |y>.  ..  ..|
+EMBED:   0080: 75737449 6E666F20 786D6C6E 733D2275  |ustInfo xmlns="u|
+EMBED:   0090: 726E3A73 6368656D 61732D6D 6963726F  |rn:schemas-micro|
+EMBED:   00A0: 736F6674 2D636F6D 3A61736D 2E763322  |soft-com:asm.v3"|
+EMBED:   00B0: 3E0A2020 20203C73 65637572 6974793E  |>.|
+EMBED:   00C0: 0A202020 2020203C 72657175 65737465  |.  .   |
+EMBED:   00E0: 20202020 20203C72 65717565 73746564  |  .  |
+EMBED:   0140: 0A202020 203C2F73 65637572 6974793E  |.|
+EMBED:   0150: 0A20203C 2F747275 7374496E 666F3E0A  |.  .|
+EMBED:   0160: 20203C64 6570656E 64656E63 793E0A20  |  . |
+EMBED:   0170: 2020203C 64657065 6E64656E 74417373  |   .  .|
+EMBED:   01B0: 3C2F6465 70656E64 656E7441 7373656D  |.  .  ..|
+EMBED:   0200: 20203C61 7373656D 626C7949 64656E74  |  ..  ..|
 EMBED: )
diff --git a/lld/test/COFF/manifestinput.test b/lld/test/COFF/manifestinput.test
index 04af80a13312d..cbf27b1ea96b5 100644
--- a/lld/test/COFF/manifestinput.test
+++ b/lld/test/COFF/manifestinput.test
@@ -5,22 +5,21 @@
 # RUN:   /manifest:embed \
 # RUN:   /manifestuac:"level='requireAdministrator'" \
 # RUN:   /manifestinput:%p/Inputs/manifestinput.test %t.obj
-# RUN: llvm-readobj --coff-resources --file-headers %t.exe | FileCheck %s \
-# RUN:   -check-prefix TEST_EMBED
+# RUN: llvm-readobj --coff-resources --file-headers %t.exe | FileCheck %s
 
-TEST_EMBED:  ResourceTableRVA: 0x2000
-TEST_EMBED-NEXT: ResourceTableSize: 0x2A0
-TEST_EMBED-DAG:  Resources [
-TEST_EMBED-NEXT:   Total Number of Resources: 1
-TEST_EMBED-DAG:Number of String Entries: 0
-TEST_EMBED-NEXT:   Number of ID Entries: 1
-TEST_EMBED-NEXT:   Type: MANIFEST (ID 24) [
-TEST_EMBED-NEXT: Table Offset: 0x18
-TEST_EMBED-NEXT: Number of String Entries: 0
-TEST_EMBED-NEXT: Number of ID Entries: 1
-TE

[llvm-branch-commits] [lld] release/21.x: [LLD][COFF] Fix manifest UAC trustInfo namespace (#165285) (PR #165605)

2025-10-29 Thread via llvm-branch-commits

https://github.com/llvmbot milestoned 
https://github.com/llvm/llvm-project/pull/165605
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits


[llvm-branch-commits] [lld] release/21.x: [LLD][COFF] Fix manifest UAC trustInfo namespace (#165285) (PR #165605)

2025-10-29 Thread via llvm-branch-commits

llvmbot wrote:

@MaskRay What do you think about merging this PR to the release branch?

https://github.com/llvm/llvm-project/pull/165605
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits