[llvm-branch-commits] [llvm] [SLU][profcheck] create likely branch weights for guard->branch (PR #164271)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164271
>From 466d5f3f12337d3cccdaf184b83c6fb80c0a9186 Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Mon, 20 Oct 2025 08:21:26 -0700
Subject: [PATCH] [SLU][profcheck] create likely branch weights for
guard->branch
---
.../Transforms/Scalar/SimpleLoopUnswitch.cpp | 12 +-
.../Transforms/SimpleLoopUnswitch/guards.ll | 181 +-
2 files changed, 139 insertions(+), 54 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index dd36a63c1564b..88a19d3683991 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -40,6 +40,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
@@ -2829,9 +2830,14 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst
*GI, Loop &L,
MSSAU->getMemorySSA()->verifyMemorySSA();
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- Instruction *DeoptBlockTerm =
- SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true,
-GI->getMetadata(LLVMContext::MD_prof), &DTU,
&LI);
+ // llvm.experimental.guard doesn't have branch weights. We can assume,
+ // however, that the deopt path is unlikely.
+ Instruction *DeoptBlockTerm = SplitBlockAndInsertIfThen(
+ GI->getArgOperand(0), GI, true,
+ !ProfcheckDisableMetadataFixes && EstimateProfile
+ ? MDBuilder(GI->getContext()).createUnlikelyBranchWeights()
+ : nullptr,
+ &DTU, &LI);
BranchInst *CheckBI = cast(CheckBB->getTerminator());
// SplitBlockAndInsertIfThen inserts control flow that branches to
// DeoptBlockTerm if the condition is true. We want the opposite.
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
index 533b1f691f5ad..e83047e397d3d 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
@@ -1,26 +1,34 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --check-globals all --version 5
; RUN: opt -passes='loop(simple-loop-unswitch),verify'
-simple-loop-unswitch-guards -S < %s | FileCheck %s
; RUN: opt -passes='simple-loop-unswitch'
-simple-loop-unswitch-guards -S < %s | FileCheck %s
; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify'
-simple-loop-unswitch-guards -verify-memoryssa -verify-loop-info -S < %s |
FileCheck %s
declare void @llvm.experimental.guard(i1, ...)
-define void @test_simple_case(i1 %cond, i32 %N) {
-; CHECK-LABEL: @test_simple_case(
+define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
+; CHECK-LABEL: define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
; CHECK-NEXT: entry:
-; CHECK-NEXT:br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond, label %entry.split.us, label %entry.split, !prof
!1
; CHECK: entry.split.us:
-; CHECK-NEXT:br label [[LOOP_US:%.*]]
+; CHECK-NEXT:br label %loop.us
; CHECK: loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [
[[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
-; CHECK-NEXT:br label [[GUARDED_US]]
+; CHECK-NEXT:%iv.us = phi i32 [ 0, %entry.split.us ], [ %iv.next.us,
%guarded.us ]
+; CHECK-NEXT:br label %guarded.us
; CHECK: guarded.us:
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
-; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:%iv.next.us = add i32 %iv.us, 1
+; CHECK-NEXT:%loop.cond.us = icmp slt i32 %iv.next.us, %N
+; CHECK-NEXT:br i1 %loop.cond.us, label %loop.us, label %exit.split.us
+; CHECK: exit.split.us:
+; CHECK-NEXT:br label %exit
+; CHECK: entry.split:
+; CHECK-NEXT:br label %loop
+; CHECK: loop:
+; CHECK-NEXT:br label %deopt
; CHECK: deopt:
; CHECK-NEXT:call void (i1, ...) @llvm.experimental.guard(i1 false) [
"deopt"() ]
; CHECK-NEXT:unreachable
+; CHECK: exit:
+; CHECK-NEXT:ret void
;
entry:
@@ -38,25 +46,39 @@ exit:
}
define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
-; CHECK-LABEL: @test_two_guards(
+; CHECK-LABEL: define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
; CHECK-NEXT: entry:
-; CHECK-NEXT:br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond1, label %entry.split.us, label %entry.split,
!prof !1
; CHECK:
[llvm-branch-commits] [llvm] [LIR][profcheck] Reuse the loop's exit condition profile (PR #164523)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164523
>From aafb258bc4561fb35d86518a2d96769ab9b2ac46 Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Tue, 21 Oct 2025 17:24:49 -0700
Subject: [PATCH] [LIR][profcheck] Reuse the loop's exit condition profile
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 40 +--
.../LoopIdiom/X86/preserve-profile.ll | 70 +++
2 files changed, 106 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 019536ca91ae0..9070d252ae09f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -72,6 +72,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -105,6 +106,7 @@ STATISTIC(
STATISTIC(NumShiftUntilZero,
"Number of uncountable loops recognized as 'shift until zero'
idiom");
+namespace llvm {
bool DisableLIRP::All;
static cl::opt
DisableLIRPAll("disable-" DEBUG_TYPE "-all",
@@ -163,6 +165,10 @@ static cl::opt ForceMemsetPatternIntrinsic(
cl::desc("Use memset.pattern intrinsic whenever possible"),
cl::init(false),
cl::Hidden);
+extern cl::opt ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
namespace {
class LoopIdiomRecognize {
@@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// The loop trip count check.
auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
CurLoop->getName() + ".ivcheck");
- Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ SmallVector BranchWeights;
+ const bool HasBranchWeights =
+ !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+ auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ if (HasBranchWeights) {
+if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
+ std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights,
+ /*IsExpected=*/false);
+ }
+
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
@@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop,
ScalarEvolution *SE,
/// %start = <...>
/// %extraoffset = <...>
/// <...>
-/// br label %for.cond
+/// br label %loop
///
/// loop:
-/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
+/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
/// %nbits = add nsw i8 %iv, %extraoffset
/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
@@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
// The loop terminator.
Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
- Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+ SmallVector BranchWeights;
+ const bool HasBranchWeights =
+ !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+ auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+ if (HasBranchWeights) {
+if (InvertedCond)
+ std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
+ }
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
new file mode 100644
index 0..d01bb748d9422
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
@@ -0,0 +1,70 @@
+; RUN: opt
-passes="module(print),function(loop(loop-idiom)),module(print)"
-mtriple=x86_64 -mcpu=core-avx2 %s -disable-output 2>&1 | FileCheck
--check-prefix=PROFILE %s
+
+declare void @escape_inner(i8, i8, i8, i1, i8)
+declare void @escape_outer(i8, i8, i8, i1, i8)
+
+declare i8 @gen.i8()
+
+; Most basic pattern; Note that iff the shift amount is offset, said offsetting
+; must not cause an overflow, but `add nsw` is fine.
+define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+ %nbits = add nsw i8 %iv, %extraoffset
+ %val.shifted = ashr i8 %val, %nbits
+ %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+ %iv.next = add i8 %iv, 1
+
+ call void @escap
[llvm-branch-commits] [llvm] [SLU][profcheck] Propagate profile for branches on injected conditions. (PR #164476)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164476
>From f696a5d6aa0895cfad1a61eab1f3a0ee79bd894c Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Tue, 21 Oct 2025 11:22:01 -0700
Subject: [PATCH] [SLU][profcheck] Propagate profile for branches on injected
conditions.
---
.../Transforms/Scalar/SimpleLoopUnswitch.cpp | 9 +-
.../inject-invariant-conditions.ll| 142 +-
2 files changed, 79 insertions(+), 72 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 88a19d3683991..988825737f04f 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -3201,10 +3201,15 @@
injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
Builder.SetInsertPoint(TI);
auto *InvariantBr =
Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
+ // We don't know anything about the relation between the limits.
+ setExplicitlyUnknownBranchWeightsIfProfiled(
+ *InvariantBr, *InvariantBr->getParent()->getParent(), DEBUG_TYPE);
Builder.SetInsertPoint(CheckBlock);
- Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
- TI->getSuccessor(1));
+ Builder.CreateCondBr(
+ TI->getCondition(), TI->getSuccessor(0), TI->getSuccessor(1),
+ !ProfcheckDisableMetadataFixes ? TI->getMetadata(LLVMContext::MD_prof)
+ : nullptr);
TI->eraseFromParent();
// Fixup phis.
diff --git
a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
index 536e0c6a0e74a..3c84dea2a0672 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
@@ -2,40 +2,40 @@
; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true
-passes="loop(simple-loop-unswitch),simplifycfg" | FileCheck %s
; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true
-passes="loop-mssa(simple-loop-unswitch),simplifycfg"
-verify-memoryssa | FileCheck %s
-define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr
noundef %arr, ptr noundef %x_p) {
+define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr
noundef %arr, ptr noundef %x_p) !prof !{!"function_entry_count", i32 10} {
; CHECK-LABEL: @test_01(
; CHECK-NEXT: entry:
-; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef !0
+; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef
[[META1:![0-9]+]]
; CHECK-NEXT:[[INJECTED_COND:%.*]] = icmp ule i32 [[LIMIT:%.*]], [[X]]
-; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label
[[LOOP:%.*]]
+; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label
[[LOOP:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]],
[[GUARDED_US:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:[[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32
[[IV_US]]
-; CHECK-NEXT:[[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4
-; CHECK-NEXT:[[BOUND_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LIMIT]]
-; CHECK-NEXT:br i1 [[BOUND_CHECK_US]], label [[GUARDED_US]], label
[[COMMON_RET:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]]
], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV]]
+; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
+; CHECK-NEXT:[[BOUND_CHECK:%.*]] = icmp ult i32 [[EL]], [[LIMIT]]
+; CHECK-NEXT:br i1 [[BOUND_CHECK]], label [[GUARDED_US]], label
[[COMMON_RET:%.*]], !prof [[PROF3:![0-9]+]]
; CHECK: guarded.us:
-; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[X]]
-; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32
[[EL_US]]
-; CHECK-NEXT:store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL]], [[X]]
+; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32
[[EL]]
+; CHECK-NEXT:store i32 [[IV]], ptr [[ARR_PTR_US]], align 4
+; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV]], 1
; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label
[[COMMON_RET]]
; CHECK: loop:
-; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [
0, [[ENTRY]] ]
-; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
-; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
-; CHECK-NEXT:[[BOUND_CHEC
[llvm-branch-commits] [llvm] [LIR][profcheck] Reuse the loop's exit condition profile (PR #164523)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164523
>From aafb258bc4561fb35d86518a2d96769ab9b2ac46 Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Tue, 21 Oct 2025 17:24:49 -0700
Subject: [PATCH] [LIR][profcheck] Reuse the loop's exit condition profile
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 40 +--
.../LoopIdiom/X86/preserve-profile.ll | 70 +++
2 files changed, 106 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 019536ca91ae0..9070d252ae09f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -72,6 +72,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -105,6 +106,7 @@ STATISTIC(
STATISTIC(NumShiftUntilZero,
"Number of uncountable loops recognized as 'shift until zero'
idiom");
+namespace llvm {
bool DisableLIRP::All;
static cl::opt
DisableLIRPAll("disable-" DEBUG_TYPE "-all",
@@ -163,6 +165,10 @@ static cl::opt ForceMemsetPatternIntrinsic(
cl::desc("Use memset.pattern intrinsic whenever possible"),
cl::init(false),
cl::Hidden);
+extern cl::opt ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
namespace {
class LoopIdiomRecognize {
@@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// The loop trip count check.
auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
CurLoop->getName() + ".ivcheck");
- Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ SmallVector BranchWeights;
+ const bool HasBranchWeights =
+ !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+ auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ if (HasBranchWeights) {
+if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
+ std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights,
+ /*IsExpected=*/false);
+ }
+
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
@@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop,
ScalarEvolution *SE,
/// %start = <...>
/// %extraoffset = <...>
/// <...>
-/// br label %for.cond
+/// br label %loop
///
/// loop:
-/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
+/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
/// %nbits = add nsw i8 %iv, %extraoffset
/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
@@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
// The loop terminator.
Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
- Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+ SmallVector BranchWeights;
+ const bool HasBranchWeights =
+ !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+ auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+ if (HasBranchWeights) {
+if (InvertedCond)
+ std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
+ }
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
new file mode 100644
index 0..d01bb748d9422
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
@@ -0,0 +1,70 @@
+; RUN: opt
-passes="module(print),function(loop(loop-idiom)),module(print)"
-mtriple=x86_64 -mcpu=core-avx2 %s -disable-output 2>&1 | FileCheck
--check-prefix=PROFILE %s
+
+declare void @escape_inner(i8, i8, i8, i1, i8)
+declare void @escape_outer(i8, i8, i8, i1, i8)
+
+declare i8 @gen.i8()
+
+; Most basic pattern; Note that iff the shift amount is offset, said offsetting
+; must not cause an overflow, but `add nsw` is fine.
+define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+ %nbits = add nsw i8 %iv, %extraoffset
+ %val.shifted = ashr i8 %val, %nbits
+ %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+ %iv.next = add i8 %iv, 1
+
+ call void @escap
[llvm-branch-commits] [llvm] [LVer][profcheck] explicitly set unknown branch weights for the versioned/unversioned selector (PR #164507)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164507
>From e745aaf0e5454fd7f79269517a1ca2eb4c582edd Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Tue, 21 Oct 2025 15:20:14 -0700
Subject: [PATCH] [LVer][profcheck] explicitly set unknown branch weights for
the versioned/unversioned selector
---
llvm/lib/Transforms/Utils/LoopVersioning.cpp | 10 --
.../Transforms/LoopDistribute/basic-with-memchecks.ll | 5 +++--
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index ec2e6c1ab796b..4786819d18fa4 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -109,8 +110,13 @@ void LoopVersioning::versionLoop(
// Insert the conditional branch based on the result of the memchecks.
Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
Builder.SetInsertPoint(OrigTerm);
- Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
- VersionedLoop->getLoopPreheader());
+ auto *BI =
+ Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
+ VersionedLoop->getLoopPreheader());
+ // We don't know what the probability of executing the versioned vs the
+ // unversioned variants is.
+ setExplicitlyUnknownBranchWeightsIfProfiled(
+ *BI, *BI->getParent()->getParent(), DEBUG_TYPE);
OrigTerm->eraseFromParent();
// The loops merge in the original exit block. This is now dominated by the
diff --git a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
index 97ea2c6708dad..2828882afe779 100644
--- a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
+++ b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
@@ -28,7 +28,7 @@ target triple = "x86_64-apple-macosx10.10.0"
@E = common global ptr null, align 8
; CHECK-LABEL: @f(
-define void @f() {
+define void @f() !prof !{!"function_entry_count", i32 10} {
entry:
%a = load ptr, ptr @A, align 8
%b = load ptr, ptr @B, align 8
@@ -55,7 +55,7 @@ entry:
; CHECK: = icmp
; CHECK-NOT: = icmp
-; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label
%for.body.ph.ldist1
+; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label
%for.body.ph.ldist1, !prof ![[PROF1:[0-9]]]
; The non-distributed loop that the memchecks fall back on.
@@ -289,3 +289,4 @@ attributes #1 = { nounwind convergent }
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.distribute.enable", i1 true}
+; CHECK: ![[PROF1]] = !{!"unknown", !"loop-versioning"}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SLU][profcheck] create likely branch weights for guard->branch (PR #164271)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164271
>From 466d5f3f12337d3cccdaf184b83c6fb80c0a9186 Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Mon, 20 Oct 2025 08:21:26 -0700
Subject: [PATCH] [SLU][profcheck] create likely branch weights for
guard->branch
---
.../Transforms/Scalar/SimpleLoopUnswitch.cpp | 12 +-
.../Transforms/SimpleLoopUnswitch/guards.ll | 181 +-
2 files changed, 139 insertions(+), 54 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index dd36a63c1564b..88a19d3683991 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -40,6 +40,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
@@ -2829,9 +2830,14 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst
*GI, Loop &L,
MSSAU->getMemorySSA()->verifyMemorySSA();
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- Instruction *DeoptBlockTerm =
- SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true,
-GI->getMetadata(LLVMContext::MD_prof), &DTU,
&LI);
+ // llvm.experimental.guard doesn't have branch weights. We can assume,
+ // however, that the deopt path is unlikely.
+ Instruction *DeoptBlockTerm = SplitBlockAndInsertIfThen(
+ GI->getArgOperand(0), GI, true,
+ !ProfcheckDisableMetadataFixes && EstimateProfile
+ ? MDBuilder(GI->getContext()).createUnlikelyBranchWeights()
+ : nullptr,
+ &DTU, &LI);
BranchInst *CheckBI = cast(CheckBB->getTerminator());
// SplitBlockAndInsertIfThen inserts control flow that branches to
// DeoptBlockTerm if the condition is true. We want the opposite.
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
index 533b1f691f5ad..e83047e397d3d 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
@@ -1,26 +1,34 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --check-globals all --version 5
; RUN: opt -passes='loop(simple-loop-unswitch),verify'
-simple-loop-unswitch-guards -S < %s | FileCheck %s
; RUN: opt -passes='simple-loop-unswitch'
-simple-loop-unswitch-guards -S < %s | FileCheck %s
; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify'
-simple-loop-unswitch-guards -verify-memoryssa -verify-loop-info -S < %s |
FileCheck %s
declare void @llvm.experimental.guard(i1, ...)
-define void @test_simple_case(i1 %cond, i32 %N) {
-; CHECK-LABEL: @test_simple_case(
+define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
+; CHECK-LABEL: define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
; CHECK-NEXT: entry:
-; CHECK-NEXT:br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond, label %entry.split.us, label %entry.split, !prof
!1
; CHECK: entry.split.us:
-; CHECK-NEXT:br label [[LOOP_US:%.*]]
+; CHECK-NEXT:br label %loop.us
; CHECK: loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [
[[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
-; CHECK-NEXT:br label [[GUARDED_US]]
+; CHECK-NEXT:%iv.us = phi i32 [ 0, %entry.split.us ], [ %iv.next.us,
%guarded.us ]
+; CHECK-NEXT:br label %guarded.us
; CHECK: guarded.us:
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
-; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:%iv.next.us = add i32 %iv.us, 1
+; CHECK-NEXT:%loop.cond.us = icmp slt i32 %iv.next.us, %N
+; CHECK-NEXT:br i1 %loop.cond.us, label %loop.us, label %exit.split.us
+; CHECK: exit.split.us:
+; CHECK-NEXT:br label %exit
+; CHECK: entry.split:
+; CHECK-NEXT:br label %loop
+; CHECK: loop:
+; CHECK-NEXT:br label %deopt
; CHECK: deopt:
; CHECK-NEXT:call void (i1, ...) @llvm.experimental.guard(i1 false) [
"deopt"() ]
; CHECK-NEXT:unreachable
+; CHECK: exit:
+; CHECK-NEXT:ret void
;
entry:
@@ -38,25 +46,39 @@ exit:
}
define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
-; CHECK-LABEL: @test_two_guards(
+; CHECK-LABEL: define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
; CHECK-NEXT: entry:
-; CHECK-NEXT:br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond1, label %entry.split.us, label %entry.split,
!prof !1
; CHECK:
[llvm-branch-commits] [compiler-rt] [compiler-rt] Default to Lit's Internal Shell (PR #165148)
https://github.com/fmayer approved this pull request. https://github.com/llvm/llvm-project/pull/165148 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LVer][profcheck] explicitly set unknown branch weights for the versioned/unversioned selector (PR #164507)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164507
>From e745aaf0e5454fd7f79269517a1ca2eb4c582edd Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Tue, 21 Oct 2025 15:20:14 -0700
Subject: [PATCH] [LVer][profcheck] explicitly set unknown branch weights for
the versioned/unversioned selector
---
llvm/lib/Transforms/Utils/LoopVersioning.cpp | 10 --
.../Transforms/LoopDistribute/basic-with-memchecks.ll | 5 +++--
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index ec2e6c1ab796b..4786819d18fa4 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -109,8 +110,13 @@ void LoopVersioning::versionLoop(
// Insert the conditional branch based on the result of the memchecks.
Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
Builder.SetInsertPoint(OrigTerm);
- Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
- VersionedLoop->getLoopPreheader());
+ auto *BI =
+ Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
+ VersionedLoop->getLoopPreheader());
+ // We don't know what the probability of executing the versioned vs the
+ // unversioned variants is.
+ setExplicitlyUnknownBranchWeightsIfProfiled(
+ *BI, *BI->getParent()->getParent(), DEBUG_TYPE);
OrigTerm->eraseFromParent();
// The loops merge in the original exit block. This is now dominated by the
diff --git a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
index 97ea2c6708dad..2828882afe779 100644
--- a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
+++ b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
@@ -28,7 +28,7 @@ target triple = "x86_64-apple-macosx10.10.0"
@E = common global ptr null, align 8
; CHECK-LABEL: @f(
-define void @f() {
+define void @f() !prof !{!"function_entry_count", i32 10} {
entry:
%a = load ptr, ptr @A, align 8
%b = load ptr, ptr @B, align 8
@@ -55,7 +55,7 @@ entry:
; CHECK: = icmp
; CHECK-NOT: = icmp
-; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label
%for.body.ph.ldist1
+; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label
%for.body.ph.ldist1, !prof ![[PROF1:[0-9]]]
; The non-distributed loop that the memchecks fall back on.
@@ -289,3 +289,4 @@ attributes #1 = { nounwind convergent }
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.distribute.enable", i1 true}
+; CHECK: ![[PROF1]] = !{!"unknown", !"loop-versioning"}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LSCFG][profcheck] Add dummy branch weights for the dummy switch to dead exits (PR #164714)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164714
>From 5b7729327168a7bfbd0c6f561cd0e6a55d54fb10 Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Wed, 22 Oct 2025 14:34:31 -0700
Subject: [PATCH] [LSCFG][profcheck] Add dummy branch weights for the dummy
switch to dead exits
---
.../lib/Transforms/Scalar/LoopSimplifyCFG.cpp | 12 ++
.../LoopSimplifyCFG/constant-fold-branch.ll | 104 +-
2 files changed, 66 insertions(+), 50 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index b9546c5fa236b..e902b71776973 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
@@ -393,6 +394,17 @@ class ConstantTerminatorFoldingImpl {
DTUpdates.push_back({DominatorTree::Insert, Preheader, BB});
++NumLoopExitsDeleted;
}
+// We don't really need to add branch weights to DummySwitch, because all
+// but one branches are just a temporary artifact - see the comment on top
+// of this function. But, it's easy to estimate the weights, and it helps
+// maintain a property of the overall compiler - that the branch weights
+// don't "just get dropped" accidentally (i.e. profcheck)
+if (DummySwitch->getParent()->getParent()->hasProfileData()) {
+ SmallVector DummyBranchWeights(1 + DummySwitch->getNumCases());
+ // default. 100% probability, the rest are dead.
+ DummyBranchWeights[0] = 1;
+ setBranchWeights(*DummySwitch, DummyBranchWeights, /*IsExpected=*/false);
+}
assert(L.getLoopPreheader() == NewPreheader && "Malformed CFG?");
if (Loop *OuterLoop = LI.getLoopFor(Preheader)) {
diff --git a/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
b/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
index 1ec212f0bb5ea..46b6209986fed 100644
--- a/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
+++ b/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: --check-globals
; REQUIRES: asserts
; RUN: opt -S -enable-loop-simplifycfg-term-folding=true
-passes=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa
< %s | FileCheck %s
; RUN: opt -S -enable-loop-simplifycfg-term-folding=true
-passes='require,loop(loop-simplifycfg)' -verify-loop-info
-verify-dom-info -verify-loop-lcssa < %s | FileCheck %s
@@ -59,7 +59,7 @@ define i32 @dead_backedge_test_switch_loop(i32 %end) {
; CHECK: dead_backedge:
; CHECK-NEXT:[[I_2]] = add i32 [[I_1]], 10
; CHECK-NEXT:switch i32 1, label [[EXIT:%.*]] [
-; CHECK-NEXT:i32 0, label [[HEADER_BACKEDGE]]
+; CHECK-NEXT: i32 0, label [[HEADER_BACKEDGE]]
; CHECK-NEXT:]
; CHECK: exit:
; CHECK-NEXT:[[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ]
@@ -233,12 +233,12 @@ exit:
; Check that we preserve static reachibility of a dead exit block while
deleting
; a branch.
-define i32 @dead_exit_test_branch_loop(i32 %end) {
+define i32 @dead_exit_test_branch_loop(i32 %end) !prof
!{!"function_entry_count", i32 10} {
; CHECK-LABEL: @dead_exit_test_branch_loop(
; CHECK-NEXT: preheader:
; CHECK-NEXT:switch i32 0, label [[PREHEADER_SPLIT:%.*]] [
-; CHECK-NEXT:i32 1, label [[DEAD:%.*]]
-; CHECK-NEXT:]
+; CHECK-NEXT: i32 1, label [[DEAD:%.*]]
+; CHECK-NEXT:], !prof [[PROF1:![0-9]+]]
; CHECK: preheader.split:
; CHECK-NEXT:br label [[HEADER:%.*]]
; CHECK: header:
@@ -262,7 +262,7 @@ preheader:
header:
%i = phi i32 [0, %preheader], [%i.inc, %backedge]
- br i1 true, label %backedge, label %dead
+ br i1 true, label %backedge, label %dead, !prof !{!"branch_weights", i32 10,
i32 1}
dead:
br label %dummy
@@ -286,7 +286,7 @@ define i32 @dead_exit_test_switch_loop(i32 %end) {
; CHECK-LABEL: @dead_exit_test_switch_loop(
; CHECK-NEXT: preheader:
; CHECK-NEXT:switch i32 0, label [[PREHEADER_SPLIT:%.*]] [
-; CHECK-NEXT:i32 1, label [[DEAD:%.*]]
+; CHECK-NEXT: i32 1, label [[DEAD:%.*]]
; CHECK-NEXT:]
; CHECK: preheader.split:
; CHECK-NEXT:br label [[HEADER:%.*]]
@@ -383,9 +383,9 @@ define i32 @dead_loop_test_switch_loop(i32 %end) {
; CHECK: header:
; CHECK-NEXT:[[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [
[[I_INC:%.*]], [[BACKEDGE:%.*]] ]
; CHECK-NEXT:switch i32 1, label [[DEAD:%.*]] [
-; CHECK-NEXT:i32 0, label [[DEAD]]
-; CHECK-NEXT:i32 1, label [[BACKEDGE]]
-; CHECK-NEXT:i32 2, lab
[llvm-branch-commits] [llvm] [SLU][profcheck] Use the original branch weigths in `buildPartialInvariantUnswitchConditionalBranch` (PR #164270)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164270
>From f3362bc0bcb593c8e27c87a8e71ab0dc38e8dc72 Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Sat, 18 Oct 2025 17:14:08 -0700
Subject: [PATCH] [SLU][profcheck] Use the original branch weigths in
`buildPartialInvariantUnswitchConditionalBranch`
---
.../Transforms/Scalar/SimpleLoopUnswitch.cpp | 17 -
.../SimpleLoopUnswitch/partial-unswitch.ll| 75 +++
2 files changed, 56 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index bb6c879f4d47e..dd36a63c1564b 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -337,7 +337,7 @@ static void buildPartialUnswitchConditionalBranch(
static void buildPartialInvariantUnswitchConditionalBranch(
BasicBlock &BB, ArrayRef ToDuplicate, bool Direction,
BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
-MemorySSAUpdater *MSSAU) {
+MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch) {
ValueToValueMapTy VMap;
for (auto *Val : reverse(ToDuplicate)) {
Instruction *Inst = cast(Val);
@@ -377,8 +377,17 @@ static void buildPartialInvariantUnswitchConditionalBranch(
IRBuilder<> IRB(&BB);
IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
Value *Cond = VMap[ToDuplicate[0]];
- IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
- Direction ? &NormalSucc : &UnswitchedSucc);
+ auto *ProfData =
+ !ProfcheckDisableMetadataFixes &&
+ ToDuplicate[0] ==
skipTrivialSelect(OriginalBranch.getCondition())
+ ? OriginalBranch.getMetadata(LLVMContext::MD_prof)
+ : nullptr;
+ auto *BR =
+ IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+ Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
+ if (!ProfData)
+setExplicitlyUnknownBranchWeightsIfProfiled(
+*BR, *BR->getParent()->getParent(), DEBUG_TYPE);
}
/// Rewrite the PHI nodes in an unswitched loop exit basic block.
@@ -2515,7 +2524,7 @@ static void unswitchNontrivialInvariants(
// the branch in the split block.
if (PartiallyInvariant)
buildPartialInvariantUnswitchConditionalBranch(
- *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
+ *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);
else {
buildPartialUnswitchConditionalBranch(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
index 1d8942079ffd8..87161707d9f69 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
@@ -1,14 +1,14 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: --check-globals
; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify'
-S < %s | FileCheck %s
declare void @clobber()
-define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) {
+define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) !prof !0 {
; CHECK-LABEL: @partial_unswitch_true_successor(
; CHECK-NEXT: entry:
; CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4
; CHECK-NEXT:[[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100
-; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK: entry.split.us:
; CHECK-NEXT:br label [[LOOP_HEADER_US:%.*]]
; CHECK: loop.header.us:
@@ -19,7 +19,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N)
{
; CHECK: loop.latch.us:
; CHECK-NEXT:[[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]
; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label
[[EXIT_SPLIT_US:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: exit.split.us:
; CHECK-NEXT:br label [[EXIT:%.*]]
; CHECK: entry.split:
@@ -28,7 +28,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N)
{
; CHECK-NEXT:[[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [
[[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT:[[LV:%.*]] = load i32, ptr [[PTR]], align 4
; CHECK-NEXT:[[SC:%.*]] = icmp eq i32 [[LV]], 100
-; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]]
+; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]],
!prof [[PROF1]]
; CHECK: noclobber:
; CHECK-NEXT:
[llvm-branch-commits] [llvm] [SLU][profcheck] Propagate profile for branches on injected conditions. (PR #164476)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164476
>From f696a5d6aa0895cfad1a61eab1f3a0ee79bd894c Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Tue, 21 Oct 2025 11:22:01 -0700
Subject: [PATCH] [SLU][profcheck] Propagate profile for branches on injected
conditions.
---
.../Transforms/Scalar/SimpleLoopUnswitch.cpp | 9 +-
.../inject-invariant-conditions.ll| 142 +-
2 files changed, 79 insertions(+), 72 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 88a19d3683991..988825737f04f 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -3201,10 +3201,15 @@
injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
Builder.SetInsertPoint(TI);
auto *InvariantBr =
Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
+ // We don't know anything about the relation between the limits.
+ setExplicitlyUnknownBranchWeightsIfProfiled(
+ *InvariantBr, *InvariantBr->getParent()->getParent(), DEBUG_TYPE);
Builder.SetInsertPoint(CheckBlock);
- Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
- TI->getSuccessor(1));
+ Builder.CreateCondBr(
+ TI->getCondition(), TI->getSuccessor(0), TI->getSuccessor(1),
+ !ProfcheckDisableMetadataFixes ? TI->getMetadata(LLVMContext::MD_prof)
+ : nullptr);
TI->eraseFromParent();
// Fixup phis.
diff --git
a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
index 536e0c6a0e74a..3c84dea2a0672 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
@@ -2,40 +2,40 @@
; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true
-passes="loop(simple-loop-unswitch),simplifycfg" | FileCheck %s
; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true
-passes="loop-mssa(simple-loop-unswitch),simplifycfg"
-verify-memoryssa | FileCheck %s
-define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr
noundef %arr, ptr noundef %x_p) {
+define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr
noundef %arr, ptr noundef %x_p) !prof !{!"function_entry_count", i32 10} {
; CHECK-LABEL: @test_01(
; CHECK-NEXT: entry:
-; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef !0
+; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef
[[META1:![0-9]+]]
; CHECK-NEXT:[[INJECTED_COND:%.*]] = icmp ule i32 [[LIMIT:%.*]], [[X]]
-; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label
[[LOOP:%.*]]
+; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label
[[LOOP:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]],
[[GUARDED_US:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:[[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32
[[IV_US]]
-; CHECK-NEXT:[[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4
-; CHECK-NEXT:[[BOUND_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LIMIT]]
-; CHECK-NEXT:br i1 [[BOUND_CHECK_US]], label [[GUARDED_US]], label
[[COMMON_RET:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]]
], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV]]
+; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
+; CHECK-NEXT:[[BOUND_CHECK:%.*]] = icmp ult i32 [[EL]], [[LIMIT]]
+; CHECK-NEXT:br i1 [[BOUND_CHECK]], label [[GUARDED_US]], label
[[COMMON_RET:%.*]], !prof [[PROF3:![0-9]+]]
; CHECK: guarded.us:
-; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[X]]
-; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32
[[EL_US]]
-; CHECK-NEXT:store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL]], [[X]]
+; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32
[[EL]]
+; CHECK-NEXT:store i32 [[IV]], ptr [[ARR_PTR_US]], align 4
+; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV]], 1
; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label
[[COMMON_RET]]
; CHECK: loop:
-; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [
0, [[ENTRY]] ]
-; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
-; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
-; CHECK-NEXT:[[BOUND_CHEC
[llvm-branch-commits] [llvm] [SLU][profcheck] Use the original branch weigths in `buildPartialInvariantUnswitchConditionalBranch` (PR #164270)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164270
>From f3362bc0bcb593c8e27c87a8e71ab0dc38e8dc72 Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Sat, 18 Oct 2025 17:14:08 -0700
Subject: [PATCH] [SLU][profcheck] Use the original branch weigths in
`buildPartialInvariantUnswitchConditionalBranch`
---
.../Transforms/Scalar/SimpleLoopUnswitch.cpp | 17 -
.../SimpleLoopUnswitch/partial-unswitch.ll| 75 +++
2 files changed, 56 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index bb6c879f4d47e..dd36a63c1564b 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -337,7 +337,7 @@ static void buildPartialUnswitchConditionalBranch(
static void buildPartialInvariantUnswitchConditionalBranch(
BasicBlock &BB, ArrayRef ToDuplicate, bool Direction,
BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
-MemorySSAUpdater *MSSAU) {
+MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch) {
ValueToValueMapTy VMap;
for (auto *Val : reverse(ToDuplicate)) {
Instruction *Inst = cast(Val);
@@ -377,8 +377,17 @@ static void buildPartialInvariantUnswitchConditionalBranch(
IRBuilder<> IRB(&BB);
IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
Value *Cond = VMap[ToDuplicate[0]];
- IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
- Direction ? &NormalSucc : &UnswitchedSucc);
+ auto *ProfData =
+ !ProfcheckDisableMetadataFixes &&
+ ToDuplicate[0] ==
skipTrivialSelect(OriginalBranch.getCondition())
+ ? OriginalBranch.getMetadata(LLVMContext::MD_prof)
+ : nullptr;
+ auto *BR =
+ IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+ Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
+ if (!ProfData)
+setExplicitlyUnknownBranchWeightsIfProfiled(
+*BR, *BR->getParent()->getParent(), DEBUG_TYPE);
}
/// Rewrite the PHI nodes in an unswitched loop exit basic block.
@@ -2515,7 +2524,7 @@ static void unswitchNontrivialInvariants(
// the branch in the split block.
if (PartiallyInvariant)
buildPartialInvariantUnswitchConditionalBranch(
- *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
+ *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);
else {
buildPartialUnswitchConditionalBranch(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
index 1d8942079ffd8..87161707d9f69 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
@@ -1,14 +1,14 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: --check-globals
; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify'
-S < %s | FileCheck %s
declare void @clobber()
-define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) {
+define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) !prof !0 {
; CHECK-LABEL: @partial_unswitch_true_successor(
; CHECK-NEXT: entry:
; CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4
; CHECK-NEXT:[[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100
-; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK: entry.split.us:
; CHECK-NEXT:br label [[LOOP_HEADER_US:%.*]]
; CHECK: loop.header.us:
@@ -19,7 +19,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N)
{
; CHECK: loop.latch.us:
; CHECK-NEXT:[[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]
; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label
[[EXIT_SPLIT_US:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: exit.split.us:
; CHECK-NEXT:br label [[EXIT:%.*]]
; CHECK: entry.split:
@@ -28,7 +28,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N)
{
; CHECK-NEXT:[[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [
[[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT:[[LV:%.*]] = load i32, ptr [[PTR]], align 4
; CHECK-NEXT:[[SC:%.*]] = icmp eq i32 [[LV]], 100
-; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]]
+; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]],
!prof [[PROF1]]
; CHECK: noclobber:
; CHECK-NEXT:
[llvm-branch-commits] [llvm] [LSCFG][profcheck] Add dummy branch weights for the dummy switch to dead exits (PR #164714)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164714
>From 5b7729327168a7bfbd0c6f561cd0e6a55d54fb10 Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Wed, 22 Oct 2025 14:34:31 -0700
Subject: [PATCH] [LSCFG][profcheck] Add dummy branch weights for the dummy
switch to dead exits
---
.../lib/Transforms/Scalar/LoopSimplifyCFG.cpp | 12 ++
.../LoopSimplifyCFG/constant-fold-branch.ll | 104 +-
2 files changed, 66 insertions(+), 50 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index b9546c5fa236b..e902b71776973 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
@@ -393,6 +394,17 @@ class ConstantTerminatorFoldingImpl {
DTUpdates.push_back({DominatorTree::Insert, Preheader, BB});
++NumLoopExitsDeleted;
}
+// We don't really need to add branch weights to DummySwitch, because all
+// but one branches are just a temporary artifact - see the comment on top
+// of this function. But, it's easy to estimate the weights, and it helps
+// maintain a property of the overall compiler - that the branch weights
+// don't "just get dropped" accidentally (i.e. profcheck)
+if (DummySwitch->getParent()->getParent()->hasProfileData()) {
+ SmallVector DummyBranchWeights(1 + DummySwitch->getNumCases());
+ // default. 100% probability, the rest are dead.
+ DummyBranchWeights[0] = 1;
+ setBranchWeights(*DummySwitch, DummyBranchWeights, /*IsExpected=*/false);
+}
assert(L.getLoopPreheader() == NewPreheader && "Malformed CFG?");
if (Loop *OuterLoop = LI.getLoopFor(Preheader)) {
diff --git a/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
b/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
index 1ec212f0bb5ea..46b6209986fed 100644
--- a/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
+++ b/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: --check-globals
; REQUIRES: asserts
; RUN: opt -S -enable-loop-simplifycfg-term-folding=true
-passes=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa
< %s | FileCheck %s
; RUN: opt -S -enable-loop-simplifycfg-term-folding=true
-passes='require,loop(loop-simplifycfg)' -verify-loop-info
-verify-dom-info -verify-loop-lcssa < %s | FileCheck %s
@@ -59,7 +59,7 @@ define i32 @dead_backedge_test_switch_loop(i32 %end) {
; CHECK: dead_backedge:
; CHECK-NEXT:[[I_2]] = add i32 [[I_1]], 10
; CHECK-NEXT:switch i32 1, label [[EXIT:%.*]] [
-; CHECK-NEXT:i32 0, label [[HEADER_BACKEDGE]]
+; CHECK-NEXT: i32 0, label [[HEADER_BACKEDGE]]
; CHECK-NEXT:]
; CHECK: exit:
; CHECK-NEXT:[[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ]
@@ -233,12 +233,12 @@ exit:
; Check that we preserve static reachibility of a dead exit block while
deleting
; a branch.
-define i32 @dead_exit_test_branch_loop(i32 %end) {
+define i32 @dead_exit_test_branch_loop(i32 %end) !prof
!{!"function_entry_count", i32 10} {
; CHECK-LABEL: @dead_exit_test_branch_loop(
; CHECK-NEXT: preheader:
; CHECK-NEXT:switch i32 0, label [[PREHEADER_SPLIT:%.*]] [
-; CHECK-NEXT:i32 1, label [[DEAD:%.*]]
-; CHECK-NEXT:]
+; CHECK-NEXT: i32 1, label [[DEAD:%.*]]
+; CHECK-NEXT:], !prof [[PROF1:![0-9]+]]
; CHECK: preheader.split:
; CHECK-NEXT:br label [[HEADER:%.*]]
; CHECK: header:
@@ -262,7 +262,7 @@ preheader:
header:
%i = phi i32 [0, %preheader], [%i.inc, %backedge]
- br i1 true, label %backedge, label %dead
+ br i1 true, label %backedge, label %dead, !prof !{!"branch_weights", i32 10,
i32 1}
dead:
br label %dummy
@@ -286,7 +286,7 @@ define i32 @dead_exit_test_switch_loop(i32 %end) {
; CHECK-LABEL: @dead_exit_test_switch_loop(
; CHECK-NEXT: preheader:
; CHECK-NEXT:switch i32 0, label [[PREHEADER_SPLIT:%.*]] [
-; CHECK-NEXT:i32 1, label [[DEAD:%.*]]
+; CHECK-NEXT: i32 1, label [[DEAD:%.*]]
; CHECK-NEXT:]
; CHECK: preheader.split:
; CHECK-NEXT:br label [[HEADER:%.*]]
@@ -383,9 +383,9 @@ define i32 @dead_loop_test_switch_loop(i32 %end) {
; CHECK: header:
; CHECK-NEXT:[[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [
[[I_INC:%.*]], [[BACKEDGE:%.*]] ]
; CHECK-NEXT:switch i32 1, label [[DEAD:%.*]] [
-; CHECK-NEXT:i32 0, label [[DEAD]]
-; CHECK-NEXT:i32 1, label [[BACKEDGE]]
-; CHECK-NEXT:i32 2, lab
[llvm-branch-commits] [llvm] [DA] Check nsw when extracting a constant operand of SCEVMul (PR #164408)
https://github.com/kasuga-fj updated
https://github.com/llvm/llvm-project/pull/164408
>From 4c5c963d99d99b4649b39fca172917fc0b09ccb2 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga
Date: Tue, 21 Oct 2025 12:23:25 +
Subject: [PATCH 1/2] [DA] Check nsw when extracting a constant operand of
SCEVMul
---
llvm/lib/Analysis/DependenceAnalysis.cpp | 5 +++--
llvm/test/Analysis/DependenceAnalysis/GCD.ll | 6 +++---
.../Analysis/DependenceAnalysis/SymbolicSIV.ll| 4 ++--
.../DependenceAnalysis/gcd-miv-overflow.ll| 15 ++-
4 files changed, 14 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp
b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 853bd66c8a7f8..36ac252aba6ed 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -2828,8 +2828,9 @@ static std::optional getConstantPart(const SCEV
*Expr) {
if (const auto *Constant = dyn_cast(Expr))
return Constant->getAPInt();
if (const auto *Product = dyn_cast(Expr))
-if (const auto *Constant = dyn_cast(Product->getOperand(0)))
- return Constant->getAPInt();
+if (Product->hasNoSignedWrap())
+ if (auto *Constant = dyn_cast(Product->getOperand(0)))
+return Constant->getAPInt();
return std::nullopt;
}
diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll
b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
index 03343e7a98211..cb14d189afe4c 100644
--- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
@@ -254,7 +254,7 @@ define void @gcd4(ptr %A, ptr %B, i64 %M, i64 %N) nounwind
uwtable ssp {
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32
%conv, ptr %arrayidx, align 4
; CHECK-NEXT:da analyze - output [* *]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load
i32, ptr %arrayidx16, align 4
-; CHECK-NEXT:da analyze - none!
+; CHECK-NEXT:da analyze - flow [* *|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32
%0, ptr %B.addr.11, align 4
; CHECK-NEXT:da analyze - confused!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load
i32, ptr %arrayidx16, align 4
@@ -322,7 +322,7 @@ define void @gcd5(ptr %A, ptr %B, i64 %M, i64 %N) nounwind
uwtable ssp {
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32
%conv, ptr %arrayidx, align 4
; CHECK-NEXT:da analyze - output [* *]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load
i32, ptr %arrayidx16, align 4
-; CHECK-NEXT:da analyze - flow [<> *]!
+; CHECK-NEXT:da analyze - flow [* *|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32
%0, ptr %B.addr.11, align 4
; CHECK-NEXT:da analyze - confused!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load
i32, ptr %arrayidx16, align 4
@@ -390,7 +390,7 @@ define void @gcd6(i64 %n, ptr %A, ptr %B) nounwind uwtable
ssp {
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store
i32 %conv, ptr %arrayidx5, align 4
; CHECK-NEXT:da analyze - output [* *]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: %2 =
load i32, ptr %arrayidx9, align 4
-; CHECK-NEXT:da analyze - none!
+; CHECK-NEXT:da analyze - flow [* *|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store
i32 %2, ptr %B.addr.12, align 4
; CHECK-NEXT:da analyze - confused!
; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx9, align 4 --> Dst: %2 = load
i32, ptr %arrayidx9, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
index cdfaec76fa892..73a415baef4c4 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
@@ -384,7 +384,7 @@ define void @symbolicsiv6(ptr %A, ptr %B, i64 %n, i64 %N,
i64 %M) nounwind uwtab
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32
%conv, ptr %arrayidx, align 4
; CHECK-NEXT:da analyze - none!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load
i32, ptr %arrayidx7, align 4
-; CHECK-NEXT:da analyze - none!
+; CHECK-NEXT:da analyze - flow [*|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32
%0, ptr %B.addr.02, align 4
; CHECK-NEXT:da analyze - confused!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: %0 = load
i32, ptr %arrayidx7, align 4
@@ -440,7 +440,7 @@ define void @symbolicsiv7(ptr %A, ptr %B, i64 %n, i64 %N,
i64 %M) nounwind uwtab
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32
%conv, ptr %arrayidx, align 4
; CHECK-NEXT:da analyze - none!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %1 = load
[llvm-branch-commits] [llvm] [llvm-readobj, ELF] Support reading binary with more than PN_XNUM segments. (PR #165278)
aokblast wrote: > @aokblast, is this ready for review? It's currently still attempting to merge > into another of your user branches, whereas if it's ready for review, I'd > expect it to be merging into `main`. Hello, yes, it is ready to review. But I create a user branch based on the one you approved yesterday temporarily since MaskRay haven't accepted that patch. If he doesn't responese in a week, I will merge that patch so that this patch can be merged into main. https://github.com/llvm/llvm-project/pull/165278 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LIR][profcheck] Reuse the loop's exit condition profile (PR #164523)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164523
>From 1bd2ba55726bd525908b186e0460e0556e7c3c6a Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Tue, 21 Oct 2025 17:24:49 -0700
Subject: [PATCH] [LIR][profcheck] Reuse the loop's exit condition profile
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 40 +--
.../LoopIdiom/X86/preserve-profile.ll | 70 +++
2 files changed, 106 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 019536ca91ae0..9070d252ae09f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -72,6 +72,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -105,6 +106,7 @@ STATISTIC(
STATISTIC(NumShiftUntilZero,
"Number of uncountable loops recognized as 'shift until zero'
idiom");
+namespace llvm {
bool DisableLIRP::All;
static cl::opt
DisableLIRPAll("disable-" DEBUG_TYPE "-all",
@@ -163,6 +165,10 @@ static cl::opt ForceMemsetPatternIntrinsic(
cl::desc("Use memset.pattern intrinsic whenever possible"),
cl::init(false),
cl::Hidden);
+extern cl::opt ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
namespace {
class LoopIdiomRecognize {
@@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// The loop trip count check.
auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
CurLoop->getName() + ".ivcheck");
- Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ SmallVector BranchWeights;
+ const bool HasBranchWeights =
+ !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+ auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ if (HasBranchWeights) {
+if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
+ std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights,
+ /*IsExpected=*/false);
+ }
+
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
@@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop,
ScalarEvolution *SE,
/// %start = <...>
/// %extraoffset = <...>
/// <...>
-/// br label %for.cond
+/// br label %loop
///
/// loop:
-/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
+/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
/// %nbits = add nsw i8 %iv, %extraoffset
/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
@@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
// The loop terminator.
Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
- Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+ SmallVector BranchWeights;
+ const bool HasBranchWeights =
+ !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+ auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+ if (HasBranchWeights) {
+if (InvertedCond)
+ std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
+ }
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
new file mode 100644
index 0..d01bb748d9422
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
@@ -0,0 +1,70 @@
+; RUN: opt
-passes="module(print),function(loop(loop-idiom)),module(print)"
-mtriple=x86_64 -mcpu=core-avx2 %s -disable-output 2>&1 | FileCheck
--check-prefix=PROFILE %s
+
+declare void @escape_inner(i8, i8, i8, i1, i8)
+declare void @escape_outer(i8, i8, i8, i1, i8)
+
+declare i8 @gen.i8()
+
+; Most basic pattern; Note that iff the shift amount is offset, said offsetting
+; must not cause an overflow, but `add nsw` is fine.
+define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+ %nbits = add nsw i8 %iv, %extraoffset
+ %val.shifted = ashr i8 %val, %nbits
+ %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+ %iv.next = add i8 %iv, 1
+
+ call void @escap
[llvm-branch-commits] [llvm] [LVer][profcheck] explicitly set unknown branch weights for the versioned/unversioned selector (PR #164507)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164507
>From 577971fd0c10a2733c3255609f5cb7db874011a4 Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Tue, 21 Oct 2025 15:20:14 -0700
Subject: [PATCH] [LVer][profcheck] explicitly set unknown branch weights for
the versioned/unversioned selector
---
llvm/lib/Transforms/Utils/LoopVersioning.cpp | 10 --
.../Transforms/LoopDistribute/basic-with-memchecks.ll | 5 +++--
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index ec2e6c1ab796b..4786819d18fa4 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -109,8 +110,13 @@ void LoopVersioning::versionLoop(
// Insert the conditional branch based on the result of the memchecks.
Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
Builder.SetInsertPoint(OrigTerm);
- Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
- VersionedLoop->getLoopPreheader());
+ auto *BI =
+ Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
+ VersionedLoop->getLoopPreheader());
+ // We don't know what the probability of executing the versioned vs the
+ // unversioned variants is.
+ setExplicitlyUnknownBranchWeightsIfProfiled(
+ *BI, *BI->getParent()->getParent(), DEBUG_TYPE);
OrigTerm->eraseFromParent();
// The loops merge in the original exit block. This is now dominated by the
diff --git a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
index 97ea2c6708dad..2828882afe779 100644
--- a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
+++ b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
@@ -28,7 +28,7 @@ target triple = "x86_64-apple-macosx10.10.0"
@E = common global ptr null, align 8
; CHECK-LABEL: @f(
-define void @f() {
+define void @f() !prof !{!"function_entry_count", i32 10} {
entry:
%a = load ptr, ptr @A, align 8
%b = load ptr, ptr @B, align 8
@@ -55,7 +55,7 @@ entry:
; CHECK: = icmp
; CHECK-NOT: = icmp
-; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label
%for.body.ph.ldist1
+; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label
%for.body.ph.ldist1, !prof ![[PROF1:[0-9]]]
; The non-distributed loop that the memchecks fall back on.
@@ -289,3 +289,4 @@ attributes #1 = { nounwind convergent }
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.distribute.enable", i1 true}
+; CHECK: ![[PROF1]] = !{!"unknown", !"loop-versioning"}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SLU][profcheck] Propagate profile for branches on injected conditions. (PR #164476)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164476
>From 54eacf7f9cdd8e57ba591e0bbf4b8eb35ff8bf4d Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Tue, 21 Oct 2025 11:22:01 -0700
Subject: [PATCH] [SLU][profcheck] Propagate profile for branches on injected
conditions.
---
.../Transforms/Scalar/SimpleLoopUnswitch.cpp | 9 +-
.../inject-invariant-conditions.ll| 142 +-
2 files changed, 79 insertions(+), 72 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index f54d1ba5ca14e..ff4fcb599c5dd 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -3199,10 +3199,15 @@
injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
Builder.SetInsertPoint(TI);
auto *InvariantBr =
Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
+ // We don't know anything about the relation between the limits.
+ setExplicitlyUnknownBranchWeightsIfProfiled(
+ *InvariantBr, *InvariantBr->getParent()->getParent(), DEBUG_TYPE);
Builder.SetInsertPoint(CheckBlock);
- Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
- TI->getSuccessor(1));
+ Builder.CreateCondBr(
+ TI->getCondition(), TI->getSuccessor(0), TI->getSuccessor(1),
+ !ProfcheckDisableMetadataFixes ? TI->getMetadata(LLVMContext::MD_prof)
+ : nullptr);
TI->eraseFromParent();
// Fixup phis.
diff --git
a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
index 536e0c6a0e74a..3c84dea2a0672 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
@@ -2,40 +2,40 @@
; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true
-passes="loop(simple-loop-unswitch),simplifycfg" | FileCheck %s
; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true
-passes="loop-mssa(simple-loop-unswitch),simplifycfg"
-verify-memoryssa | FileCheck %s
-define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr
noundef %arr, ptr noundef %x_p) {
+define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr
noundef %arr, ptr noundef %x_p) !prof !{!"function_entry_count", i32 10} {
; CHECK-LABEL: @test_01(
; CHECK-NEXT: entry:
-; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef !0
+; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef
[[META1:![0-9]+]]
; CHECK-NEXT:[[INJECTED_COND:%.*]] = icmp ule i32 [[LIMIT:%.*]], [[X]]
-; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label
[[LOOP:%.*]]
+; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label
[[LOOP:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]],
[[GUARDED_US:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:[[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32
[[IV_US]]
-; CHECK-NEXT:[[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4
-; CHECK-NEXT:[[BOUND_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LIMIT]]
-; CHECK-NEXT:br i1 [[BOUND_CHECK_US]], label [[GUARDED_US]], label
[[COMMON_RET:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]]
], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV]]
+; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
+; CHECK-NEXT:[[BOUND_CHECK:%.*]] = icmp ult i32 [[EL]], [[LIMIT]]
+; CHECK-NEXT:br i1 [[BOUND_CHECK]], label [[GUARDED_US]], label
[[COMMON_RET:%.*]], !prof [[PROF3:![0-9]+]]
; CHECK: guarded.us:
-; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[X]]
-; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32
[[EL_US]]
-; CHECK-NEXT:store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL]], [[X]]
+; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32
[[EL]]
+; CHECK-NEXT:store i32 [[IV]], ptr [[ARR_PTR_US]], align 4
+; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV]], 1
; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label
[[COMMON_RET]]
; CHECK: loop:
-; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [
0, [[ENTRY]] ]
-; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
-; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
-; CHECK-NEXT:[[BOUND_CHEC
[llvm-branch-commits] [llvm] [LSCFG][profcheck] Add dummy branch weights for the dummy switch to dead exits (PR #164714)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164714
>From e24859154f6753e6c9abaa5dd272a9a10f93b1ad Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Wed, 22 Oct 2025 14:34:31 -0700
Subject: [PATCH] [LSCFG][profcheck] Add dummy branch weights for the dummy
switch to dead exits
---
.../lib/Transforms/Scalar/LoopSimplifyCFG.cpp | 12 ++
.../LoopSimplifyCFG/constant-fold-branch.ll | 104 +-
2 files changed, 66 insertions(+), 50 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index b9546c5fa236b..e902b71776973 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/ScalarEvolution.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Scalar/LoopPassManager.h"
@@ -393,6 +394,17 @@ class ConstantTerminatorFoldingImpl {
DTUpdates.push_back({DominatorTree::Insert, Preheader, BB});
++NumLoopExitsDeleted;
}
+// We don't really need to add branch weights to DummySwitch, because all
+// but one branches are just a temporary artifact - see the comment on top
+// of this function. But, it's easy to estimate the weights, and it helps
+// maintain a property of the overall compiler - that the branch weights
+// don't "just get dropped" accidentally (i.e. profcheck)
+if (DummySwitch->getParent()->getParent()->hasProfileData()) {
+ SmallVector DummyBranchWeights(1 + DummySwitch->getNumCases());
+ // default. 100% probability, the rest are dead.
+ DummyBranchWeights[0] = 1;
+ setBranchWeights(*DummySwitch, DummyBranchWeights, /*IsExpected=*/false);
+}
assert(L.getLoopPreheader() == NewPreheader && "Malformed CFG?");
if (Loop *OuterLoop = LI.getLoopFor(Preheader)) {
diff --git a/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
b/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
index 1ec212f0bb5ea..46b6209986fed 100644
--- a/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
+++ b/llvm/test/Transforms/LoopSimplifyCFG/constant-fold-branch.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: --check-globals
; REQUIRES: asserts
; RUN: opt -S -enable-loop-simplifycfg-term-folding=true
-passes=loop-simplifycfg -verify-loop-info -verify-dom-info -verify-loop-lcssa
< %s | FileCheck %s
; RUN: opt -S -enable-loop-simplifycfg-term-folding=true
-passes='require,loop(loop-simplifycfg)' -verify-loop-info
-verify-dom-info -verify-loop-lcssa < %s | FileCheck %s
@@ -59,7 +59,7 @@ define i32 @dead_backedge_test_switch_loop(i32 %end) {
; CHECK: dead_backedge:
; CHECK-NEXT:[[I_2]] = add i32 [[I_1]], 10
; CHECK-NEXT:switch i32 1, label [[EXIT:%.*]] [
-; CHECK-NEXT:i32 0, label [[HEADER_BACKEDGE]]
+; CHECK-NEXT: i32 0, label [[HEADER_BACKEDGE]]
; CHECK-NEXT:]
; CHECK: exit:
; CHECK-NEXT:[[I_2_LCSSA:%.*]] = phi i32 [ [[I_2]], [[DEAD_BACKEDGE]] ]
@@ -233,12 +233,12 @@ exit:
; Check that we preserve static reachibility of a dead exit block while
deleting
; a branch.
-define i32 @dead_exit_test_branch_loop(i32 %end) {
+define i32 @dead_exit_test_branch_loop(i32 %end) !prof
!{!"function_entry_count", i32 10} {
; CHECK-LABEL: @dead_exit_test_branch_loop(
; CHECK-NEXT: preheader:
; CHECK-NEXT:switch i32 0, label [[PREHEADER_SPLIT:%.*]] [
-; CHECK-NEXT:i32 1, label [[DEAD:%.*]]
-; CHECK-NEXT:]
+; CHECK-NEXT: i32 1, label [[DEAD:%.*]]
+; CHECK-NEXT:], !prof [[PROF1:![0-9]+]]
; CHECK: preheader.split:
; CHECK-NEXT:br label [[HEADER:%.*]]
; CHECK: header:
@@ -262,7 +262,7 @@ preheader:
header:
%i = phi i32 [0, %preheader], [%i.inc, %backedge]
- br i1 true, label %backedge, label %dead
+ br i1 true, label %backedge, label %dead, !prof !{!"branch_weights", i32 10,
i32 1}
dead:
br label %dummy
@@ -286,7 +286,7 @@ define i32 @dead_exit_test_switch_loop(i32 %end) {
; CHECK-LABEL: @dead_exit_test_switch_loop(
; CHECK-NEXT: preheader:
; CHECK-NEXT:switch i32 0, label [[PREHEADER_SPLIT:%.*]] [
-; CHECK-NEXT:i32 1, label [[DEAD:%.*]]
+; CHECK-NEXT: i32 1, label [[DEAD:%.*]]
; CHECK-NEXT:]
; CHECK: preheader.split:
; CHECK-NEXT:br label [[HEADER:%.*]]
@@ -383,9 +383,9 @@ define i32 @dead_loop_test_switch_loop(i32 %end) {
; CHECK: header:
; CHECK-NEXT:[[I:%.*]] = phi i32 [ 0, [[PREHEADER:%.*]] ], [
[[I_INC:%.*]], [[BACKEDGE:%.*]] ]
; CHECK-NEXT:switch i32 1, label [[DEAD:%.*]] [
-; CHECK-NEXT:i32 0, label [[DEAD]]
-; CHECK-NEXT:i32 1, label [[BACKEDGE]]
-; CHECK-NEXT:i32 2, lab
[llvm-branch-commits] [llvm] [SLU][profcheck] Use the original branch weigths in `buildPartialInvariantUnswitchConditionalBranch` (PR #164270)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164270
>From 31442a49e459d9712aece05e66ff0d1e4a70721c Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Sat, 18 Oct 2025 17:14:08 -0700
Subject: [PATCH] [SLU][profcheck] Use the original branch weigths in
`buildPartialInvariantUnswitchConditionalBranch`
---
.../Transforms/Scalar/SimpleLoopUnswitch.cpp | 17 -
.../SimpleLoopUnswitch/partial-unswitch.ll| 75 +++
2 files changed, 56 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 763e95a5955a8..2e7a7fec745fb 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -335,7 +335,7 @@ static void buildPartialUnswitchConditionalBranch(
static void buildPartialInvariantUnswitchConditionalBranch(
BasicBlock &BB, ArrayRef ToDuplicate, bool Direction,
BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
-MemorySSAUpdater *MSSAU) {
+MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch) {
ValueToValueMapTy VMap;
for (auto *Val : reverse(ToDuplicate)) {
Instruction *Inst = cast(Val);
@@ -375,8 +375,17 @@ static void buildPartialInvariantUnswitchConditionalBranch(
IRBuilder<> IRB(&BB);
IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
Value *Cond = VMap[ToDuplicate[0]];
- IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
- Direction ? &NormalSucc : &UnswitchedSucc);
+ auto *ProfData =
+ !ProfcheckDisableMetadataFixes &&
+ ToDuplicate[0] ==
skipTrivialSelect(OriginalBranch.getCondition())
+ ? OriginalBranch.getMetadata(LLVMContext::MD_prof)
+ : nullptr;
+ auto *BR =
+ IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+ Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
+ if (!ProfData)
+setExplicitlyUnknownBranchWeightsIfProfiled(
+*BR, *BR->getParent()->getParent(), DEBUG_TYPE);
}
/// Rewrite the PHI nodes in an unswitched loop exit basic block.
@@ -2513,7 +2522,7 @@ static void unswitchNontrivialInvariants(
// the branch in the split block.
if (PartiallyInvariant)
buildPartialInvariantUnswitchConditionalBranch(
- *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
+ *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);
else {
buildPartialUnswitchConditionalBranch(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
index 1d8942079ffd8..87161707d9f69 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
@@ -1,14 +1,14 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: --check-globals
; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify'
-S < %s | FileCheck %s
declare void @clobber()
-define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) {
+define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) !prof !0 {
; CHECK-LABEL: @partial_unswitch_true_successor(
; CHECK-NEXT: entry:
; CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4
; CHECK-NEXT:[[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100
-; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK: entry.split.us:
; CHECK-NEXT:br label [[LOOP_HEADER_US:%.*]]
; CHECK: loop.header.us:
@@ -19,7 +19,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N)
{
; CHECK: loop.latch.us:
; CHECK-NEXT:[[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]
; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label
[[EXIT_SPLIT_US:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: exit.split.us:
; CHECK-NEXT:br label [[EXIT:%.*]]
; CHECK: entry.split:
@@ -28,7 +28,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N)
{
; CHECK-NEXT:[[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [
[[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT:[[LV:%.*]] = load i32, ptr [[PTR]], align 4
; CHECK-NEXT:[[SC:%.*]] = icmp eq i32 [[LV]], 100
-; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]]
+; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]],
!prof [[PROF1]]
; CHECK: noclobber:
; CHECK-NEXT:
[llvm-branch-commits] [llvm] [SLU][profcheck] create likely branch weights for guard->branch (PR #164271)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164271
>From 7967c99ec1b2121b09690fc0cb57dac8814edc00 Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Mon, 20 Oct 2025 08:21:26 -0700
Subject: [PATCH] [SLU][profcheck] create likely branch weights for
guard->branch
---
.../Transforms/Scalar/SimpleLoopUnswitch.cpp | 12 +-
.../Transforms/SimpleLoopUnswitch/guards.ll | 181 +-
2 files changed, 139 insertions(+), 54 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 2e7a7fec745fb..f54d1ba5ca14e 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -40,6 +40,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
@@ -2827,9 +2828,14 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst
*GI, Loop &L,
MSSAU->getMemorySSA()->verifyMemorySSA();
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- Instruction *DeoptBlockTerm =
- SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true,
-GI->getMetadata(LLVMContext::MD_prof), &DTU,
&LI);
+ // llvm.experimental.guard doesn't have branch weights. We can assume,
+ // however, that the deopt path is unlikely.
+ Instruction *DeoptBlockTerm = SplitBlockAndInsertIfThen(
+ GI->getArgOperand(0), GI, true,
+ !ProfcheckDisableMetadataFixes && EstimateProfile
+ ? MDBuilder(GI->getContext()).createUnlikelyBranchWeights()
+ : nullptr,
+ &DTU, &LI);
BranchInst *CheckBI = cast(CheckBB->getTerminator());
// SplitBlockAndInsertIfThen inserts control flow that branches to
// DeoptBlockTerm if the condition is true. We want the opposite.
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
index 533b1f691f5ad..e83047e397d3d 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
@@ -1,26 +1,34 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --check-globals all --version 5
; RUN: opt -passes='loop(simple-loop-unswitch),verify'
-simple-loop-unswitch-guards -S < %s | FileCheck %s
; RUN: opt -passes='simple-loop-unswitch'
-simple-loop-unswitch-guards -S < %s | FileCheck %s
; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify'
-simple-loop-unswitch-guards -verify-memoryssa -verify-loop-info -S < %s |
FileCheck %s
declare void @llvm.experimental.guard(i1, ...)
-define void @test_simple_case(i1 %cond, i32 %N) {
-; CHECK-LABEL: @test_simple_case(
+define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
+; CHECK-LABEL: define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
; CHECK-NEXT: entry:
-; CHECK-NEXT:br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond, label %entry.split.us, label %entry.split, !prof
!1
; CHECK: entry.split.us:
-; CHECK-NEXT:br label [[LOOP_US:%.*]]
+; CHECK-NEXT:br label %loop.us
; CHECK: loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [
[[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
-; CHECK-NEXT:br label [[GUARDED_US]]
+; CHECK-NEXT:%iv.us = phi i32 [ 0, %entry.split.us ], [ %iv.next.us,
%guarded.us ]
+; CHECK-NEXT:br label %guarded.us
; CHECK: guarded.us:
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
-; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:%iv.next.us = add i32 %iv.us, 1
+; CHECK-NEXT:%loop.cond.us = icmp slt i32 %iv.next.us, %N
+; CHECK-NEXT:br i1 %loop.cond.us, label %loop.us, label %exit.split.us
+; CHECK: exit.split.us:
+; CHECK-NEXT:br label %exit
+; CHECK: entry.split:
+; CHECK-NEXT:br label %loop
+; CHECK: loop:
+; CHECK-NEXT:br label %deopt
; CHECK: deopt:
; CHECK-NEXT:call void (i1, ...) @llvm.experimental.guard(i1 false) [
"deopt"() ]
; CHECK-NEXT:unreachable
+; CHECK: exit:
+; CHECK-NEXT:ret void
;
entry:
@@ -38,25 +46,39 @@ exit:
}
define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
-; CHECK-LABEL: @test_two_guards(
+; CHECK-LABEL: define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
; CHECK-NEXT: entry:
-; CHECK-NEXT:br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond1, label %entry.split.us, label %entry.split,
!prof !1
; CHECK:
[llvm-branch-commits] [llvm] [SLU][profcheck] Propagate profile for branches on injected conditions. (PR #164476)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164476
>From 54eacf7f9cdd8e57ba591e0bbf4b8eb35ff8bf4d Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Tue, 21 Oct 2025 11:22:01 -0700
Subject: [PATCH] [SLU][profcheck] Propagate profile for branches on injected
conditions.
---
.../Transforms/Scalar/SimpleLoopUnswitch.cpp | 9 +-
.../inject-invariant-conditions.ll| 142 +-
2 files changed, 79 insertions(+), 72 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index f54d1ba5ca14e..ff4fcb599c5dd 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -3199,10 +3199,15 @@
injectPendingInvariantConditions(NonTrivialUnswitchCandidate Candidate, Loop &L,
Builder.SetInsertPoint(TI);
auto *InvariantBr =
Builder.CreateCondBr(InjectedCond, InLoopSucc, CheckBlock);
+ // We don't know anything about the relation between the limits.
+ setExplicitlyUnknownBranchWeightsIfProfiled(
+ *InvariantBr, *InvariantBr->getParent()->getParent(), DEBUG_TYPE);
Builder.SetInsertPoint(CheckBlock);
- Builder.CreateCondBr(TI->getCondition(), TI->getSuccessor(0),
- TI->getSuccessor(1));
+ Builder.CreateCondBr(
+ TI->getCondition(), TI->getSuccessor(0), TI->getSuccessor(1),
+ !ProfcheckDisableMetadataFixes ? TI->getMetadata(LLVMContext::MD_prof)
+ : nullptr);
TI->eraseFromParent();
// Fixup phis.
diff --git
a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
index 536e0c6a0e74a..3c84dea2a0672 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/inject-invariant-conditions.ll
@@ -2,40 +2,40 @@
; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true
-passes="loop(simple-loop-unswitch),simplifycfg" | FileCheck %s
; RUN: opt < %s -S -simple-loop-unswitch-inject-invariant-conditions=true
-passes="loop-mssa(simple-loop-unswitch),simplifycfg"
-verify-memoryssa | FileCheck %s
-define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr
noundef %arr, ptr noundef %x_p) {
+define i32 @test_01(ptr noundef %p, i32 noundef %n, i32 noundef %limit, ptr
noundef %arr, ptr noundef %x_p) !prof !{!"function_entry_count", i32 10} {
; CHECK-LABEL: @test_01(
; CHECK-NEXT: entry:
-; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef !0
+; CHECK-NEXT:[[X:%.*]] = load i32, ptr [[X_P:%.*]], align 4, !noundef
[[META1:![0-9]+]]
; CHECK-NEXT:[[INJECTED_COND:%.*]] = icmp ule i32 [[LIMIT:%.*]], [[X]]
-; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label
[[LOOP:%.*]]
+; CHECK-NEXT:br i1 [[INJECTED_COND]], label [[LOOP_US:%.*]], label
[[LOOP:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]],
[[GUARDED_US:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT:[[EL_PTR_US:%.*]] = getelementptr i32, ptr [[P:%.*]], i32
[[IV_US]]
-; CHECK-NEXT:[[EL_US:%.*]] = load i32, ptr [[EL_PTR_US]], align 4
-; CHECK-NEXT:[[BOUND_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[LIMIT]]
-; CHECK-NEXT:br i1 [[BOUND_CHECK_US]], label [[GUARDED_US]], label
[[COMMON_RET:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]]
], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P:%.*]], i32 [[IV]]
+; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
+; CHECK-NEXT:[[BOUND_CHECK:%.*]] = icmp ult i32 [[EL]], [[LIMIT]]
+; CHECK-NEXT:br i1 [[BOUND_CHECK]], label [[GUARDED_US]], label
[[COMMON_RET:%.*]], !prof [[PROF3:![0-9]+]]
; CHECK: guarded.us:
-; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL_US]], [[X]]
-; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32
[[EL_US]]
-; CHECK-NEXT:store i32 [[IV_US]], ptr [[ARR_PTR_US]], align 4
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:[[RANGE_CHECK_US:%.*]] = icmp ult i32 [[EL]], [[X]]
+; CHECK-NEXT:[[ARR_PTR_US:%.*]] = getelementptr i32, ptr [[ARR:%.*]], i32
[[EL]]
+; CHECK-NEXT:store i32 [[IV]], ptr [[ARR_PTR_US]], align 4
+; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV]], 1
; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label
[[COMMON_RET]]
; CHECK: loop:
-; CHECK-NEXT:[[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [
0, [[ENTRY]] ]
-; CHECK-NEXT:[[EL_PTR:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]]
-; CHECK-NEXT:[[EL:%.*]] = load i32, ptr [[EL_PTR]], align 4
-; CHECK-NEXT:[[BOUND_CHEC
[llvm-branch-commits] [llvm] [LoopUnroll] Fix block frequencies for epilogue (PR #159163)
jdenny-ornl wrote: ping https://github.com/llvm/llvm-project/pull/159163 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SLU][profcheck] create likely branch weights for guard->branch (PR #164271)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164271
>From 7967c99ec1b2121b09690fc0cb57dac8814edc00 Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Mon, 20 Oct 2025 08:21:26 -0700
Subject: [PATCH] [SLU][profcheck] create likely branch weights for
guard->branch
---
.../Transforms/Scalar/SimpleLoopUnswitch.cpp | 12 +-
.../Transforms/SimpleLoopUnswitch/guards.ll | 181 +-
2 files changed, 139 insertions(+), 54 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 2e7a7fec745fb..f54d1ba5ca14e 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -40,6 +40,7 @@
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/IR/ProfDataUtils.h"
@@ -2827,9 +2828,14 @@ static BranchInst *turnGuardIntoBranch(IntrinsicInst
*GI, Loop &L,
MSSAU->getMemorySSA()->verifyMemorySSA();
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- Instruction *DeoptBlockTerm =
- SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true,
-GI->getMetadata(LLVMContext::MD_prof), &DTU,
&LI);
+ // llvm.experimental.guard doesn't have branch weights. We can assume,
+ // however, that the deopt path is unlikely.
+ Instruction *DeoptBlockTerm = SplitBlockAndInsertIfThen(
+ GI->getArgOperand(0), GI, true,
+ !ProfcheckDisableMetadataFixes && EstimateProfile
+ ? MDBuilder(GI->getContext()).createUnlikelyBranchWeights()
+ : nullptr,
+ &DTU, &LI);
BranchInst *CheckBI = cast(CheckBB->getTerminator());
// SplitBlockAndInsertIfThen inserts control flow that branches to
// DeoptBlockTerm if the condition is true. We want the opposite.
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
index 533b1f691f5ad..e83047e397d3d 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/guards.ll
@@ -1,26 +1,34 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: -p --check-globals all --version 5
; RUN: opt -passes='loop(simple-loop-unswitch),verify'
-simple-loop-unswitch-guards -S < %s | FileCheck %s
; RUN: opt -passes='simple-loop-unswitch'
-simple-loop-unswitch-guards -S < %s | FileCheck %s
; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify'
-simple-loop-unswitch-guards -verify-memoryssa -verify-loop-info -S < %s |
FileCheck %s
declare void @llvm.experimental.guard(i1, ...)
-define void @test_simple_case(i1 %cond, i32 %N) {
-; CHECK-LABEL: @test_simple_case(
+define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
+; CHECK-LABEL: define void @test_simple_case(i1 %cond, i32 %N) !prof !0 {
; CHECK-NEXT: entry:
-; CHECK-NEXT:br i1 [[COND:%.*]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond, label %entry.split.us, label %entry.split, !prof
!1
; CHECK: entry.split.us:
-; CHECK-NEXT:br label [[LOOP_US:%.*]]
+; CHECK-NEXT:br label %loop.us
; CHECK: loop.us:
-; CHECK-NEXT:[[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [
[[IV_NEXT_US:%.*]], [[GUARDED_US:%.*]] ]
-; CHECK-NEXT:br label [[GUARDED_US]]
+; CHECK-NEXT:%iv.us = phi i32 [ 0, %entry.split.us ], [ %iv.next.us,
%guarded.us ]
+; CHECK-NEXT:br label %guarded.us
; CHECK: guarded.us:
-; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:[[LOOP_COND_US:%.*]] = icmp slt i32 [[IV_NEXT_US]], [[N:%.*]]
-; CHECK-NEXT:br i1 [[LOOP_COND_US]], label [[LOOP_US]], label
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:%iv.next.us = add i32 %iv.us, 1
+; CHECK-NEXT:%loop.cond.us = icmp slt i32 %iv.next.us, %N
+; CHECK-NEXT:br i1 %loop.cond.us, label %loop.us, label %exit.split.us
+; CHECK: exit.split.us:
+; CHECK-NEXT:br label %exit
+; CHECK: entry.split:
+; CHECK-NEXT:br label %loop
+; CHECK: loop:
+; CHECK-NEXT:br label %deopt
; CHECK: deopt:
; CHECK-NEXT:call void (i1, ...) @llvm.experimental.guard(i1 false) [
"deopt"() ]
; CHECK-NEXT:unreachable
+; CHECK: exit:
+; CHECK-NEXT:ret void
;
entry:
@@ -38,25 +46,39 @@ exit:
}
define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
-; CHECK-LABEL: @test_two_guards(
+; CHECK-LABEL: define void @test_two_guards(i1 %cond1, i1 %cond2, i32 %N) {
; CHECK-NEXT: entry:
-; CHECK-NEXT:br i1 [[COND1:%.*]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 %cond1, label %entry.split.us, label %entry.split,
!prof !1
; CHECK:
[llvm-branch-commits] [llvm] [SLU][profcheck] Use the original branch weigths in `buildPartialInvariantUnswitchConditionalBranch` (PR #164270)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164270
>From 31442a49e459d9712aece05e66ff0d1e4a70721c Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Sat, 18 Oct 2025 17:14:08 -0700
Subject: [PATCH] [SLU][profcheck] Use the original branch weigths in
`buildPartialInvariantUnswitchConditionalBranch`
---
.../Transforms/Scalar/SimpleLoopUnswitch.cpp | 17 -
.../SimpleLoopUnswitch/partial-unswitch.ll| 75 +++
2 files changed, 56 insertions(+), 36 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index 763e95a5955a8..2e7a7fec745fb 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -335,7 +335,7 @@ static void buildPartialUnswitchConditionalBranch(
static void buildPartialInvariantUnswitchConditionalBranch(
BasicBlock &BB, ArrayRef ToDuplicate, bool Direction,
BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
-MemorySSAUpdater *MSSAU) {
+MemorySSAUpdater *MSSAU, const BranchInst &OriginalBranch) {
ValueToValueMapTy VMap;
for (auto *Val : reverse(ToDuplicate)) {
Instruction *Inst = cast(Val);
@@ -375,8 +375,17 @@ static void buildPartialInvariantUnswitchConditionalBranch(
IRBuilder<> IRB(&BB);
IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated());
Value *Cond = VMap[ToDuplicate[0]];
- IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
- Direction ? &NormalSucc : &UnswitchedSucc);
+ auto *ProfData =
+ !ProfcheckDisableMetadataFixes &&
+ ToDuplicate[0] ==
skipTrivialSelect(OriginalBranch.getCondition())
+ ? OriginalBranch.getMetadata(LLVMContext::MD_prof)
+ : nullptr;
+ auto *BR =
+ IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
+ Direction ? &NormalSucc : &UnswitchedSucc, ProfData);
+ if (!ProfData)
+setExplicitlyUnknownBranchWeightsIfProfiled(
+*BR, *BR->getParent()->getParent(), DEBUG_TYPE);
}
/// Rewrite the PHI nodes in an unswitched loop exit basic block.
@@ -2513,7 +2522,7 @@ static void unswitchNontrivialInvariants(
// the branch in the split block.
if (PartiallyInvariant)
buildPartialInvariantUnswitchConditionalBranch(
- *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
+ *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU, *BI);
else {
buildPartialUnswitchConditionalBranch(
*SplitBB, Invariants, Direction, *ClonedPH, *LoopPH,
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
index 1d8942079ffd8..87161707d9f69 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
@@ -1,14 +1,14 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
UTC_ARGS: --check-globals
; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify'
-S < %s | FileCheck %s
declare void @clobber()
-define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) {
+define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N) !prof !0 {
; CHECK-LABEL: @partial_unswitch_true_successor(
; CHECK-NEXT: entry:
; CHECK-NEXT:[[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4
; CHECK-NEXT:[[TMP1:%.*]] = icmp eq i32 [[TMP0]], 100
-; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]]
+; CHECK-NEXT:br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label
[[ENTRY_SPLIT:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK: entry.split.us:
; CHECK-NEXT:br label [[LOOP_HEADER_US:%.*]]
; CHECK: loop.header.us:
@@ -19,7 +19,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N)
{
; CHECK: loop.latch.us:
; CHECK-NEXT:[[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]
; CHECK-NEXT:[[IV_NEXT_US]] = add i32 [[IV_US]], 1
-; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label
[[EXIT_SPLIT_US:%.*]]
+; CHECK-NEXT:br i1 [[C_US]], label [[LOOP_HEADER_US]], label
[[EXIT_SPLIT_US:%.*]], !prof [[PROF2:![0-9]+]]
; CHECK: exit.split.us:
; CHECK-NEXT:br label [[EXIT:%.*]]
; CHECK: entry.split:
@@ -28,7 +28,7 @@ define i32 @partial_unswitch_true_successor(ptr %ptr, i32 %N)
{
; CHECK-NEXT:[[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [
[[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
; CHECK-NEXT:[[LV:%.*]] = load i32, ptr [[PTR]], align 4
; CHECK-NEXT:[[SC:%.*]] = icmp eq i32 [[LV]], 100
-; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]]
+; CHECK-NEXT:br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]],
!prof [[PROF1]]
; CHECK: noclobber:
; CHECK-NEXT:
[llvm-branch-commits] [llvm] [LVer][profcheck] explicitly set unknown branch weights for the versioned/unversioned selector (PR #164507)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164507
>From 577971fd0c10a2733c3255609f5cb7db874011a4 Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Tue, 21 Oct 2025 15:20:14 -0700
Subject: [PATCH] [LVer][profcheck] explicitly set unknown branch weights for
the versioned/unversioned selector
---
llvm/lib/Transforms/Utils/LoopVersioning.cpp | 10 --
.../Transforms/LoopDistribute/basic-with-memchecks.ll | 5 +++--
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index ec2e6c1ab796b..4786819d18fa4 100644
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PassManager.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Cloning.h"
@@ -109,8 +110,13 @@ void LoopVersioning::versionLoop(
// Insert the conditional branch based on the result of the memchecks.
Instruction *OrigTerm = RuntimeCheckBB->getTerminator();
Builder.SetInsertPoint(OrigTerm);
- Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
- VersionedLoop->getLoopPreheader());
+ auto *BI =
+ Builder.CreateCondBr(RuntimeCheck, NonVersionedLoop->getLoopPreheader(),
+ VersionedLoop->getLoopPreheader());
+ // We don't know what the probability of executing the versioned vs the
+ // unversioned variants is.
+ setExplicitlyUnknownBranchWeightsIfProfiled(
+ *BI, *BI->getParent()->getParent(), DEBUG_TYPE);
OrigTerm->eraseFromParent();
// The loops merge in the original exit block. This is now dominated by the
diff --git a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
index 97ea2c6708dad..2828882afe779 100644
--- a/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
+++ b/llvm/test/Transforms/LoopDistribute/basic-with-memchecks.ll
@@ -28,7 +28,7 @@ target triple = "x86_64-apple-macosx10.10.0"
@E = common global ptr null, align 8
; CHECK-LABEL: @f(
-define void @f() {
+define void @f() !prof !{!"function_entry_count", i32 10} {
entry:
%a = load ptr, ptr @A, align 8
%b = load ptr, ptr @B, align 8
@@ -55,7 +55,7 @@ entry:
; CHECK: = icmp
; CHECK-NOT: = icmp
-; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label
%for.body.ph.ldist1
+; CHECK: br i1 %conflict.rdx15, label %for.body.ph.lver.orig, label
%for.body.ph.ldist1, !prof ![[PROF1:[0-9]]]
; The non-distributed loop that the memchecks fall back on.
@@ -289,3 +289,4 @@ attributes #1 = { nounwind convergent }
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.distribute.enable", i1 true}
+; CHECK: ![[PROF1]] = !{!"unknown", !"loop-versioning"}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LIR][profcheck] Reuse the loop's exit condition profile (PR #164523)
https://github.com/mtrofin updated
https://github.com/llvm/llvm-project/pull/164523
>From 1bd2ba55726bd525908b186e0460e0556e7c3c6a Mon Sep 17 00:00:00 2001
From: Mircea Trofin
Date: Tue, 21 Oct 2025 17:24:49 -0700
Subject: [PATCH] [LIR][profcheck] Reuse the loop's exit condition profile
---
.../Transforms/Scalar/LoopIdiomRecognize.cpp | 40 +--
.../LoopIdiom/X86/preserve-profile.ll | 70 +++
2 files changed, 106 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 019536ca91ae0..9070d252ae09f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -72,6 +72,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/User.h"
#include "llvm/IR/Value.h"
@@ -105,6 +106,7 @@ STATISTIC(
STATISTIC(NumShiftUntilZero,
"Number of uncountable loops recognized as 'shift until zero'
idiom");
+namespace llvm {
bool DisableLIRP::All;
static cl::opt
DisableLIRPAll("disable-" DEBUG_TYPE "-all",
@@ -163,6 +165,10 @@ static cl::opt ForceMemsetPatternIntrinsic(
cl::desc("Use memset.pattern intrinsic whenever possible"),
cl::init(false),
cl::Hidden);
+extern cl::opt ProfcheckDisableMetadataFixes;
+
+} // namespace llvm
+
namespace {
class LoopIdiomRecognize {
@@ -3199,7 +3205,21 @@ bool LoopIdiomRecognize::recognizeShiftUntilBitTest() {
// The loop trip count check.
auto *IVCheck = Builder.CreateICmpEQ(IVNext, LoopTripCount,
CurLoop->getName() + ".ivcheck");
- Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ SmallVector BranchWeights;
+ const bool HasBranchWeights =
+ !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+ auto *BI = Builder.CreateCondBr(IVCheck, SuccessorBB, LoopHeaderBB);
+ if (HasBranchWeights) {
+if (SuccessorBB == LoopHeaderBB->getTerminator()->getSuccessor(1))
+ std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights,
+ /*IsExpected=*/false);
+ }
+
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
@@ -3368,10 +3388,10 @@ static bool detectShiftUntilZeroIdiom(Loop *CurLoop,
ScalarEvolution *SE,
/// %start = <...>
/// %extraoffset = <...>
/// <...>
-/// br label %for.cond
+/// br label %loop
///
/// loop:
-/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %for.cond ]
+/// %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
/// %nbits = add nsw i8 %iv, %extraoffset
/// %val.shifted = {{l,a}shr,shl} i8 %val, %nbits
/// %val.shifted.iszero = icmp eq i8 %val.shifted, 0
@@ -3533,7 +3553,19 @@ bool LoopIdiomRecognize::recognizeShiftUntilZero() {
// The loop terminator.
Builder.SetInsertPoint(LoopHeaderBB->getTerminator());
- Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+ SmallVector BranchWeights;
+ const bool HasBranchWeights =
+ !ProfcheckDisableMetadataFixes &&
+ extractBranchWeights(*LoopHeaderBB->getTerminator(), BranchWeights);
+
+ auto *BI = Builder.CreateCondBr(CIVCheck, SuccessorBB, LoopHeaderBB);
+ if (HasBranchWeights) {
+if (InvertedCond)
+ std::swap(BranchWeights[0], BranchWeights[1]);
+// We're not changing the loop profile, so we can reuse the original loop's
+// profile.
+setBranchWeights(*BI, BranchWeights, /*IsExpected=*/false);
+ }
LoopHeaderBB->getTerminator()->eraseFromParent();
// Populate the IV PHI.
diff --git a/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
new file mode 100644
index 0..d01bb748d9422
--- /dev/null
+++ b/llvm/test/Transforms/LoopIdiom/X86/preserve-profile.ll
@@ -0,0 +1,70 @@
+; RUN: opt
-passes="module(print),function(loop(loop-idiom)),module(print)"
-mtriple=x86_64 -mcpu=core-avx2 %s -disable-output 2>&1 | FileCheck
--check-prefix=PROFILE %s
+
+declare void @escape_inner(i8, i8, i8, i1, i8)
+declare void @escape_outer(i8, i8, i8, i1, i8)
+
+declare i8 @gen.i8()
+
+; Most basic pattern; Note that iff the shift amount is offset, said offsetting
+; must not cause an overflow, but `add nsw` is fine.
+define i8 @p0(i8 %val, i8 %start, i8 %extraoffset) mustprogress {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i8 [ %start, %entry ], [ %iv.next, %loop ]
+ %nbits = add nsw i8 %iv, %extraoffset
+ %val.shifted = ashr i8 %val, %nbits
+ %val.shifted.iszero = icmp eq i8 %val.shifted, 0
+ %iv.next = add i8 %iv, 1
+
+ call void @escap
[llvm-branch-commits] [llvm] [LoopUnroll] Fix block frequencies for epilogue (PR #159163)
https://github.com/mtrofin approved this pull request. https://github.com/llvm/llvm-project/pull/159163 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DA] Check nsw when extracting a constant operand of SCEVMul (PR #164408)
https://github.com/kasuga-fj updated
https://github.com/llvm/llvm-project/pull/164408
>From 4c5c963d99d99b4649b39fca172917fc0b09ccb2 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga
Date: Tue, 21 Oct 2025 12:23:25 +
Subject: [PATCH 1/3] [DA] Check nsw when extracting a constant operand of
SCEVMul
---
llvm/lib/Analysis/DependenceAnalysis.cpp | 5 +++--
llvm/test/Analysis/DependenceAnalysis/GCD.ll | 6 +++---
.../Analysis/DependenceAnalysis/SymbolicSIV.ll| 4 ++--
.../DependenceAnalysis/gcd-miv-overflow.ll| 15 ++-
4 files changed, 14 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp
b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 853bd66c8a7f8..36ac252aba6ed 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -2828,8 +2828,9 @@ static std::optional getConstantPart(const SCEV
*Expr) {
if (const auto *Constant = dyn_cast(Expr))
return Constant->getAPInt();
if (const auto *Product = dyn_cast(Expr))
-if (const auto *Constant = dyn_cast(Product->getOperand(0)))
- return Constant->getAPInt();
+if (Product->hasNoSignedWrap())
+ if (auto *Constant = dyn_cast(Product->getOperand(0)))
+return Constant->getAPInt();
return std::nullopt;
}
diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll
b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
index 03343e7a98211..cb14d189afe4c 100644
--- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll
@@ -254,7 +254,7 @@ define void @gcd4(ptr %A, ptr %B, i64 %M, i64 %N) nounwind
uwtable ssp {
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32
%conv, ptr %arrayidx, align 4
; CHECK-NEXT:da analyze - output [* *]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load
i32, ptr %arrayidx16, align 4
-; CHECK-NEXT:da analyze - none!
+; CHECK-NEXT:da analyze - flow [* *|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32
%0, ptr %B.addr.11, align 4
; CHECK-NEXT:da analyze - confused!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load
i32, ptr %arrayidx16, align 4
@@ -322,7 +322,7 @@ define void @gcd5(ptr %A, ptr %B, i64 %M, i64 %N) nounwind
uwtable ssp {
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32
%conv, ptr %arrayidx, align 4
; CHECK-NEXT:da analyze - output [* *]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load
i32, ptr %arrayidx16, align 4
-; CHECK-NEXT:da analyze - flow [<> *]!
+; CHECK-NEXT:da analyze - flow [* *|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32
%0, ptr %B.addr.11, align 4
; CHECK-NEXT:da analyze - confused!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load
i32, ptr %arrayidx16, align 4
@@ -390,7 +390,7 @@ define void @gcd6(i64 %n, ptr %A, ptr %B) nounwind uwtable
ssp {
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store
i32 %conv, ptr %arrayidx5, align 4
; CHECK-NEXT:da analyze - output [* *]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: %2 =
load i32, ptr %arrayidx9, align 4
-; CHECK-NEXT:da analyze - none!
+; CHECK-NEXT:da analyze - flow [* *|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store
i32 %2, ptr %B.addr.12, align 4
; CHECK-NEXT:da analyze - confused!
; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx9, align 4 --> Dst: %2 = load
i32, ptr %arrayidx9, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
index cdfaec76fa892..73a415baef4c4 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
@@ -384,7 +384,7 @@ define void @symbolicsiv6(ptr %A, ptr %B, i64 %n, i64 %N,
i64 %M) nounwind uwtab
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32
%conv, ptr %arrayidx, align 4
; CHECK-NEXT:da analyze - none!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load
i32, ptr %arrayidx7, align 4
-; CHECK-NEXT:da analyze - none!
+; CHECK-NEXT:da analyze - flow [*|<]!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32
%0, ptr %B.addr.02, align 4
; CHECK-NEXT:da analyze - confused!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: %0 = load
i32, ptr %arrayidx7, align 4
@@ -440,7 +440,7 @@ define void @symbolicsiv7(ptr %A, ptr %B, i64 %n, i64 %N,
i64 %M) nounwind uwtab
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32
%conv, ptr %arrayidx, align 4
; CHECK-NEXT:da analyze - none!
; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %1 = load
[llvm-branch-commits] [llvm] [DA] Check nsw when extracting a constant operand of SCEVMul (PR #164408)
@@ -2828,8 +2828,9 @@ static std::optional getConstantPart(const SCEV
*Expr) {
if (const auto *Constant = dyn_cast(Expr))
return Constant->getAPInt();
if (const auto *Product = dyn_cast(Expr))
-if (const auto *Constant = dyn_cast(Product->getOperand(0)))
- return Constant->getAPInt();
+if (Product->hasNoSignedWrap())
kasuga-fj wrote:
Thanks, renamed
https://github.com/llvm/llvm-project/pull/164408
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-readobj, ELF] Support reading binary with more than PN_XNUM segments. (PR #165278)
jh7370 wrote: @aokblast, is this ready for review? It's currently still attempting to merge into another of your user branches, whereas if it's ready for review, I'd expect it to be merging into `main`. https://github.com/llvm/llvm-project/pull/165278 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][PAC] Warn about synchronous unwind tables (PR #165227)
@@ -133,11 +140,17 @@ Error
PointerAuthCFIAnalyzer::runOnFunctions(BinaryContext &BC) {
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
SkipPredicate, "PointerAuthCFIAnalyzer");
+
+ float IgnoredPercent = (100.0 * FunctionsIgnored) / Total;
BC.outs() << "BOLT-INFO: PointerAuthCFIAnalyzer ran on " << Total
<< " functions. Ignored " << FunctionsIgnored << " functions "
-<< format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total)
+<< format("(%.2lf%%)", IgnoredPercent)
<< " because of CFI inconsistencies\n";
+ if (IgnoredPercent >= 10.0)
+BC.outs() << "BOLT-WARNING: PointerAuthCFIAnalyzer only supports "
+ "asynchronous unwind tables.\n";
peterwaller-arm wrote:
I think these are valid concerns. It could still be more actionable to the user
without specifying the flag; calling out that the workload must be recompiled
with them turned on. It could say something like "For C compilers the flag is
typically spelled -fasynchronous-unwind-tables /
-fno-asynchronous-unwind-tables". Even if a user is not using C this might turn
out to be useful information to find relevant documentation.
https://github.com/llvm/llvm-project/pull/165227
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [DA] Check nsw when extracting a constant operand of SCEVMul (PR #164408)
https://github.com/Meinersbur approved this pull request. https://github.com/llvm/llvm-project/pull/164408 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][PAC] Warn about synchronous unwind tables (PR #165227)
@@ -133,11 +140,17 @@ Error
PointerAuthCFIAnalyzer::runOnFunctions(BinaryContext &BC) {
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
SkipPredicate, "PointerAuthCFIAnalyzer");
+
+ float IgnoredPercent = (100.0 * FunctionsIgnored) / Total;
BC.outs() << "BOLT-INFO: PointerAuthCFIAnalyzer ran on " << Total
<< " functions. Ignored " << FunctionsIgnored << " functions "
-<< format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total)
+<< format("(%.2lf%%)", IgnoredPercent)
<< " because of CFI inconsistencies\n";
+ if (IgnoredPercent >= 10.0)
+BC.outs() << "BOLT-WARNING: PointerAuthCFIAnalyzer only supports "
+ "asynchronous unwind tables.\n";
bgergely0 wrote:
Agreed, with careful wording it would be useful information to users.
https://github.com/llvm/llvm-project/pull/165227
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Use assertion in VPExpressionRecipe creation (PR #165543)
https://github.com/SamTebbs33 edited https://github.com/llvm/llvm-project/pull/165543 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LV] Use assertion in VPExpressionRecipe creation (PR #165543)
https://github.com/SamTebbs33 created
https://github.com/llvm/llvm-project/pull/165543
This PR re-introduces the assert that the cost of a partial reduction is valid
during VPExpressionRecipe creation.
This is a stacked PR:
1. https://github.com/llvm/llvm-project/pull/165536
>From 0aa3c2d73ee5546b2d45b8ccf7c33ca39e7ec4d2 Mon Sep 17 00:00:00 2001
From: Samuel Tebbs
Date: Wed, 29 Oct 2025 10:10:00 +
Subject: [PATCH] [LV] Use assertion in VPExpressionRecipe creation
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 70 ++-
1 file changed, 38 insertions(+), 32 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d9ac26bba7507..e75c99c35938e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3532,24 +3532,28 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe
*Red, VPCostContext &Ctx,
auto *SrcVecTy = cast(toVectorTy(SrcTy, VF));
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- InstructionCost ExtRedCost;
- InstructionCost ExtCost =
- cast(VecOp)->computeCost(VF, Ctx);
- InstructionCost RedCost = Red->computeCost(VF, Ctx);
-
if (isa(Red)) {
TargetTransformInfo::PartialReductionExtendKind ExtKind =
TargetTransformInfo::getPartialReductionExtendKind(ExtOpc);
// FIXME: Move partial reduction creation, costing and clamping
// here from LoopVectorize.cpp.
-ExtRedCost = Ctx.TTI.getPartialReductionCost(
-Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
-llvm::TargetTransformInfo::PR_None, std::nullopt,
Ctx.CostKind);
- } else {
-ExtRedCost = Ctx.TTI.getExtendedReductionCost(
-Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
-Red->getFastMathFlags(), CostKind);
+InstructionCost PartialReductionCost =
+Ctx.TTI.getPartialReductionCost(
+Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
+llvm::TargetTransformInfo::PR_None, std::nullopt,
+Ctx.CostKind);
+assert(PartialReductionCost.isValid() &&
+ "A partial reduction should have a valid cost");
+return true;
}
+
+ InstructionCost ExtCost =
+ cast(VecOp)->computeCost(VF, Ctx);
+ InstructionCost RedCost = Red->computeCost(VF, Ctx);
+
+ InstructionCost ExtRedCost = Ctx.TTI.getExtendedReductionCost(
+ Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
+ Red->getFastMathFlags(), CostKind);
return ExtRedCost.isValid() && ExtRedCost < ExtCost + RedCost;
},
Range);
@@ -3595,33 +3599,35 @@
tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
Type *SrcTy =
Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
- InstructionCost MulAccCost;
if (IsPartialReduction) {
Type *SrcTy2 =
Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
// FIXME: Move partial reduction creation, costing and clamping
// here from LoopVectorize.cpp.
-MulAccCost = Ctx.TTI.getPartialReductionCost(
-Opcode, SrcTy, SrcTy2, RedTy, VF,
-Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
- Ext0->getOpcode())
- : TargetTransformInfo::PR_None,
-Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
- Ext1->getOpcode())
- : TargetTransformInfo::PR_None,
-Mul->getOpcode(), CostKind);
- } else {
-// Only partial reductions support mixed extends at the moment.
-if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
- return false;
-
-bool IsZExt =
-!Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
-auto *SrcVecTy = cast(toVectorTy(SrcTy, VF));
-MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
-SrcVecTy, CostKind);
+InstructionCost PartialReductionCost =
+Ctx.TTI.getPartialReductionCost(
+Opcode, SrcTy, SrcTy2, RedTy, VF,
+Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
+ Ext0->getOpcode())
+ : TargetTransformInfo::PR_None,
+Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
+ Ext1->getOpcode())
+
[llvm-branch-commits] [llvm] [LV] Use assertion in VPExpressionRecipe creation (PR #165543)
llvmbot wrote:
@llvm/pr-subscribers-llvm-transforms
Author: Sam Tebbs (SamTebbs33)
Changes
This PR re-introduces the assert that the cost of a partial reduction is valid
during VPExpressionRecipe creation.
This is a stacked PR:
1. https://github.com/llvm/llvm-project/pull/165536
2. -> https://github.com/llvm/llvm-project/pull/165543
---
Full diff: https://github.com/llvm/llvm-project/pull/165543.diff
1 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+38-32)
``diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d9ac26bba7507..e75c99c35938e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3532,24 +3532,28 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe
*Red, VPCostContext &Ctx,
auto *SrcVecTy = cast(toVectorTy(SrcTy, VF));
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- InstructionCost ExtRedCost;
- InstructionCost ExtCost =
- cast(VecOp)->computeCost(VF, Ctx);
- InstructionCost RedCost = Red->computeCost(VF, Ctx);
-
if (isa(Red)) {
TargetTransformInfo::PartialReductionExtendKind ExtKind =
TargetTransformInfo::getPartialReductionExtendKind(ExtOpc);
// FIXME: Move partial reduction creation, costing and clamping
// here from LoopVectorize.cpp.
-ExtRedCost = Ctx.TTI.getPartialReductionCost(
-Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
-llvm::TargetTransformInfo::PR_None, std::nullopt,
Ctx.CostKind);
- } else {
-ExtRedCost = Ctx.TTI.getExtendedReductionCost(
-Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
-Red->getFastMathFlags(), CostKind);
+InstructionCost PartialReductionCost =
+Ctx.TTI.getPartialReductionCost(
+Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
+llvm::TargetTransformInfo::PR_None, std::nullopt,
+Ctx.CostKind);
+assert(PartialReductionCost.isValid() &&
+ "A partial reduction should have a valid cost");
+return true;
}
+
+ InstructionCost ExtCost =
+ cast(VecOp)->computeCost(VF, Ctx);
+ InstructionCost RedCost = Red->computeCost(VF, Ctx);
+
+ InstructionCost ExtRedCost = Ctx.TTI.getExtendedReductionCost(
+ Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
+ Red->getFastMathFlags(), CostKind);
return ExtRedCost.isValid() && ExtRedCost < ExtCost + RedCost;
},
Range);
@@ -3595,33 +3599,35 @@
tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
Type *SrcTy =
Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
- InstructionCost MulAccCost;
if (IsPartialReduction) {
Type *SrcTy2 =
Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
// FIXME: Move partial reduction creation, costing and clamping
// here from LoopVectorize.cpp.
-MulAccCost = Ctx.TTI.getPartialReductionCost(
-Opcode, SrcTy, SrcTy2, RedTy, VF,
-Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
- Ext0->getOpcode())
- : TargetTransformInfo::PR_None,
-Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
- Ext1->getOpcode())
- : TargetTransformInfo::PR_None,
-Mul->getOpcode(), CostKind);
- } else {
-// Only partial reductions support mixed extends at the moment.
-if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
- return false;
-
-bool IsZExt =
-!Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
-auto *SrcVecTy = cast(toVectorTy(SrcTy, VF));
-MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
-SrcVecTy, CostKind);
+InstructionCost PartialReductionCost =
+Ctx.TTI.getPartialReductionCost(
+Opcode, SrcTy, SrcTy2, RedTy, VF,
+Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
+ Ext0->getOpcode())
+ : TargetTransformInfo::PR_None,
+Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
+ Ext1->getOpcode())
+ : TargetTransformInfo::PR_None,
+Mul->get
[llvm-branch-commits] [llvm] [LV] Use assertion in VPExpressionRecipe creation (PR #165543)
llvmbot wrote:
@llvm/pr-subscribers-vectorizers
Author: Sam Tebbs (SamTebbs33)
Changes
This PR re-introduces the assert that the cost of a partial reduction is valid
during VPExpressionRecipe creation.
This is a stacked PR:
1. https://github.com/llvm/llvm-project/pull/165536
2. -> https://github.com/llvm/llvm-project/pull/165543
---
Full diff: https://github.com/llvm/llvm-project/pull/165543.diff
1 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+38-32)
``diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d9ac26bba7507..e75c99c35938e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -3532,24 +3532,28 @@ tryToMatchAndCreateExtendedReduction(VPReductionRecipe
*Red, VPCostContext &Ctx,
auto *SrcVecTy = cast(toVectorTy(SrcTy, VF));
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- InstructionCost ExtRedCost;
- InstructionCost ExtCost =
- cast(VecOp)->computeCost(VF, Ctx);
- InstructionCost RedCost = Red->computeCost(VF, Ctx);
-
if (isa(Red)) {
TargetTransformInfo::PartialReductionExtendKind ExtKind =
TargetTransformInfo::getPartialReductionExtendKind(ExtOpc);
// FIXME: Move partial reduction creation, costing and clamping
// here from LoopVectorize.cpp.
-ExtRedCost = Ctx.TTI.getPartialReductionCost(
-Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
-llvm::TargetTransformInfo::PR_None, std::nullopt,
Ctx.CostKind);
- } else {
-ExtRedCost = Ctx.TTI.getExtendedReductionCost(
-Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
-Red->getFastMathFlags(), CostKind);
+InstructionCost PartialReductionCost =
+Ctx.TTI.getPartialReductionCost(
+Opcode, SrcTy, nullptr, RedTy, VF, ExtKind,
+llvm::TargetTransformInfo::PR_None, std::nullopt,
+Ctx.CostKind);
+assert(PartialReductionCost.isValid() &&
+ "A partial reduction should have a valid cost");
+return true;
}
+
+ InstructionCost ExtCost =
+ cast(VecOp)->computeCost(VF, Ctx);
+ InstructionCost RedCost = Red->computeCost(VF, Ctx);
+
+ InstructionCost ExtRedCost = Ctx.TTI.getExtendedReductionCost(
+ Opcode, ExtOpc == Instruction::CastOps::ZExt, RedTy, SrcVecTy,
+ Red->getFastMathFlags(), CostKind);
return ExtRedCost.isValid() && ExtRedCost < ExtCost + RedCost;
},
Range);
@@ -3595,33 +3599,35 @@
tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
Type *SrcTy =
Ext0 ? Ctx.Types.inferScalarType(Ext0->getOperand(0)) : RedTy;
- InstructionCost MulAccCost;
if (IsPartialReduction) {
Type *SrcTy2 =
Ext1 ? Ctx.Types.inferScalarType(Ext1->getOperand(0)) :
nullptr;
// FIXME: Move partial reduction creation, costing and clamping
// here from LoopVectorize.cpp.
-MulAccCost = Ctx.TTI.getPartialReductionCost(
-Opcode, SrcTy, SrcTy2, RedTy, VF,
-Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
- Ext0->getOpcode())
- : TargetTransformInfo::PR_None,
-Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
- Ext1->getOpcode())
- : TargetTransformInfo::PR_None,
-Mul->getOpcode(), CostKind);
- } else {
-// Only partial reductions support mixed extends at the moment.
-if (Ext0 && Ext1 && Ext0->getOpcode() != Ext1->getOpcode())
- return false;
-
-bool IsZExt =
-!Ext0 || Ext0->getOpcode() == Instruction::CastOps::ZExt;
-auto *SrcVecTy = cast(toVectorTy(SrcTy, VF));
-MulAccCost = Ctx.TTI.getMulAccReductionCost(IsZExt, Opcode, RedTy,
-SrcVecTy, CostKind);
+InstructionCost PartialReductionCost =
+Ctx.TTI.getPartialReductionCost(
+Opcode, SrcTy, SrcTy2, RedTy, VF,
+Ext0 ? TargetTransformInfo::getPartialReductionExtendKind(
+ Ext0->getOpcode())
+ : TargetTransformInfo::PR_None,
+Ext1 ? TargetTransformInfo::getPartialReductionExtendKind(
+ Ext1->getOpcode())
+ : TargetTransformInfo::PR_None,
+Mul->getOpco
[llvm-branch-commits] [llvm] [SDAG] Set InBounds when when computing offsets into memory objects (PR #165425)
@@ -5626,17 +5626,31 @@ class LLVM_ABI TargetLowering : public
TargetLoweringBase {
/// Get a pointer to vector element \p Idx located in memory for a vector of
/// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out
of
/// bounds the returned pointer is unspecified, but will be within the vector
- /// bounds.
- SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
- SDValue Index) const;
+ /// bounds. \p PtrArithFlags can be used to mark that arithmetic within the
+ /// vector in memory is known to not wrap or to be inbounds.
+ SDValue getVectorElementPointer(
+ SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index,
+ const SDNodeFlags PtrArithFlags = SDNodeFlags()) const;
+
+ /// Get a pointer to vector element \p Idx located in memory for a vector of
+ /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out
of
+ /// bounds the returned pointer is unspecified, but will be within the vector
+ /// bounds. \p VecPtr is guaranteed to point to the beginning of a memory
+ /// location large enough for the vector.
+ SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr,
+ EVT VecVT, SDValue Index) const;
ritter-x2a wrote:
Probably not, you're right. I went for consistency with the surrounding
declarations, but, looking further around in the file, there is precedent for
inline definitions as well. Changed.
https://github.com/llvm/llvm-project/pull/165425
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [SDAG] Set InBounds when when computing offsets into memory objects (PR #165425)
https://github.com/ritter-x2a updated
https://github.com/llvm/llvm-project/pull/165425
>From 50f481b182228175e73b7754e42076337b8c613c Mon Sep 17 00:00:00 2001
From: Fabian Ritter
Date: Mon, 27 Oct 2025 04:48:36 -0400
Subject: [PATCH 1/2] [SDAG] Set InBounds when when computing offsets into
memory objects
When a load or store accesses N bytes starting from a pointer P, and we want to
compute an offset pointer within these N bytes after P, we know that the
arithmetic to add the offset must be inbounds. This is for example relevant
when legalizing too-wide memory accesses, when lowering memcpy&Co., or when
optimizing "vector-load -> extractelement" into an offset load.
For SWDEV-516125.
---
llvm/include/llvm/CodeGen/SelectionDAG.h | 12 ---
llvm/include/llvm/CodeGen/TargetLowering.h| 26 ++
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 ++-
.../CodeGen/SelectionDAG/TargetLowering.cpp | 34 +--
4 files changed, 55 insertions(+), 22 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h
b/llvm/include/llvm/CodeGen/SelectionDAG.h
index df6ce0fe1b037..1a5ffb38f2568 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1113,7 +1113,8 @@ class SelectionDAG {
SDValue Mask, SDValue EVL);
/// Returns sum of the base pointer and offset.
- /// Unlike getObjectPtrOffset this does not set NoUnsignedWrap by default.
+ /// Unlike getObjectPtrOffset this does not set NoUnsignedWrap and InBounds
by
+ /// default.
LLVM_ABI SDValue
getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL,
const SDNodeFlags Flags = SDNodeFlags());
@@ -1123,15 +1124,18 @@ class SelectionDAG {
/// Create an add instruction with appropriate flags when used for
/// addressing some offset of an object. i.e. if a load is split into
multiple
- /// components, create an add nuw from the base pointer to the offset.
+ /// components, create an add nuw (or ptradd nuw inbounds) from the base
+ /// pointer to the offset.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset) {
-return getMemBasePlusOffset(Ptr, Offset, SL, SDNodeFlags::NoUnsignedWrap);
+return getMemBasePlusOffset(
+Ptr, Offset, SL, SDNodeFlags::NoUnsignedWrap | SDNodeFlags::InBounds);
}
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, SDValue Offset) {
// The object itself can't wrap around the address space, so it shouldn't
be
// possible for the adds of the offsets to the split parts to overflow.
-return getMemBasePlusOffset(Ptr, Offset, SL, SDNodeFlags::NoUnsignedWrap);
+return getMemBasePlusOffset(
+Ptr, Offset, SL, SDNodeFlags::NoUnsignedWrap | SDNodeFlags::InBounds);
}
/// Return a new CALLSEQ_START node, that starts new call frame, in which
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h
b/llvm/include/llvm/CodeGen/TargetLowering.h
index d6ed3a8f739b3..28a43d82364b7 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -5626,17 +5626,31 @@ class LLVM_ABI TargetLowering : public
TargetLoweringBase {
/// Get a pointer to vector element \p Idx located in memory for a vector of
/// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out
of
/// bounds the returned pointer is unspecified, but will be within the vector
- /// bounds.
- SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
- SDValue Index) const;
+ /// bounds. \p PtrArithFlags can be used to mark that arithmetic within the
+ /// vector in memory is known to not wrap or to be inbounds.
+ SDValue getVectorElementPointer(
+ SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, SDValue Index,
+ const SDNodeFlags PtrArithFlags = SDNodeFlags()) const;
+
+ /// Get a pointer to vector element \p Idx located in memory for a vector of
+ /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out
of
+ /// bounds the returned pointer is unspecified, but will be within the vector
+ /// bounds. \p VecPtr is guaranteed to point to the beginning of a memory
+ /// location large enough for the vector.
+ SDValue getInboundsVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr,
+ EVT VecVT, SDValue Index) const;
/// Get a pointer to a sub-vector of type \p SubVecVT at index \p Idx located
/// in memory for a vector of type \p VecVT starting at a base address of
/// \p VecPtr. If \p Idx plus the size of \p SubVecVT is out of bounds the
/// returned pointer is unspecified, but the value returned will be such that
- /// the entire subvector would be within the vector bounds.
- SDValue getVectorSubVecPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
- EVT SubVecVT, SDValue Index
[llvm-branch-commits] [BOLT] Allow name matching stale profiles under infer-stale-profile (PR #165493)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/165493 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [BOLT] Allow name matching stale profiles under infer-stale-profile (PR #165493)
https://github.com/aaupov updated https://github.com/llvm/llvm-project/pull/165493 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [LoongArch] Convert ld to fld when result is only used by sitofp (PR #165523)
llvmbot wrote:
@llvm/pr-subscribers-backend-loongarch
Author: Zhaoxin Yang (ylzsx)
Changes
If the result of an integer load is only used by an integer-to-float
conversion, use a fp load instead. This eliminates an integer-to-float-move
(movgr2fr) instruction.
---
Full diff: https://github.com/llvm/llvm-project/pull/165523.diff
5 Files Affected:
- (modified) llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td (+5)
- (modified) llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td (+3)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+45)
- (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.h (+1)
- (modified) llvm/test/CodeGen/LoongArch/load-itofp-combine.ll (+8-23)
``diff
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index 690dd73014e57..cb6b7c7342ec6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -26,6 +26,7 @@ def SDT_LoongArchMOVFR2GR_S_LA64
def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
def SDT_LoongArchFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
+def SDT_LoongArchITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
// ISD::BRCOND is custom-lowered to LoongArchISD::BRCOND for floating-point
// comparisons to prevent recursive lowering.
@@ -39,6 +40,7 @@ def loongarch_movfr2gr_s_la64
def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
def loongarch_frecipe : SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchFRECIPE>;
def loongarch_frsqrte : SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchFRSQRTE>;
+def loongarch_sitof : SDNode<"LoongArchISD::SITOF", SDT_LoongArchITOF>;
//===--===//
// Instructions
@@ -346,6 +348,9 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg
FPR32:$fa))),
// fnmsub.s: -fj * fk + fa (the nsz flag on the FMA)
def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa),
(FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
+
+// ffint.s.w
+def : Pat<(loongarch_sitof FPR32:$fj), (FFINT_S_W FPR32:$fj)>;
} // Predicates = [HasBasicF]
let Predicates = [HasBasicF, IsLA64] in {
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index daefbaa52d42a..e973c80af807c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -307,6 +307,9 @@ def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg
FPR64:$fa))),
// fnmsub.d: -fj * fk + fa (the nsz flag on the FMA)
def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa),
(FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
+
+// ffint.d.l
+def : Pat<(loongarch_sitof FPR64:$fj), (FFINT_D_L FPR64:$fj)>;
} // Predicates = [HasBasicD]
/// Floating point constants
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 80c96c6dc8eb6..3695c5a42790f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -451,6 +451,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const
TargetMachine &TM,
}
// Set DAG combine for LA32 and LA64.
+ if (Subtarget.hasBasicF()) {
+setTargetDAGCombine(ISD::SINT_TO_FP);
+ }
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
@@ -6725,6 +6728,45 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG
&DAG,
return SDValue();
}
+static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+TargetLowering::DAGCombinerInfo &DCI,
+const LoongArchSubtarget &Subtarget) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ if (VT != MVT::f32 && VT != MVT::f64)
+return SDValue();
+ if (VT == MVT::f32 && !Subtarget.hasBasicF())
+return SDValue();
+ if (VT == MVT::f64 && !Subtarget.hasBasicD())
+return SDValue();
+
+ // Only optimize when the source and destination types have the same width.
+ if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
+return SDValue();
+
+ SDValue Src = N->getOperand(0);
+ // If the result of an integer load is only used by an integer-to-float
+ // conversion, use a fp load instead. This eliminates an
integer-to-float-move
+ // (movgr2fr) instruction.
+ if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
+ // Do not change the width of a volatile load. This condition check is
+ // inspired by AArch64.
+ !cast(Src)->isVolatile()) {
+LoadSDNode *LN0 = cast(Src);
+SDValue Load = DAG.getLoad(VT, DL, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPo
[llvm-branch-commits] [llvm] [LoongArch] Convert ld to fld when result is only used by sitofp (PR #165523)
https://github.com/ylzsx created
https://github.com/llvm/llvm-project/pull/165523
If the result of an integer load is only used by an integer-to-float
conversion, use a fp load instead. This eliminates an integer-to-float-move
(movgr2fr) instruction.
>From 25fc7d1d06a7b6b6a9d19ed82586094f58f8c527 Mon Sep 17 00:00:00 2001
From: yangzhaoxin
Date: Wed, 29 Oct 2025 15:19:05 +0800
Subject: [PATCH] [LoongArch] Convert ld to fld when result is only used by
sitofp
If the result of an integer load is only used by an integer-to-float
conversion, use a fp load instead. This eliminates an
integer-to-float-move (movgr2fr) instruction.
---
.../LoongArch/LoongArchFloat32InstrInfo.td| 5 +++
.../LoongArch/LoongArchFloat64InstrInfo.td| 3 ++
.../LoongArch/LoongArchISelLowering.cpp | 45 +++
.../Target/LoongArch/LoongArchISelLowering.h | 1 +
.../CodeGen/LoongArch/load-itofp-combine.ll | 31 -
5 files changed, 62 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
index 690dd73014e57..cb6b7c7342ec6 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td
@@ -26,6 +26,7 @@ def SDT_LoongArchMOVFR2GR_S_LA64
def SDT_LoongArchFTINT : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
def SDT_LoongArchFRECIPE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
def SDT_LoongArchFRSQRTE : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisFP<1>]>;
+def SDT_LoongArchITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0, 1>]>;
// ISD::BRCOND is custom-lowered to LoongArchISD::BRCOND for floating-point
// comparisons to prevent recursive lowering.
@@ -39,6 +40,7 @@ def loongarch_movfr2gr_s_la64
def loongarch_ftint : SDNode<"LoongArchISD::FTINT", SDT_LoongArchFTINT>;
def loongarch_frecipe : SDNode<"LoongArchISD::FRECIPE", SDT_LoongArchFRECIPE>;
def loongarch_frsqrte : SDNode<"LoongArchISD::FRSQRTE", SDT_LoongArchFRSQRTE>;
+def loongarch_sitof : SDNode<"LoongArchISD::SITOF", SDT_LoongArchITOF>;
//===--===//
// Instructions
@@ -346,6 +348,9 @@ def : Pat<(fneg (fma FPR32:$fj, FPR32:$fk, (fneg
FPR32:$fa))),
// fnmsub.s: -fj * fk + fa (the nsz flag on the FMA)
def : Pat<(fma_nsz (fneg FPR32:$fj), FPR32:$fk, FPR32:$fa),
(FNMSUB_S FPR32:$fj, FPR32:$fk, FPR32:$fa)>;
+
+// ffint.s.w
+def : Pat<(loongarch_sitof FPR32:$fj), (FFINT_S_W FPR32:$fj)>;
} // Predicates = [HasBasicF]
let Predicates = [HasBasicF, IsLA64] in {
diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
index daefbaa52d42a..e973c80af807c 100644
--- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td
@@ -307,6 +307,9 @@ def : Pat<(fneg (fma FPR64:$fj, FPR64:$fk, (fneg
FPR64:$fa))),
// fnmsub.d: -fj * fk + fa (the nsz flag on the FMA)
def : Pat<(fma_nsz (fneg FPR64:$fj), FPR64:$fk, FPR64:$fa),
(FNMSUB_D FPR64:$fj, FPR64:$fk, FPR64:$fa)>;
+
+// ffint.d.l
+def : Pat<(loongarch_sitof FPR64:$fj), (FFINT_D_L FPR64:$fj)>;
} // Predicates = [HasBasicD]
/// Floating point constants
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 80c96c6dc8eb6..3695c5a42790f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -451,6 +451,9 @@ LoongArchTargetLowering::LoongArchTargetLowering(const
TargetMachine &TM,
}
// Set DAG combine for LA32 and LA64.
+ if (Subtarget.hasBasicF()) {
+setTargetDAGCombine(ISD::SINT_TO_FP);
+ }
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
@@ -6725,6 +6728,45 @@ performEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG
&DAG,
return SDValue();
}
+static SDValue performSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+TargetLowering::DAGCombinerInfo &DCI,
+const LoongArchSubtarget &Subtarget) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ if (VT != MVT::f32 && VT != MVT::f64)
+return SDValue();
+ if (VT == MVT::f32 && !Subtarget.hasBasicF())
+return SDValue();
+ if (VT == MVT::f64 && !Subtarget.hasBasicD())
+return SDValue();
+
+ // Only optimize when the source and destination types have the same width.
+ if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
+return SDValue();
+
+ SDValue Src = N->getOperand(0);
+ // If the result of an integer load is only used by an integer-to-float
+ // conversion, use a fp load instead. This eliminates an
integer-to-float-move
+ // (movgr2fr) instruction.
+ if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse() &&
+ // Do not change the width of
[llvm-branch-commits] [llvm] [BOLT] Match functions with pseudo probes (PR #100446)
https://github.com/aaupov edited https://github.com/llvm/llvm-project/pull/100446 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Move call probe information to CallSiteInfo (PR #165490)
https://github.com/aaupov edited https://github.com/llvm/llvm-project/pull/165490 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [AtomicExpand] Add bitcasts when expanding load atomic vector (PR #148900)
@@ -1220,6 +1220,11 @@ def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src, def : Pat<(v2i64 (scalar_to_vector (i64 (atomic_load_64 addr:$src, (VMOV64toPQIZrm addr:$src)>, Requires<[HasAVX512]>; +def : Pat<(v2i64 (atomic_load_128_v2i64 addr:$src)), + (VMOVAPDrm addr:$src)>; // load atomic <2 x i64> +def : Pat<(v4i32 (atomic_load_128_v4i32 addr:$src)), + (VMOVAPDrm addr:$src)>; // load atomic <4 x i32> RKSimon wrote: These need to be tagged with Requires<[UseAVX] and the SSE2/AVX512 variants adding as well (MOVAPDrm / VMOVAPDZ128rm) https://github.com/llvm/llvm-project/pull/148900 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT][PAC] Warn about synchronous unwind tables (PR #165227)
@@ -133,11 +140,17 @@ Error
PointerAuthCFIAnalyzer::runOnFunctions(BinaryContext &BC) {
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
SkipPredicate, "PointerAuthCFIAnalyzer");
+
+ float IgnoredPercent = (100.0 * FunctionsIgnored) / Total;
BC.outs() << "BOLT-INFO: PointerAuthCFIAnalyzer ran on " << Total
<< " functions. Ignored " << FunctionsIgnored << " functions "
-<< format("(%.2lf%%)", (100.0 * FunctionsIgnored) / Total)
+<< format("(%.2lf%%)", IgnoredPercent)
<< " because of CFI inconsistencies\n";
+ if (IgnoredPercent >= 10.0)
+BC.outs() << "BOLT-WARNING: PointerAuthCFIAnalyzer only supports "
+ "asynchronous unwind tables.\n";
bgergely0 wrote:
I'm not a fan of adding exact flag names for two reasons:
- it's only valid for C/C++, whereas the ABI is "general" and other languages
(Rust) also generate such unwind tables, but the flags are named differently
- `-fasync` is the default for clang, so if we have a sync unw table, it's
because the `-fno-async` flag was passed. If the user then passes `-fasync` as
well, the chosen option depends on the order of the two flags
WDYT?
https://github.com/llvm/llvm-project/pull/165227
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [BOLT] Match functions with pseudo probes (PR #100446)
https://github.com/aaupov edited https://github.com/llvm/llvm-project/pull/100446 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/21.x: [clang-shlib] Fix linking libclang-cpp on Haiku (#156401) (PR #165638)
llvmbot wrote: @DavidSpickett What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/165638 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/21.x: [clang-shlib] Fix linking libclang-cpp on Haiku (#156401) (PR #165638)
https://github.com/llvmbot created
https://github.com/llvm/llvm-project/pull/165638
Backport 21bcd00e54416b0950da19fe8adb0628a19bf66f
Requested by: @brad0
>From a2cb10a0e31a8070a4b43db8a24e5b48ea96e147 Mon Sep 17 00:00:00 2001
From: Brad Smith
Date: Wed, 29 Oct 2025 20:55:15 -0400
Subject: [PATCH] [clang-shlib] Fix linking libclang-cpp on Haiku (#156401)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Haiku requires linking in libnetwork.
Co-authored-by: Jérôme Duval
(cherry picked from commit 21bcd00e54416b0950da19fe8adb0628a19bf66f)
---
clang/tools/clang-shlib/CMakeLists.txt | 4
1 file changed, 4 insertions(+)
diff --git a/clang/tools/clang-shlib/CMakeLists.txt
b/clang/tools/clang-shlib/CMakeLists.txt
index 945076e1ad810..a4d0aa5779a7e 100644
--- a/clang/tools/clang-shlib/CMakeLists.txt
+++ b/clang/tools/clang-shlib/CMakeLists.txt
@@ -41,6 +41,10 @@ if (CLANG_LINK_CLANG_DYLIB)
set(INSTALL_WITH_TOOLCHAIN INSTALL_WITH_TOOLCHAIN)
endif()
+if (HAIKU)
+ list(APPEND _DEPS network)
+endif()
+
add_clang_library(clang-cpp
SHARED
${INSTALL_WITH_TOOLCHAIN}
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/21.x: [clang-shlib] Fix linking libclang-cpp on Haiku (#156401) (PR #165638)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/165638 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] release/21.x: [clang-shlib] Fix linking libclang-cpp on Haiku (#156401) (PR #165638)
llvmbot wrote:
@llvm/pr-subscribers-clang
Author: None (llvmbot)
Changes
Backport 21bcd00e54416b0950da19fe8adb0628a19bf66f
Requested by: @brad0
---
Full diff: https://github.com/llvm/llvm-project/pull/165638.diff
1 Files Affected:
- (modified) clang/tools/clang-shlib/CMakeLists.txt (+4)
``diff
diff --git a/clang/tools/clang-shlib/CMakeLists.txt
b/clang/tools/clang-shlib/CMakeLists.txt
index 945076e1ad810..a4d0aa5779a7e 100644
--- a/clang/tools/clang-shlib/CMakeLists.txt
+++ b/clang/tools/clang-shlib/CMakeLists.txt
@@ -41,6 +41,10 @@ if (CLANG_LINK_CLANG_DYLIB)
set(INSTALL_WITH_TOOLCHAIN INSTALL_WITH_TOOLCHAIN)
endif()
+if (HAIKU)
+ list(APPEND _DEPS network)
+endif()
+
add_clang_library(clang-cpp
SHARED
${INSTALL_WITH_TOOLCHAIN}
``
https://github.com/llvm/llvm-project/pull/165638
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy][COFF] Update .symidx values after stripping (#153322) (PR #165608)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/165608 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy][COFF] Update .symidx values after stripping (#153322) (PR #165608)
https://github.com/llvmbot created
https://github.com/llvm/llvm-project/pull/165608
Backport 74d52f9639ca7588c622c0790ca18fa5bff66837
Requested by: @mstorsjo
>From 0921531330add39df5d83c4802eb76adf2aac744 Mon Sep 17 00:00:00 2001
From: Evgenii Kudriashov
Date: Wed, 3 Sep 2025 23:54:21 +0200
Subject: [PATCH] [llvm-objcopy][COFF] Update .symidx values after stripping
(#153322)
After deleting debug sections, symbol indices are shifted but sections
consisting of .symidx directives are completely ignored. Update symbol
indices as well.
(cherry picked from commit 74d52f9639ca7588c622c0790ca18fa5bff66837)
---
llvm/lib/ObjCopy/COFF/COFFObject.cpp | 2 +
llvm/lib/ObjCopy/COFF/COFFObject.h| 2 +
llvm/lib/ObjCopy/COFF/COFFWriter.cpp | 75 +++
llvm/lib/ObjCopy/COFF/COFFWriter.h| 1 +
.../COFF/strip-invalid-symidx-section.test| 188 ++
.../COFF/strip-update-symidx-section.test | 173
6 files changed, 441 insertions(+)
create mode 100644
llvm/test/tools/llvm-objcopy/COFF/strip-invalid-symidx-section.test
create mode 100644
llvm/test/tools/llvm-objcopy/COFF/strip-update-symidx-section.test
diff --git a/llvm/lib/ObjCopy/COFF/COFFObject.cpp
b/llvm/lib/ObjCopy/COFF/COFFObject.cpp
index 5fa13391c908f..91cf7e32a7396 100644
--- a/llvm/lib/ObjCopy/COFF/COFFObject.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFObject.cpp
@@ -18,6 +18,8 @@ using namespace object;
void Object::addSymbols(ArrayRef NewSymbols) {
for (Symbol S : NewSymbols) {
S.UniqueId = NextSymbolUniqueId++;
+S.OriginalRawIndex = NextSymbolOriginalIndex;
+NextSymbolOriginalIndex += 1 + S.Sym.NumberOfAuxSymbols;
Symbols.emplace_back(S);
}
updateSymbols();
diff --git a/llvm/lib/ObjCopy/COFF/COFFObject.h
b/llvm/lib/ObjCopy/COFF/COFFObject.h
index cdd1f17fc6055..6b70add1bb1b7 100644
--- a/llvm/lib/ObjCopy/COFF/COFFObject.h
+++ b/llvm/lib/ObjCopy/COFF/COFFObject.h
@@ -89,6 +89,7 @@ struct Symbol {
std::optional WeakTargetSymbolId;
size_t UniqueId;
size_t RawIndex;
+ size_t OriginalRawIndex;
bool Referenced;
};
@@ -140,6 +141,7 @@ struct Object {
DenseMap SymbolMap;
size_t NextSymbolUniqueId = 0;
+ size_t NextSymbolOriginalIndex = 0;
std::vector Sections;
DenseMap SectionMap;
diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
index 350c4aec572c9..fed67d67f13a7 100644
--- a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
@@ -12,6 +12,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Support/CRC.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
#include
@@ -92,6 +94,77 @@ Error COFFWriter::finalizeSymbolContents() {
return Error::success();
}
+Error COFFWriter::finalizeSymIdxContents() {
+ // CFGuards shouldn't be present in PE.
+ if (Obj.IsPE)
+return Error::success();
+
+ // Currently handle only sections consisting only of .symidx.
+ // TODO: other sections such as .impcall and .hybmp$x require more complex
+ // handling as they have more complex layout.
+ auto IsSymIdxSection = [](StringRef Name) {
+return Name == ".gljmp$y" || Name == ".giats$y" || Name == ".gfids$y" ||
+ Name == ".gehcont$y";
+ };
+
+ DenseMap SymIdMap;
+ SmallDenseMap SecIdMap;
+ for (Symbol &Sym : Obj.getMutableSymbols()) {
+SymIdMap[Sym.OriginalRawIndex] = Sym.RawIndex;
+
+// We collect only definition symbols of the sections to update the
+// checksums.
+if (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC &&
+Sym.Sym.NumberOfAuxSymbols == 1 && Sym.Sym.Value == 0 &&
+IsSymIdxSection(Sym.Name))
+ SecIdMap[Sym.TargetSectionId] =
+ reinterpret_cast(
+ Sym.AuxData[0].Opaque);
+ }
+
+ for (Section &Sec : Obj.getMutableSections()) {
+if (!IsSymIdxSection(Sec.Name))
+ continue;
+
+ArrayRef RawIds = Sec.getContents();
+// Nothing to do and also the checksum will be -1 instead of 0 if we
+// recalculate it on empty input.
+if (RawIds.size() == 0)
+ continue;
+
+auto SecDefIt = SecIdMap.find(Sec.UniqueId);
+if (SecDefIt == SecIdMap.end())
+ return createStringError(object_error::invalid_symbol_index,
+ "section '%s' does not have the corresponding "
+ "symbol or the symbol has unexpected format",
+ Sec.Name.str().c_str());
+
+// Create updated content.
+ArrayRef Ids(
+reinterpret_cast(RawIds.data()),
+RawIds.size() / 4);
+std::vector NewIds;
+for (support::ulittle32_t Id : Ids) {
+ auto SymIdIt = SymIdMap.find(Id);
+ if (SymIdIt == SymIdMap.end())
+return createStringError(object_error::invalid_symbol_index,
+ "section '%s' contains a .symidx (%
[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy][COFF] Update .symidx values after stripping (#153322) (PR #165608)
llvmbot wrote: @jh7370 What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/165608 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] release/21.x: [llvm-objcopy][COFF] Update .symidx values after stripping (#153322) (PR #165608)
llvmbot wrote:
@llvm/pr-subscribers-llvm-binary-utilities
Author: None (llvmbot)
Changes
Backport 74d52f9639ca7588c622c0790ca18fa5bff66837
Requested by: @mstorsjo
---
Full diff: https://github.com/llvm/llvm-project/pull/165608.diff
6 Files Affected:
- (modified) llvm/lib/ObjCopy/COFF/COFFObject.cpp (+2)
- (modified) llvm/lib/ObjCopy/COFF/COFFObject.h (+2)
- (modified) llvm/lib/ObjCopy/COFF/COFFWriter.cpp (+75)
- (modified) llvm/lib/ObjCopy/COFF/COFFWriter.h (+1)
- (added) llvm/test/tools/llvm-objcopy/COFF/strip-invalid-symidx-section.test
(+188)
- (added) llvm/test/tools/llvm-objcopy/COFF/strip-update-symidx-section.test
(+173)
``diff
diff --git a/llvm/lib/ObjCopy/COFF/COFFObject.cpp
b/llvm/lib/ObjCopy/COFF/COFFObject.cpp
index 5fa13391c908f..91cf7e32a7396 100644
--- a/llvm/lib/ObjCopy/COFF/COFFObject.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFObject.cpp
@@ -18,6 +18,8 @@ using namespace object;
void Object::addSymbols(ArrayRef NewSymbols) {
for (Symbol S : NewSymbols) {
S.UniqueId = NextSymbolUniqueId++;
+S.OriginalRawIndex = NextSymbolOriginalIndex;
+NextSymbolOriginalIndex += 1 + S.Sym.NumberOfAuxSymbols;
Symbols.emplace_back(S);
}
updateSymbols();
diff --git a/llvm/lib/ObjCopy/COFF/COFFObject.h
b/llvm/lib/ObjCopy/COFF/COFFObject.h
index cdd1f17fc6055..6b70add1bb1b7 100644
--- a/llvm/lib/ObjCopy/COFF/COFFObject.h
+++ b/llvm/lib/ObjCopy/COFF/COFFObject.h
@@ -89,6 +89,7 @@ struct Symbol {
std::optional WeakTargetSymbolId;
size_t UniqueId;
size_t RawIndex;
+ size_t OriginalRawIndex;
bool Referenced;
};
@@ -140,6 +141,7 @@ struct Object {
DenseMap SymbolMap;
size_t NextSymbolUniqueId = 0;
+ size_t NextSymbolOriginalIndex = 0;
std::vector Sections;
DenseMap SectionMap;
diff --git a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
index 350c4aec572c9..fed67d67f13a7 100644
--- a/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
+++ b/llvm/lib/ObjCopy/COFF/COFFWriter.cpp
@@ -12,6 +12,8 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/Object/COFF.h"
+#include "llvm/Support/CRC.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/ErrorHandling.h"
#include
@@ -92,6 +94,77 @@ Error COFFWriter::finalizeSymbolContents() {
return Error::success();
}
+Error COFFWriter::finalizeSymIdxContents() {
+ // CFGuards shouldn't be present in PE.
+ if (Obj.IsPE)
+return Error::success();
+
+ // Currently handle only sections consisting only of .symidx.
+ // TODO: other sections such as .impcall and .hybmp$x require more complex
+ // handling as they have more complex layout.
+ auto IsSymIdxSection = [](StringRef Name) {
+return Name == ".gljmp$y" || Name == ".giats$y" || Name == ".gfids$y" ||
+ Name == ".gehcont$y";
+ };
+
+ DenseMap SymIdMap;
+ SmallDenseMap SecIdMap;
+ for (Symbol &Sym : Obj.getMutableSymbols()) {
+SymIdMap[Sym.OriginalRawIndex] = Sym.RawIndex;
+
+// We collect only definition symbols of the sections to update the
+// checksums.
+if (Sym.Sym.StorageClass == IMAGE_SYM_CLASS_STATIC &&
+Sym.Sym.NumberOfAuxSymbols == 1 && Sym.Sym.Value == 0 &&
+IsSymIdxSection(Sym.Name))
+ SecIdMap[Sym.TargetSectionId] =
+ reinterpret_cast(
+ Sym.AuxData[0].Opaque);
+ }
+
+ for (Section &Sec : Obj.getMutableSections()) {
+if (!IsSymIdxSection(Sec.Name))
+ continue;
+
+ArrayRef RawIds = Sec.getContents();
+// Nothing to do and also the checksum will be -1 instead of 0 if we
+// recalculate it on empty input.
+if (RawIds.size() == 0)
+ continue;
+
+auto SecDefIt = SecIdMap.find(Sec.UniqueId);
+if (SecDefIt == SecIdMap.end())
+ return createStringError(object_error::invalid_symbol_index,
+ "section '%s' does not have the corresponding "
+ "symbol or the symbol has unexpected format",
+ Sec.Name.str().c_str());
+
+// Create updated content.
+ArrayRef Ids(
+reinterpret_cast(RawIds.data()),
+RawIds.size() / 4);
+std::vector NewIds;
+for (support::ulittle32_t Id : Ids) {
+ auto SymIdIt = SymIdMap.find(Id);
+ if (SymIdIt == SymIdMap.end())
+return createStringError(object_error::invalid_symbol_index,
+ "section '%s' contains a .symidx (%d) that is
"
+ "incorrect or was stripped",
+ Sec.Name.str().c_str(), Id.value());
+ NewIds.push_back(support::ulittle32_t(SymIdIt->getSecond()));
+}
+ArrayRef NewRawIds(reinterpret_cast(NewIds.data()),
+RawIds.size());
+// Update the checksum.
+JamCRC JC(/*Init=*/0);
+JC.update(NewRawIds);
+SecDefIt->getSecond()->CheckSum = JC.getCRC();
+// Set new content.
+Sec.setOwnedContents(NewRawIds
[llvm-branch-commits] ELF: Rename RandomizePaddingSection to PaddingSection. (PR #155540)
MaskRay wrote: This should be changed to rebase on top of main and it can be pushed before landing previous patches. https://github.com/llvm/llvm-project/pull/155540 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] ELF: Rename RandomizePaddingSection to PaddingSection. (PR #155540)
https://github.com/MaskRay approved this pull request. https://github.com/llvm/llvm-project/pull/155540 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [mlir] 2736b4d - Revert "[mlir][affine] Add fold logic when the affine.yield has IV as operand…"
Author: lonely eagle
Date: 2025-10-30T02:15:05+08:00
New Revision: 2736b4d7963204aa182e0c8b344d50b73dc9fd5f
URL:
https://github.com/llvm/llvm-project/commit/2736b4d7963204aa182e0c8b344d50b73dc9fd5f
DIFF:
https://github.com/llvm/llvm-project/commit/2736b4d7963204aa182e0c8b344d50b73dc9fd5f.diff
LOG: Revert "[mlir][affine] Add fold logic when the affine.yield has IV as
operand…"
This reverts commit e24e7ff7e3a5ff24c5b4d41131cbd6fbac86565e.
Added:
Modified:
mlir/lib/Dialect/Affine/IR/AffineOps.cpp
mlir/test/Dialect/Affine/canonicalize.mlir
Removed:
diff --git a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
index 002f1f60bb58e..0c3592124cdec 100644
--- a/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
+++ b/mlir/lib/Dialect/Affine/IR/AffineOps.cpp
@@ -2610,21 +2610,6 @@ static std::optional
getTrivialConstantTripCount(AffineForOp forOp) {
return ub - lb <= 0 ? 0 : (ub - lb + step - 1) / step;
}
-/// Calculate the constant value of the loop's induction variable for its last
-/// trip.
-static std::optional
-getConstantInductionVarForLastTrip(AffineForOp forOp) {
- std::optional tripCount = getTrivialConstantTripCount(forOp);
- if (!tripCount.has_value())
-return std::nullopt;
- if (tripCount.value() == 0)
-return std::nullopt;
- int64_t lb = forOp.getConstantLowerBound();
- int64_t step = forOp.getStepAsInt();
- int64_t lastTripIv = lb + (tripCount.value() - 1) * step;
- return lastTripIv;
-}
-
/// Fold the empty loop.
static SmallVector AffineForEmptyLoopFolder(AffineForOp forOp) {
if (!llvm::hasSingleElement(*forOp.getBody()))
@@ -2637,7 +2622,7 @@ static SmallVector
AffineForEmptyLoopFolder(AffineForOp forOp) {
// results.
return forOp.getInits();
}
- SmallVector replacements;
+ SmallVector replacements;
auto yieldOp = cast(forOp.getBody()->getTerminator());
auto iterArgs = forOp.getRegionIterArgs();
bool hasValDefinedOutsideLoop = false;
@@ -2645,14 +2630,10 @@ static SmallVector
AffineForEmptyLoopFolder(AffineForOp forOp) {
for (unsigned i = 0, e = yieldOp->getNumOperands(); i < e; ++i) {
Value val = yieldOp.getOperand(i);
BlockArgument *iterArgIt = llvm::find(iterArgs, val);
-if (val == forOp.getInductionVar()) {
- if (auto lastTripIv = getConstantInductionVarForLastTrip(forOp)) {
-replacements.push_back(IntegerAttr::get(
-IndexType::get(forOp.getContext()), lastTripIv.value()));
-continue;
- }
+// TODO: It should be possible to perform a replacement by computing the
+// last value of the IV based on the bounds and the step.
+if (val == forOp.getInductionVar())
return {};
-}
if (iterArgIt == iterArgs.end()) {
// `val` is defined outside of the loop.
assert(forOp.isDefinedOutsideOfLoop(val) &&
@@ -2675,7 +2656,7 @@ static SmallVector
AffineForEmptyLoopFolder(AffineForOp forOp) {
// out of order.
if (tripCount.has_value() && tripCount.value() >= 2 && iterArgsNotInOrder)
return {};
- return replacements;
+ return llvm::to_vector_of(replacements);
}
/// Canonicalize the bounds of the given loop.
diff --git a/mlir/test/Dialect/Affine/canonicalize.mlir
b/mlir/test/Dialect/Affine/canonicalize.mlir
index 997f23b4bd669..1169cd1c29d74 100644
--- a/mlir/test/Dialect/Affine/canonicalize.mlir
+++ b/mlir/test/Dialect/Affine/canonicalize.mlir
@@ -609,19 +609,6 @@ func.func @fold_zero_iter_loops(%in : index) -> index {
// -
-// CHECK-LABEL: func @fold_empty_loop_iv
-// CHECK-SAME: %[[INIT:.*]]: index
-func.func @fold_empty_loop_iv(%init: index) -> (index, index) {
- %res:2 = affine.for %i = 0 to 10 step 1 iter_args(%arg0 = %init, %arg1 =
%init) -> (index, index) {
-affine.yield %i, %arg1 : index, index
- }
- // CHECK: %[[C9:.*]] = arith.constant 9 : index
- // CHECK: return %[[C9]], %[[INIT]] : index, index
- return %res#0, %res#1 : index, index
-}
-
-// -
-
// CHECK-DAG: #[[$SET:.*]] = affine_set<(d0, d1)[s0] : (d0 >= 0, -d0 + 1022 >=
0, d1 >= 0, -d1 + s0 - 2 >= 0)>
// CHECK-LABEL: func @canonicalize_affine_if
___
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/21.x: [LLD][COFF] Fix manifest UAC trustInfo namespace (#165285) (PR #165605)
https://github.com/llvmbot created
https://github.com/llvm/llvm-project/pull/165605
Backport 6ab8e8fa03
Requested by: @nga888
>From db7b9214c3f3867a5eaa9a41cc1f69ebf804a61f Mon Sep 17 00:00:00 2001
From: Andrew Ng
Date: Wed, 29 Oct 2025 10:35:20 +
Subject: [PATCH] [LLD][COFF] Fix manifest UAC trustInfo namespace (#165285)
Fix manifest `trustInfo` to use the `urn:schemas-microsoft-com:asm.v3`
namespace.
Fixes https://github.com/llvm/llvm-project/issues/120394.
(cherry picked from commit 6ab8e8fa031e0a22c0244c1aa8f54581ed9bffd1)
---
lld/COFF/DriverUtils.cpp | 2 +-
lld/test/COFF/Inputs/manifest-uac.test | 11 +
lld/test/COFF/manifest-uac.test| 33 +
lld/test/COFF/manifest.test| 65 ++
lld/test/COFF/manifestinput.test | 35 +++---
5 files changed, 96 insertions(+), 50 deletions(-)
create mode 100644 lld/test/COFF/Inputs/manifest-uac.test
create mode 100644 lld/test/COFF/manifest-uac.test
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index d8b41c7f45400..5ef41c4c0a086 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -387,7 +387,7 @@ std::string LinkerDriver::createDefaultXml() {
<< "\n";
if (ctx.config.manifestUAC) {
-os << " \n"
+os << " \n"
<< "\n"
<< " \n"
<< "
+
+
+
+
+
+
+
diff --git a/lld/test/COFF/manifest-uac.test b/lld/test/COFF/manifest-uac.test
new file mode 100644
index 0..d3a17c7282716
--- /dev/null
+++ b/lld/test/COFF/manifest-uac.test
@@ -0,0 +1,33 @@
+# REQUIRES: libxml2
+
+# RUN: yaml2obj %p/Inputs/ret42.yaml -o %t.obj
+# RUN: lld-link /out:%t.exe /entry:main \
+# RUN: /manifest:embed \
+# RUN: /manifestinput:%p/Inputs/manifest-uac.test %t.obj
+# RUN: llvm-readobj --coff-resources %t.exe | FileCheck %s
+
+CHECK: Data (
+CHECK-NEXT: : 3C3F786D 6C207665 7273696F 6E3D2231 |.|
+CHECK-NEXT: 0070: 0A20203C 74727573 74496E66 6F20786D |. .. .. <|
+CHECK-NEXT: 0120: 2F726571 75657374 65645072 6976696C |/requestedPrivil|
+CHECK-NEXT: 0130: 65676573 3E0A2020 20203C2F 73656375 |eges>.. .|
+CHECK-NEXT: 0160: 0A |.|
+CHECK-NEXT: )
diff --git a/lld/test/COFF/manifest.test b/lld/test/COFF/manifest.test
index 4910600bd3a17..09de96e9bccfa 100644
--- a/lld/test/COFF/manifest.test
+++ b/lld/test/COFF/manifest.test
@@ -10,7 +10,7 @@
MANIFEST:
MANIFEST:
-MANIFEST:
+MANIFEST:
MANIFEST:
MANIFEST:
MANIFEST:
@@ -26,7 +26,7 @@ MANIFEST:
UAC:
UAC:
-UAC:
+UAC:
UAC:
UAC:
UAC:
@@ -43,7 +43,7 @@ UAC:
DEPENDENCY:
DEPENDENCY:
-DEPENDENCY:
+DEPENDENCY:
DEPENDENCY:
DEPENDENCY:
DEPENDENCY:
@@ -90,7 +90,7 @@ NOUACNODEP:
SEVERALDEPS:
SEVERALDEPS:
-SEVERALDEPS:
+SEVERALDEPS:
SEVERALDEPS:
SEVERALDEPS:
SEVERALDEPS:
@@ -139,31 +139,34 @@ EMBED: 0040: 6D61732D 6D696372 6F736F66 742D636F
|mas-microsoft-co|
EMBED: 0050: 6D3A6173 6D2E7631 220A2020 20202020 |m:asm.v1". |
EMBED: 0060: 20202020 6D616E69 66657374 56657273 |manifestVers|
EMBED: 0070: 696F6E3D 22312E30 223E0A20 203C7472 |ion="1.0">. .. . . |
-EMBED: 0100: 203C2F72 65717565 73746564 50726976 | .. . .. |
-EMBED: 0160: 20202020 3C617373 656D626C 79496465 |.. . .<|
-EMBED: 01C0: 64657065 6E64656E 74417373 656D626C |dependentAssembl|
-EMBED: 01D0: 793E0A20 20202020 203C6173 73656D62 |y>. .. ..|
+EMBED: 0080: 75737449 6E666F20 786D6C6E 733D2275 |ustInfo xmlns="u|
+EMBED: 0090: 726E3A73 6368656D 61732D6D 6963726F |rn:schemas-micro|
+EMBED: 00A0: 736F6674 2D636F6D 3A61736D 2E763322 |soft-com:asm.v3"|
+EMBED: 00B0: 3E0A2020 20203C73 65637572 6974793E |>.|
+EMBED: 00C0: 0A202020 2020203C 72657175 65737465 |. . |
+EMBED: 00E0: 20202020 20203C72 65717565 73746564 | . |
+EMBED: 0140: 0A202020 203C2F73 65637572 6974793E |.|
+EMBED: 0150: 0A20203C 2F747275 7374496E 666F3E0A |. .|
+EMBED: 0160: 20203C64 6570656E 64656E63 793E0A20 | . |
+EMBED: 0170: 2020203C 64657065 6E64656E 74417373 | . .|
+EMBED: 01B0: 3C2F6465 70656E64 656E7441 7373656D |. . ..|
+EMBED: 0200: 20203C61 7373656D 626C7949 64656E74 | .. ..|
EMBED: )
diff --git a/lld/test/COFF/manifestinput.test b/lld/test/COFF/manifestinput.test
index 04af80a13312d..cbf27b1ea96b5 100644
--- a/lld/test/COFF/manifestinput.test
+++ b/lld/test/COFF/manifestinput.test
@@ -5,22 +5,21 @@
# RUN: /manifest:embed \
# RUN: /manifestuac:"level='requireAdministrator'" \
# RUN: /manifestinput:%p/Inputs/manifestinput.test %t.obj
-# RUN: llvm-readobj --coff-resources --file-headers %t.exe | FileCheck %s \
-# RUN: -check-prefix TEST_EMBED
+# RUN: llvm-readobj --coff-resources --file-headers %t.e
[llvm-branch-commits] [lld] release/21.x: [LLD][COFF] Fix manifest UAC trustInfo namespace (#165285) (PR #165605)
llvmbot wrote:
@llvm/pr-subscribers-lld-coff
Author: None (llvmbot)
Changes
Backport 6ab8e8fa03
Requested by: @nga888
---
Full diff: https://github.com/llvm/llvm-project/pull/165605.diff
5 Files Affected:
- (modified) lld/COFF/DriverUtils.cpp (+1-1)
- (added) lld/test/COFF/Inputs/manifest-uac.test (+11)
- (added) lld/test/COFF/manifest-uac.test (+33)
- (modified) lld/test/COFF/manifest.test (+34-31)
- (modified) lld/test/COFF/manifestinput.test (+17-18)
``diff
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index d8b41c7f45400..5ef41c4c0a086 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -387,7 +387,7 @@ std::string LinkerDriver::createDefaultXml() {
<< "\n";
if (ctx.config.manifestUAC) {
-os << " \n"
+os << " \n"
<< "\n"
<< " \n"
<< "
+
+
+
+
+
+
+
diff --git a/lld/test/COFF/manifest-uac.test b/lld/test/COFF/manifest-uac.test
new file mode 100644
index 0..d3a17c7282716
--- /dev/null
+++ b/lld/test/COFF/manifest-uac.test
@@ -0,0 +1,33 @@
+# REQUIRES: libxml2
+
+# RUN: yaml2obj %p/Inputs/ret42.yaml -o %t.obj
+# RUN: lld-link /out:%t.exe /entry:main \
+# RUN: /manifest:embed \
+# RUN: /manifestinput:%p/Inputs/manifest-uac.test %t.obj
+# RUN: llvm-readobj --coff-resources %t.exe | FileCheck %s
+
+CHECK: Data (
+CHECK-NEXT: : 3C3F786D 6C207665 7273696F 6E3D2231 |.|
+CHECK-NEXT: 0070: 0A20203C 74727573 74496E66 6F20786D |. .. .. <|
+CHECK-NEXT: 0120: 2F726571 75657374 65645072 6976696C |/requestedPrivil|
+CHECK-NEXT: 0130: 65676573 3E0A2020 20203C2F 73656375 |eges>.. .|
+CHECK-NEXT: 0160: 0A |.|
+CHECK-NEXT: )
diff --git a/lld/test/COFF/manifest.test b/lld/test/COFF/manifest.test
index 4910600bd3a17..09de96e9bccfa 100644
--- a/lld/test/COFF/manifest.test
+++ b/lld/test/COFF/manifest.test
@@ -10,7 +10,7 @@
MANIFEST:
MANIFEST:
-MANIFEST:
+MANIFEST:
MANIFEST:
MANIFEST:
MANIFEST:
@@ -26,7 +26,7 @@ MANIFEST:
UAC:
UAC:
-UAC:
+UAC:
UAC:
UAC:
UAC:
@@ -43,7 +43,7 @@ UAC:
DEPENDENCY:
DEPENDENCY:
-DEPENDENCY:
+DEPENDENCY:
DEPENDENCY:
DEPENDENCY:
DEPENDENCY:
@@ -90,7 +90,7 @@ NOUACNODEP:
SEVERALDEPS:
SEVERALDEPS:
-SEVERALDEPS:
+SEVERALDEPS:
SEVERALDEPS:
SEVERALDEPS:
SEVERALDEPS:
@@ -139,31 +139,34 @@ EMBED: 0040: 6D61732D 6D696372 6F736F66 742D636F
|mas-microsoft-co|
EMBED: 0050: 6D3A6173 6D2E7631 220A2020 20202020 |m:asm.v1". |
EMBED: 0060: 20202020 6D616E69 66657374 56657273 |manifestVers|
EMBED: 0070: 696F6E3D 22312E30 223E0A20 203C7472 |ion="1.0">. .. . . |
-EMBED: 0100: 203C2F72 65717565 73746564 50726976 | .. . .. |
-EMBED: 0160: 20202020 3C617373 656D626C 79496465 |.. . .<|
-EMBED: 01C0: 64657065 6E64656E 74417373 656D626C |dependentAssembl|
-EMBED: 01D0: 793E0A20 20202020 203C6173 73656D62 |y>. .. ..|
+EMBED: 0080: 75737449 6E666F20 786D6C6E 733D2275 |ustInfo xmlns="u|
+EMBED: 0090: 726E3A73 6368656D 61732D6D 6963726F |rn:schemas-micro|
+EMBED: 00A0: 736F6674 2D636F6D 3A61736D 2E763322 |soft-com:asm.v3"|
+EMBED: 00B0: 3E0A2020 20203C73 65637572 6974793E |>.|
+EMBED: 00C0: 0A202020 2020203C 72657175 65737465 |. . |
+EMBED: 00E0: 20202020 20203C72 65717565 73746564 | . |
+EMBED: 0140: 0A202020 203C2F73 65637572 6974793E |.|
+EMBED: 0150: 0A20203C 2F747275 7374496E 666F3E0A |. .|
+EMBED: 0160: 20203C64 6570656E 64656E63 793E0A20 | . |
+EMBED: 0170: 2020203C 64657065 6E64656E 74417373 | . .|
+EMBED: 01B0: 3C2F6465 70656E64 656E7441 7373656D |. . ..|
+EMBED: 0200: 20203C61 7373656D 626C7949 64656E74 | .. ..|
EMBED: )
diff --git a/lld/test/COFF/manifestinput.test b/lld/test/COFF/manifestinput.test
index 04af80a13312d..cbf27b1ea96b5 100644
--- a/lld/test/COFF/manifestinput.test
+++ b/lld/test/COFF/manifestinput.test
@@ -5,22 +5,21 @@
# RUN: /manifest:embed \
# RUN: /manifestuac:"level='requireAdministrator'" \
# RUN: /manifestinput:%p/Inputs/manifestinput.test %t.obj
-# RUN: llvm-readobj --coff-resources --file-headers %t.exe | FileCheck %s \
-# RUN: -check-prefix TEST_EMBED
+# RUN: llvm-readobj --coff-resources --file-headers %t.exe | FileCheck %s
-TEST_EMBED: ResourceTableRVA: 0x2000
-TEST_EMBED-NEXT: ResourceTableSize: 0x2A0
-TEST_EMBED-DAG: Resources [
-TEST_EMBED-NEXT: Total Number of Resources: 1
-TEST_EMBED-DAG:Number of String Entries: 0
-TEST_EMBED-NEXT: Number of ID Entries: 1
-TEST_EMBED-NEXT: Type: MANIFEST (ID 24) [
-TEST_EMBED-NEXT: Table Offset: 0x18
-TEST_EMBED-NEXT: Number of String Entries: 0
-TEST_EMBED-NEXT: Number of ID Entries: 1
-TEST_EMBED
[llvm-branch-commits] [lld] release/21.x: [LLD][COFF] Fix manifest UAC trustInfo namespace (#165285) (PR #165605)
llvmbot wrote:
@llvm/pr-subscribers-platform-windows
Author: None (llvmbot)
Changes
Backport 6ab8e8fa03
Requested by: @nga888
---
Full diff: https://github.com/llvm/llvm-project/pull/165605.diff
5 Files Affected:
- (modified) lld/COFF/DriverUtils.cpp (+1-1)
- (added) lld/test/COFF/Inputs/manifest-uac.test (+11)
- (added) lld/test/COFF/manifest-uac.test (+33)
- (modified) lld/test/COFF/manifest.test (+34-31)
- (modified) lld/test/COFF/manifestinput.test (+17-18)
``diff
diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp
index d8b41c7f45400..5ef41c4c0a086 100644
--- a/lld/COFF/DriverUtils.cpp
+++ b/lld/COFF/DriverUtils.cpp
@@ -387,7 +387,7 @@ std::string LinkerDriver::createDefaultXml() {
<< "\n";
if (ctx.config.manifestUAC) {
-os << " \n"
+os << " \n"
<< "\n"
<< " \n"
<< "
+
+
+
+
+
+
+
diff --git a/lld/test/COFF/manifest-uac.test b/lld/test/COFF/manifest-uac.test
new file mode 100644
index 0..d3a17c7282716
--- /dev/null
+++ b/lld/test/COFF/manifest-uac.test
@@ -0,0 +1,33 @@
+# REQUIRES: libxml2
+
+# RUN: yaml2obj %p/Inputs/ret42.yaml -o %t.obj
+# RUN: lld-link /out:%t.exe /entry:main \
+# RUN: /manifest:embed \
+# RUN: /manifestinput:%p/Inputs/manifest-uac.test %t.obj
+# RUN: llvm-readobj --coff-resources %t.exe | FileCheck %s
+
+CHECK: Data (
+CHECK-NEXT: : 3C3F786D 6C207665 7273696F 6E3D2231 |.|
+CHECK-NEXT: 0070: 0A20203C 74727573 74496E66 6F20786D |. .. .. <|
+CHECK-NEXT: 0120: 2F726571 75657374 65645072 6976696C |/requestedPrivil|
+CHECK-NEXT: 0130: 65676573 3E0A2020 20203C2F 73656375 |eges>.. .|
+CHECK-NEXT: 0160: 0A |.|
+CHECK-NEXT: )
diff --git a/lld/test/COFF/manifest.test b/lld/test/COFF/manifest.test
index 4910600bd3a17..09de96e9bccfa 100644
--- a/lld/test/COFF/manifest.test
+++ b/lld/test/COFF/manifest.test
@@ -10,7 +10,7 @@
MANIFEST:
MANIFEST:
-MANIFEST:
+MANIFEST:
MANIFEST:
MANIFEST:
MANIFEST:
@@ -26,7 +26,7 @@ MANIFEST:
UAC:
UAC:
-UAC:
+UAC:
UAC:
UAC:
UAC:
@@ -43,7 +43,7 @@ UAC:
DEPENDENCY:
DEPENDENCY:
-DEPENDENCY:
+DEPENDENCY:
DEPENDENCY:
DEPENDENCY:
DEPENDENCY:
@@ -90,7 +90,7 @@ NOUACNODEP:
SEVERALDEPS:
SEVERALDEPS:
-SEVERALDEPS:
+SEVERALDEPS:
SEVERALDEPS:
SEVERALDEPS:
SEVERALDEPS:
@@ -139,31 +139,34 @@ EMBED: 0040: 6D61732D 6D696372 6F736F66 742D636F
|mas-microsoft-co|
EMBED: 0050: 6D3A6173 6D2E7631 220A2020 20202020 |m:asm.v1". |
EMBED: 0060: 20202020 6D616E69 66657374 56657273 |manifestVers|
EMBED: 0070: 696F6E3D 22312E30 223E0A20 203C7472 |ion="1.0">. .. . . |
-EMBED: 0100: 203C2F72 65717565 73746564 50726976 | .. . .. |
-EMBED: 0160: 20202020 3C617373 656D626C 79496465 |.. . .<|
-EMBED: 01C0: 64657065 6E64656E 74417373 656D626C |dependentAssembl|
-EMBED: 01D0: 793E0A20 20202020 203C6173 73656D62 |y>. .. ..|
+EMBED: 0080: 75737449 6E666F20 786D6C6E 733D2275 |ustInfo xmlns="u|
+EMBED: 0090: 726E3A73 6368656D 61732D6D 6963726F |rn:schemas-micro|
+EMBED: 00A0: 736F6674 2D636F6D 3A61736D 2E763322 |soft-com:asm.v3"|
+EMBED: 00B0: 3E0A2020 20203C73 65637572 6974793E |>.|
+EMBED: 00C0: 0A202020 2020203C 72657175 65737465 |. . |
+EMBED: 00E0: 20202020 20203C72 65717565 73746564 | . |
+EMBED: 0140: 0A202020 203C2F73 65637572 6974793E |.|
+EMBED: 0150: 0A20203C 2F747275 7374496E 666F3E0A |. .|
+EMBED: 0160: 20203C64 6570656E 64656E63 793E0A20 | . |
+EMBED: 0170: 2020203C 64657065 6E64656E 74417373 | . .|
+EMBED: 01B0: 3C2F6465 70656E64 656E7441 7373656D |. . ..|
+EMBED: 0200: 20203C61 7373656D 626C7949 64656E74 | .. ..|
EMBED: )
diff --git a/lld/test/COFF/manifestinput.test b/lld/test/COFF/manifestinput.test
index 04af80a13312d..cbf27b1ea96b5 100644
--- a/lld/test/COFF/manifestinput.test
+++ b/lld/test/COFF/manifestinput.test
@@ -5,22 +5,21 @@
# RUN: /manifest:embed \
# RUN: /manifestuac:"level='requireAdministrator'" \
# RUN: /manifestinput:%p/Inputs/manifestinput.test %t.obj
-# RUN: llvm-readobj --coff-resources --file-headers %t.exe | FileCheck %s \
-# RUN: -check-prefix TEST_EMBED
+# RUN: llvm-readobj --coff-resources --file-headers %t.exe | FileCheck %s
-TEST_EMBED: ResourceTableRVA: 0x2000
-TEST_EMBED-NEXT: ResourceTableSize: 0x2A0
-TEST_EMBED-DAG: Resources [
-TEST_EMBED-NEXT: Total Number of Resources: 1
-TEST_EMBED-DAG:Number of String Entries: 0
-TEST_EMBED-NEXT: Number of ID Entries: 1
-TEST_EMBED-NEXT: Type: MANIFEST (ID 24) [
-TEST_EMBED-NEXT: Table Offset: 0x18
-TEST_EMBED-NEXT: Number of String Entries: 0
-TEST_EMBED-NEXT: Number of ID Entries: 1
-TE
[llvm-branch-commits] [lld] release/21.x: [LLD][COFF] Fix manifest UAC trustInfo namespace (#165285) (PR #165605)
https://github.com/llvmbot milestoned https://github.com/llvm/llvm-project/pull/165605 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [lld] release/21.x: [LLD][COFF] Fix manifest UAC trustInfo namespace (#165285) (PR #165605)
llvmbot wrote: @MaskRay What do you think about merging this PR to the release branch? https://github.com/llvm/llvm-project/pull/165605 ___ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
