[llvm] [mlir] [lld] [clang] [NFC][IRCE] Add unit test to show room for improvement (PR #71506)
https://github.com/aleks-tmb updated https://github.com/llvm/llvm-project/pull/71506 >From 1c946c72f19bb553ad7af2e9b4edb1796eee Mon Sep 17 00:00:00 2001 From: Aleksander Popov Date: Tue, 7 Nov 2023 05:16:45 +0100 Subject: [PATCH] [NFC][IRCE] Add unit test to show room for improvement Add tests for compound loop bounds where IRCE is possible if (K > 0 && M > 0) for (i = 0; i < min(K, M); i++) {...} if (K > 0 && M > 0) for (i = min(K, M); i >= 0; i--) {...} --- .../Transforms/IRCE/compound-loop-bound.ll| 132 ++ 1 file changed, 132 insertions(+) create mode 100644 llvm/test/Transforms/IRCE/compound-loop-bound.ll diff --git a/llvm/test/Transforms/IRCE/compound-loop-bound.ll b/llvm/test/Transforms/IRCE/compound-loop-bound.ll new file mode 100644 index 000..0930d19e22154fc --- /dev/null +++ b/llvm/test/Transforms/IRCE/compound-loop-bound.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 +; RUN: opt -passes=irce < %s -S | FileCheck %s + +; if (K > 0 && M > 0) +; for (i = 0; i < min(K, M); i++) {...} +; +; TODO: Loop bounds are safe according to loop guards. IRCE is allowed. +define void @incrementing_loop(ptr %arr, ptr %len_ptr, i32 %K, i32 %M) { +; CHECK-LABEL: define void @incrementing_loop( +; CHECK-SAME: ptr [[ARR:%.*]], ptr [[LEN_PTR:%.*]], i32 [[K:%.*]], i32 [[M:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT:[[LEN:%.*]] = load i32, ptr [[LEN_PTR]], align 4, !range [[RNG0:![0-9]+]] +; CHECK-NEXT:[[CHECK0:%.*]] = icmp sgt i32 [[K]], 0 +; CHECK-NEXT:[[CHECK1:%.*]] = icmp sgt i32 [[M]], 0 +; CHECK-NEXT:[[AND:%.*]] = and i1 [[CHECK0]], [[CHECK1]] +; CHECK-NEXT:br i1 [[AND]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: preheader: +; CHECK-NEXT:[[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[K]], i32 [[M]]) +; CHECK-NEXT:br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT:[[IDX:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ] +; CHECK-NEXT:[[IDX_NEXT]] = add i32 [[IDX]], 1 +; CHECK-NEXT:[[GUARD:%.*]] = icmp slt i32 [[IDX]], [[LEN]] +; CHECK-NEXT:br i1 [[GUARD]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS:%.*]] +; CHECK: in.bounds: +; CHECK-NEXT:[[ADDR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IDX]] +; CHECK-NEXT:store i32 0, ptr [[ADDR]], align 4 +; CHECK-NEXT:[[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[SMIN]] +; CHECK-NEXT:br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: out.of.bounds: +; CHECK-NEXT:ret void +; CHECK: exit.loopexit: +; CHECK-NEXT:br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT:ret void +; +entry: + %len = load i32, ptr %len_ptr, !range !0 + %check0 = icmp sgt i32 %K, 0 + %check1 = icmp sgt i32 %M, 0 + %and = and i1 %check0, %check1 + br i1 %and, label %preheader, label %exit + +preheader: + %smin = call i32 @llvm.smin.i32(i32 %K, i32 %M) + br label %loop + +loop: + %idx = phi i32 [ 0, %preheader ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %guard = icmp slt i32 %idx, %len + br i1 %guard, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, ptr %arr, i32 %idx + store i32 0, ptr %addr + %next = icmp slt i32 %idx.next, %smin + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; if (K > 0 && M > 0) +; for (i = min(K, M); i >= 0; i--) {...} +; +; TODO: Loop bounds are safe according to loop guards. IRCE is allowed. +define void @decrementing_loop(ptr %arr, ptr %len_ptr, i32 %K, i32 %M) { +; CHECK-LABEL: define void @decrementing_loop( +; CHECK-SAME: ptr [[ARR:%.*]], ptr [[LEN_PTR:%.*]], i32 [[K:%.*]], i32 [[M:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT:[[LEN:%.*]] = load i32, ptr [[LEN_PTR]], align 4, !range [[RNG0]] +; CHECK-NEXT:[[CHECK0:%.*]] = icmp sgt i32 [[K]], 0 +; CHECK-NEXT:[[CHECK1:%.*]] = icmp sgt i32 [[M]], 0 +; CHECK-NEXT:[[AND:%.*]] = and i1 [[CHECK0]], [[CHECK1]] +; CHECK-NEXT:br i1 [[AND]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] +; CHECK: preheader: +; CHECK-NEXT:[[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[K]], i32 [[M]]) +; CHECK-NEXT:br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT:[[IDX:%.*]] = phi i32 [ [[SMIN]], [[PREHEADER]] ], [ [[IDX_DEC:%.*]], [[IN_BOUNDS:%.*]] ] +; CHECK-NEXT:[[IDX_DEC]] = sub i32 [[IDX]], 1 +; CHECK-NEXT:[[GUARD:%.*]] = icmp slt i32 [[IDX]], [[LEN]] +; CHECK-NEXT:br i1 [[GUARD]], label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS:%.*]] +; CHECK: in.bounds: +; CHECK-NEXT:[[ADDR:%.*]] = getelementptr i32, ptr [[ARR]], i32 [[IDX]] +; CHECK-NEXT:store i32 0, ptr [[ADDR]], align 4 +; CHECK-NEXT:[[NEXT:%.*]] = icmp sgt i32 [[IDX_DEC]], -1 +; CHECK-NEXT:br i1 [[NEXT]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]] +; CHECK: out.of.bounds: +; CHECK-NEXT:ret void +; CHECK: ex
[clang] [LoopPeeling] Fix weights updating of peeled off branches (PR #70094)
aleks-tmb wrote: @MatzeB Hi, could you please take a look. Described issue manifested after your patch: ``` SHA: b30c9c937802a78ef986cb4219eba51148f76e6c Author: Matthias Braun Date: 2023-09-11 14:23:29 -0700 -0700 LoopUnrollRuntime: Add weights to all branches ``` Actually your patch added branch weights scaling. So imagine a loop with scaled latch weights ratio from 1:2 to 127:254 one. If we decide to peel 3 iterations of such loop, initial 1:2 ratio will be updated as: ``` 0th peeled latch: 1:2 1st peeled latch: 1:1 2nd peeled latch: 1:1 ``` And with initial 127:254: ``` 0th peeled latch: 127:254 1st peeled latch: 127:127 2nd peeled latch: 127:1 ``` So the hotness of the mail loop header changed from neutral 1:1 to 127:1, making it almost unreachable. In my patch I tried to make it: https://github.com/llvm/llvm-project/pull/70094 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] [LoopPeeling] Fix weights updating of peeled off branches (PR #70094)
aleks-tmb wrote: @MatzeB Hi, could you please take a look. Described issue manifested after your patch: ``` SHA: b30c9c937802a78ef986cb4219eba51148f76e6c Author: Matthias Braun Date: 2023-09-11 14:23:29 -0700 -0700 LoopUnrollRuntime: Add weights to all branches ``` Actually your patch added branch weights scaling. So imagine a loop with scaled latch weights ratio from 1:2 to 127:254 one. If we decide to peel 3 iterations of such loop, initial 1:2 ratio will be updated as: ``` 0th peeled latch: 1:2 1st peeled latch: 1:1 2nd peeled latch: 1:1 ``` And with initial 127:254: ``` 0th peeled latch: 127:254 1st peeled latch: 127:127 2nd peeled latch: 127:1 ``` So the hotness of the mail loop header changed from neutral 1:1 to 127:1, making it almost unreachable. In my patch I tried to make it: https://github.com/llvm/llvm-project/pull/70094 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[libcxx] [libc] [flang] [clang-tools-extra] [clang] [compiler-rt] [llvm] [LoopPeeling] Fix weights updating of peeled off branches (PR #70094)
https://github.com/aleks-tmb updated https://github.com/llvm/llvm-project/pull/70094 >From 4265cc4a05d939a29e57a116e696fa1eb032c249 Mon Sep 17 00:00:00 2001 From: Aleksandr Popov Date: Tue, 24 Oct 2023 16:47:50 + Subject: [PATCH] [LoopPeeling] Fix weights updating of peeled off branches In https://reviews.llvm.org/D64235 a new algorithm has been introduced for updating the branch weights of latch blocks and their copies. It increases the probability of going to the exit block for each next peel iteration, calculating weights by (F - I * E, E), where: - F is a weight of the edge from latch to header. - E is a weight of the edge from latch to exit. - I is a number of peeling iteration. E.g: Let's say the latch branch weights are (100,300) and the estimated trip count is 4. If we peel off all 4 iterations the weights of the copied branches will be: 0: (100,300) 1: (100,200) 2: (100,100) 3: (100,1) https://godbolt.org/z/93KnoEsT6 So we make the original loop almost unreachable from the 3rd peeled copy according to the profile data. But that's only true if the profiling data is accurate. Underestimated trip count can lead to a performance issues with the register allocator, which may decide to spill intervals inside the loop assuming it's unreachable. Since we don't know how accurate the profiling data is, it seems better to set neutral 1/1 weights on the last peeled latch branch. After this change, the weights in the example above will look like this: 0: (100,300) 1: (100,200) 2: (100,100) 3: (100,100) --- llvm/lib/Transforms/Utils/LoopPeel.cpp | 10 +++--- llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll | 5 ++--- llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll | 5 ++--- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index 31f065b691f864c..30c525dd82bbd97 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -636,9 +636,13 @@ static void updateBranchWeights(Instruction *Term, WeightInfo &Info) { MDB.createBranchWeights(Info.Weights)); for (auto [Idx, SubWeight] : enumerate(Info.SubWeights)) if (SubWeight != 0) - Info.Weights[Idx] = Info.Weights[Idx] > SubWeight - ? Info.Weights[Idx] - SubWeight - : 1; + // Don't set the probability of taking the edge from latch to loop header + // to less than 1, as this could significantly reduce the loop's hotness, + // which would be incorrect in the case of underestimating the trip count. + Info.Weights[Idx] = + Info.Weights[Idx] > SubWeight + ? std::max(Info.Weights[Idx] - SubWeight, SubWeight) + : SubWeight; } /// Initialize the weights for all exiting blocks. diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll index a0dc216133fb7d5..d91cb5bab382782 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll @@ -21,7 +21,7 @@ ; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !18 ; CHECK: [[NEXT2]]: ; CHECK: br i1 %c, label %{{.*}}, label %side_exit.loopexit, !prof !15 -; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !19 +; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !18 define i32 @basic(ptr %p, i32 %k, i1 %c) #0 !prof !15 { entry: @@ -85,6 +85,5 @@ attributes #1 = { nounwind optsize } ; This is a weights of latch and its copies. ;CHECK: !16 = !{!"branch_weights", i32 3001, i32 1001} ;CHECK: !17 = !{!"branch_weights", i32 2000, i32 1001} -;CHECK: !18 = !{!"branch_weights", i32 999, i32 1001} -;CHECK: !19 = !{!"branch_weights", i32 1, i32 1001} +;CHECK: !18 = !{!"branch_weights", i32 1001, i32 1001} diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll index cadb6739dbc3fbb..15dce234baee91d 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll @@ -24,7 +24,7 @@ ; CHECK: [[NEXT1]]: ; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !17 ; CHECK: [[NEXT2]]: -; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !18 +; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !17 define void @basic(ptr %p, i32 %k) #0 !prof !15 { entry: @@ -105,6 +105,5 @@ attributes #1 = { nounwind optsize } ;CHECK: !15 = !{!"branch_weights", i32 3001, i32 1001} ;CHECK: !16 = !{!"branch_weights", i32 2000, i32 1001} -;CHECK: !17 = !{!"branch_weights", i32 999, i32 1001} -;CHECK: !18 = !{!"branch_weights", i32 1, i32 1001} +;CHECK: !17 = !{!"branch_weights", i32 1001, i32 1001} ___ cfe-commits mailing list cfe
[llvm] [clang-tools-extra] [LoopConstrainer] Apply loop gurads to check that loop bounds are safe (PR #71531)
https://github.com/aleks-tmb updated https://github.com/llvm/llvm-project/pull/71531 >From c734c948599ace737d48b32a817bd6410bd2b13e Mon Sep 17 00:00:00 2001 From: Aleksander Popov Date: Tue, 7 Nov 2023 05:31:13 +0100 Subject: [PATCH] [LoopConstrainer] Apply loop gurads to check that loop bounds are safe Loop guards that apply to loop SCEV bounds allow IRCE for cases with compound loop bounds such as: if (K > 0 && M > 0) for (i = 0; i < min(K, M); i++) {...} if (K > 0 && M > 0) for (i = min(K, M); i >= 0; i--) {...} Otherwise SCEV couldn't prove that loops have safe bounds in these cases. --- llvm/lib/Transforms/Utils/LoopConstrainer.cpp | 22 +++-- .../Transforms/IRCE/compound-loop-bound.ll| 85 +-- 2 files changed, 90 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopConstrainer.cpp b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp index ea6d952cfa7d4f..da6f37df5b314a 100644 --- a/llvm/lib/Transforms/Utils/LoopConstrainer.cpp +++ b/llvm/lib/Transforms/Utils/LoopConstrainer.cpp @@ -42,8 +42,11 @@ static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV, ICmpInst::Predicate BoundPred = IsSigned ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT; + auto StartLG = SE.applyLoopGuards(Start, L); + auto BoundLG = SE.applyLoopGuards(BoundSCEV, L); + if (LatchBrExitIdx == 1) -return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV); +return SE.isLoopEntryGuardedByCond(L, BoundPred, StartLG, BoundLG); assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be either 0 or 1"); @@ -54,10 +57,10 @@ static bool isSafeDecreasingBound(const SCEV *Start, const SCEV *BoundSCEV, const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Min), StepPlusOne); const SCEV *MinusOne = - SE.getMinusSCEV(BoundSCEV, SE.getOne(BoundSCEV->getType())); + SE.getMinusSCEV(BoundLG, SE.getOne(BoundLG->getType())); - return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, MinusOne) && - SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit); + return SE.isLoopEntryGuardedByCond(L, BoundPred, StartLG, MinusOne) && + SE.isLoopEntryGuardedByCond(L, BoundPred, BoundLG, Limit); } /// Given a loop with an increasing induction variable, is it possible to @@ -86,8 +89,11 @@ static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV, ICmpInst::Predicate BoundPred = IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; + auto StartLG = SE.applyLoopGuards(Start, L); + auto BoundLG = SE.applyLoopGuards(BoundSCEV, L); + if (LatchBrExitIdx == 1) -return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV); +return SE.isLoopEntryGuardedByCond(L, BoundPred, StartLG, BoundLG); assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be 0 or 1"); @@ -97,9 +103,9 @@ static bool isSafeIncreasingBound(const SCEV *Start, const SCEV *BoundSCEV, : APInt::getMaxValue(BitWidth); const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne); - return (SE.isLoopEntryGuardedByCond(L, BoundPred, Start, - SE.getAddExpr(BoundSCEV, Step)) && - SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit)); + return (SE.isLoopEntryGuardedByCond(L, BoundPred, StartLG, + SE.getAddExpr(BoundLG, Step)) && + SE.isLoopEntryGuardedByCond(L, BoundPred, BoundLG, Limit)); } /// Returns estimate for max latch taken count of the loop of the narrowest diff --git a/llvm/test/Transforms/IRCE/compound-loop-bound.ll b/llvm/test/Transforms/IRCE/compound-loop-bound.ll index 0930d19e22154f..e50d8c6127f401 100644 --- a/llvm/test/Transforms/IRCE/compound-loop-bound.ll +++ b/llvm/test/Transforms/IRCE/compound-loop-bound.ll @@ -16,23 +16,56 @@ define void @incrementing_loop(ptr %arr, ptr %len_ptr, i32 %K, i32 %M) { ; CHECK-NEXT:br i1 [[AND]], label [[PREHEADER:%.*]], label [[EXIT:%.*]] ; CHECK: preheader: ; CHECK-NEXT:[[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[K]], i32 [[M]]) +; CHECK-NEXT:[[SMIN1:%.*]] = call i32 @llvm.smin.i32(i32 [[LEN]], i32 [[M]]) +; CHECK-NEXT:[[SMIN2:%.*]] = call i32 @llvm.smin.i32(i32 [[SMIN1]], i32 [[K]]) +; CHECK-NEXT:[[EXIT_MAINLOOP_AT:%.*]] = call i32 @llvm.smax.i32(i32 [[SMIN2]], i32 0) +; CHECK-NEXT:[[TMP0:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] +; CHECK-NEXT:br i1 [[TMP0]], label [[LOOP_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: loop.preheader: ; CHECK-NEXT:br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT:[[IDX:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ] -; CHECK-NEXT:[[IDX_NEXT]] = add i32 [[IDX]], 1 +; CHECK-NEXT:[[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER]] ] +; CHECK-NEXT:[[IDX_NEXT]] = add nsw i32 [[IDX]], 1 ; CHECK-NEXT:[[GUARD:%.*]] = icmp slt i32 [[IDX]], [
[clang-tools-extra] [LoopPeeling] Fix weights updating of peeled off branches (PR #70094)
https://github.com/aleks-tmb updated https://github.com/llvm/llvm-project/pull/70094 >From 4265cc4a05d939a29e57a116e696fa1eb032c249 Mon Sep 17 00:00:00 2001 From: Aleksandr Popov Date: Tue, 24 Oct 2023 16:47:50 + Subject: [PATCH] [LoopPeeling] Fix weights updating of peeled off branches In https://reviews.llvm.org/D64235 a new algorithm has been introduced for updating the branch weights of latch blocks and their copies. It increases the probability of going to the exit block for each next peel iteration, calculating weights by (F - I * E, E), where: - F is a weight of the edge from latch to header. - E is a weight of the edge from latch to exit. - I is a number of peeling iteration. E.g: Let's say the latch branch weights are (100,300) and the estimated trip count is 4. If we peel off all 4 iterations the weights of the copied branches will be: 0: (100,300) 1: (100,200) 2: (100,100) 3: (100,1) https://godbolt.org/z/93KnoEsT6 So we make the original loop almost unreachable from the 3rd peeled copy according to the profile data. But that's only true if the profiling data is accurate. Underestimated trip count can lead to a performance issues with the register allocator, which may decide to spill intervals inside the loop assuming it's unreachable. Since we don't know how accurate the profiling data is, it seems better to set neutral 1/1 weights on the last peeled latch branch. After this change, the weights in the example above will look like this: 0: (100,300) 1: (100,200) 2: (100,100) 3: (100,100) --- llvm/lib/Transforms/Utils/LoopPeel.cpp | 10 +++--- llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll | 5 ++--- llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll | 5 ++--- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp index 31f065b691f864c..30c525dd82bbd97 100644 --- a/llvm/lib/Transforms/Utils/LoopPeel.cpp +++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp @@ -636,9 +636,13 @@ static void updateBranchWeights(Instruction *Term, WeightInfo &Info) { MDB.createBranchWeights(Info.Weights)); for (auto [Idx, SubWeight] : enumerate(Info.SubWeights)) if (SubWeight != 0) - Info.Weights[Idx] = Info.Weights[Idx] > SubWeight - ? Info.Weights[Idx] - SubWeight - : 1; + // Don't set the probability of taking the edge from latch to loop header + // to less than 1, as this could significantly reduce the loop's hotness, + // which would be incorrect in the case of underestimating the trip count. + Info.Weights[Idx] = + Info.Weights[Idx] > SubWeight + ? std::max(Info.Weights[Idx] - SubWeight, SubWeight) + : SubWeight; } /// Initialize the weights for all exiting blocks. diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll index a0dc216133fb7d5..d91cb5bab382782 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo-deopt.ll @@ -21,7 +21,7 @@ ; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !18 ; CHECK: [[NEXT2]]: ; CHECK: br i1 %c, label %{{.*}}, label %side_exit.loopexit, !prof !15 -; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !19 +; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !18 define i32 @basic(ptr %p, i32 %k, i1 %c) #0 !prof !15 { entry: @@ -85,6 +85,5 @@ attributes #1 = { nounwind optsize } ; This is a weights of latch and its copies. ;CHECK: !16 = !{!"branch_weights", i32 3001, i32 1001} ;CHECK: !17 = !{!"branch_weights", i32 2000, i32 1001} -;CHECK: !18 = !{!"branch_weights", i32 999, i32 1001} -;CHECK: !19 = !{!"branch_weights", i32 1, i32 1001} +;CHECK: !18 = !{!"branch_weights", i32 1001, i32 1001} diff --git a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll index cadb6739dbc3fbb..15dce234baee91d 100644 --- a/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll +++ b/llvm/test/Transforms/LoopUnroll/peel-loop-pgo.ll @@ -24,7 +24,7 @@ ; CHECK: [[NEXT1]]: ; CHECK: br i1 %{{.*}}, label %[[NEXT2:.*]], label %for.cond.for.end_crit_edge, !prof !17 ; CHECK: [[NEXT2]]: -; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !18 +; CHECK: br i1 %{{.*}}, label %for.body, label %{{.*}}, !prof !17 define void @basic(ptr %p, i32 %k) #0 !prof !15 { entry: @@ -105,6 +105,5 @@ attributes #1 = { nounwind optsize } ;CHECK: !15 = !{!"branch_weights", i32 3001, i32 1001} ;CHECK: !16 = !{!"branch_weights", i32 2000, i32 1001} -;CHECK: !17 = !{!"branch_weights", i32 999, i32 1001} -;CHECK: !18 = !{!"branch_weights", i32 1, i32 1001} +;CHECK: !17 = !{!"branch_weights", i32 1001, i32 1001} ___ cfe-commits mailing list cfe