https://github.com/VigneshwarJ updated https://github.com/llvm/llvm-project/pull/157559
>From 63d81f0c3e09d27cd048657d81f12a63bcb89796 Mon Sep 17 00:00:00 2001 From: vigneshwar jayakumar <vigneshwar.jayaku...@amd.com> Date: Mon, 8 Sep 2025 16:24:50 -0500 Subject: [PATCH 1/2] [IndVarSimplify] Sink unused l-invariant loads in preheader. Unused loop invariant loads were not sunk from preheader to exit block which increased live range. This commit sinks unused invariant loads from preheader to exit block when the load's memory location is not modified within the loop body. --- llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 50 ++++++++++++--- .../IndVarSimplify/sink-from-preheader.ll | 61 +++++++++++++++++++ .../Transforms/LoopSimplify/ashr-crash.ll | 2 +- 3 files changed, 102 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp index c32731185afd0..61138e379607c 100644 --- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -130,6 +130,7 @@ class IndVarSimplify { const DataLayout &DL; TargetLibraryInfo *TLI; const TargetTransformInfo *TTI; + AliasAnalysis *AA; std::unique_ptr<MemorySSAUpdater> MSSAU; SmallVector<WeakTrackingVH, 16> DeadInsts; @@ -162,8 +163,9 @@ class IndVarSimplify { public: IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, const DataLayout &DL, TargetLibraryInfo *TLI, - TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars) - : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), + TargetTransformInfo *TTI, AliasAnalysis *AA, MemorySSA *MSSA, + bool WidenIndVars) + : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI), AA(AA), WidenIndVars(WidenIndVars) { if (MSSA) MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); @@ -1089,6 +1091,16 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { if (!Preheader) return false; bool MadeAnyChanges = false; + + // Collect all store instructions that may modify memory in the loop. + SmallPtrSet<Instruction *, 8> Stores; + for (BasicBlock *BB : L->blocks()) { + for (Instruction &I : *BB) { + if (I.mayWriteToMemory()) + Stores.insert(&I); + } + } + for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) { // Skip BB Terminator. @@ -1100,14 +1112,32 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) { break; // Don't move instructions which might have side effects, since the side - // effects need to complete before instructions inside the loop. Also don't - // move instructions which might read memory, since the loop may modify - // memory. Note that it's okay if the instruction might have undefined - // behavior: LoopSimplify guarantees that the preheader dominates the exit - // block. - if (I.mayHaveSideEffects() || I.mayReadFromMemory()) + // effects need to complete before instructions inside the loop. Note that + // it's okay if the instruction might have undefined behavior: LoopSimplify + // guarantees that the preheader dominates the exit block. + if (I.mayHaveSideEffects()) continue; + // Don't sink read instruction which's memory might be modified in the loop. + if (I.mayReadFromMemory()) { + if (LoadInst *Load = dyn_cast<LoadInst>(&I)) { + MemoryLocation Loc = MemoryLocation::get(Load); + bool isModified = false; + + // Check if any store instruction in the loop modifies the loaded memory + // location. + for (Instruction *S : Stores) { + if (isModSet(AA->getModRefInfo(S, Loc))) { + isModified = true; + break; + } + } + if (isModified) + continue; + } else { + continue; + } + } // Skip debug or pseudo instructions. if (I.isDebugOrPseudoInst()) continue; @@ -2042,8 +2072,8 @@ PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM, Function *F = L.getHeader()->getParent(); const DataLayout &DL = F->getDataLayout(); - IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA, - WidenIndVars && AllowIVWidening); + IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, &AR.AA, + AR.MSSA, WidenIndVars && AllowIVWidening); if (!IVS.run(&L)) return PreservedAnalyses::all(); diff --git a/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll b/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll index 89583f9131518..8d714c775bde5 100644 --- a/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll +++ b/llvm/test/Transforms/IndVarSimplify/sink-from-preheader.ll @@ -30,3 +30,64 @@ loop: exit: ret i32 %add } + +define i32 @test_with_unused_load(i32 %a, ptr %b, i32 %N) { +; CHECK-LABEL: @test_with_unused_load( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[LOAD]] +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + %load = load i32, ptr %b + %add = add i32 %a, %load + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %iv.next = add i32 %iv, 1 + %cmp = icmp slt i32 %iv.next, %N + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %add +} + +; load's memory location is modified inside the loop, don't sink load. +define i32 @test_with_unused_load_modified_store(i32 %a, ptr %b, i32 %N) { +; CHECK-LABEL: @test_with_unused_load_modified_store( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[B:%.*]], align 4 +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: store i32 [[IV_NEXT]], ptr [[B]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] +; CHECK: exit: +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[A:%.*]], [[LOAD]] +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + %load = load i32, ptr %b + %add = add i32 %a, %load + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %iv.next = add i32 %iv, 1 + store i32 %iv.next, ptr %b + %cmp = icmp slt i32 %iv.next, %N + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %add +} diff --git a/llvm/test/Transforms/LoopSimplify/ashr-crash.ll b/llvm/test/Transforms/LoopSimplify/ashr-crash.ll index 85198eb19aad6..ae4f9adf8e16c 100644 --- a/llvm/test/Transforms/LoopSimplify/ashr-crash.ll +++ b/llvm/test/Transforms/LoopSimplify/ashr-crash.ll @@ -29,11 +29,11 @@ define void @foo() { ; CHECK-LABEL: define void @foo() { ; CHECK-NEXT: entry: ; CHECK-NEXT: store i32 0, ptr @d, align 4 -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @c, align 4 ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; CHECK: for.cond1.preheader: ; CHECK-NEXT: br label [[FOR_BODY3:%.*]] ; CHECK: for.body3: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @c, align 4 ; CHECK-NEXT: store i32 1, ptr @a, align 4 ; CHECK-NEXT: store i32 1, ptr @d, align 4 ; CHECK-NEXT: [[CMP4_LE_LE_INV:%.*]] = icmp sgt i32 [[TMP0]], 0 >From 61ad654c0b2c45e3235d065ab5c4162e79b23be2 Mon Sep 17 00:00:00 2001 From: vigneshwar jayakumar <vigneshwar.jayaku...@amd.com> Date: Mon, 8 Sep 2025 18:44:18 -0500 Subject: [PATCH 2/2] fix tests --- clang/test/OpenMP/bug56913.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/OpenMP/bug56913.c b/clang/test/OpenMP/bug56913.c index fad9e17ac4dd8..8409f2933922b 100644 --- a/clang/test/OpenMP/bug56913.c +++ b/clang/test/OpenMP/bug56913.c @@ -19,12 +19,12 @@ void loop(int n) { // CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N]], 0 // CHECK-NEXT: br i1 [[CMP]], label [[SIMD_IF_THEN:%.*]], label [[SIMD_IF_END:%.*]] // CHECK: simd.if.then: -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @j, align 4, !tbaa [[TBAA2:![0-9]+]] // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[J]]) #[[ATTR2:[0-9]+]] -// CHECK-NEXT: store ptr [[J]], ptr @u, align 8, !tbaa [[TBAA6:![0-9]+]], !llvm.access.group [[ACC_GRP8:![0-9]+]] +// CHECK-NEXT: store ptr [[J]], ptr @u, align 8, !tbaa [[TBAA2:![0-9]+]], !llvm.access.group [[ACC_GRP7:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr @j, align 4, !tbaa [[TBAA8:![0-9]+]] // CHECK-NEXT: [[INC_LE:%.*]] = add i32 [[TMP0]], [[N]] -// CHECK-NEXT: store i32 [[INC_LE]], ptr [[J]], align 4, !tbaa [[TBAA2]] -// CHECK-NEXT: store i32 [[INC_LE]], ptr @j, align 4, !tbaa [[TBAA2]] +// CHECK-NEXT: store i32 [[INC_LE]], ptr [[J]], align 4, !tbaa [[TBAA8]] +// CHECK-NEXT: store i32 [[INC_LE]], ptr @j, align 4, !tbaa [[TBAA8]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[J]]) #[[ATTR2]] // CHECK-NEXT: br label [[SIMD_IF_END]] // CHECK: simd.if.end: _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits