[llvm-branch-commits] [llvm] 1739628 - Fix SLPVectorize assumption that all users are in the same function
Author: Alexis Engelke Date: 2024-08-11T05:55:56Z New Revision: 1739628f12950e3ddbd80418750b93cdc11b48e8 URL: https://github.com/llvm/llvm-project/commit/1739628f12950e3ddbd80418750b93cdc11b48e8 DIFF: https://github.com/llvm/llvm-project/commit/1739628f12950e3ddbd80418750b93cdc11b48e8.diff LOG: Fix SLPVectorize assumption that all users are in the same function Added: llvm/test/Transforms/SLPVectorizer/const-in-different-functions.ll Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 186b382addd710..91e180f9eea13c 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5998,7 +5998,9 @@ BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const { // Collect stores per pointer object. for (User *U : V->users()) { auto *SI = dyn_cast(U); - if (SI == nullptr || !SI->isSimple() || + // Test whether we can handle the store. If V is a constant, its users + // might be in diff erent functions. + if (SI == nullptr || !SI->isSimple() || SI->getFunction() != F || !isValidElementType(SI->getValueOperand()->getType())) continue; // Skip entry if already diff --git a/llvm/test/Transforms/SLPVectorizer/const-in- diff erent-functions.ll b/llvm/test/Transforms/SLPVectorizer/const-in- diff erent-functions.ll new file mode 100644 index 00..29a8f15733c450 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/const-in- diff erent-functions.ll @@ -0,0 +1,46 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -mtriple=x86_64 -passes=slp-vectorizer < %s | FileCheck %s + +; Test that SLP vectorize doesn't crash if a stored constant is used in multiple +; functions. + +define void @_Z1hPfl() { +; CHECK-LABEL: define void @_Z1hPfl() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT:[[TMP0:%.*]] = getelementptr i8, ptr null, i64 28 +; CHECK-NEXT:store <2 x float> , ptr [[TMP0]], align 4 +; CHECK-NEXT:ret void +; +entry: + %0 = getelementptr i8, ptr null, i64 28 + store float 0.00e+00, ptr %0, align 4 + %1 = getelementptr i8, ptr null, i64 32 + store float 1.00e+00, ptr %1, align 16 + ret void +} + +define void @_Z1mv(i64 %arrayidx4.i.2.idx) { +; CHECK-LABEL: define void @_Z1mv( +; CHECK-SAME: i64 [[ARRAYIDX4_I_2_IDX:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT:ret void +; CHECK: [[FOR_COND1_PREHEADER_LR_PH_I:.*:]] +; CHECK-NEXT:br label %[[FOR_COND1_PREHEADER_I:.*]] +; CHECK: [[FOR_COND1_PREHEADER_I]]: +; CHECK-NEXT:store float 1.00e+00, ptr null, align 4 +; CHECK-NEXT:[[ARRAYIDX4_I_2:%.*]] = getelementptr i8, ptr null, i64 [[ARRAYIDX4_I_2_IDX]] +; CHECK-NEXT:store float 0.00e+00, ptr [[ARRAYIDX4_I_2]], align 4 +; CHECK-NEXT:br label %[[FOR_COND1_PREHEADER_I]] +; +entry: + ret void + +for.cond1.preheader.lr.ph.i: ; No predecessors! + br label %for.cond1.preheader.i + +for.cond1.preheader.i:; preds = %for.cond1.preheader.i, %for.cond1.preheader.lr.ph.i + store float 1.00e+00, ptr null, align 4 + %arrayidx4.i.2 = getelementptr i8, ptr null, i64 %arrayidx4.i.2.idx + store float 0.00e+00, ptr %arrayidx4.i.2, align 4 + br label %for.cond1.preheader.i +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 084d025 - Address comments
Author: Alexis Engelke Date: 2024-08-11T07:39:53Z New Revision: 084d02577eb68dd2b6260b9b1d12a61631e8d799 URL: https://github.com/llvm/llvm-project/commit/084d02577eb68dd2b6260b9b1d12a61631e8d799 DIFF: https://github.com/llvm/llvm-project/commit/084d02577eb68dd2b6260b9b1d12a61631e8d799.diff LOG: Address comments Added: llvm/test/Transforms/SLPVectorizer/X86/const-in-different-functions.ll Modified: Removed: llvm/test/Transforms/SLPVectorizer/const-in-different-functions.ll diff --git a/llvm/test/Transforms/SLPVectorizer/const-in- diff erent-functions.ll b/llvm/test/Transforms/SLPVectorizer/X86/const-in- diff erent-functions.ll similarity index 75% rename from llvm/test/Transforms/SLPVectorizer/const-in- diff erent-functions.ll rename to llvm/test/Transforms/SLPVectorizer/X86/const-in- diff erent-functions.ll index 29a8f15733c450..2e473f4f2c213c 100644 --- a/llvm/test/Transforms/SLPVectorizer/const-in- diff erent-functions.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/const-in- diff erent-functions.ll @@ -4,17 +4,19 @@ ; Test that SLP vectorize doesn't crash if a stored constant is used in multiple ; functions. +@p = external global [64 x float] + define void @_Z1hPfl() { ; CHECK-LABEL: define void @_Z1hPfl() { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT:[[TMP0:%.*]] = getelementptr i8, ptr null, i64 28 +; CHECK-NEXT:[[TMP0:%.*]] = getelementptr i8, ptr @p, i64 28 ; CHECK-NEXT:store <2 x float> , ptr [[TMP0]], align 4 ; CHECK-NEXT:ret void ; entry: - %0 = getelementptr i8, ptr null, i64 28 + %0 = getelementptr i8, ptr @p, i64 28 store float 0.00e+00, ptr %0, align 4 - %1 = getelementptr i8, ptr null, i64 32 + %1 = getelementptr i8, ptr @p, i64 32 store float 1.00e+00, ptr %1, align 16 ret void } @@ -27,8 +29,8 @@ define void @_Z1mv(i64 %arrayidx4.i.2.idx) { ; CHECK: [[FOR_COND1_PREHEADER_LR_PH_I:.*:]] ; CHECK-NEXT:br label %[[FOR_COND1_PREHEADER_I:.*]] ; CHECK: [[FOR_COND1_PREHEADER_I]]: -; CHECK-NEXT:store float 1.00e+00, ptr null, align 4 -; CHECK-NEXT:[[ARRAYIDX4_I_2:%.*]] = getelementptr i8, ptr null, i64 [[ARRAYIDX4_I_2_IDX]] +; CHECK-NEXT:store float 1.00e+00, ptr @p, align 4 +; CHECK-NEXT:[[ARRAYIDX4_I_2:%.*]] = getelementptr i8, ptr @p, i64 [[ARRAYIDX4_I_2_IDX]] ; CHECK-NEXT:store float 0.00e+00, ptr [[ARRAYIDX4_I_2]], align 4 ; CHECK-NEXT:br label %[[FOR_COND1_PREHEADER_I]] ; @@ -39,8 +41,8 @@ for.cond1.preheader.lr.ph.i: ; No predecessors! br label %for.cond1.preheader.i for.cond1.preheader.i:; preds = %for.cond1.preheader.i, %for.cond1.preheader.lr.ph.i - store float 1.00e+00, ptr null, align 4 - %arrayidx4.i.2 = getelementptr i8, ptr null, i64 %arrayidx4.i.2.idx + store float 1.00e+00, ptr @p, align 4 + %arrayidx4.i.2 = getelementptr i8, ptr @p, i64 %arrayidx4.i.2.idx store float 0.00e+00, ptr %arrayidx4.i.2, align 4 br label %for.cond1.preheader.i } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] d0b1a58 - Address comments
Author: Alexis Engelke Date: 2024-08-13T07:50:05Z New Revision: d0b1a582fd33e8c3605c027883c6deb35757f560 URL: https://github.com/llvm/llvm-project/commit/d0b1a582fd33e8c3605c027883c6deb35757f560 DIFF: https://github.com/llvm/llvm-project/commit/d0b1a582fd33e8c3605c027883c6deb35757f560.diff LOG: Address comments Added: Modified: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp Removed: diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 91e180f9eea13c..edacb2fb33540f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5991,6 +5991,9 @@ BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const { DenseMap> PtrToStoresMap; for (unsigned Lane : seq(0, TE->Scalars.size())) { Value *V = TE->Scalars[Lane]; +// Don't iterate over the users of constant data. +if (isa(V)) + continue; // To save compilation time we don't visit if we have too many users. if (V->hasNUsesOrMore(UsesLimit)) break; @@ -5998,8 +6001,8 @@ BoUpSLP::collectUserStores(const BoUpSLP::TreeEntry *TE) const { // Collect stores per pointer object. for (User *U : V->users()) { auto *SI = dyn_cast(U); - // Test whether we can handle the store. If V is a constant, its users - // might be in diff erent functions. + // Test whether we can handle the store. V might be a global, which could + // be used in a diff erent function. if (SI == nullptr || !SI->isSimple() || SI->getFunction() != F || !isValidElementType(SI->getValueOperand()->getType())) continue; ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [InstCombine] Don't look at ConstantData users (PR #103302)
https://github.com/aengelke created https://github.com/llvm/llvm-project/pull/103302 When looking at PHI operand for combining, only look at instructions and arguments. The loop later iteraters over Arg's users, which is not useful if Arg is a constant -- it's users are not meaningful and might be in different functions, which causes problems for the dominates() query. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [InstCombine] Don't look at ConstantData users (PR #103302)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/103302 >From 6a2ac00a8424a4402475e2b7972bfb01330c3bf8 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Tue, 13 Aug 2024 16:10:38 + Subject: [PATCH] Only run instcombine in test case Created using spr 1.3.5-bogner --- .../Transforms/InstCombine/phi-int-users.ll | 416 -- 1 file changed, 379 insertions(+), 37 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/phi-int-users.ll b/llvm/test/Transforms/InstCombine/phi-int-users.ll index ce81c5d7e3626..8a6bf44b884a2 100644 --- a/llvm/test/Transforms/InstCombine/phi-int-users.ll +++ b/llvm/test/Transforms/InstCombine/phi-int-users.ll @@ -1,14 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=arm64 -passes='inline,function(sroa,jump-threading,instcombine)' -S < %s | FileCheck %s +; RUN: opt -mtriple=arm64 -S < %s -passes=instcombine | FileCheck %s ; Verify that instcombine doesn't look at users of Constant in different ; functions for dominates() queries. -%struct.widget = type { %struct.baz, i8, [7 x i8] } -%struct.baz = type { %struct.snork } -%struct.snork = type { [8 x i8] } - -define void @spam(ptr %arg) { +define void @spam(ptr %arg) personality ptr null { ; CHECK-LABEL: define void @spam( ; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { ; CHECK-NEXT: [[BB:.*:]] @@ -49,11 +45,55 @@ define void @spam(ptr %arg) { ; CHECK-NEXT:ret void ; bb: - call void @barney(ptr %arg) + %load.i = load volatile i1, ptr null, align 1 + br i1 %load.i, label %bb2.i, label %bb3.i + +bb2.i:; preds = %bb + store i64 1, ptr %arg, align 8 + br label %barney.exit + +bb3.i:; preds = %bb + %load.i.i = load volatile i32, ptr null, align 4 + %icmp.i.i = icmp eq i32 %load.i.i, 0 + br i1 %icmp.i.i, label %bb2.i.i, label %bb3.i.i + +bb2.i.i: ; preds = %bb3.i + br label %bb1.i + +bb1.i:; preds = %spam.exit.i, %bb2.i.i + %load.i.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i.i, label %spam.exit.i, label %bb3.i.i.i + +bb3.i.i.i:; preds = %bb1.i + call void @zot.4() + br label %spam.exit.i + +spam.exit.i: ; preds = %bb3.i.i.i, %bb1.i + %alloca.sroa.0.1.i = phi i64 [ 0, %bb3.i.i.i ], [ 1, %bb1.i ] + %0 = inttoptr i64 %alloca.sroa.0.1.i to ptr + store i32 0, ptr %0, align 4 + br label %bb1.i + +eggs.exit:; No predecessors! + br label %barney.exit + +bb3.i.i: ; preds = %bb3.i + %load.i.i1 = load volatile i1, ptr null, align 1 + br i1 %load.i.i1, label %quux.exit, label %bb3.i.i2 + +bb3.i.i2: ; preds = %bb3.i.i + call void @snork() + unreachable + +quux.exit:; preds = %bb3.i.i + store ptr null, ptr null, align 8 + br label %barney.exit + +barney.exit: ; preds = %quux.exit, %eggs.exit, %bb2.i ret void } -define ptr @zot(ptr %arg) { +define ptr @zot(ptr %arg) personality ptr null { ; CHECK-LABEL: define ptr @zot( ; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { ; CHECK-NEXT: [[BB:.*:]] @@ -63,7 +103,9 @@ define ptr @zot(ptr %arg) { ; CHECK-NEXT:ret ptr null ; bb: - %call = call ptr @ham.8(ptr %arg) + %load.i.i.i.i = load ptr, ptr %arg, align 8 + store ptr null, ptr %arg, align 8 + store i32 0, ptr %load.i.i.i.i, align 4 ret ptr null } @@ -86,7 +128,7 @@ define ptr @wombat.1(ptr %arg) { ; CHECK-NEXT:ret ptr null ; bb: - %call = call ptr @foo.9(ptr %arg) + store i64 1, ptr %arg, align 8 ret ptr null } @@ -103,7 +145,15 @@ define void @quux() personality ptr null { ; CHECK-NEXT:ret void ; bb: - call void @wobble() + %load.i = load volatile i1, ptr null, align 1 + br i1 %load.i, label %wibble.exit, label %bb3.i + +bb3.i:; preds = %bb + call void @snork() + unreachable + +wibble.exit: ; preds = %bb + store ptr null, ptr null, align 8 ret void } @@ -120,7 +170,15 @@ define void @wobble() personality ptr null { ; CHECK-NEXT:ret void ; bb: - call void @quux.3() + %load.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i, label %wobble.2.exit, label %bb3.i.i + +bb3.i.i: ; preds = %bb + call void @snork() + unreachable + +wobble.2.exit:; preds = %bb + store ptr null, ptr null, align 8 ret void } @@ -141,12 +199,20 @@ define void @eggs() personality ptr null { ; CHECK-NEXT:br label %[[BB1]] ; bb: - %alloca = alloca %struct.widget, align 8 br label %bb1 -bb1:
[llvm-branch-commits] [llvm] [InstCombine] Don't look at ConstantData users (PR #103302)
@@ -0,0 +1,576 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -mtriple=arm64 -passes='inline,function(sroa,jump-threading,instcombine)' -S < %s | FileCheck %s aengelke wrote: True, changed https://github.com/llvm/llvm-project/pull/103302 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [InstCombine] Don't look at ConstantData users (PR #103302)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/103302 >From 6a2ac00a8424a4402475e2b7972bfb01330c3bf8 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Tue, 13 Aug 2024 16:10:38 + Subject: [PATCH 1/2] Only run instcombine in test case Created using spr 1.3.5-bogner --- .../Transforms/InstCombine/phi-int-users.ll | 416 -- 1 file changed, 379 insertions(+), 37 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/phi-int-users.ll b/llvm/test/Transforms/InstCombine/phi-int-users.ll index ce81c5d7e3626..8a6bf44b884a2 100644 --- a/llvm/test/Transforms/InstCombine/phi-int-users.ll +++ b/llvm/test/Transforms/InstCombine/phi-int-users.ll @@ -1,14 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=arm64 -passes='inline,function(sroa,jump-threading,instcombine)' -S < %s | FileCheck %s +; RUN: opt -mtriple=arm64 -S < %s -passes=instcombine | FileCheck %s ; Verify that instcombine doesn't look at users of Constant in different ; functions for dominates() queries. -%struct.widget = type { %struct.baz, i8, [7 x i8] } -%struct.baz = type { %struct.snork } -%struct.snork = type { [8 x i8] } - -define void @spam(ptr %arg) { +define void @spam(ptr %arg) personality ptr null { ; CHECK-LABEL: define void @spam( ; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { ; CHECK-NEXT: [[BB:.*:]] @@ -49,11 +45,55 @@ define void @spam(ptr %arg) { ; CHECK-NEXT:ret void ; bb: - call void @barney(ptr %arg) + %load.i = load volatile i1, ptr null, align 1 + br i1 %load.i, label %bb2.i, label %bb3.i + +bb2.i:; preds = %bb + store i64 1, ptr %arg, align 8 + br label %barney.exit + +bb3.i:; preds = %bb + %load.i.i = load volatile i32, ptr null, align 4 + %icmp.i.i = icmp eq i32 %load.i.i, 0 + br i1 %icmp.i.i, label %bb2.i.i, label %bb3.i.i + +bb2.i.i: ; preds = %bb3.i + br label %bb1.i + +bb1.i:; preds = %spam.exit.i, %bb2.i.i + %load.i.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i.i, label %spam.exit.i, label %bb3.i.i.i + +bb3.i.i.i:; preds = %bb1.i + call void @zot.4() + br label %spam.exit.i + +spam.exit.i: ; preds = %bb3.i.i.i, %bb1.i + %alloca.sroa.0.1.i = phi i64 [ 0, %bb3.i.i.i ], [ 1, %bb1.i ] + %0 = inttoptr i64 %alloca.sroa.0.1.i to ptr + store i32 0, ptr %0, align 4 + br label %bb1.i + +eggs.exit:; No predecessors! + br label %barney.exit + +bb3.i.i: ; preds = %bb3.i + %load.i.i1 = load volatile i1, ptr null, align 1 + br i1 %load.i.i1, label %quux.exit, label %bb3.i.i2 + +bb3.i.i2: ; preds = %bb3.i.i + call void @snork() + unreachable + +quux.exit:; preds = %bb3.i.i + store ptr null, ptr null, align 8 + br label %barney.exit + +barney.exit: ; preds = %quux.exit, %eggs.exit, %bb2.i ret void } -define ptr @zot(ptr %arg) { +define ptr @zot(ptr %arg) personality ptr null { ; CHECK-LABEL: define ptr @zot( ; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { ; CHECK-NEXT: [[BB:.*:]] @@ -63,7 +103,9 @@ define ptr @zot(ptr %arg) { ; CHECK-NEXT:ret ptr null ; bb: - %call = call ptr @ham.8(ptr %arg) + %load.i.i.i.i = load ptr, ptr %arg, align 8 + store ptr null, ptr %arg, align 8 + store i32 0, ptr %load.i.i.i.i, align 4 ret ptr null } @@ -86,7 +128,7 @@ define ptr @wombat.1(ptr %arg) { ; CHECK-NEXT:ret ptr null ; bb: - %call = call ptr @foo.9(ptr %arg) + store i64 1, ptr %arg, align 8 ret ptr null } @@ -103,7 +145,15 @@ define void @quux() personality ptr null { ; CHECK-NEXT:ret void ; bb: - call void @wobble() + %load.i = load volatile i1, ptr null, align 1 + br i1 %load.i, label %wibble.exit, label %bb3.i + +bb3.i:; preds = %bb + call void @snork() + unreachable + +wibble.exit: ; preds = %bb + store ptr null, ptr null, align 8 ret void } @@ -120,7 +170,15 @@ define void @wobble() personality ptr null { ; CHECK-NEXT:ret void ; bb: - call void @quux.3() + %load.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i, label %wobble.2.exit, label %bb3.i.i + +bb3.i.i: ; preds = %bb + call void @snork() + unreachable + +wobble.2.exit:; preds = %bb + store ptr null, ptr null, align 8 ret void } @@ -141,12 +199,20 @@ define void @eggs() personality ptr null { ; CHECK-NEXT:br label %[[BB1]] ; bb: - %alloca = alloca %struct.widget, align 8 br label %bb1 -bb
[llvm-branch-commits] [llvm] [InstCombine] Don't look at ConstantData users (PR #103302)
aengelke wrote: Done. Fun fact, llvm-reduce crashed on the input due to this bug. https://github.com/llvm/llvm-project/pull/103302 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [InstCombine] Don't look at ConstantData users (PR #103302)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/103302 >From 6a2ac00a8424a4402475e2b7972bfb01330c3bf8 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Tue, 13 Aug 2024 16:10:38 + Subject: [PATCH 1/2] Only run instcombine in test case Created using spr 1.3.5-bogner --- .../Transforms/InstCombine/phi-int-users.ll | 416 -- 1 file changed, 379 insertions(+), 37 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/phi-int-users.ll b/llvm/test/Transforms/InstCombine/phi-int-users.ll index ce81c5d7e3626..8a6bf44b884a2 100644 --- a/llvm/test/Transforms/InstCombine/phi-int-users.ll +++ b/llvm/test/Transforms/InstCombine/phi-int-users.ll @@ -1,14 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=arm64 -passes='inline,function(sroa,jump-threading,instcombine)' -S < %s | FileCheck %s +; RUN: opt -mtriple=arm64 -S < %s -passes=instcombine | FileCheck %s ; Verify that instcombine doesn't look at users of Constant in different ; functions for dominates() queries. -%struct.widget = type { %struct.baz, i8, [7 x i8] } -%struct.baz = type { %struct.snork } -%struct.snork = type { [8 x i8] } - -define void @spam(ptr %arg) { +define void @spam(ptr %arg) personality ptr null { ; CHECK-LABEL: define void @spam( ; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { ; CHECK-NEXT: [[BB:.*:]] @@ -49,11 +45,55 @@ define void @spam(ptr %arg) { ; CHECK-NEXT:ret void ; bb: - call void @barney(ptr %arg) + %load.i = load volatile i1, ptr null, align 1 + br i1 %load.i, label %bb2.i, label %bb3.i + +bb2.i:; preds = %bb + store i64 1, ptr %arg, align 8 + br label %barney.exit + +bb3.i:; preds = %bb + %load.i.i = load volatile i32, ptr null, align 4 + %icmp.i.i = icmp eq i32 %load.i.i, 0 + br i1 %icmp.i.i, label %bb2.i.i, label %bb3.i.i + +bb2.i.i: ; preds = %bb3.i + br label %bb1.i + +bb1.i:; preds = %spam.exit.i, %bb2.i.i + %load.i.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i.i, label %spam.exit.i, label %bb3.i.i.i + +bb3.i.i.i:; preds = %bb1.i + call void @zot.4() + br label %spam.exit.i + +spam.exit.i: ; preds = %bb3.i.i.i, %bb1.i + %alloca.sroa.0.1.i = phi i64 [ 0, %bb3.i.i.i ], [ 1, %bb1.i ] + %0 = inttoptr i64 %alloca.sroa.0.1.i to ptr + store i32 0, ptr %0, align 4 + br label %bb1.i + +eggs.exit:; No predecessors! + br label %barney.exit + +bb3.i.i: ; preds = %bb3.i + %load.i.i1 = load volatile i1, ptr null, align 1 + br i1 %load.i.i1, label %quux.exit, label %bb3.i.i2 + +bb3.i.i2: ; preds = %bb3.i.i + call void @snork() + unreachable + +quux.exit:; preds = %bb3.i.i + store ptr null, ptr null, align 8 + br label %barney.exit + +barney.exit: ; preds = %quux.exit, %eggs.exit, %bb2.i ret void } -define ptr @zot(ptr %arg) { +define ptr @zot(ptr %arg) personality ptr null { ; CHECK-LABEL: define ptr @zot( ; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { ; CHECK-NEXT: [[BB:.*:]] @@ -63,7 +103,9 @@ define ptr @zot(ptr %arg) { ; CHECK-NEXT:ret ptr null ; bb: - %call = call ptr @ham.8(ptr %arg) + %load.i.i.i.i = load ptr, ptr %arg, align 8 + store ptr null, ptr %arg, align 8 + store i32 0, ptr %load.i.i.i.i, align 4 ret ptr null } @@ -86,7 +128,7 @@ define ptr @wombat.1(ptr %arg) { ; CHECK-NEXT:ret ptr null ; bb: - %call = call ptr @foo.9(ptr %arg) + store i64 1, ptr %arg, align 8 ret ptr null } @@ -103,7 +145,15 @@ define void @quux() personality ptr null { ; CHECK-NEXT:ret void ; bb: - call void @wobble() + %load.i = load volatile i1, ptr null, align 1 + br i1 %load.i, label %wibble.exit, label %bb3.i + +bb3.i:; preds = %bb + call void @snork() + unreachable + +wibble.exit: ; preds = %bb + store ptr null, ptr null, align 8 ret void } @@ -120,7 +170,15 @@ define void @wobble() personality ptr null { ; CHECK-NEXT:ret void ; bb: - call void @quux.3() + %load.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i, label %wobble.2.exit, label %bb3.i.i + +bb3.i.i: ; preds = %bb + call void @snork() + unreachable + +wobble.2.exit:; preds = %bb + store ptr null, ptr null, align 8 ret void } @@ -141,12 +199,20 @@ define void @eggs() personality ptr null { ; CHECK-NEXT:br label %[[BB1]] ; bb: - %alloca = alloca %struct.widget, align 8 br label %bb1 -bb
[llvm-branch-commits] [llvm] [InstCombine] Don't look at ConstantData users (PR #103302)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/103302 >From 6a2ac00a8424a4402475e2b7972bfb01330c3bf8 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Tue, 13 Aug 2024 16:10:38 + Subject: [PATCH 1/2] Only run instcombine in test case Created using spr 1.3.5-bogner --- .../Transforms/InstCombine/phi-int-users.ll | 416 -- 1 file changed, 379 insertions(+), 37 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/phi-int-users.ll b/llvm/test/Transforms/InstCombine/phi-int-users.ll index ce81c5d7e3626..8a6bf44b884a2 100644 --- a/llvm/test/Transforms/InstCombine/phi-int-users.ll +++ b/llvm/test/Transforms/InstCombine/phi-int-users.ll @@ -1,14 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -mtriple=arm64 -passes='inline,function(sroa,jump-threading,instcombine)' -S < %s | FileCheck %s +; RUN: opt -mtriple=arm64 -S < %s -passes=instcombine | FileCheck %s ; Verify that instcombine doesn't look at users of Constant in different ; functions for dominates() queries. -%struct.widget = type { %struct.baz, i8, [7 x i8] } -%struct.baz = type { %struct.snork } -%struct.snork = type { [8 x i8] } - -define void @spam(ptr %arg) { +define void @spam(ptr %arg) personality ptr null { ; CHECK-LABEL: define void @spam( ; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { ; CHECK-NEXT: [[BB:.*:]] @@ -49,11 +45,55 @@ define void @spam(ptr %arg) { ; CHECK-NEXT:ret void ; bb: - call void @barney(ptr %arg) + %load.i = load volatile i1, ptr null, align 1 + br i1 %load.i, label %bb2.i, label %bb3.i + +bb2.i:; preds = %bb + store i64 1, ptr %arg, align 8 + br label %barney.exit + +bb3.i:; preds = %bb + %load.i.i = load volatile i32, ptr null, align 4 + %icmp.i.i = icmp eq i32 %load.i.i, 0 + br i1 %icmp.i.i, label %bb2.i.i, label %bb3.i.i + +bb2.i.i: ; preds = %bb3.i + br label %bb1.i + +bb1.i:; preds = %spam.exit.i, %bb2.i.i + %load.i.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i.i, label %spam.exit.i, label %bb3.i.i.i + +bb3.i.i.i:; preds = %bb1.i + call void @zot.4() + br label %spam.exit.i + +spam.exit.i: ; preds = %bb3.i.i.i, %bb1.i + %alloca.sroa.0.1.i = phi i64 [ 0, %bb3.i.i.i ], [ 1, %bb1.i ] + %0 = inttoptr i64 %alloca.sroa.0.1.i to ptr + store i32 0, ptr %0, align 4 + br label %bb1.i + +eggs.exit:; No predecessors! + br label %barney.exit + +bb3.i.i: ; preds = %bb3.i + %load.i.i1 = load volatile i1, ptr null, align 1 + br i1 %load.i.i1, label %quux.exit, label %bb3.i.i2 + +bb3.i.i2: ; preds = %bb3.i.i + call void @snork() + unreachable + +quux.exit:; preds = %bb3.i.i + store ptr null, ptr null, align 8 + br label %barney.exit + +barney.exit: ; preds = %quux.exit, %eggs.exit, %bb2.i ret void } -define ptr @zot(ptr %arg) { +define ptr @zot(ptr %arg) personality ptr null { ; CHECK-LABEL: define ptr @zot( ; CHECK-SAME: ptr [[ARG:%.*]]) personality ptr null { ; CHECK-NEXT: [[BB:.*:]] @@ -63,7 +103,9 @@ define ptr @zot(ptr %arg) { ; CHECK-NEXT:ret ptr null ; bb: - %call = call ptr @ham.8(ptr %arg) + %load.i.i.i.i = load ptr, ptr %arg, align 8 + store ptr null, ptr %arg, align 8 + store i32 0, ptr %load.i.i.i.i, align 4 ret ptr null } @@ -86,7 +128,7 @@ define ptr @wombat.1(ptr %arg) { ; CHECK-NEXT:ret ptr null ; bb: - %call = call ptr @foo.9(ptr %arg) + store i64 1, ptr %arg, align 8 ret ptr null } @@ -103,7 +145,15 @@ define void @quux() personality ptr null { ; CHECK-NEXT:ret void ; bb: - call void @wobble() + %load.i = load volatile i1, ptr null, align 1 + br i1 %load.i, label %wibble.exit, label %bb3.i + +bb3.i:; preds = %bb + call void @snork() + unreachable + +wibble.exit: ; preds = %bb + store ptr null, ptr null, align 8 ret void } @@ -120,7 +170,15 @@ define void @wobble() personality ptr null { ; CHECK-NEXT:ret void ; bb: - call void @quux.3() + %load.i.i = load volatile i1, ptr null, align 1 + br i1 %load.i.i, label %wobble.2.exit, label %bb3.i.i + +bb3.i.i: ; preds = %bb + call void @snork() + unreachable + +wobble.2.exit:; preds = %bb + store ptr null, ptr null, align 8 ret void } @@ -141,12 +199,20 @@ define void @eggs() personality ptr null { ; CHECK-NEXT:br label %[[BB1]] ; bb: - %alloca = alloca %struct.widget, align 8 br label %bb1 -bb
[llvm-branch-commits] [Support] Use block numbers for LoopInfo BBMap (PR #103400)
https://github.com/aengelke created https://github.com/llvm/llvm-project/pull/103400 Replace the DenseMap from blocks to their innermost loop a vector indexed by block numbers, when possible. This requires updating the loop info when blocks are renumbered. This update is currently implemented by iterating over all loops and their blocks, as there is no mapping from the previous block number to the block (as opposed to the dominator tree). This makes the update O(n^2) in the worst case: a block in a loop with nesting level x will be considered x times to determine the innermost loop. In practice, it should be acceptable, though (but probably not in the long run, O(n^2) algorithms are generally bad). NB: I'm generally not happy with the way loops are stored. As I think that there's room for improvement, I don't want to touch the representation at this point. I'm also considering to remove the number updating facility in favor of recomputing the analysis, it natural loop analysis isn't that expensive and it might give more freedom for data structure design to have a fixed numbering without needing to worry about numbering changes. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [Support] Use block numbers for LoopInfo BBMap (PR #103400)
aengelke wrote: [c-t-t](http://llvm-compile-time-tracker.com/compare.php?from=2db9cb5fec35a7516b0e1d123d161ace78e14be6&to=c34780d18ac411ca2363eeff1cecd08aeb1d154a&stat=instructions:u) -0.13% stage2-O3 https://github.com/llvm/llvm-project/pull/103400 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CodeGen] Limit number of analyzed predecessors (PR #142584)
aengelke wrote: NB: I don't claim to fully understand what this code does, but it seems to be safe to return a default value. [Example generator](https://github.com/tpde2/tpde/blob/f6e87d2e97f49f403c12a27e7cf513a44f0f5dbc/tpde-llvm/test/filetest/many-preds.test) to demonstrate the behavior, e.g. with 10k predecessors. https://github.com/llvm/llvm-project/pull/142584 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CodeGen] Limit number of analyzed predecessors (PR #142584)
https://github.com/aengelke created https://github.com/llvm/llvm-project/pull/142584 MachineBlockPlacement has quadratic runtime in the number of predecessors: in some situation, for an edge, all predecessors of the successor are considered. Limit the number of considered predecessors to bound compile time for large functions. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CodeGen][NFC] Fix quadratic c-t for large jump tables (PR #144108)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/144108 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/142584 >From 4cbc231699c11444cff73ff28b88dc0f3835c752 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Wed, 4 Jun 2025 09:21:02 + Subject: [PATCH 1/2] Move one check to beginning of function Created using spr 1.3.5-bogner --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index e96f3f8193b09..2dbabfe345d5e 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -1483,6 +1483,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( if (SuccChain.UnscheduledPredecessors == 0) return false; + // Compile-time optimization: runtime is quadratic in the number of + // predecessors. For such uncommon cases, exit early. + if (Succ->pred_size() > PredecessorLimit) +return false; + // There are two basic scenarios here: // - // Case 1: triangular shape CFG (if-then): @@ -1603,11 +1608,6 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb; bool BadCFGConflict = false; - // Compile-time optimization: runtime is quadratic in the number of - // predecessors. For such uncommon cases, exit early. - if (Succ->pred_size() > PredecessorLimit) -return false; - for (MachineBasicBlock *Pred : Succ->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (Pred == Succ || PredChain == &SuccChain || >From e90cfcb5740fc7297e05a876172ad8c25f596a33 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Fri, 13 Jun 2025 15:43:00 + Subject: [PATCH 2/2] Test new command line flag Created using spr 1.3.5-bogner --- llvm/test/CodeGen/RISCV/branch.ll | 49 +++ 1 file changed, 49 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/branch.ll b/llvm/test/CodeGen/RISCV/branch.ll index 578080cd3a240..ed86ca8ca4dd1 100644 --- a/llvm/test/CodeGen/RISCV/branch.ll +++ b/llvm/test/CodeGen/RISCV/branch.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -block-placement-predecessor-limit=10 < %s \ +; RUN: | FileCheck -check-prefix=RV32I-MBPLIMIT %s define void @foo(i32 %a, ptr %b, i1 %c) nounwind { ; RV32I-LABEL: foo: @@ -48,6 +50,53 @@ define void @foo(i32 %a, ptr %b, i1 %c) nounwind { ; RV32I-NEXT:lw zero, 0(a1) ; RV32I-NEXT: .LBB0_14: # %end ; RV32I-NEXT:ret +; +; RV32I-MBPLIMIT-LABEL: foo: +; RV32I-MBPLIMIT: # %bb.0: +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_2 +; RV32I-MBPLIMIT-NEXT: .LBB0_1: # %end +; RV32I-MBPLIMIT-NEXT:ret +; RV32I-MBPLIMIT-NEXT: .LBB0_2: # %test2 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.3: # %test3 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:blt a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.4: # %test4 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bge a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.5: # %test5 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bltu a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.6: # %test6 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgeu a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.7: # %test7 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:blt a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.8: # %test8 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bge a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.9: # %test9 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bltu a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.10: # %test10 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgeu a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.11: # %test11 +; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1) +; RV32I-MBPLIMIT-NEXT:andi a2, a2, 1 +; RV32I-MBPLIMIT-NEXT:bnez a2, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.12: # %test12 +; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgez a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.13: # %test13 +; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1) +; RV32I-MBPLIMIT-NEXT:blez a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.14: # %test14 +; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1) +; RV32I-MBPLIMIT-NEXT:ret %val1 = load volatile i32, ptr %b %tst1 = icmp eq i32 %val1, %a br i1 %tst1, label %end, label %test2 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/c
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/142584 >From 4cbc231699c11444cff73ff28b88dc0f3835c752 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Wed, 4 Jun 2025 09:21:02 + Subject: [PATCH 1/2] Move one check to beginning of function Created using spr 1.3.5-bogner --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index e96f3f8193b09..2dbabfe345d5e 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -1483,6 +1483,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( if (SuccChain.UnscheduledPredecessors == 0) return false; + // Compile-time optimization: runtime is quadratic in the number of + // predecessors. For such uncommon cases, exit early. + if (Succ->pred_size() > PredecessorLimit) +return false; + // There are two basic scenarios here: // - // Case 1: triangular shape CFG (if-then): @@ -1603,11 +1608,6 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb; bool BadCFGConflict = false; - // Compile-time optimization: runtime is quadratic in the number of - // predecessors. For such uncommon cases, exit early. - if (Succ->pred_size() > PredecessorLimit) -return false; - for (MachineBasicBlock *Pred : Succ->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (Pred == Succ || PredChain == &SuccChain || >From e90cfcb5740fc7297e05a876172ad8c25f596a33 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Fri, 13 Jun 2025 15:43:00 + Subject: [PATCH 2/2] Test new command line flag Created using spr 1.3.5-bogner --- llvm/test/CodeGen/RISCV/branch.ll | 49 +++ 1 file changed, 49 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/branch.ll b/llvm/test/CodeGen/RISCV/branch.ll index 578080cd3a240..ed86ca8ca4dd1 100644 --- a/llvm/test/CodeGen/RISCV/branch.ll +++ b/llvm/test/CodeGen/RISCV/branch.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -block-placement-predecessor-limit=10 < %s \ +; RUN: | FileCheck -check-prefix=RV32I-MBPLIMIT %s define void @foo(i32 %a, ptr %b, i1 %c) nounwind { ; RV32I-LABEL: foo: @@ -48,6 +50,53 @@ define void @foo(i32 %a, ptr %b, i1 %c) nounwind { ; RV32I-NEXT:lw zero, 0(a1) ; RV32I-NEXT: .LBB0_14: # %end ; RV32I-NEXT:ret +; +; RV32I-MBPLIMIT-LABEL: foo: +; RV32I-MBPLIMIT: # %bb.0: +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_2 +; RV32I-MBPLIMIT-NEXT: .LBB0_1: # %end +; RV32I-MBPLIMIT-NEXT:ret +; RV32I-MBPLIMIT-NEXT: .LBB0_2: # %test2 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.3: # %test3 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:blt a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.4: # %test4 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bge a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.5: # %test5 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bltu a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.6: # %test6 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgeu a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.7: # %test7 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:blt a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.8: # %test8 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bge a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.9: # %test9 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bltu a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.10: # %test10 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgeu a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.11: # %test11 +; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1) +; RV32I-MBPLIMIT-NEXT:andi a2, a2, 1 +; RV32I-MBPLIMIT-NEXT:bnez a2, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.12: # %test12 +; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgez a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.13: # %test13 +; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1) +; RV32I-MBPLIMIT-NEXT:blez a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.14: # %test14 +; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1) +; RV32I-MBPLIMIT-NEXT:ret %val1 = load volatile i32, ptr %b %tst1 = icmp eq i32 %val1, %a br i1 %tst1, label %end, label %test2 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/c
[llvm-branch-commits] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
https://github.com/aengelke created https://github.com/llvm/llvm-project/pull/145009 Similar to the existing implementations for X86 and PPC, support symbolizing branch targets for AArch64. Do not omit the address for ADRP as the target is typically not at an intended location. ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
@@ -0,0 +1,67 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x6000 +# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr --adjust-vma=0x2000 | \ +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x8000 + +## Expect to find the branch labels and global variable name. +# CHECK: <_start>: +# CHECK-NEXT: ldr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adrp x1, [[ABS_ADRP_VAL]] +# CHECK-NEXT: adr x2, +# CHECK-NEXT: cmp x1, x2 +# CHECK-NEXT: b.eq +# CHECK-NEXT: b +# CHECK-NEXT: : +# CHECK-NEXT: cbz x2, +# CHECK-NEXT: ret + +## Machine code generated with: aengelke wrote: obj2yaml produces loads of unnecessary content (program headers, dynamic sections (dynsym/dynstr/hash/dynamic), its output is twice as long as this test currently is. I can do that, but I don't think it's worth the effort. https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/145009 >From db5463b1af5c1c425866979dcf85ee5919c8a75d Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 23 Jun 2025 08:50:34 + Subject: [PATCH 1/3] address comments + add reloctable test Created using spr 1.3.5-bogner --- ...=> elf-executable-symbolize-operands.yaml} | 31 +++- .../elf-relocatable-symbolize-operands.s | 77 +++ 2 files changed, 105 insertions(+), 3 deletions(-) rename llvm/test/tools/llvm-objdump/AArch64/{elf-disassemble-symbololize-operands.yaml => elf-executable-symbolize-operands.yaml} (64%) create mode 100644 llvm/test/tools/llvm-objdump/AArch64/elf-relocatable-symbolize-operands.s diff --git a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml b/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml similarity index 64% rename from llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml rename to llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml index 3f3c6f33e620f..d318ea01b4c30 100644 --- a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml +++ b/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml @@ -1,14 +1,14 @@ # RUN: yaml2obj %s -o %t # RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ -# RUN: FileCheck %s --match-full-lines +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x6000 # RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr --adjust-vma=0x2000 | \ -# RUN: FileCheck %s --match-full-lines +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x8000 ## Expect to find the branch labels and global variable name. # CHECK: <_start>: # CHECK-NEXT: ldr x0, # CHECK-NEXT: : -# CHECK-NEXT: adrp x1, 0x{{[68]}}000 +# CHECK-NEXT: adrp x1, [[ABS_ADRP_VAL]] # CHECK-NEXT: adr x2, # CHECK-NEXT: cmp x1, x2 # CHECK-NEXT: b.eq @@ -17,6 +17,31 @@ # CHECK-NEXT: cbz x2, # CHECK-NEXT: ret +## Machine code generated with: +# llvm-mc --arch=aarch64 --filetype=obj -o tmp.o <: +# CHECK-NEXT: b +# CHECK-NEXT: tbz x0, #0x2c, +# CHECK-NEXT: : +# CHECK-NEXT: b.eq +# CHECK-NEXT: : +# CHECK-NEXT: cbz x1, +# CHECK-NEXT: : +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: R_AARCH64_CALL26 fn2 +# CHECK-NEXT: bl +# CHECK-NEXT: adr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adr x1, +# CHECK-NEXT: R_AARCH64_ADR_PREL_LO21 fn2 +# CHECK-NEXT: adr x2, +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: : +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: R_AARCH64_LD_PREL_LO19 fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-EMPTY: +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: add x3, x3, #0x0 +# CHECK-NEXT: R_AARCH64_ADD_ABS_LO12_NC fn2 +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: ldr x0, [x3] +# CHECK-NEXT: R_AARCH64_LDST64_ABS_LO12_NC fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: ret + +.p2align 4 +.global fn1 +fn1: +b 0f +tbz x0, 44, 2f +0: b.eq 1f +1: cbz x1, 0b +2: nop +bl fn2 +bl .Lfn2 +adr x0, 2b +adr x1, fn2 +adr x2, .Lfn2 +ldr w0, 2b +ldr w0, fn2 +ret + +.p2align 4 +.global fn2 +fn2: +.Lfn2: # local label for non-interposable call +bl .Lfn3 +# In future, we might identify the pairs and symbolize the operands properly +adrp x3, fn2 +add x3, x3, :lo12:fn2 +adrp x3, fn2 +ldr x0, [x3, :lo12:fn2] +ret + +.p2align 4 +.Lfn3: # private function +ret >From 1abf014077dd0e7f5592651a51484a544cad1e49 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 23 Jun 2025 09:24:47 + Subject: [PATCH 2/3] move tests to avoid failure if AArch64 is not configured Created using spr 1.3.5-bogner --- .../AArch64/symbolize-operands-executable.yaml} | 0 .../AArch64/symbolize-operands-reloctable.s} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename llvm/test/tools/llvm-objdump/{AArch64/elf-executable-symbolize-operands.yaml => ELF/AArch64/symbolize-operands-executable.yaml} (100%) rename llvm/test/tools/llvm-objdump/{AArch64/elf-relocatable-symbolize-operands.s => ELF/AArch64/symbolize-operands-reloctable.s} (100%) diff --git a/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml b/llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-executable.yaml similarity index 100% rename from llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml rename to llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-executable.yaml diff --git a/llv
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
@@ -0,0 +1,42 @@ +# RUN: yaml2obj %s -o %t +# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ +# RUN: FileCheck %s --match-full-lines +# RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr --adjust-vma=0x2000 | \ +# RUN: FileCheck %s --match-full-lines + +## Expect to find the branch labels and global variable name. +# CHECK: <_start>: +# CHECK-NEXT: ldr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adrp x1, 0x{{[68]}}000 +# CHECK-NEXT: adr x2, +# CHECK-NEXT: cmp x1, x2 +# CHECK-NEXT: b.eq +# CHECK-NEXT: b +# CHECK-NEXT: : +# CHECK-NEXT: cbz x2, +# CHECK-NEXT: ret + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data:ELFDATA2LSB + Type:ET_EXEC + Machine: EM_AARCH64 +Sections: + - Name:.text +Type:SHT_PROGBITS +Address: 0x4000 +Flags: [SHF_ALLOC, SHF_EXECINSTR] +Content: '6080005801d0228000103f0002eb4054fc1762b4c0035fd6' aengelke wrote: Done. I also added a test to show how this behaves on relocatable files. https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/145009 >From db5463b1af5c1c425866979dcf85ee5919c8a75d Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 23 Jun 2025 08:50:34 + Subject: [PATCH 1/2] address comments + add reloctable test Created using spr 1.3.5-bogner --- ...=> elf-executable-symbolize-operands.yaml} | 31 +++- .../elf-relocatable-symbolize-operands.s | 77 +++ 2 files changed, 105 insertions(+), 3 deletions(-) rename llvm/test/tools/llvm-objdump/AArch64/{elf-disassemble-symbololize-operands.yaml => elf-executable-symbolize-operands.yaml} (64%) create mode 100644 llvm/test/tools/llvm-objdump/AArch64/elf-relocatable-symbolize-operands.s diff --git a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml b/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml similarity index 64% rename from llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml rename to llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml index 3f3c6f33e620f..d318ea01b4c30 100644 --- a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml +++ b/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml @@ -1,14 +1,14 @@ # RUN: yaml2obj %s -o %t # RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ -# RUN: FileCheck %s --match-full-lines +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x6000 # RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr --adjust-vma=0x2000 | \ -# RUN: FileCheck %s --match-full-lines +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x8000 ## Expect to find the branch labels and global variable name. # CHECK: <_start>: # CHECK-NEXT: ldr x0, # CHECK-NEXT: : -# CHECK-NEXT: adrp x1, 0x{{[68]}}000 +# CHECK-NEXT: adrp x1, [[ABS_ADRP_VAL]] # CHECK-NEXT: adr x2, # CHECK-NEXT: cmp x1, x2 # CHECK-NEXT: b.eq @@ -17,6 +17,31 @@ # CHECK-NEXT: cbz x2, # CHECK-NEXT: ret +## Machine code generated with: +# llvm-mc --arch=aarch64 --filetype=obj -o tmp.o <: +# CHECK-NEXT: b +# CHECK-NEXT: tbz x0, #0x2c, +# CHECK-NEXT: : +# CHECK-NEXT: b.eq +# CHECK-NEXT: : +# CHECK-NEXT: cbz x1, +# CHECK-NEXT: : +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: R_AARCH64_CALL26 fn2 +# CHECK-NEXT: bl +# CHECK-NEXT: adr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adr x1, +# CHECK-NEXT: R_AARCH64_ADR_PREL_LO21 fn2 +# CHECK-NEXT: adr x2, +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: : +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: R_AARCH64_LD_PREL_LO19 fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-EMPTY: +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: add x3, x3, #0x0 +# CHECK-NEXT: R_AARCH64_ADD_ABS_LO12_NC fn2 +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: ldr x0, [x3] +# CHECK-NEXT: R_AARCH64_LDST64_ABS_LO12_NC fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: ret + +.p2align 4 +.global fn1 +fn1: +b 0f +tbz x0, 44, 2f +0: b.eq 1f +1: cbz x1, 0b +2: nop +bl fn2 +bl .Lfn2 +adr x0, 2b +adr x1, fn2 +adr x2, .Lfn2 +ldr w0, 2b +ldr w0, fn2 +ret + +.p2align 4 +.global fn2 +fn2: +.Lfn2: # local label for non-interposable call +bl .Lfn3 +# In future, we might identify the pairs and symbolize the operands properly +adrp x3, fn2 +add x3, x3, :lo12:fn2 +adrp x3, fn2 +ldr x0, [x3, :lo12:fn2] +ret + +.p2align 4 +.Lfn3: # private function +ret >From 1abf014077dd0e7f5592651a51484a544cad1e49 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 23 Jun 2025 09:24:47 + Subject: [PATCH 2/2] move tests to avoid failure if AArch64 is not configured Created using spr 1.3.5-bogner --- .../AArch64/symbolize-operands-executable.yaml} | 0 .../AArch64/symbolize-operands-reloctable.s} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename llvm/test/tools/llvm-objdump/{AArch64/elf-executable-symbolize-operands.yaml => ELF/AArch64/symbolize-operands-executable.yaml} (100%) rename llvm/test/tools/llvm-objdump/{AArch64/elf-relocatable-symbolize-operands.s => ELF/AArch64/symbolize-operands-reloctable.s} (100%) diff --git a/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml b/llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-executable.yaml similarity index 100% rename from llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml rename to llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-executable.yaml diff --git a/llv
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/145009 >From db5463b1af5c1c425866979dcf85ee5919c8a75d Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 23 Jun 2025 08:50:34 + Subject: [PATCH 1/4] address comments + add reloctable test Created using spr 1.3.5-bogner --- ...=> elf-executable-symbolize-operands.yaml} | 31 +++- .../elf-relocatable-symbolize-operands.s | 77 +++ 2 files changed, 105 insertions(+), 3 deletions(-) rename llvm/test/tools/llvm-objdump/AArch64/{elf-disassemble-symbololize-operands.yaml => elf-executable-symbolize-operands.yaml} (64%) create mode 100644 llvm/test/tools/llvm-objdump/AArch64/elf-relocatable-symbolize-operands.s diff --git a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml b/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml similarity index 64% rename from llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml rename to llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml index 3f3c6f33e620f..d318ea01b4c30 100644 --- a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml +++ b/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml @@ -1,14 +1,14 @@ # RUN: yaml2obj %s -o %t # RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ -# RUN: FileCheck %s --match-full-lines +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x6000 # RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr --adjust-vma=0x2000 | \ -# RUN: FileCheck %s --match-full-lines +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x8000 ## Expect to find the branch labels and global variable name. # CHECK: <_start>: # CHECK-NEXT: ldr x0, # CHECK-NEXT: : -# CHECK-NEXT: adrp x1, 0x{{[68]}}000 +# CHECK-NEXT: adrp x1, [[ABS_ADRP_VAL]] # CHECK-NEXT: adr x2, # CHECK-NEXT: cmp x1, x2 # CHECK-NEXT: b.eq @@ -17,6 +17,31 @@ # CHECK-NEXT: cbz x2, # CHECK-NEXT: ret +## Machine code generated with: +# llvm-mc --arch=aarch64 --filetype=obj -o tmp.o <: +# CHECK-NEXT: b +# CHECK-NEXT: tbz x0, #0x2c, +# CHECK-NEXT: : +# CHECK-NEXT: b.eq +# CHECK-NEXT: : +# CHECK-NEXT: cbz x1, +# CHECK-NEXT: : +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: R_AARCH64_CALL26 fn2 +# CHECK-NEXT: bl +# CHECK-NEXT: adr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adr x1, +# CHECK-NEXT: R_AARCH64_ADR_PREL_LO21 fn2 +# CHECK-NEXT: adr x2, +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: : +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: R_AARCH64_LD_PREL_LO19 fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-EMPTY: +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: add x3, x3, #0x0 +# CHECK-NEXT: R_AARCH64_ADD_ABS_LO12_NC fn2 +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: ldr x0, [x3] +# CHECK-NEXT: R_AARCH64_LDST64_ABS_LO12_NC fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: ret + +.p2align 4 +.global fn1 +fn1: +b 0f +tbz x0, 44, 2f +0: b.eq 1f +1: cbz x1, 0b +2: nop +bl fn2 +bl .Lfn2 +adr x0, 2b +adr x1, fn2 +adr x2, .Lfn2 +ldr w0, 2b +ldr w0, fn2 +ret + +.p2align 4 +.global fn2 +fn2: +.Lfn2: # local label for non-interposable call +bl .Lfn3 +# In future, we might identify the pairs and symbolize the operands properly +adrp x3, fn2 +add x3, x3, :lo12:fn2 +adrp x3, fn2 +ldr x0, [x3, :lo12:fn2] +ret + +.p2align 4 +.Lfn3: # private function +ret >From 1abf014077dd0e7f5592651a51484a544cad1e49 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 23 Jun 2025 09:24:47 + Subject: [PATCH 2/4] move tests to avoid failure if AArch64 is not configured Created using spr 1.3.5-bogner --- .../AArch64/symbolize-operands-executable.yaml} | 0 .../AArch64/symbolize-operands-reloctable.s} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename llvm/test/tools/llvm-objdump/{AArch64/elf-executable-symbolize-operands.yaml => ELF/AArch64/symbolize-operands-executable.yaml} (100%) rename llvm/test/tools/llvm-objdump/{AArch64/elf-relocatable-symbolize-operands.s => ELF/AArch64/symbolize-operands-reloctable.s} (100%) diff --git a/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml b/llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-executable.yaml similarity index 100% rename from llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml rename to llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-executable.yaml diff --git a/llv
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/145009 >From db5463b1af5c1c425866979dcf85ee5919c8a75d Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 23 Jun 2025 08:50:34 + Subject: [PATCH] address comments + add reloctable test Created using spr 1.3.5-bogner --- ...=> elf-executable-symbolize-operands.yaml} | 31 +++- .../elf-relocatable-symbolize-operands.s | 77 +++ 2 files changed, 105 insertions(+), 3 deletions(-) rename llvm/test/tools/llvm-objdump/AArch64/{elf-disassemble-symbololize-operands.yaml => elf-executable-symbolize-operands.yaml} (64%) create mode 100644 llvm/test/tools/llvm-objdump/AArch64/elf-relocatable-symbolize-operands.s diff --git a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml b/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml similarity index 64% rename from llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml rename to llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml index 3f3c6f33e620f..d318ea01b4c30 100644 --- a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml +++ b/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml @@ -1,14 +1,14 @@ # RUN: yaml2obj %s -o %t # RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ -# RUN: FileCheck %s --match-full-lines +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x6000 # RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr --adjust-vma=0x2000 | \ -# RUN: FileCheck %s --match-full-lines +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x8000 ## Expect to find the branch labels and global variable name. # CHECK: <_start>: # CHECK-NEXT: ldr x0, # CHECK-NEXT: : -# CHECK-NEXT: adrp x1, 0x{{[68]}}000 +# CHECK-NEXT: adrp x1, [[ABS_ADRP_VAL]] # CHECK-NEXT: adr x2, # CHECK-NEXT: cmp x1, x2 # CHECK-NEXT: b.eq @@ -17,6 +17,31 @@ # CHECK-NEXT: cbz x2, # CHECK-NEXT: ret +## Machine code generated with: +# llvm-mc --arch=aarch64 --filetype=obj -o tmp.o <: +# CHECK-NEXT: b +# CHECK-NEXT: tbz x0, #0x2c, +# CHECK-NEXT: : +# CHECK-NEXT: b.eq +# CHECK-NEXT: : +# CHECK-NEXT: cbz x1, +# CHECK-NEXT: : +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: R_AARCH64_CALL26 fn2 +# CHECK-NEXT: bl +# CHECK-NEXT: adr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adr x1, +# CHECK-NEXT: R_AARCH64_ADR_PREL_LO21 fn2 +# CHECK-NEXT: adr x2, +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: : +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: R_AARCH64_LD_PREL_LO19 fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-EMPTY: +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: add x3, x3, #0x0 +# CHECK-NEXT: R_AARCH64_ADD_ABS_LO12_NC fn2 +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: ldr x0, [x3] +# CHECK-NEXT: R_AARCH64_LDST64_ABS_LO12_NC fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: ret + +.p2align 4 +.global fn1 +fn1: +b 0f +tbz x0, 44, 2f +0: b.eq 1f +1: cbz x1, 0b +2: nop +bl fn2 +bl .Lfn2 +adr x0, 2b +adr x1, fn2 +adr x2, .Lfn2 +ldr w0, 2b +ldr w0, fn2 +ret + +.p2align 4 +.global fn2 +fn2: +.Lfn2: # local label for non-interposable call +bl .Lfn3 +# In future, we might identify the pairs and symbolize the operands properly +adrp x3, fn2 +add x3, x3, :lo12:fn2 +adrp x3, fn2 +ldr x0, [x3, :lo12:fn2] +ret + +.p2align 4 +.Lfn3: # private function +ret ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
@@ -0,0 +1,42 @@ +# RUN: yaml2obj %s -o %t aengelke wrote: Fixed; just copied the file from X86 without reading too closely... https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/145009 >From db5463b1af5c1c425866979dcf85ee5919c8a75d Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 23 Jun 2025 08:50:34 + Subject: [PATCH 1/5] address comments + add reloctable test Created using spr 1.3.5-bogner --- ...=> elf-executable-symbolize-operands.yaml} | 31 +++- .../elf-relocatable-symbolize-operands.s | 77 +++ 2 files changed, 105 insertions(+), 3 deletions(-) rename llvm/test/tools/llvm-objdump/AArch64/{elf-disassemble-symbololize-operands.yaml => elf-executable-symbolize-operands.yaml} (64%) create mode 100644 llvm/test/tools/llvm-objdump/AArch64/elf-relocatable-symbolize-operands.s diff --git a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml b/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml similarity index 64% rename from llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml rename to llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml index 3f3c6f33e620f..d318ea01b4c30 100644 --- a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml +++ b/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml @@ -1,14 +1,14 @@ # RUN: yaml2obj %s -o %t # RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ -# RUN: FileCheck %s --match-full-lines +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x6000 # RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr --adjust-vma=0x2000 | \ -# RUN: FileCheck %s --match-full-lines +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x8000 ## Expect to find the branch labels and global variable name. # CHECK: <_start>: # CHECK-NEXT: ldr x0, # CHECK-NEXT: : -# CHECK-NEXT: adrp x1, 0x{{[68]}}000 +# CHECK-NEXT: adrp x1, [[ABS_ADRP_VAL]] # CHECK-NEXT: adr x2, # CHECK-NEXT: cmp x1, x2 # CHECK-NEXT: b.eq @@ -17,6 +17,31 @@ # CHECK-NEXT: cbz x2, # CHECK-NEXT: ret +## Machine code generated with: +# llvm-mc --arch=aarch64 --filetype=obj -o tmp.o <: +# CHECK-NEXT: b +# CHECK-NEXT: tbz x0, #0x2c, +# CHECK-NEXT: : +# CHECK-NEXT: b.eq +# CHECK-NEXT: : +# CHECK-NEXT: cbz x1, +# CHECK-NEXT: : +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: R_AARCH64_CALL26 fn2 +# CHECK-NEXT: bl +# CHECK-NEXT: adr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adr x1, +# CHECK-NEXT: R_AARCH64_ADR_PREL_LO21 fn2 +# CHECK-NEXT: adr x2, +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: : +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: R_AARCH64_LD_PREL_LO19 fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-EMPTY: +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: add x3, x3, #0x0 +# CHECK-NEXT: R_AARCH64_ADD_ABS_LO12_NC fn2 +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: ldr x0, [x3] +# CHECK-NEXT: R_AARCH64_LDST64_ABS_LO12_NC fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: ret + +.p2align 4 +.global fn1 +fn1: +b 0f +tbz x0, 44, 2f +0: b.eq 1f +1: cbz x1, 0b +2: nop +bl fn2 +bl .Lfn2 +adr x0, 2b +adr x1, fn2 +adr x2, .Lfn2 +ldr w0, 2b +ldr w0, fn2 +ret + +.p2align 4 +.global fn2 +fn2: +.Lfn2: # local label for non-interposable call +bl .Lfn3 +# In future, we might identify the pairs and symbolize the operands properly +adrp x3, fn2 +add x3, x3, :lo12:fn2 +adrp x3, fn2 +ldr x0, [x3, :lo12:fn2] +ret + +.p2align 4 +.Lfn3: # private function +ret >From 1abf014077dd0e7f5592651a51484a544cad1e49 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 23 Jun 2025 09:24:47 + Subject: [PATCH 2/5] move tests to avoid failure if AArch64 is not configured Created using spr 1.3.5-bogner --- .../AArch64/symbolize-operands-executable.yaml} | 0 .../AArch64/symbolize-operands-reloctable.s} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename llvm/test/tools/llvm-objdump/{AArch64/elf-executable-symbolize-operands.yaml => ELF/AArch64/symbolize-operands-executable.yaml} (100%) rename llvm/test/tools/llvm-objdump/{AArch64/elf-relocatable-symbolize-operands.s => ELF/AArch64/symbolize-operands-reloctable.s} (100%) diff --git a/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml b/llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-executable.yaml similarity index 100% rename from llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml rename to llvm/test/tools/llvm-objdump/ELF/AArch64/symbolize-operands-executable.yaml diff --git a/llv
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
aengelke wrote: Reused an existing test case, this also shows the difference in the resulting block order. If preferred, I can also write a separate test case. https://github.com/llvm/llvm-project/pull/142584 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/142584 >From 4cbc231699c11444cff73ff28b88dc0f3835c752 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Wed, 4 Jun 2025 09:21:02 + Subject: [PATCH 1/2] Move one check to beginning of function Created using spr 1.3.5-bogner --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index e96f3f8193b09..2dbabfe345d5e 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -1483,6 +1483,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( if (SuccChain.UnscheduledPredecessors == 0) return false; + // Compile-time optimization: runtime is quadratic in the number of + // predecessors. For such uncommon cases, exit early. + if (Succ->pred_size() > PredecessorLimit) +return false; + // There are two basic scenarios here: // - // Case 1: triangular shape CFG (if-then): @@ -1603,11 +1608,6 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb; bool BadCFGConflict = false; - // Compile-time optimization: runtime is quadratic in the number of - // predecessors. For such uncommon cases, exit early. - if (Succ->pred_size() > PredecessorLimit) -return false; - for (MachineBasicBlock *Pred : Succ->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (Pred == Succ || PredChain == &SuccChain || >From e90cfcb5740fc7297e05a876172ad8c25f596a33 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Fri, 13 Jun 2025 15:43:00 + Subject: [PATCH 2/2] Test new command line flag Created using spr 1.3.5-bogner --- llvm/test/CodeGen/RISCV/branch.ll | 49 +++ 1 file changed, 49 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/branch.ll b/llvm/test/CodeGen/RISCV/branch.ll index 578080cd3a240..ed86ca8ca4dd1 100644 --- a/llvm/test/CodeGen/RISCV/branch.ll +++ b/llvm/test/CodeGen/RISCV/branch.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV32I %s +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -block-placement-predecessor-limit=10 < %s \ +; RUN: | FileCheck -check-prefix=RV32I-MBPLIMIT %s define void @foo(i32 %a, ptr %b, i1 %c) nounwind { ; RV32I-LABEL: foo: @@ -48,6 +50,53 @@ define void @foo(i32 %a, ptr %b, i1 %c) nounwind { ; RV32I-NEXT:lw zero, 0(a1) ; RV32I-NEXT: .LBB0_14: # %end ; RV32I-NEXT:ret +; +; RV32I-MBPLIMIT-LABEL: foo: +; RV32I-MBPLIMIT: # %bb.0: +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_2 +; RV32I-MBPLIMIT-NEXT: .LBB0_1: # %end +; RV32I-MBPLIMIT-NEXT:ret +; RV32I-MBPLIMIT-NEXT: .LBB0_2: # %test2 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bne a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.3: # %test3 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:blt a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.4: # %test4 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bge a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.5: # %test5 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bltu a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.6: # %test6 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgeu a3, a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.7: # %test7 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:blt a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.8: # %test8 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bge a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.9: # %test9 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bltu a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.10: # %test10 +; RV32I-MBPLIMIT-NEXT:lw a3, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgeu a0, a3, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.11: # %test11 +; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1) +; RV32I-MBPLIMIT-NEXT:andi a2, a2, 1 +; RV32I-MBPLIMIT-NEXT:bnez a2, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.12: # %test12 +; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1) +; RV32I-MBPLIMIT-NEXT:bgez a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.13: # %test13 +; RV32I-MBPLIMIT-NEXT:lw a0, 0(a1) +; RV32I-MBPLIMIT-NEXT:blez a0, .LBB0_1 +; RV32I-MBPLIMIT-NEXT: # %bb.14: # %test14 +; RV32I-MBPLIMIT-NEXT:lw zero, 0(a1) +; RV32I-MBPLIMIT-NEXT:ret %val1 = load volatile i32, ptr %b %tst1 = icmp eq i32 %val1, %a br i1 %tst1, label %end, label %test2 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/c
[llvm-branch-commits] [CodeGen][NFC] Fix quadratic c-t for large jump tables (PR #144108)
https://github.com/aengelke created https://github.com/llvm/llvm-project/pull/144108 Deleting a basic block removes all references from jump tables, which is O(n). When freeing a MachineFunction, all basic blocks are deleted before the jump tables, causing O(n^2) runtime. Fix this by deallocating the jump table first. Test case generator: import sys n = int(sys.argv[1]) print("define void @f(i64 %c, ptr %p) {") print(" switch i64 %c, label %d [") for i in range(n): print(f"i64 {i}, label %h{i}") print(f" ]") for i in range(n): print(f'h{i}:') print(f' store i64 {i*i}, ptr %p') print(f' ret void') print('d:') print(' ret void') print('}') ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
aengelke wrote: Done. The command line help doesn't give any indication on supported architectures. https://github.com/llvm/llvm-project/pull/145009 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [mlir] [llvm-objdump] Support --symbolize-operand on AArch64 (PR #145009)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/145009 >From 87858653bb4c9e3911479f139ca0f1b093e94280 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Fri, 20 Jun 2025 10:18:23 + Subject: [PATCH 1/6] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?= =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created using spr 1.3.5-bogner [skip ci] --- mlir/include/mlir/Dialect/Arith/IR/ArithOps.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td index 993f36f556e87..0518cac156eba 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td @@ -1271,7 +1271,7 @@ def Arith_ScalingExtFOp // TruncIOp //===--===// -def Arith_TruncIOp : Op, DeclareOpInterfaceMethods, >From db5463b1af5c1c425866979dcf85ee5919c8a75d Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 23 Jun 2025 08:50:34 + Subject: [PATCH 2/6] address comments + add reloctable test Created using spr 1.3.5-bogner --- ...=> elf-executable-symbolize-operands.yaml} | 31 +++- .../elf-relocatable-symbolize-operands.s | 77 +++ 2 files changed, 105 insertions(+), 3 deletions(-) rename llvm/test/tools/llvm-objdump/AArch64/{elf-disassemble-symbololize-operands.yaml => elf-executable-symbolize-operands.yaml} (64%) create mode 100644 llvm/test/tools/llvm-objdump/AArch64/elf-relocatable-symbolize-operands.s diff --git a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml b/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml similarity index 64% rename from llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml rename to llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml index 3f3c6f33e620f..d318ea01b4c30 100644 --- a/llvm/test/tools/llvm-objdump/AArch64/elf-disassemble-symbololize-operands.yaml +++ b/llvm/test/tools/llvm-objdump/AArch64/elf-executable-symbolize-operands.yaml @@ -1,14 +1,14 @@ # RUN: yaml2obj %s -o %t # RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr | \ -# RUN: FileCheck %s --match-full-lines +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x6000 # RUN: llvm-objdump %t -d --symbolize-operands --no-show-raw-insn --no-leading-addr --adjust-vma=0x2000 | \ -# RUN: FileCheck %s --match-full-lines +# RUN: FileCheck %s --match-full-lines -DABS_ADRP_VAL=0x8000 ## Expect to find the branch labels and global variable name. # CHECK: <_start>: # CHECK-NEXT: ldr x0, # CHECK-NEXT: : -# CHECK-NEXT: adrp x1, 0x{{[68]}}000 +# CHECK-NEXT: adrp x1, [[ABS_ADRP_VAL]] # CHECK-NEXT: adr x2, # CHECK-NEXT: cmp x1, x2 # CHECK-NEXT: b.eq @@ -17,6 +17,31 @@ # CHECK-NEXT: cbz x2, # CHECK-NEXT: ret +## Machine code generated with: +# llvm-mc --arch=aarch64 --filetype=obj -o tmp.o <: +# CHECK-NEXT: b +# CHECK-NEXT: tbz x0, #0x2c, +# CHECK-NEXT: : +# CHECK-NEXT: b.eq +# CHECK-NEXT: : +# CHECK-NEXT: cbz x1, +# CHECK-NEXT: : +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: R_AARCH64_CALL26 fn2 +# CHECK-NEXT: bl +# CHECK-NEXT: adr x0, +# CHECK-NEXT: : +# CHECK-NEXT: adr x1, +# CHECK-NEXT: R_AARCH64_ADR_PREL_LO21 fn2 +# CHECK-NEXT: adr x2, +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: : +# CHECK-NEXT: ldr w0, +# CHECK-NEXT: R_AARCH64_LD_PREL_LO19 fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-EMPTY: +# CHECK-NEXT: : +# CHECK-NEXT: bl +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: add x3, x3, #0x0 +# CHECK-NEXT: R_AARCH64_ADD_ABS_LO12_NC fn2 +# CHECK-NEXT: adrp x3, 0x0 +# CHECK-NEXT: R_AARCH64_ADR_PREL_PG_HI21 fn2 +# CHECK-NEXT: ldr x0, [x3] +# CHECK-NEXT: R_AARCH64_LDST64_ABS_LO12_NC fn2 +# CHECK-NEXT: ret +# CHECK-NEXT: nop +# CHECK-NEXT: nop +# CHECK-NEXT: : +# CHECK-NEXT: ret + +.p2align 4 +.global fn1 +fn1: +b 0f +tbz x0, 44, 2f +0: b.eq 1f +1: cbz x1, 0b +2: nop +bl fn2 +bl .Lfn2 +adr x0, 2b +adr x1, fn2 +adr x2, .Lfn2 +ldr w0, 2b +ldr w0, fn2 +ret + +.p2align 4 +.global fn2 +fn2: +.Lfn2: # local label for non-interposable call +bl .Lfn3 +# In future, we might identify the pairs and symbolize the operands properly +adrp x3, fn2 +add x3, x3, :lo12:fn2 +adrp x3, fn2 +ldr x0, [x3, :lo12:fn2] +ret + +.p2align 4 +.Lfn3: # private function +ret >From 1abf014077dd0e7f5592651a51484a544cad1e49 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 23 Jun 2025 09:24:47 +00
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
https://github.com/aengelke updated https://github.com/llvm/llvm-project/pull/142584 >From 4cbc231699c11444cff73ff28b88dc0f3835c752 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Wed, 4 Jun 2025 09:21:02 + Subject: [PATCH] Move one check to beginning of function Created using spr 1.3.5-bogner --- llvm/lib/CodeGen/MachineBlockPlacement.cpp | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp index e96f3f8193b09..2dbabfe345d5e 100644 --- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -1483,6 +1483,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( if (SuccChain.UnscheduledPredecessors == 0) return false; + // Compile-time optimization: runtime is quadratic in the number of + // predecessors. For such uncommon cases, exit early. + if (Succ->pred_size() > PredecessorLimit) +return false; + // There are two basic scenarios here: // - // Case 1: triangular shape CFG (if-then): @@ -1603,11 +1608,6 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb; bool BadCFGConflict = false; - // Compile-time optimization: runtime is quadratic in the number of - // predecessors. For such uncommon cases, exit early. - if (Succ->pred_size() > PredecessorLimit) -return false; - for (MachineBasicBlock *Pred : Succ->predecessors()) { BlockChain *PredChain = BlockToChain[Pred]; if (Pred == Succ || PredChain == &SuccChain || ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [CodeGen] Limit number of analyzed predecessors (PR #142584)
@@ -1030,6 +1036,11 @@ bool MachineBlockPlacement::isTrellis( SmallPtrSet SeenPreds; for (MachineBasicBlock *Succ : ViableSuccs) { +// Compile-time optimization: runtime is quadratic in the number of +// predecessors. For such uncommon cases, exit early. +if (Succ->pred_size() > PredecessorLimit) aengelke wrote: Consider the code from the test generator below. From my understanding, `buildChain` will iterate over all basic blocks of the chain and call `selectBestSuccessor` for each of them, which will in turn call `isTrellis` for every block. `isTrellis` will look at the predecessors of all successors, in particular, it will look at all predecessors of the `merge` block, which are all the other blocks => for almost every block, the code looks at almost all other blocks. Test generator, try n=4: ```python import sys n = int(sys.argv[1]) print("declare void @exit(i32)") print("declare i1 @cond(i32)") print("define i32 @f(i32 %v, i32 %v0) {") for i in range(n): print(f'b{i}:') print(f' %v{i+1} = add i32 %v{i}, %v') print(f' %c{i} = call i1 @cond(i32 %v{i+1})') print(f' br i1 %c{i}, label %merge, label %b{i+1}') print(f'b{n}:') print(f' ret i32 %v{n}') print('merge:') print(' call void @exit(i32 1)') print(' unreachable') print('}') ``` ```console # Without this change $ python3 many-preds3.test 4 | /usr/bin/time ./llvm-build/bin/llc -filetype=obj -o /dev/null -O1 15.93user 0.17system 0:16.18elapsed 99%CPU (0avgtext+0avgdata 457748maxresident)k 0inputs+0outputs (0major+99524minor)pagefaults 0swaps # With this change $ python3 many-preds3.test 4 | /usr/bin/time ./llvm-build/bin/llc -filetype=obj -o /dev/null -O1 8.82user 0.19system 0:09.10elapsed 99%CPU (0avgtext+0avgdata 457240maxresident)k 0inputs+0outputs (0major+99425minor)pagefaults 0swaps ``` https://github.com/llvm/llvm-project/pull/142584 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
@@ -1592,6 +1603,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor( BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb; bool BadCFGConflict = false; + // Compile-time optimization: runtime is quadratic in the number of aengelke wrote: Done https://github.com/llvm/llvm-project/pull/142584 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] [CodeGen] Limit number of analyzed predecessors (PR #142584)
aengelke wrote: > Adding this threshold check within isTrellis() feels somewhat unnatural. If > compile time is a concern, could we simply check the size of functions (in > terms of the number of blocks, as opposed to predecessor only) early in this > pass and either skip it or switch to a faster, simpler algorithm? Is such an algorithm already implemented and readily available or would it require implementing a new algorithm? (I would prefer to keep changes locally and not abruptly degrade/change the output of the entire function, so I placed the checks as closely to the relevant points.) > Also 1000 size seems small, may be 1? Some measurement data, left without this patch, right with. 1000 is the region were it becomes somewhat noticeable and at 3000 blocks, can already end up with ~2x change of compile time... a threshold of 10k would be way too high in my opinion. ``` 500: 0.06 0.06 1000: 0.11 0.09 2000: 0.26 0.18 3000: 0.49 0.27 4000: 0.70 0.37 5000: 1.14 0.47 1: 3.86 1.09 ``` https://github.com/llvm/llvm-project/pull/142584 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] MC: Refactor FT_Align fragments when linker relaxation is enabled (PR #149465)
@@ -230,22 +230,24 @@ uint64_t MCAssembler::computeFragmentSize(const MCFragment &F) const { case MCFragment::FT_Align: { unsigned Offset = F.Offset + F.getFixedSize(); unsigned Size = offsetToAlignment(Offset, F.getAlignment()); - -// Insert extra Nops for code alignment if the target define -// shouldInsertExtraNopBytesForCodeAlign target hook. -if (F.getParent()->useCodeAlign() && F.hasAlignEmitNops() && -getBackend().shouldInsertExtraNopBytesForCodeAlign(F, Size)) - return F.getFixedSize() + Size; - -// If we are padding with nops, force the padding to be larger than the -// minimum nop size. -if (Size > 0 && F.hasAlignEmitNops()) { - while (Size % getBackend().getMinimumNopSize()) -Size += F.getAlignment().value(); +auto &Frag = const_cast(F); +// In the nops mode, RISC-V style linker relaxation might adjust the size +// and add a fixup, even if `Size` is originally 0. +bool AlignFixup = false; +if (F.hasAlignEmitNops()) { + AlignFixup = getBackend().relaxAlign(Frag, Size); + // If the backend does not handle the fragment specially, pad with nops, + // but ensure that the padding is larger than the minimum nop size. + if (!AlignFixup) +while (Size % getBackend().getMinimumNopSize()) + Size += F.getAlignment().value(); } -if (Size > F.getAlignMaxBytesToEmit()) +if (!AlignFixup && Size > F.getAlignMaxBytesToEmit()) Size = 0; -return F.getFixedSize() + Size; +Frag.VarContentEnd = F.VarContentStart + Size; +if (Frag.VarContentEnd > Frag.getParent()->ContentStorage.size()) + Frag.getParent()->ContentStorage.resize(Frag.VarContentEnd); aengelke wrote: Also add comment here that actual content is ignored and that this is only for tracking the size? https://github.com/llvm/llvm-project/pull/149465 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] MC: Refactor FT_Align fragments when linker relaxation is enabled (PR #149465)
https://github.com/aengelke edited https://github.com/llvm/llvm-project/pull/149465 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] MC: Refactor FT_Align fragments when linker relaxation is enabled (PR #149465)
@@ -433,42 +434,44 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm, const auto &EF = cast(F); OS << StringRef(EF.getContents().data(), EF.getContents().size()); OS << StringRef(EF.getVarContents().data(), EF.getVarContents().size()); -if (F.getKind() == MCFragment::FT_Align) { - ++stats::EmittedAlignFragments; - assert(F.getAlignFillLen() && - "Invalid virtual align in concrete fragment!"); - - uint64_t Count = (FragmentSize - F.getFixedSize()) / F.getAlignFillLen(); - assert((FragmentSize - F.getFixedSize()) % F.getAlignFillLen() == 0 && - "computeFragmentSize computed size is incorrect"); - - // See if we are aligning with nops, and if so do that first to try to - // fill the Count bytes. Then if that did not fill any bytes or there are - // any bytes left to fill use the Value and ValueSize to fill the rest. If - // we are aligning with nops, ask that target to emit the right data. - if (F.hasAlignEmitNops()) { -if (!Asm.getBackend().writeNopData(OS, Count, F.getSubtargetInfo())) - report_fatal_error("unable to write nop sequence of " + Twine(Count) + - " bytes"); - } else { -// Otherwise, write out in multiples of the value size. -for (uint64_t i = 0; i != Count; ++i) { - switch (F.getAlignFillLen()) { - default: -llvm_unreachable("Invalid size!"); - case 1: -OS << char(F.getAlignFill()); -break; - case 2: -support::endian::write(OS, F.getAlignFill(), Endian); -break; - case 4: -support::endian::write(OS, F.getAlignFill(), Endian); -break; - case 8: -support::endian::write(OS, F.getAlignFill(), Endian); -break; - } + } break; + + case MCFragment::FT_Align: { +++stats::EmittedAlignFragments; +OS << StringRef(F.getContents().data(), F.getContents().size()); +assert(F.getAlignFillLen() && + "Invalid virtual align in concrete fragment!"); + +uint64_t Count = (FragmentSize - F.getFixedSize()) / F.getAlignFillLen(); +assert((FragmentSize - F.getFixedSize()) % F.getAlignFillLen() == 0 && + "computeFragmentSize computed size is incorrect"); + +// See if we are aligning with nops, and if so do that first to try to +// fill the Count bytes. Then if that did not fill any bytes or there are +// any bytes left to fill use the Value and ValueSize to fill the rest. If +// we are aligning with nops, ask that target to emit the right data. aengelke wrote: Comment outdated? https://github.com/llvm/llvm-project/pull/149465 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] MC: Refactor FT_Align fragments when linker relaxation is enabled (PR #149465)
https://github.com/aengelke commented: I like reducing the number of hooks, but the const_cast feels to hacky. https://github.com/llvm/llvm-project/pull/149465 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits