https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/84029
Backport cad6ad2759a782c48193f83886488dacc9f330e3 e84182af919d136d74b75ded4d599b38fb47dfb0 Requested by: @nikic >From 5dc0641578cd229c87322ad187489f6818783233 Mon Sep 17 00:00:00 2001 From: Nikita Popov <npo...@redhat.com> Date: Mon, 4 Mar 2024 11:32:07 +0100 Subject: [PATCH 1/2] [Inline] Add test for #67054 (NFC) (cherry picked from commit cad6ad2759a782c48193f83886488dacc9f330e3) --- .../Inline/X86/call-abi-compatibility.ll | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll index 3a30980fe31bd7..f03270bafea999 100644 --- a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll +++ b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll @@ -93,3 +93,34 @@ define internal void @caller_not_avx4() { } declare i64 @caller_unknown_simple(i64) + +; FIXME: This call should get inlined, because the callee only contains +; inline ASM, not real calls. +define <8 x i64> @caller_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #0 { +; CHECK-LABEL: define {{[^@]+}}@caller_inline_asm +; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[CALL:%.*]] = call <8 x i64> @callee_inline_asm(ptr [[P0]], i64 [[K]], ptr [[P1]], ptr [[P2]]) +; CHECK-NEXT: ret <8 x i64> [[CALL]] +; + %call = call <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) + ret <8 x i64> %call +} + +define internal <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #1 { +; CHECK-LABEL: define {{[^@]+}}@callee_inline_asm +; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: [[SRC:%.*]] = load <8 x i64>, ptr [[P0]], align 64 +; CHECK-NEXT: [[A:%.*]] = load <8 x i64>, ptr [[P1]], align 64 +; CHECK-NEXT: [[B:%.*]] = load <8 x i64>, ptr [[P2]], align 64 +; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 [[K]], <8 x i64> [[A]], <8 x i64> [[B]], <8 x i64> [[SRC]]) +; CHECK-NEXT: ret <8 x i64> [[TMP1]] +; + %src = load <8 x i64>, ptr %p0, align 64 + %a = load <8 x i64>, ptr %p1, align 64 + %b = load <8 x i64>, ptr %p2, align 64 + %3 = tail call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 %k, <8 x i64> %a, <8 x i64> %b, <8 x i64> %src) #2 + ret <8 x i64> %3 +} + +attributes #0 = { "min-legal-vector-width"="512" "target-features"="+avx,+avx2,+avx512bw,+avx512dq,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" } +attributes #1 = { "min-legal-vector-width"="512" "target-features"="+avx,+avx2,+avx512bw,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" } >From 9b06cea3ee403118cff0ac7640c29756137fc709 Mon Sep 17 00:00:00 2001 From: Nikita Popov <npo...@redhat.com> Date: Tue, 5 Mar 2024 14:21:33 +0100 Subject: [PATCH 2/2] [X86][Inline] Skip inline asm in inlining target feature check (#83820) When inlining across functions with different target features, we perform roughly two checks: 1. The caller features must be a superset of the callee features. 2. Calls in the callee cannot use types where the target features would change the call ABI (e.g. by changing whether something is passed in a zmm or two ymm registers). The latter check is very crude right now. The latter check currently also catches inline asm "calls". I believe that inline asm should be excluded from this check, as it is independent from the usual call ABI, and instead governed by the inline asm constraint string. Fixes https://github.com/llvm/llvm-project/issues/67054. (cherry picked from commit e84182af919d136d74b75ded4d599b38fb47dfb0) --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 4 ++++ .../Inline/X86/call-abi-compatibility.ll | 17 ++++++----------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index cd40b1d3b09332..be774a89eccbb4 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -6080,6 +6080,10 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, for (const Instruction &I : instructions(Callee)) { if (const auto *CB = dyn_cast<CallBase>(&I)) { + // Having more target features is fine for inline ASM. + if (CB->isInlineAsm()) + continue; + SmallVector<Type *, 8> Types; for (Value *Arg : CB->args()) Types.push_back(Arg->getType()); diff --git a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll index f03270bafea999..6f582cab2f1452 100644 --- a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll +++ b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll @@ -94,27 +94,22 @@ define internal void @caller_not_avx4() { declare i64 @caller_unknown_simple(i64) -; FIXME: This call should get inlined, because the callee only contains +; This call should get inlined, because the callee only contains ; inline ASM, not real calls. define <8 x i64> @caller_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #0 { ; CHECK-LABEL: define {{[^@]+}}@caller_inline_asm ; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: [[CALL:%.*]] = call <8 x i64> @callee_inline_asm(ptr [[P0]], i64 [[K]], ptr [[P1]], ptr [[P2]]) -; CHECK-NEXT: ret <8 x i64> [[CALL]] +; CHECK-NEXT: [[SRC_I:%.*]] = load <8 x i64>, ptr [[P0]], align 64 +; CHECK-NEXT: [[A_I:%.*]] = load <8 x i64>, ptr [[P1]], align 64 +; CHECK-NEXT: [[B_I:%.*]] = load <8 x i64>, ptr [[P2]], align 64 +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 [[K]], <8 x i64> [[A_I]], <8 x i64> [[B_I]], <8 x i64> [[SRC_I]]) +; CHECK-NEXT: ret <8 x i64> [[TMP1]] ; %call = call <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) ret <8 x i64> %call } define internal <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) #1 { -; CHECK-LABEL: define {{[^@]+}}@callee_inline_asm -; CHECK-SAME: (ptr [[P0:%.*]], i64 [[K:%.*]], ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR3:[0-9]+]] { -; CHECK-NEXT: [[SRC:%.*]] = load <8 x i64>, ptr [[P0]], align 64 -; CHECK-NEXT: [[A:%.*]] = load <8 x i64>, ptr [[P1]], align 64 -; CHECK-NEXT: [[B:%.*]] = load <8 x i64>, ptr [[P2]], align 64 -; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i64> asm "vpaddb\09$($3, $2, $0 {$1}", "=v,^Yk,v,v,0,~{dirflag},~{fpsr},~{flags}"(i64 [[K]], <8 x i64> [[A]], <8 x i64> [[B]], <8 x i64> [[SRC]]) -; CHECK-NEXT: ret <8 x i64> [[TMP1]] -; %src = load <8 x i64>, ptr %p0, align 64 %a = load <8 x i64>, ptr %p1, align 64 %b = load <8 x i64>, ptr %p2, align 64 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits