https://github.com/folkertdev updated https://github.com/llvm/llvm-project/pull/204887
>From 6306970fa9266b0d386a51a477e7aefe958363d7 Mon Sep 17 00:00:00 2001 From: Folkert de Vries <[email protected]> Date: Fri, 19 Jun 2026 20:05:36 +0200 Subject: [PATCH 1/8] [X86][Windows] Return `fp128` on the stack This is in line with mingw64 gcc and follows the win64 CC (at least more) --- clang/lib/CodeGen/Targets/X86.cpp | 14 +- clang/test/CodeGen/win-fp128.c | 4 +- llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 14 + .../test/CodeGen/X86/fp128-libcalls-strict.ll | 451 +++++++++++++----- llvm/test/CodeGen/X86/fp128-libcalls.ll | 251 +++++++--- llvm/test/CodeGen/X86/i128-fp128-abi.ll | 132 +++-- 6 files changed, 624 insertions(+), 242 deletions(-) diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index dbe4d656aabc5..77c912b021604 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -3437,8 +3437,6 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, case BuiltinType::Int128: case BuiltinType::UInt128: case BuiltinType::Float128: - // 128-bit float and integer types share the same ABI. - // If it's a parameter type, the normal ABI rule is that arguments larger // than 8 bytes are passed indirectly. GCC follows it. We follow it too, // even though it isn't particularly efficient. @@ -3449,10 +3447,14 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, // Mingw64 GCC returns i128 in XMM0. Coerce to v2i64 to handle that. // Clang matches them for compatibility. - // NOTE: GCC actually returns f128 indirectly but will hopefully change. - // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054#c8. - return ABIArgInfo::getDirect(llvm::FixedVectorType::get( - llvm::Type::getInt64Ty(getVMContext()), 2)); + if (BT->getKind() == BuiltinType::Int128 || + BT->getKind() == BuiltinType::UInt128) + return ABIArgInfo::getDirect(llvm::FixedVectorType::get( + llvm::Type::getInt64Ty(getVMContext()), 2)); + + // Mingw64 GCC returns f128 via sret. Clang matches that for + // compatibility. + break; default: break; diff --git a/clang/test/CodeGen/win-fp128.c b/clang/test/CodeGen/win-fp128.c index 58e203d4fc8ed..dc144f899fa4f 100644 --- a/clang/test/CodeGen/win-fp128.c +++ b/clang/test/CodeGen/win-fp128.c @@ -3,10 +3,10 @@ // __float128 is unsupported on MSVC __float128 fp128_ret(void) { return 0; } -// CHECK-GNU64: define dso_local <2 x i64> @fp128_ret() +// CHECK-GNU64: define dso_local fp128 @fp128_ret() __float128 fp128_args(__float128 a, __float128 b) { return a * b; } -// CHECK-GNU64: define dso_local <2 x i64> @fp128_args(ptr noundef dead_on_return %0, ptr noundef dead_on_return %1) +// CHECK-GNU64: define dso_local fp128 @fp128_args(ptr noundef dead_on_return %0, ptr noundef dead_on_return %1) void fp128_vararg(int a, ...) { // CHECK-GNU64-LABEL: define dso_local void @fp128_vararg diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index 7c068115df481..bce581ad7a48b 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -670,6 +670,20 @@ bool X86TargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context, const Type *RetTy) const { + // Mingw64 GCC returns f128 via sret, which matches the documentation of the + // Windows x64 calling convention: + // + // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170#return-values + // + // > Otherwise, the caller must allocate memory for the return value and pass + // a pointer to it as the first argument. + // + // Return false, which will perform sret demotion. + if (Subtarget.isCallingConvWin64(CallConv) && + llvm::any_of( + Outs, [](const ISD::OutputArg &Out) { return Out.VT == MVT::f128; })) + return false; + SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC_X86); diff --git a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll index ad2d690fd7ed0..dfff88d30bcd4 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls-strict.ll @@ -79,15 +79,22 @@ define fp128 @add(fp128 %x, fp128 %y) nounwind strictfp { ; ; WIN-LABEL: add: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 -; WIN-NEXT: movaps (%rdx), %xmm1 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $80, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 +; WIN-NEXT: movaps (%r8), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq __addtf3 -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $80, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: add: @@ -201,15 +208,22 @@ define fp128 @sub(fp128 %x, fp128 %y) nounwind strictfp { ; ; WIN-LABEL: sub: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 -; WIN-NEXT: movaps (%rdx), %xmm1 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $80, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 +; WIN-NEXT: movaps (%r8), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq __subtf3 -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $80, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: sub: @@ -323,15 +337,22 @@ define fp128 @mul(fp128 %x, fp128 %y) nounwind strictfp { ; ; WIN-LABEL: mul: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 -; WIN-NEXT: movaps (%rdx), %xmm1 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $80, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 +; WIN-NEXT: movaps (%r8), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq __multf3 -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $80, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: mul: @@ -445,15 +466,22 @@ define fp128 @div(fp128 %x, fp128 %y) nounwind strictfp { ; ; WIN-LABEL: div: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 -; WIN-NEXT: movaps (%rdx), %xmm1 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $80, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 +; WIN-NEXT: movaps (%r8), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq __divtf3 -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $80, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: div: @@ -568,18 +596,25 @@ define fp128 @fma(fp128 %x, fp128 %y, fp128 %z) nounwind strictfp { ; ; WIN-LABEL: fma: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $88, %rsp -; WIN-NEXT: movaps (%r8), %xmm0 -; WIN-NEXT: movaps (%rcx), %xmm1 -; WIN-NEXT: movaps (%rdx), %xmm2 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $96, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%r9), %xmm0 +; WIN-NEXT: movaps (%rdx), %xmm1 +; WIN-NEXT: movaps (%r8), %xmm2 ; WIN-NEXT: movaps %xmm2, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r9 ; WIN-NEXT: callq fmal -; WIN-NEXT: addq $88, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $96, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: fma: @@ -694,15 +729,22 @@ define fp128 @frem(fp128 %x, fp128 %y) nounwind strictfp { ; ; WIN-LABEL: frem: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 -; WIN-NEXT: movaps (%rdx), %xmm1 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $80, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 +; WIN-NEXT: movaps (%r8), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq fmodl -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $80, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: frem: @@ -797,12 +839,19 @@ define fp128 @ceil(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: ceil: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq ceill -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: ceil: @@ -887,12 +936,19 @@ define fp128 @acos(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: acos: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq acosl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: acos: @@ -977,12 +1033,19 @@ define fp128 @cos(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: cos: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq cosl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: cos: @@ -1067,12 +1130,19 @@ define fp128 @cosh(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: cosh: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq coshl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: cosh: @@ -1157,12 +1227,19 @@ define fp128 @exp(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: exp: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq expl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: exp: @@ -1247,12 +1324,19 @@ define fp128 @exp2(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: exp2: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq exp2l -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: exp2: @@ -1337,12 +1421,19 @@ define fp128 @floor(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: floor: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq floorl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: floor: @@ -1427,12 +1518,19 @@ define fp128 @log(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: log: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq logl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: log: @@ -1517,12 +1615,19 @@ define fp128 @log10(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: log10: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq log10l -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: log10: @@ -1607,12 +1712,19 @@ define fp128 @log2(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: log2: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq log2l -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: log2: @@ -1709,15 +1821,22 @@ define fp128 @maxnum(fp128 %x, fp128 %y) nounwind strictfp { ; ; WIN-LABEL: maxnum: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 -; WIN-NEXT: movaps (%rdx), %xmm1 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $80, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 +; WIN-NEXT: movaps (%r8), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq fmaxl -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $80, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: maxnum: @@ -1824,15 +1943,22 @@ define fp128 @minnum(fp128 %x, fp128 %y) nounwind strictfp { ; ; WIN-LABEL: minnum: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 -; WIN-NEXT: movaps (%rdx), %xmm1 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $80, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 +; WIN-NEXT: movaps (%r8), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq fminl -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $80, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: minnum: @@ -1927,12 +2053,19 @@ define fp128 @nearbyint(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: nearbyint: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq nearbyintl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: nearbyint: @@ -2029,15 +2162,22 @@ define fp128 @pow(fp128 %x, fp128 %y) nounwind strictfp { ; ; WIN-LABEL: pow: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 -; WIN-NEXT: movaps (%rdx), %xmm1 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $80, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 +; WIN-NEXT: movaps (%r8), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq powl -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $80, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: pow: @@ -2143,12 +2283,19 @@ define fp128 @powi(fp128 %x, i32 %y) nounwind strictfp { ; ; WIN-LABEL: powi: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq __powitf2 -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: powi: @@ -2237,12 +2384,19 @@ define fp128 @rint(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: rint: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq rintl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: rint: @@ -2327,12 +2481,19 @@ define fp128 @round(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: round: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq roundl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: round: @@ -2417,12 +2578,19 @@ define fp128 @roundeven(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: roundeven: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq roundevenl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: roundeven: @@ -2507,12 +2675,19 @@ define fp128 @asin(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: asin: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq asinl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: asin: @@ -2597,12 +2772,19 @@ define fp128 @sin(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: sin: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq sinl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: sin: @@ -2687,12 +2869,19 @@ define fp128 @sinh(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: sinh: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq sinhl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: sinh: @@ -2777,12 +2966,19 @@ define fp128 @sqrt(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: sqrt: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq sqrtl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: sqrt: @@ -2867,12 +3063,19 @@ define fp128 @atan(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: atan: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq atanl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: atan: @@ -2969,15 +3172,22 @@ define fp128 @atan2(fp128 %x, fp128 %y) nounwind strictfp { ; ; WIN-LABEL: atan2: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 -; WIN-NEXT: movaps (%rdx), %xmm1 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $80, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 +; WIN-NEXT: movaps (%r8), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq atan2l -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $80, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: atan2: @@ -3072,12 +3282,19 @@ define fp128 @tan(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: tan: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq tanl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: tan: @@ -3162,12 +3379,19 @@ define fp128 @tanh(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: tanh: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq tanhl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: tanh: @@ -3252,12 +3476,19 @@ define fp128 @trunc(fp128 %x) nounwind strictfp { ; ; WIN-LABEL: trunc: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq truncl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: trunc: diff --git a/llvm/test/CodeGen/X86/fp128-libcalls.ll b/llvm/test/CodeGen/X86/fp128-libcalls.ll index 4b0449fd7502e..c594b15ef1cbe 100644 --- a/llvm/test/CodeGen/X86/fp128-libcalls.ll +++ b/llvm/test/CodeGen/X86/fp128-libcalls.ll @@ -78,16 +78,18 @@ define dso_local void @Test128Add(fp128 %d1, fp128 %d2) nounwind { ; ; WIN-LABEL: Test128Add: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp +; WIN-NEXT: subq $88, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps (%rdx), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq __addtf3 +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: addq $88, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Add: @@ -207,16 +209,18 @@ define dso_local void @Test128_1Add(fp128 %d1) nounwind { ; ; WIN-LABEL: Test128_1Add: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp +; WIN-NEXT: subq $88, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps vf128(%rip), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq __addtf3 +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: addq $88, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128_1Add: @@ -331,16 +335,18 @@ define dso_local void @Test128Sub(fp128 %d1, fp128 %d2) nounwind { ; ; WIN-LABEL: Test128Sub: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp +; WIN-NEXT: subq $88, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps (%rdx), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq __subtf3 +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: addq $88, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Sub: @@ -460,16 +466,18 @@ define dso_local void @Test128_1Sub(fp128 %d1) nounwind { ; ; WIN-LABEL: Test128_1Sub: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp +; WIN-NEXT: subq $88, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps vf128(%rip), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq __subtf3 +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: addq $88, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128_1Sub: @@ -584,16 +592,18 @@ define dso_local void @Test128Mul(fp128 %d1, fp128 %d2) nounwind { ; ; WIN-LABEL: Test128Mul: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp +; WIN-NEXT: subq $88, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps (%rdx), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq __multf3 +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: addq $88, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Mul: @@ -713,16 +723,18 @@ define dso_local void @Test128_1Mul(fp128 %d1) nounwind { ; ; WIN-LABEL: Test128_1Mul: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp +; WIN-NEXT: subq $88, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps vf128(%rip), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq __multf3 +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: addq $88, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128_1Mul: @@ -837,16 +849,18 @@ define dso_local void @Test128Div(fp128 %d1, fp128 %d2) nounwind { ; ; WIN-LABEL: Test128Div: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp +; WIN-NEXT: subq $88, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps (%rdx), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq __divtf3 +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: addq $88, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Div: @@ -966,16 +980,18 @@ define dso_local void @Test128_1Div(fp128 %d1) nounwind { ; ; WIN-LABEL: Test128_1Div: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp +; WIN-NEXT: subq $88, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps vf128(%rip), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq __divtf3 +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: addq $88, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128_1Div: @@ -1082,16 +1098,18 @@ define dso_local void @Test128Rem(fp128 %d1, fp128 %d2) nounwind { ; ; WIN-LABEL: Test128Rem: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp +; WIN-NEXT: subq $88, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps (%rdx), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq fmodl +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: addq $88, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Rem: @@ -1201,16 +1219,18 @@ define dso_local void @Test128_1Rem(fp128 %d1) nounwind { ; ; WIN-LABEL: Test128_1Rem: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $72, %rsp +; WIN-NEXT: subq $88, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps vf128(%rip), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq fmodl +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: addq $88, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128_1Rem: @@ -1303,13 +1323,15 @@ define dso_local void @Test128Sqrt(fp128 %d1) nounwind { ; ; WIN-LABEL: Test128Sqrt: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp +; WIN-NEXT: subq $72, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq sqrtl +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: addq $72, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Sqrt: @@ -1390,13 +1412,15 @@ define dso_local void @Test128Sin(fp128 %d1) nounwind { ; ; WIN-LABEL: Test128Sin: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp +; WIN-NEXT: subq $72, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq sinl +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: addq $72, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Sin: @@ -1477,13 +1501,15 @@ define dso_local void @Test128Cos(fp128 %d1) nounwind { ; ; WIN-LABEL: Test128Cos: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp +; WIN-NEXT: subq $72, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq cosl +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: addq $72, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Cos: @@ -1564,13 +1590,15 @@ define dso_local void @Test128Ceil(fp128 %d1) nounwind { ; ; WIN-LABEL: Test128Ceil: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp +; WIN-NEXT: subq $72, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq ceill +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: addq $72, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Ceil: @@ -1651,13 +1679,15 @@ define dso_local void @Test128Floor(fp128 %d1) nounwind { ; ; WIN-LABEL: Test128Floor: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp +; WIN-NEXT: subq $72, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq floorl +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: addq $72, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Floor: @@ -1738,13 +1768,15 @@ define dso_local void @Test128Trunc(fp128 %d1) nounwind { ; ; WIN-LABEL: Test128Trunc: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp +; WIN-NEXT: subq $72, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq truncl +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: addq $72, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Trunc: @@ -1825,13 +1857,15 @@ define dso_local void @Test128Nearbyint(fp128 %d1) nounwind { ; ; WIN-LABEL: Test128Nearbyint: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp +; WIN-NEXT: subq $72, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq nearbyintl +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: addq $72, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Nearbyint: @@ -1912,13 +1946,15 @@ define dso_local void @Test128Rint(fp128 %d1) nounwind { ; ; WIN-LABEL: Test128Rint: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp +; WIN-NEXT: subq $72, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq rintl +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: addq $72, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Rint: @@ -1999,13 +2035,15 @@ define dso_local void @Test128Round(fp128 %d1) nounwind { ; ; WIN-LABEL: Test128Round: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $56, %rsp +; WIN-NEXT: subq $72, %rsp ; WIN-NEXT: movaps (%rcx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq roundl +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; WIN-NEXT: movaps %xmm0, vf128(%rip) -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: addq $72, %rsp ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Round: @@ -2102,18 +2140,25 @@ define fp128 @Test128FMA(fp128 %a, fp128 %b, fp128 %c) nounwind { ; ; WIN-LABEL: Test128FMA: ; WIN: # %bb.0: # %entry -; WIN-NEXT: subq $88, %rsp -; WIN-NEXT: movaps (%r8), %xmm0 -; WIN-NEXT: movaps (%rcx), %xmm1 -; WIN-NEXT: movaps (%rdx), %xmm2 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $96, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%r9), %xmm0 +; WIN-NEXT: movaps (%rdx), %xmm1 +; WIN-NEXT: movaps (%r8), %xmm2 ; WIN-NEXT: movaps %xmm2, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r9 ; WIN-NEXT: callq fmal -; WIN-NEXT: addq $88, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $96, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128FMA: @@ -2211,12 +2256,19 @@ define fp128 @Test128Acos(fp128 %a) nounwind { ; ; WIN-LABEL: Test128Acos: ; WIN: # %bb.0: -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq acosl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Acos: @@ -2294,12 +2346,19 @@ define fp128 @Test128Asin(fp128 %a) nounwind { ; ; WIN-LABEL: Test128Asin: ; WIN: # %bb.0: -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq asinl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Asin: @@ -2377,12 +2436,19 @@ define fp128 @Test128Atan(fp128 %a) nounwind { ; ; WIN-LABEL: Test128Atan: ; WIN: # %bb.0: -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq atanl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Atan: @@ -2472,15 +2538,22 @@ define fp128 @Test128Atan2(fp128 %a, fp128 %b) nounwind { ; ; WIN-LABEL: Test128Atan2: ; WIN: # %bb.0: -; WIN-NEXT: subq $72, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 -; WIN-NEXT: movaps (%rdx), %xmm1 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $80, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 +; WIN-NEXT: movaps (%r8), %xmm1 ; WIN-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %r8 ; WIN-NEXT: callq atan2l -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $80, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Atan2: @@ -2568,12 +2641,19 @@ define fp128 @Test128Cosh(fp128 %a) nounwind { ; ; WIN-LABEL: Test128Cosh: ; WIN: # %bb.0: -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq coshl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Cosh: @@ -2651,12 +2731,19 @@ define fp128 @Test128Sinh(fp128 %a) nounwind { ; ; WIN-LABEL: Test128Sinh: ; WIN: # %bb.0: -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq sinhl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Sinh: @@ -2734,12 +2821,19 @@ define fp128 @Test128Tan(fp128 %a) nounwind { ; ; WIN-LABEL: Test128Tan: ; WIN: # %bb.0: -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq tanl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Tan: @@ -2817,12 +2911,19 @@ define fp128 @Test128Tanh(fp128 %a) nounwind { ; ; WIN-LABEL: Test128Tanh: ; WIN: # %bb.0: -; WIN-NEXT: subq $56, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx +; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq tanhl -; WIN-NEXT: addq $56, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Tanh: @@ -2916,14 +3017,20 @@ define { fp128, fp128 } @Test128Modf(fp128 %a) nounwind { ; ; WIN-LABEL: Test128Modf: ; WIN: # %bb.0: -; WIN-NEXT: subq $72, %rsp -; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: pushq %rsi +; WIN-NEXT: subq $64, %rsp +; WIN-NEXT: movq %rcx, %rsi +; WIN-NEXT: movaps (%rdx), %xmm0 ; WIN-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; WIN-NEXT: leaq 16(%rcx), %r8 ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; WIN-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; WIN-NEXT: callq modfl -; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; WIN-NEXT: addq $72, %rsp +; WIN-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rsi) +; WIN-NEXT: movq %rsi, %rax +; WIN-NEXT: addq $64, %rsp +; WIN-NEXT: popq %rsi ; WIN-NEXT: retq ; ; WIN-X86-LABEL: Test128Modf: diff --git a/llvm/test/CodeGen/X86/i128-fp128-abi.ll b/llvm/test/CodeGen/X86/i128-fp128-abi.ll index 2174d5056e6ce..9f385ee2faf4e 100644 --- a/llvm/test/CodeGen/X86/i128-fp128-abi.ll +++ b/llvm/test/CodeGen/X86/i128-fp128-abi.ll @@ -190,7 +190,9 @@ define PrimTy @return(ptr %p) nounwind { ; ; CHECK-MSVC64-F128-LABEL: return: ; CHECK-MSVC64-F128: # %bb.0: -; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MSVC64-F128-NEXT: movq %rcx, %rax +; CHECK-MSVC64-F128-NEXT: movaps (%rdx), %xmm0 +; CHECK-MSVC64-F128-NEXT: movaps %xmm0, (%rcx) ; CHECK-MSVC64-F128-NEXT: retq ; ; CHECK-MSVC64-I128-LABEL: return: @@ -201,7 +203,9 @@ define PrimTy @return(ptr %p) nounwind { ; ; CHECK-MINGW-F128-LABEL: return: ; CHECK-MINGW-F128: # %bb.0: -; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MINGW-F128-NEXT: movq %rcx, %rax +; CHECK-MINGW-F128-NEXT: movaps (%rdx), %xmm0 +; CHECK-MINGW-F128-NEXT: movaps %xmm0, (%rcx) ; CHECK-MINGW-F128-NEXT: retq ; ; CHECK-MINGW-I128-LABEL: return: @@ -262,7 +266,9 @@ define PrimTy @first_arg(PrimTy %x) nounwind { ; ; CHECK-MSVC64-F128-LABEL: first_arg: ; CHECK-MSVC64-F128: # %bb.0: -; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MSVC64-F128-NEXT: movq %rcx, %rax +; CHECK-MSVC64-F128-NEXT: movaps (%rdx), %xmm0 +; CHECK-MSVC64-F128-NEXT: movaps %xmm0, (%rcx) ; CHECK-MSVC64-F128-NEXT: retq ; ; CHECK-MSVC64-I128-LABEL: first_arg: @@ -272,7 +278,9 @@ define PrimTy @first_arg(PrimTy %x) nounwind { ; ; CHECK-MINGW-F128-LABEL: first_arg: ; CHECK-MINGW-F128: # %bb.0: -; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MINGW-F128-NEXT: movq %rcx, %rax +; CHECK-MINGW-F128-NEXT: movaps (%rdx), %xmm0 +; CHECK-MINGW-F128-NEXT: movaps %xmm0, (%rcx) ; CHECK-MINGW-F128-NEXT: retq ; ; CHECK-MINGW-I128-LABEL: first_arg: @@ -338,8 +346,10 @@ define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounw ; ; CHECK-MSVC64-F128-LABEL: leading_args: ; CHECK-MSVC64-F128: # %bb.0: -; CHECK-MSVC64-F128-NEXT: movq 40(%rsp), %rax -; CHECK-MSVC64-F128-NEXT: movaps (%rax), %xmm0 +; CHECK-MSVC64-F128-NEXT: movq %rcx, %rax +; CHECK-MSVC64-F128-NEXT: movq 48(%rsp), %rcx +; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MSVC64-F128-NEXT: movaps %xmm0, (%rax) ; CHECK-MSVC64-F128-NEXT: retq ; ; CHECK-MSVC64-I128-LABEL: leading_args: @@ -350,8 +360,10 @@ define PrimTy @leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, PrimTy %x) nounw ; ; CHECK-MINGW-F128-LABEL: leading_args: ; CHECK-MINGW-F128: # %bb.0: -; CHECK-MINGW-F128-NEXT: movq 40(%rsp), %rax -; CHECK-MINGW-F128-NEXT: movaps (%rax), %xmm0 +; CHECK-MINGW-F128-NEXT: movq %rcx, %rax +; CHECK-MINGW-F128-NEXT: movq 48(%rsp), %rcx +; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MINGW-F128-NEXT: movaps %xmm0, (%rax) ; CHECK-MINGW-F128-NEXT: retq ; ; CHECK-MINGW-I128-LABEL: leading_args: @@ -418,8 +430,10 @@ define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, Pr ; ; CHECK-MSVC64-F128-LABEL: many_leading_args: ; CHECK-MSVC64-F128: # %bb.0: -; CHECK-MSVC64-F128-NEXT: movq 56(%rsp), %rax -; CHECK-MSVC64-F128-NEXT: movaps (%rax), %xmm0 +; CHECK-MSVC64-F128-NEXT: movq %rcx, %rax +; CHECK-MSVC64-F128-NEXT: movq 64(%rsp), %rcx +; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MSVC64-F128-NEXT: movaps %xmm0, (%rax) ; CHECK-MSVC64-F128-NEXT: retq ; ; CHECK-MSVC64-I128-LABEL: many_leading_args: @@ -430,8 +444,10 @@ define PrimTy @many_leading_args(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, Pr ; ; CHECK-MINGW-F128-LABEL: many_leading_args: ; CHECK-MINGW-F128: # %bb.0: -; CHECK-MINGW-F128-NEXT: movq 56(%rsp), %rax -; CHECK-MINGW-F128-NEXT: movaps (%rax), %xmm0 +; CHECK-MINGW-F128-NEXT: movq %rcx, %rax +; CHECK-MINGW-F128-NEXT: movq 64(%rsp), %rcx +; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MINGW-F128-NEXT: movaps %xmm0, (%rax) ; CHECK-MINGW-F128-NEXT: retq ; ; CHECK-MINGW-I128-LABEL: many_leading_args: @@ -496,8 +512,10 @@ define PrimTy @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy ; ; CHECK-MSVC64-F128-LABEL: trailing_arg: ; CHECK-MSVC64-F128: # %bb.0: -; CHECK-MSVC64-F128-NEXT: movq 48(%rsp), %rax -; CHECK-MSVC64-F128-NEXT: movaps (%rax), %xmm0 +; CHECK-MSVC64-F128-NEXT: movq %rcx, %rax +; CHECK-MSVC64-F128-NEXT: movq 56(%rsp), %rcx +; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MSVC64-F128-NEXT: movaps %xmm0, (%rax) ; CHECK-MSVC64-F128-NEXT: retq ; ; CHECK-MSVC64-I128-LABEL: trailing_arg: @@ -508,8 +526,10 @@ define PrimTy @trailing_arg(i64 %_0, i64 %_1, i64 %_2, i64 %_3, i64 %_4, PrimTy ; ; CHECK-MINGW-F128-LABEL: trailing_arg: ; CHECK-MINGW-F128: # %bb.0: -; CHECK-MINGW-F128-NEXT: movq 48(%rsp), %rax -; CHECK-MINGW-F128-NEXT: movaps (%rax), %xmm0 +; CHECK-MINGW-F128-NEXT: movq %rcx, %rax +; CHECK-MINGW-F128-NEXT: movq 56(%rsp), %rcx +; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 +; CHECK-MINGW-F128-NEXT: movaps %xmm0, (%rax) ; CHECK-MINGW-F128-NEXT: retq ; ; CHECK-MINGW-I128-LABEL: trailing_arg: @@ -578,12 +598,13 @@ define void @call_first_arg(PrimTy %x) nounwind { ; ; CHECK-MSVC64-F128-LABEL: call_first_arg: ; CHECK-MSVC64-F128: # %bb.0: -; CHECK-MSVC64-F128-NEXT: subq $56, %rsp +; CHECK-MSVC64-F128-NEXT: subq $72, %rsp ; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 ; CHECK-MSVC64-F128-NEXT: movaps %xmm0, 32(%rsp) -; CHECK-MSVC64-F128-NEXT: leaq 32(%rsp), %rcx +; CHECK-MSVC64-F128-NEXT: leaq 48(%rsp), %rcx +; CHECK-MSVC64-F128-NEXT: leaq 32(%rsp), %rdx ; CHECK-MSVC64-F128-NEXT: callq first_arg -; CHECK-MSVC64-F128-NEXT: addq $56, %rsp +; CHECK-MSVC64-F128-NEXT: addq $72, %rsp ; CHECK-MSVC64-F128-NEXT: retq ; ; CHECK-MSVC64-I128-LABEL: call_first_arg: @@ -595,12 +616,13 @@ define void @call_first_arg(PrimTy %x) nounwind { ; ; CHECK-MINGW-F128-LABEL: call_first_arg: ; CHECK-MINGW-F128: # %bb.0: -; CHECK-MINGW-F128-NEXT: subq $56, %rsp +; CHECK-MINGW-F128-NEXT: subq $72, %rsp ; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 ; CHECK-MINGW-F128-NEXT: movaps %xmm0, 32(%rsp) -; CHECK-MINGW-F128-NEXT: leaq 32(%rsp), %rcx +; CHECK-MINGW-F128-NEXT: leaq 48(%rsp), %rcx +; CHECK-MINGW-F128-NEXT: leaq 32(%rsp), %rdx ; CHECK-MINGW-F128-NEXT: callq first_arg -; CHECK-MINGW-F128-NEXT: addq $56, %rsp +; CHECK-MINGW-F128-NEXT: addq $72, %rsp ; CHECK-MINGW-F128-NEXT: retq ; ; CHECK-MINGW-I128-LABEL: call_first_arg: @@ -682,17 +704,18 @@ define void @call_leading_args(PrimTy %x) nounwind { ; ; CHECK-MSVC64-F128-LABEL: call_leading_args: ; CHECK-MSVC64-F128: # %bb.0: -; CHECK-MSVC64-F128-NEXT: subq $72, %rsp +; CHECK-MSVC64-F128-NEXT: subq $88, %rsp ; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 ; CHECK-MSVC64-F128-NEXT: movaps %xmm0, 48(%rsp) ; CHECK-MSVC64-F128-NEXT: leaq 48(%rsp), %rax -; CHECK-MSVC64-F128-NEXT: movq %rax, 32(%rsp) -; CHECK-MSVC64-F128-NEXT: xorl %ecx, %ecx +; CHECK-MSVC64-F128-NEXT: movq %rax, 40(%rsp) +; CHECK-MSVC64-F128-NEXT: movq $0, 32(%rsp) +; CHECK-MSVC64-F128-NEXT: leaq 64(%rsp), %rcx ; CHECK-MSVC64-F128-NEXT: xorl %edx, %edx ; CHECK-MSVC64-F128-NEXT: xorl %r8d, %r8d ; CHECK-MSVC64-F128-NEXT: xorl %r9d, %r9d ; CHECK-MSVC64-F128-NEXT: callq leading_args -; CHECK-MSVC64-F128-NEXT: addq $72, %rsp +; CHECK-MSVC64-F128-NEXT: addq $88, %rsp ; CHECK-MSVC64-F128-NEXT: retq ; ; CHECK-MSVC64-I128-LABEL: call_leading_args: @@ -710,17 +733,18 @@ define void @call_leading_args(PrimTy %x) nounwind { ; ; CHECK-MINGW-F128-LABEL: call_leading_args: ; CHECK-MINGW-F128: # %bb.0: -; CHECK-MINGW-F128-NEXT: subq $72, %rsp +; CHECK-MINGW-F128-NEXT: subq $88, %rsp ; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 ; CHECK-MINGW-F128-NEXT: movaps %xmm0, 48(%rsp) ; CHECK-MINGW-F128-NEXT: leaq 48(%rsp), %rax -; CHECK-MINGW-F128-NEXT: movq %rax, 32(%rsp) -; CHECK-MINGW-F128-NEXT: xorl %ecx, %ecx +; CHECK-MINGW-F128-NEXT: movq %rax, 40(%rsp) +; CHECK-MINGW-F128-NEXT: movq $0, 32(%rsp) +; CHECK-MINGW-F128-NEXT: leaq 64(%rsp), %rcx ; CHECK-MINGW-F128-NEXT: xorl %edx, %edx ; CHECK-MINGW-F128-NEXT: xorl %r8d, %r8d ; CHECK-MINGW-F128-NEXT: xorl %r9d, %r9d ; CHECK-MINGW-F128-NEXT: callq leading_args -; CHECK-MINGW-F128-NEXT: addq $72, %rsp +; CHECK-MINGW-F128-NEXT: addq $88, %rsp ; CHECK-MINGW-F128-NEXT: retq ; ; CHECK-MINGW-I128-LABEL: call_leading_args: @@ -831,21 +855,22 @@ define void @call_many_leading_args(PrimTy %x) nounwind { ; ; CHECK-MSVC64-F128-LABEL: call_many_leading_args: ; CHECK-MSVC64-F128: # %bb.0: -; CHECK-MSVC64-F128-NEXT: subq $88, %rsp +; CHECK-MSVC64-F128-NEXT: subq $120, %rsp ; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 ; CHECK-MSVC64-F128-NEXT: xorps %xmm1, %xmm1 -; CHECK-MSVC64-F128-NEXT: movaps %xmm1, 64(%rsp) -; CHECK-MSVC64-F128-NEXT: movaps %xmm0, 48(%rsp) -; CHECK-MSVC64-F128-NEXT: leaq 48(%rsp), %rax +; CHECK-MSVC64-F128-NEXT: movaps %xmm1, 80(%rsp) +; CHECK-MSVC64-F128-NEXT: leaq 80(%rsp), %rax ; CHECK-MSVC64-F128-NEXT: movq %rax, 40(%rsp) +; CHECK-MSVC64-F128-NEXT: movaps %xmm0, 64(%rsp) ; CHECK-MSVC64-F128-NEXT: leaq 64(%rsp), %rax -; CHECK-MSVC64-F128-NEXT: movq %rax, 32(%rsp) -; CHECK-MSVC64-F128-NEXT: xorl %ecx, %ecx +; CHECK-MSVC64-F128-NEXT: movq %rax, 48(%rsp) +; CHECK-MSVC64-F128-NEXT: movq $0, 32(%rsp) +; CHECK-MSVC64-F128-NEXT: leaq 96(%rsp), %rcx ; CHECK-MSVC64-F128-NEXT: xorl %edx, %edx ; CHECK-MSVC64-F128-NEXT: xorl %r8d, %r8d ; CHECK-MSVC64-F128-NEXT: xorl %r9d, %r9d ; CHECK-MSVC64-F128-NEXT: callq many_leading_args -; CHECK-MSVC64-F128-NEXT: addq $88, %rsp +; CHECK-MSVC64-F128-NEXT: addq $120, %rsp ; CHECK-MSVC64-F128-NEXT: retq ; ; CHECK-MSVC64-I128-LABEL: call_many_leading_args: @@ -865,21 +890,22 @@ define void @call_many_leading_args(PrimTy %x) nounwind { ; ; CHECK-MINGW-F128-LABEL: call_many_leading_args: ; CHECK-MINGW-F128: # %bb.0: -; CHECK-MINGW-F128-NEXT: subq $88, %rsp +; CHECK-MINGW-F128-NEXT: subq $120, %rsp ; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 ; CHECK-MINGW-F128-NEXT: xorps %xmm1, %xmm1 -; CHECK-MINGW-F128-NEXT: movaps %xmm1, 64(%rsp) -; CHECK-MINGW-F128-NEXT: movaps %xmm0, 48(%rsp) -; CHECK-MINGW-F128-NEXT: leaq 48(%rsp), %rax +; CHECK-MINGW-F128-NEXT: movaps %xmm1, 80(%rsp) +; CHECK-MINGW-F128-NEXT: leaq 80(%rsp), %rax ; CHECK-MINGW-F128-NEXT: movq %rax, 40(%rsp) +; CHECK-MINGW-F128-NEXT: movaps %xmm0, 64(%rsp) ; CHECK-MINGW-F128-NEXT: leaq 64(%rsp), %rax -; CHECK-MINGW-F128-NEXT: movq %rax, 32(%rsp) -; CHECK-MINGW-F128-NEXT: xorl %ecx, %ecx +; CHECK-MINGW-F128-NEXT: movq %rax, 48(%rsp) +; CHECK-MINGW-F128-NEXT: movq $0, 32(%rsp) +; CHECK-MINGW-F128-NEXT: leaq 96(%rsp), %rcx ; CHECK-MINGW-F128-NEXT: xorl %edx, %edx ; CHECK-MINGW-F128-NEXT: xorl %r8d, %r8d ; CHECK-MINGW-F128-NEXT: xorl %r9d, %r9d ; CHECK-MINGW-F128-NEXT: callq many_leading_args -; CHECK-MINGW-F128-NEXT: addq $88, %rsp +; CHECK-MINGW-F128-NEXT: addq $120, %rsp ; CHECK-MINGW-F128-NEXT: retq ; ; CHECK-MINGW-I128-LABEL: call_many_leading_args: @@ -993,17 +1019,18 @@ define void @call_trailing_arg(PrimTy %x) nounwind { ; ; CHECK-MSVC64-F128-LABEL: call_trailing_arg: ; CHECK-MSVC64-F128: # %bb.0: -; CHECK-MSVC64-F128-NEXT: subq $72, %rsp +; CHECK-MSVC64-F128-NEXT: subq $88, %rsp ; CHECK-MSVC64-F128-NEXT: movaps (%rcx), %xmm0 ; CHECK-MSVC64-F128-NEXT: movaps %xmm0, 48(%rsp) ; CHECK-MSVC64-F128-NEXT: leaq 48(%rsp), %rax -; CHECK-MSVC64-F128-NEXT: movq %rax, 32(%rsp) -; CHECK-MSVC64-F128-NEXT: xorl %ecx, %ecx +; CHECK-MSVC64-F128-NEXT: movq %rax, 40(%rsp) +; CHECK-MSVC64-F128-NEXT: movq $0, 32(%rsp) +; CHECK-MSVC64-F128-NEXT: leaq 64(%rsp), %rcx ; CHECK-MSVC64-F128-NEXT: xorl %edx, %edx ; CHECK-MSVC64-F128-NEXT: xorl %r8d, %r8d ; CHECK-MSVC64-F128-NEXT: xorl %r9d, %r9d ; CHECK-MSVC64-F128-NEXT: callq trailing_arg -; CHECK-MSVC64-F128-NEXT: addq $72, %rsp +; CHECK-MSVC64-F128-NEXT: addq $88, %rsp ; CHECK-MSVC64-F128-NEXT: retq ; ; CHECK-MSVC64-I128-LABEL: call_trailing_arg: @@ -1021,17 +1048,18 @@ define void @call_trailing_arg(PrimTy %x) nounwind { ; ; CHECK-MINGW-F128-LABEL: call_trailing_arg: ; CHECK-MINGW-F128: # %bb.0: -; CHECK-MINGW-F128-NEXT: subq $72, %rsp +; CHECK-MINGW-F128-NEXT: subq $88, %rsp ; CHECK-MINGW-F128-NEXT: movaps (%rcx), %xmm0 ; CHECK-MINGW-F128-NEXT: movaps %xmm0, 48(%rsp) ; CHECK-MINGW-F128-NEXT: leaq 48(%rsp), %rax -; CHECK-MINGW-F128-NEXT: movq %rax, 32(%rsp) -; CHECK-MINGW-F128-NEXT: xorl %ecx, %ecx +; CHECK-MINGW-F128-NEXT: movq %rax, 40(%rsp) +; CHECK-MINGW-F128-NEXT: movq $0, 32(%rsp) +; CHECK-MINGW-F128-NEXT: leaq 64(%rsp), %rcx ; CHECK-MINGW-F128-NEXT: xorl %edx, %edx ; CHECK-MINGW-F128-NEXT: xorl %r8d, %r8d ; CHECK-MINGW-F128-NEXT: xorl %r9d, %r9d ; CHECK-MINGW-F128-NEXT: callq trailing_arg -; CHECK-MINGW-F128-NEXT: addq $72, %rsp +; CHECK-MINGW-F128-NEXT: addq $88, %rsp ; CHECK-MINGW-F128-NEXT: retq ; ; CHECK-MINGW-I128-LABEL: call_trailing_arg: >From 70e8db9f599fa870339d87619dba4227ac2043d0 Mon Sep 17 00:00:00 2001 From: Folkert de Vries <[email protected]> Date: Sat, 20 Jun 2026 12:46:08 +0200 Subject: [PATCH 2/8] add more nuance to GCC compat comment --- llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index bce581ad7a48b..ba177c0cc63a1 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -670,13 +670,24 @@ bool X86TargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context, const Type *RetTy) const { - // Mingw64 GCC returns f128 via sret, which matches the documentation of the - // Windows x64 calling convention: + // Mingw64 GCC returns f128 via sret, and LLVM matches it for compatibility. + // + // + // Using sret is a reasonable implementation of the Windows x64 calling + // convention: // // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170#return-values // // > Otherwise, the caller must allocate memory for the return value and pass - // a pointer to it as the first argument. + // > a pointer to it as the first argument. + // + // Although it is not the only reasonable interpretation: + // + // > Nonscalar types including floats, doubles, and vector types such as + // > __m128, __m128i, __m128d are returned in XMM0. + // + // For now, we prefer compatibility with GCC. If official guidelines are ever + // published, this can be revisited. // // Return false, which will perform sret demotion. if (Subtarget.isCallingConvWin64(CallConv) && >From 6f909ce71777e3d887c553373b71c1c1b1728bc6 Mon Sep 17 00:00:00 2001 From: Folkert de Vries <[email protected]> Date: Tue, 23 Jun 2026 01:08:52 +0200 Subject: [PATCH 3/8] try to use tablegen --- llvm/lib/Target/X86/X86CallingConv.td | 34 +++++- llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 33 ++---- .../CodeGen/X86/fp128-return-calling-conv.ll | 102 ++++++++++++++++++ 3 files changed, 141 insertions(+), 28 deletions(-) create mode 100644 llvm/test/CodeGen/X86/fp128-return-calling-conv.ll diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 2e03f8996969b..5c7f48166b631 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -390,8 +390,8 @@ def RetCC_X86_Win64_C : CallingConv<[ // X86-64 vectorcall return-value convention. def RetCC_X86_64_Vectorcall : CallingConv<[ - // Vectorcall calling convention always returns FP values in XMMs. - CCIfType<[f32, f64, f128], + // See RetCC_X86_64 for details on f128. + CCIfType<[f32, f64], CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>, // Otherwise, everything is the same as Windows X86-64 C CC. @@ -469,6 +469,36 @@ def RetCC_X86_32 : CallingConv<[ // This is the root return-value convention for the X86-64 backend. def RetCC_X86_64 : CallingConv<[ + // Mingw64 GCC returns f128 via sret, and LLVM matches it for compatibility. + // + // Using sret is a reasonable implementation of the Windows x64 calling + // convention: + // + // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170#return-values + // + // > Otherwise, the caller must allocate memory for the return value and pass + // > a pointer to it as the first argument. + // + // Although it is not the only reasonable interpretation: + // + // > Nonscalar types including floats, doubles, and vector types such as + // > __m128, __m128i, __m128d are returned in XMM0. + // + // For now, we prefer compatibility with GCC. If official guidelines are ever + // published, this can be revisited. + // + // The alignment of 1 is so the frame's alignment is not bumped. + CCIfType<[f128], CCIfCC<"CallingConv::Win64", CCAssignToStack<16, 1>>>, + CCIfType<[f128], CCIfSubtarget<"isTargetWin64()", + CCIfCC<"CallingConv::C", CCAssignToStack<16, 1>>>>, + CCIfType<[f128], CCIfSubtarget<"isTargetWin64()", + CCIfCC<"CallingConv::X86_VectorCall", CCAssignToStack<16, 1>>>>, + // UEFI also uses the Win64 CC. + CCIfType<[f128], CCIfSubtarget<"isTargetUEFI64()", + CCIfCC<"CallingConv::C", CCAssignToStack<16, 1>>>>, + CCIfType<[f128], CCIfSubtarget<"isTargetUEFI64()", + CCIfCC<"CallingConv::X86_VectorCall", CCAssignToStack<16, 1>>>>, + // HiPE uses RetCC_X86_64_HiPE CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_64_HiPE>>, diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index ba177c0cc63a1..556524b8af503 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -670,34 +670,15 @@ bool X86TargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context, const Type *RetTy) const { - // Mingw64 GCC returns f128 via sret, and LLVM matches it for compatibility. - // - // - // Using sret is a reasonable implementation of the Windows x64 calling - // convention: - // - // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170#return-values - // - // > Otherwise, the caller must allocate memory for the return value and pass - // > a pointer to it as the first argument. - // - // Although it is not the only reasonable interpretation: - // - // > Nonscalar types including floats, doubles, and vector types such as - // > __m128, __m128i, __m128d are returned in XMM0. - // - // For now, we prefer compatibility with GCC. If official guidelines are ever - // published, this can be revisited. - // - // Return false, which will perform sret demotion. - if (Subtarget.isCallingConvWin64(CallConv) && - llvm::any_of( - Outs, [](const ISD::OutputArg &Out) { return Out.VT == MVT::f128; })) - return false; - SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); - return CCInfo.CheckReturn(Outs, RetCC_X86); + if (!CCInfo.CheckReturn(Outs, RetCC_X86)) + return false; + + // Demotion to sret when the value must be returned via memory. This is the + // case for fp128 on windows. + return llvm::none_of(RVLocs, + [](const CCValAssign &VA) { return VA.isMemLoc(); }); } const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { diff --git a/llvm/test/CodeGen/X86/fp128-return-calling-conv.ll b/llvm/test/CodeGen/X86/fp128-return-calling-conv.ll new file mode 100644 index 0000000000000..6aa12e0aa1f8c --- /dev/null +++ b/llvm/test/CodeGen/X86/fp128-return-calling-conv.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu | FileCheck %s --check-prefix=WIN +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=LINUX + +; On Windows, for calling conventions that have GCC compatibibility requirements, +; follow the GCC convention of passing via the stack (an sret). Elsewhere we use +; the more efficient approach of returning via XMM0. +; +; NOTE: f128 arguments are passed indirectly on Windows, but in XMM registers elsewhere. + +define fp128 @ret_ccc(fp128 %a) { +; WIN-LABEL: ret_ccc: +; WIN: # %bb.0: +; WIN-NEXT: movq %rcx, %rax +; WIN-NEXT: movaps (%rdx), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rcx) +; WIN-NEXT: retq +; +; LINUX-LABEL: ret_ccc: +; LINUX: # %bb.0: +; LINUX-NEXT: retq + ret fp128 %a +} + +define win64cc fp128 @ret_win64(fp128 %a) { +; WIN-LABEL: ret_win64: +; WIN: # %bb.0: +; WIN-NEXT: movq %rcx, %rax +; WIN-NEXT: movaps (%rdx), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rcx) +; WIN-NEXT: retq +; +; LINUX-LABEL: ret_win64: +; LINUX: # %bb.0: +; LINUX-NEXT: movq %rcx, %rax +; LINUX-NEXT: movaps (%rdx), %xmm0 +; LINUX-NEXT: movaps %xmm0, (%rcx) +; LINUX-NEXT: retq + ret fp128 %a +} + +; vectorcallcc symbol mangling confuses the test generation script. +; The \01 ensures re-running the script does not remove the checks. +define x86_vectorcallcc fp128 @"\01ret_vectorcall"(fp128 %a) { +; WIN-LABEL: ret_vectorcall: +; WIN: # %bb.0: +; WIN-NEXT: movq %rcx, %rax +; WIN-NEXT: movaps %xmm1, (%rcx) +; WIN-NEXT: retq +; +; LINUX-LABEL: ret_vectorcall: +; LINUX: # %bb.0: +; LINUX-NEXT: retq + ret fp128 %a +} + +define swiftcc fp128 @ret_swift(fp128 %a) { +; WIN-LABEL: ret_swift: +; WIN: # %bb.0: +; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: retq +; +; LINUX-LABEL: ret_swift: +; LINUX: # %bb.0: +; LINUX-NEXT: retq + ret fp128 %a +} + +define tailcc fp128 @ret_tail(fp128 %a) { +; WIN-LABEL: ret_tail: +; WIN: # %bb.0: +; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: retq $40 +; +; LINUX-LABEL: ret_tail: +; LINUX: # %bb.0: +; LINUX-NEXT: retq $8 + ret fp128 %a +} + +define preserve_mostcc fp128 @ret_preserve_most(fp128 %a) { +; WIN-LABEL: ret_preserve_most: +; WIN: # %bb.0: +; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: retq +; +; LINUX-LABEL: ret_preserve_most: +; LINUX: # %bb.0: +; LINUX-NEXT: retq + ret fp128 %a +} + +define x86_regcallcc fp128 @ret_regcall(fp128 %a) { +; WIN-LABEL: ret_regcall: +; WIN: # %bb.0: +; WIN-NEXT: retq +; +; LINUX-LABEL: ret_regcall: +; LINUX: # %bb.0: +; LINUX-NEXT: retq + ret fp128 %a +} >From 404e2115796977647ba274717ce0d6dc5f9621bb Mon Sep 17 00:00:00 2001 From: Folkert de Vries <[email protected]> Date: Tue, 23 Jun 2026 01:20:38 +0200 Subject: [PATCH 4/8] add relnotes entry --- llvm/docs/ReleaseNotes.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index df4ced91e8f5e..ca870cafe1817 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -284,6 +284,11 @@ Makes programs 10x faster by doing Special New Thing. two-register push in Windows x64 V3 unwind info. The directive takes two register operands: ``.seh_push2regs %r12, %r13``. +* The `fp128` type is now passed via sret instead of XMM0 for some calling + conventions to match GCC. The C, Win64 and vectorcall calling conventions + now use sret, other calling conventions do not need to be compatible with + GCC and still return via XMM0. + ### Changes to the OCaml bindings ### Changes to the Python bindings >From c51d0c8d95f0cc0fdff7b0a10f5b84fae94c1f9a Mon Sep 17 00:00:00 2001 From: Folkert de Vries <[email protected]> Date: Sat, 27 Jun 2026 14:03:45 +0200 Subject: [PATCH 5/8] Revert "try to use tablegen" This reverts commit 6f909ce71777e3d887c553373b71c1c1b1728bc6. --- llvm/lib/Target/X86/X86CallingConv.td | 34 +----- llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 33 ++++-- .../CodeGen/X86/fp128-return-calling-conv.ll | 102 ------------------ 3 files changed, 28 insertions(+), 141 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/fp128-return-calling-conv.ll diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 5c7f48166b631..2e03f8996969b 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -390,8 +390,8 @@ def RetCC_X86_Win64_C : CallingConv<[ // X86-64 vectorcall return-value convention. def RetCC_X86_64_Vectorcall : CallingConv<[ - // See RetCC_X86_64 for details on f128. - CCIfType<[f32, f64], + // Vectorcall calling convention always returns FP values in XMMs. + CCIfType<[f32, f64, f128], CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>, // Otherwise, everything is the same as Windows X86-64 C CC. @@ -469,36 +469,6 @@ def RetCC_X86_32 : CallingConv<[ // This is the root return-value convention for the X86-64 backend. def RetCC_X86_64 : CallingConv<[ - // Mingw64 GCC returns f128 via sret, and LLVM matches it for compatibility. - // - // Using sret is a reasonable implementation of the Windows x64 calling - // convention: - // - // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170#return-values - // - // > Otherwise, the caller must allocate memory for the return value and pass - // > a pointer to it as the first argument. - // - // Although it is not the only reasonable interpretation: - // - // > Nonscalar types including floats, doubles, and vector types such as - // > __m128, __m128i, __m128d are returned in XMM0. - // - // For now, we prefer compatibility with GCC. If official guidelines are ever - // published, this can be revisited. - // - // The alignment of 1 is so the frame's alignment is not bumped. - CCIfType<[f128], CCIfCC<"CallingConv::Win64", CCAssignToStack<16, 1>>>, - CCIfType<[f128], CCIfSubtarget<"isTargetWin64()", - CCIfCC<"CallingConv::C", CCAssignToStack<16, 1>>>>, - CCIfType<[f128], CCIfSubtarget<"isTargetWin64()", - CCIfCC<"CallingConv::X86_VectorCall", CCAssignToStack<16, 1>>>>, - // UEFI also uses the Win64 CC. - CCIfType<[f128], CCIfSubtarget<"isTargetUEFI64()", - CCIfCC<"CallingConv::C", CCAssignToStack<16, 1>>>>, - CCIfType<[f128], CCIfSubtarget<"isTargetUEFI64()", - CCIfCC<"CallingConv::X86_VectorCall", CCAssignToStack<16, 1>>>>, - // HiPE uses RetCC_X86_64_HiPE CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_64_HiPE>>, diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index 556524b8af503..ba177c0cc63a1 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -670,15 +670,34 @@ bool X86TargetLowering::CanLowerReturn( CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context, const Type *RetTy) const { - SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); - if (!CCInfo.CheckReturn(Outs, RetCC_X86)) + // Mingw64 GCC returns f128 via sret, and LLVM matches it for compatibility. + // + // + // Using sret is a reasonable implementation of the Windows x64 calling + // convention: + // + // https://learn.microsoft.com/en-us/cpp/build/x64-calling-convention?view=msvc-170#return-values + // + // > Otherwise, the caller must allocate memory for the return value and pass + // > a pointer to it as the first argument. + // + // Although it is not the only reasonable interpretation: + // + // > Nonscalar types including floats, doubles, and vector types such as + // > __m128, __m128i, __m128d are returned in XMM0. + // + // For now, we prefer compatibility with GCC. If official guidelines are ever + // published, this can be revisited. + // + // Return false, which will perform sret demotion. + if (Subtarget.isCallingConvWin64(CallConv) && + llvm::any_of( + Outs, [](const ISD::OutputArg &Out) { return Out.VT == MVT::f128; })) return false; - // Demotion to sret when the value must be returned via memory. This is the - // case for fp128 on windows. - return llvm::none_of(RVLocs, - [](const CCValAssign &VA) { return VA.isMemLoc(); }); + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); + return CCInfo.CheckReturn(Outs, RetCC_X86); } const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { diff --git a/llvm/test/CodeGen/X86/fp128-return-calling-conv.ll b/llvm/test/CodeGen/X86/fp128-return-calling-conv.ll deleted file mode 100644 index 6aa12e0aa1f8c..0000000000000 --- a/llvm/test/CodeGen/X86/fp128-return-calling-conv.ll +++ /dev/null @@ -1,102 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu | FileCheck %s --check-prefix=WIN -; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=LINUX - -; On Windows, for calling conventions that have GCC compatibibility requirements, -; follow the GCC convention of passing via the stack (an sret). Elsewhere we use -; the more efficient approach of returning via XMM0. -; -; NOTE: f128 arguments are passed indirectly on Windows, but in XMM registers elsewhere. - -define fp128 @ret_ccc(fp128 %a) { -; WIN-LABEL: ret_ccc: -; WIN: # %bb.0: -; WIN-NEXT: movq %rcx, %rax -; WIN-NEXT: movaps (%rdx), %xmm0 -; WIN-NEXT: movaps %xmm0, (%rcx) -; WIN-NEXT: retq -; -; LINUX-LABEL: ret_ccc: -; LINUX: # %bb.0: -; LINUX-NEXT: retq - ret fp128 %a -} - -define win64cc fp128 @ret_win64(fp128 %a) { -; WIN-LABEL: ret_win64: -; WIN: # %bb.0: -; WIN-NEXT: movq %rcx, %rax -; WIN-NEXT: movaps (%rdx), %xmm0 -; WIN-NEXT: movaps %xmm0, (%rcx) -; WIN-NEXT: retq -; -; LINUX-LABEL: ret_win64: -; LINUX: # %bb.0: -; LINUX-NEXT: movq %rcx, %rax -; LINUX-NEXT: movaps (%rdx), %xmm0 -; LINUX-NEXT: movaps %xmm0, (%rcx) -; LINUX-NEXT: retq - ret fp128 %a -} - -; vectorcallcc symbol mangling confuses the test generation script. -; The \01 ensures re-running the script does not remove the checks. -define x86_vectorcallcc fp128 @"\01ret_vectorcall"(fp128 %a) { -; WIN-LABEL: ret_vectorcall: -; WIN: # %bb.0: -; WIN-NEXT: movq %rcx, %rax -; WIN-NEXT: movaps %xmm1, (%rcx) -; WIN-NEXT: retq -; -; LINUX-LABEL: ret_vectorcall: -; LINUX: # %bb.0: -; LINUX-NEXT: retq - ret fp128 %a -} - -define swiftcc fp128 @ret_swift(fp128 %a) { -; WIN-LABEL: ret_swift: -; WIN: # %bb.0: -; WIN-NEXT: movaps (%rcx), %xmm0 -; WIN-NEXT: retq -; -; LINUX-LABEL: ret_swift: -; LINUX: # %bb.0: -; LINUX-NEXT: retq - ret fp128 %a -} - -define tailcc fp128 @ret_tail(fp128 %a) { -; WIN-LABEL: ret_tail: -; WIN: # %bb.0: -; WIN-NEXT: movaps (%rcx), %xmm0 -; WIN-NEXT: retq $40 -; -; LINUX-LABEL: ret_tail: -; LINUX: # %bb.0: -; LINUX-NEXT: retq $8 - ret fp128 %a -} - -define preserve_mostcc fp128 @ret_preserve_most(fp128 %a) { -; WIN-LABEL: ret_preserve_most: -; WIN: # %bb.0: -; WIN-NEXT: movaps (%rcx), %xmm0 -; WIN-NEXT: retq -; -; LINUX-LABEL: ret_preserve_most: -; LINUX: # %bb.0: -; LINUX-NEXT: retq - ret fp128 %a -} - -define x86_regcallcc fp128 @ret_regcall(fp128 %a) { -; WIN-LABEL: ret_regcall: -; WIN: # %bb.0: -; WIN-NEXT: retq -; -; LINUX-LABEL: ret_regcall: -; LINUX: # %bb.0: -; LINUX-NEXT: retq - ret fp128 %a -} >From 1d4dea5b00f2f673e86db943c37c125e605eef31 Mon Sep 17 00:00:00 2001 From: Folkert de Vries <[email protected]> Date: Sat, 27 Jun 2026 14:15:21 +0200 Subject: [PATCH 6/8] restrict what CCs now use sret for f128 --- .../CodeGen/X86/fp128-return-calling-conv.ll | 102 ++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 llvm/test/CodeGen/X86/fp128-return-calling-conv.ll diff --git a/llvm/test/CodeGen/X86/fp128-return-calling-conv.ll b/llvm/test/CodeGen/X86/fp128-return-calling-conv.ll new file mode 100644 index 0000000000000..6aa12e0aa1f8c --- /dev/null +++ b/llvm/test/CodeGen/X86/fp128-return-calling-conv.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-pc-windows-gnu | FileCheck %s --check-prefix=WIN +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=LINUX + +; On Windows, for calling conventions that have GCC compatibibility requirements, +; follow the GCC convention of passing via the stack (an sret). Elsewhere we use +; the more efficient approach of returning via XMM0. +; +; NOTE: f128 arguments are passed indirectly on Windows, but in XMM registers elsewhere. + +define fp128 @ret_ccc(fp128 %a) { +; WIN-LABEL: ret_ccc: +; WIN: # %bb.0: +; WIN-NEXT: movq %rcx, %rax +; WIN-NEXT: movaps (%rdx), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rcx) +; WIN-NEXT: retq +; +; LINUX-LABEL: ret_ccc: +; LINUX: # %bb.0: +; LINUX-NEXT: retq + ret fp128 %a +} + +define win64cc fp128 @ret_win64(fp128 %a) { +; WIN-LABEL: ret_win64: +; WIN: # %bb.0: +; WIN-NEXT: movq %rcx, %rax +; WIN-NEXT: movaps (%rdx), %xmm0 +; WIN-NEXT: movaps %xmm0, (%rcx) +; WIN-NEXT: retq +; +; LINUX-LABEL: ret_win64: +; LINUX: # %bb.0: +; LINUX-NEXT: movq %rcx, %rax +; LINUX-NEXT: movaps (%rdx), %xmm0 +; LINUX-NEXT: movaps %xmm0, (%rcx) +; LINUX-NEXT: retq + ret fp128 %a +} + +; vectorcallcc symbol mangling confuses the test generation script. +; The \01 ensures re-running the script does not remove the checks. +define x86_vectorcallcc fp128 @"\01ret_vectorcall"(fp128 %a) { +; WIN-LABEL: ret_vectorcall: +; WIN: # %bb.0: +; WIN-NEXT: movq %rcx, %rax +; WIN-NEXT: movaps %xmm1, (%rcx) +; WIN-NEXT: retq +; +; LINUX-LABEL: ret_vectorcall: +; LINUX: # %bb.0: +; LINUX-NEXT: retq + ret fp128 %a +} + +define swiftcc fp128 @ret_swift(fp128 %a) { +; WIN-LABEL: ret_swift: +; WIN: # %bb.0: +; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: retq +; +; LINUX-LABEL: ret_swift: +; LINUX: # %bb.0: +; LINUX-NEXT: retq + ret fp128 %a +} + +define tailcc fp128 @ret_tail(fp128 %a) { +; WIN-LABEL: ret_tail: +; WIN: # %bb.0: +; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: retq $40 +; +; LINUX-LABEL: ret_tail: +; LINUX: # %bb.0: +; LINUX-NEXT: retq $8 + ret fp128 %a +} + +define preserve_mostcc fp128 @ret_preserve_most(fp128 %a) { +; WIN-LABEL: ret_preserve_most: +; WIN: # %bb.0: +; WIN-NEXT: movaps (%rcx), %xmm0 +; WIN-NEXT: retq +; +; LINUX-LABEL: ret_preserve_most: +; LINUX: # %bb.0: +; LINUX-NEXT: retq + ret fp128 %a +} + +define x86_regcallcc fp128 @ret_regcall(fp128 %a) { +; WIN-LABEL: ret_regcall: +; WIN: # %bb.0: +; WIN-NEXT: retq +; +; LINUX-LABEL: ret_regcall: +; LINUX: # %bb.0: +; LINUX-NEXT: retq + ret fp128 %a +} >From 4cb0cacdf6423d96489a5a8289e738cfe42dbb59 Mon Sep 17 00:00:00 2001 From: Folkert de Vries <[email protected]> Date: Sat, 27 Jun 2026 14:36:30 +0200 Subject: [PATCH 7/8] WIP --- llvm/lib/Target/X86/X86CallingConv.td | 2 +- llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 2e03f8996969b..dfe4e9b922faf 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -391,7 +391,7 @@ def RetCC_X86_Win64_C : CallingConv<[ // X86-64 vectorcall return-value convention. def RetCC_X86_64_Vectorcall : CallingConv<[ // Vectorcall calling convention always returns FP values in XMMs. - CCIfType<[f32, f64, f128], + CCIfType<[f32, f64], CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>, // Otherwise, everything is the same as Windows X86-64 C CC. diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index ba177c0cc63a1..a65bcc666557f 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -690,7 +690,19 @@ bool X86TargetLowering::CanLowerReturn( // published, this can be revisited. // // Return false, which will perform sret demotion. - if (Subtarget.isCallingConvWin64(CallConv) && + auto IsWin64F128StackCC = [this](CallingConv::ID CC) -> bool { + switch (CC) { + case CallingConv::Win64: + return true; + case CallingConv::C: + case CallingConv::X86_VectorCall: + return Subtarget.isTargetWin64() || Subtarget.isTargetUEFI64(); + default: + return false; + } + }; + + if (IsWin64F128StackCC(CallConv) && llvm::any_of( Outs, [](const ISD::OutputArg &Out) { return Out.VT == MVT::f128; })) return false; >From 9055ee81c23b24b4efcfbce70c32f9d64dcd90f4 Mon Sep 17 00:00:00 2001 From: Folkert de Vries <[email protected]> Date: Sat, 27 Jun 2026 14:36:51 +0200 Subject: [PATCH 8/8] make clang explicitly use sret to return f128 on windows --- clang/lib/CodeGen/Targets/X86.cpp | 8 ++++++-- clang/test/CodeGen/win-fp128.c | 4 ++-- llvm/lib/Target/X86/X86ISelLoweringCall.cpp | 3 ++- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 77c912b021604..b49ee331c0152 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -3452,8 +3452,12 @@ ABIArgInfo WinX86_64ABIInfo::classify(QualType Ty, unsigned &FreeSSERegs, return ABIArgInfo::getDirect(llvm::FixedVectorType::get( llvm::Type::getInt64Ty(getVMContext()), 2)); - // Mingw64 GCC returns f128 via sret. Clang matches that for - // compatibility. + // Mingw64 GCC returns f128 via sret, and Clang matches that for + // compatibility. RegCall is excluded: it returns f128 in a vector + // register, matching the X86 backend's calling-convention lowering. + if (BT->getKind() == BuiltinType::Float128 && !IsRegCall) + return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(), + /*ByVal=*/false); break; default: diff --git a/clang/test/CodeGen/win-fp128.c b/clang/test/CodeGen/win-fp128.c index dc144f899fa4f..efc24e2ea0d63 100644 --- a/clang/test/CodeGen/win-fp128.c +++ b/clang/test/CodeGen/win-fp128.c @@ -3,10 +3,10 @@ // __float128 is unsupported on MSVC __float128 fp128_ret(void) { return 0; } -// CHECK-GNU64: define dso_local fp128 @fp128_ret() +// CHECK-GNU64: define dso_local void @fp128_ret(ptr dead_on_unwind noalias writable sret(fp128) align 16 %agg.result) __float128 fp128_args(__float128 a, __float128 b) { return a * b; } -// CHECK-GNU64: define dso_local fp128 @fp128_args(ptr noundef dead_on_return %0, ptr noundef dead_on_return %1) +// CHECK-GNU64: define dso_local void @fp128_args(ptr dead_on_unwind noalias writable sret(fp128) align 16 %agg.result, ptr noundef dead_on_return %0, ptr noundef dead_on_return %1) void fp128_vararg(int a, ...) { // CHECK-GNU64-LABEL: define dso_local void @fp128_vararg diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index a65bcc666557f..cc8f541406520 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -671,7 +671,8 @@ bool X86TargetLowering::CanLowerReturn( const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context, const Type *RetTy) const { // Mingw64 GCC returns f128 via sret, and LLVM matches it for compatibility. - // + // This logic exists for libcalls, a frontend should explicitly use sret + // rather than rely on the sret demotion here. // // Using sret is a reasonable implementation of the Windows x64 calling // convention: _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
