llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-x86 Author: Trevor Gross (tgross35) <details> <summary>Changes</summary> Backport 7ed98a62fc4930b1f204541e326556af50d2249a and its two small follow-ups 68a401bd6532dd423ccf8509b72457dc35f787ca 2ba0bb66fa873259c5997271b0b971ef76b5555b --- Patch is 491.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150746.diff 51 Files Affected: - (modified) llvm/docs/ReleaseNotes.md (+2) - (modified) llvm/lib/Target/X86/X86CallingConv.cpp (+31) - (modified) llvm/lib/Target/X86/X86CallingConv.td (+5) - (modified) llvm/lib/Target/X86/X86ISelLoweringCall.cpp (+12-3) - (modified) llvm/test/CodeGen/X86/abds-neg.ll (+218-192) - (modified) llvm/test/CodeGen/X86/abds.ll (+208-182) - (modified) llvm/test/CodeGen/X86/abdu-neg.ll (+149-133) - (modified) llvm/test/CodeGen/X86/abdu.ll (+120-105) - (modified) llvm/test/CodeGen/X86/abs.ll (+32-23) - (modified) llvm/test/CodeGen/X86/add-sub-bool.ll (+15-10) - (modified) llvm/test/CodeGen/X86/arg-copy-elide.ll (+4-4) - (modified) llvm/test/CodeGen/X86/avx512fp16-cvt.ll (+29-13) - (modified) llvm/test/CodeGen/X86/bitselect.ll (+29-26) - (modified) llvm/test/CodeGen/X86/bsf.ll (+78-66) - (modified) llvm/test/CodeGen/X86/bsr.ll (+82-76) - (modified) llvm/test/CodeGen/X86/bswap-wide-int.ll (+20-10) - (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll (+18-18) - (modified) llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll (+47-47) - (modified) llvm/test/CodeGen/X86/fp128-cast-strict.ll (+52-40) - (modified) llvm/test/CodeGen/X86/fp128-cast.ll (+71-54) - (modified) llvm/test/CodeGen/X86/fp128-libcalls-strict.ll (+1260-800) - (modified) llvm/test/CodeGen/X86/fp128-libcalls.ll (+1121-652) - (modified) llvm/test/CodeGen/X86/fshl.ll (+104-81) - (modified) llvm/test/CodeGen/X86/fshr.ll (+91-79) - (modified) llvm/test/CodeGen/X86/funnel-shift.ll (+44-30) - (modified) llvm/test/CodeGen/X86/i128-add.ll (+14-9) - (modified) llvm/test/CodeGen/X86/i128-fp128-abi.ll (+270-185) - (modified) llvm/test/CodeGen/X86/i128-sdiv.ll (+36-25) - (modified) llvm/test/CodeGen/X86/i128-udiv.ll (+9-3) - (modified) llvm/test/CodeGen/X86/iabs.ll (+23-20) - (modified) llvm/test/CodeGen/X86/icmp-shift-opt.ll (+69-33) - (modified) llvm/test/CodeGen/X86/mul128.ll (+46-51) - (modified) llvm/test/CodeGen/X86/neg-abs.ll (+32-23) - (modified) llvm/test/CodeGen/X86/popcnt.ll (+275-210) - (modified) llvm/test/CodeGen/X86/pr46004.ll (+19) - (modified) llvm/test/CodeGen/X86/scalar-fp-to-i32.ll (+54-22) - (modified) llvm/test/CodeGen/X86/scalar-fp-to-i64.ll (+54-22) - (modified) llvm/test/CodeGen/X86/scmp.ll (+21-18) - (modified) llvm/test/CodeGen/X86/sdiv_fix.ll (+50-49) - (modified) llvm/test/CodeGen/X86/sdiv_fix_sat.ll (+222-218) - (modified) llvm/test/CodeGen/X86/shift-combine.ll (+12-2) - (modified) llvm/test/CodeGen/X86/shift-i128.ll (+52-20) - (modified) llvm/test/CodeGen/X86/smax.ll (+42-36) - (modified) llvm/test/CodeGen/X86/smin.ll (+43-38) - (modified) llvm/test/CodeGen/X86/ucmp.ll (+19-15) - (modified) llvm/test/CodeGen/X86/udiv_fix.ll (+15-13) - (modified) llvm/test/CodeGen/X86/udiv_fix_sat.ll (+15-13) - (modified) llvm/test/CodeGen/X86/umax.ll (+72-63) - (modified) llvm/test/CodeGen/X86/umin.ll (+43-38) - (modified) llvm/test/CodeGen/X86/umulo-128-legalisation-lowering.ll (+3-3) - (modified) llvm/test/CodeGen/X86/wide-integer-cmp.ll (+8-6) ``````````diff diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index bafd7ba38aaae..2a4734d1733bf 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -233,6 +233,8 @@ Changes to the X86 Backend -------------------------- * `fp128` will now use `*f128` libcalls on 32-bit GNU targets as well. +* On x86-32, `fp128` and `i128` are now passed with the expected 16-byte stack + alignment. Changes to the OCaml bindings ----------------------------- diff --git a/llvm/lib/Target/X86/X86CallingConv.cpp b/llvm/lib/Target/X86/X86CallingConv.cpp index 0b4c63f7a81f7..5d5a705893242 100644 --- a/llvm/lib/Target/X86/X86CallingConv.cpp +++ b/llvm/lib/Target/X86/X86CallingConv.cpp @@ -374,5 +374,36 @@ static bool CC_X86_64_I128(unsigned &ValNo, MVT &ValVT, MVT &LocVT, return true; } +/// Special handling for i128 and fp128: on x86-32, i128 and fp128 get legalized +/// as four i32s, but fp128 must be passed on the stack with 16-byte alignment. +/// Technically only fp128 has a specified ABI, but it makes sense to handle +/// i128 the same until we hear differently. +static bool CC_X86_32_I128_FP128(unsigned &ValNo, MVT &ValVT, MVT &LocVT, + CCValAssign::LocInfo &LocInfo, + ISD::ArgFlagsTy &ArgFlags, CCState &State) { + assert(ValVT == MVT::i32 && "Should have i32 parts"); + SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); + PendingMembers.push_back( + CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); + + if (!ArgFlags.isInConsecutiveRegsLast()) + return true; + + assert(PendingMembers.size() == 4 && "Should have four parts"); + + int64_t Offset = State.AllocateStack(16, Align(16)); + PendingMembers[0].convertToMem(Offset); + PendingMembers[1].convertToMem(Offset + 4); + PendingMembers[2].convertToMem(Offset + 8); + PendingMembers[3].convertToMem(Offset + 12); + + State.addLoc(PendingMembers[0]); + State.addLoc(PendingMembers[1]); + State.addLoc(PendingMembers[2]); + State.addLoc(PendingMembers[3]); + PendingMembers.clear(); + return true; +} + // Provides entry points of CC_X86 and RetCC_X86. #include "X86GenCallingConv.inc" diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 823e0caa02262..f020e0b55141c 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -859,6 +859,11 @@ def CC_X86_32_C : CallingConv<[ // The 'nest' parameter, if any, is passed in ECX. CCIfNest<CCAssignToReg<[ECX]>>, + // i128 and fp128 need to be passed on the stack with a higher alignment than + // their legal types. Handle this with a custom function. + CCIfType<[i32], + CCIfConsecutiveRegs<CCCustom<"CC_X86_32_I128_FP128">>>, + // On swifttailcc pass swiftself in ECX. CCIfCC<"CallingConv::SwiftTail", CCIfSwiftSelf<CCIfType<[i32], CCAssignToReg<[ECX]>>>>, diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index 9ad355311527b..b4639ac2577e8 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -237,9 +237,18 @@ EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, bool X86TargetLowering::functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const { - // i128 split into i64 needs to be allocated to two consecutive registers, - // or spilled to the stack as a whole. - return Ty->isIntegerTy(128); + // On x86-64 i128 is split into two i64s and needs to be allocated to two + // consecutive registers, or spilled to the stack as a whole. On x86-32 i128 + // is split to four i32s and never actually passed in registers, but we use + // the consecutive register mark to match it in TableGen. + if (Ty->isIntegerTy(128)) + return true; + + // On x86-32, fp128 acts the same as i128. + if (Subtarget.is32Bit() && Ty->isFP128Ty()) + return true; + + return false; } /// Helper for getByValTypeAlignment to determine diff --git a/llvm/test/CodeGen/X86/abds-neg.ll b/llvm/test/CodeGen/X86/abds-neg.ll index f6d66ab47ce05..2911edfbfd409 100644 --- a/llvm/test/CodeGen/X86/abds-neg.ll +++ b/llvm/test/CodeGen/X86/abds-neg.ll @@ -367,44 +367,49 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_ext_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: sbbl %edx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: sbbl %edi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %esi, %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovll %eax, %esi -; X86-NEXT: cmovll %ebx, %edi -; X86-NEXT: cmovll %ebp, %edx -; X86-NEXT: cmovll (%esp), %ecx # 4-byte Folded Reload -; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: movl 24(%ebp), %edx +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: subl %ecx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %edx +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%ebp), %edx +; X86-NEXT: movl 32(%ebp), %ebx +; X86-NEXT: sbbl %edx, %ebx +; X86-NEXT: movl 52(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: subl 24(%ebp), %ecx +; X86-NEXT: sbbl 28(%ebp), %eax +; X86-NEXT: sbbl 32(%ebp), %edx +; X86-NEXT: sbbl 36(%ebp), %esi +; X86-NEXT: cmovll %edi, %esi +; X86-NEXT: cmovll %ebx, %edx +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: xorl %edi, %edi ; X86-NEXT: negl %ecx -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %edx, %ebp -; X86-NEXT: movl $0, %edx -; X86-NEXT: sbbl %edi, %edx -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: movl %ebp, 4(%eax) -; X86-NEXT: movl %edx, 8(%eax) -; X86-NEXT: movl %ebx, 12(%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: movl $0, %ebx +; X86-NEXT: sbbl %eax, %ebx +; X86-NEXT: movl $0, %eax +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: movl 8(%ebp), %edx +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %ebx, 4(%edx) +; X86-NEXT: movl %eax, 8(%edx) +; X86-NEXT: movl %edi, 12(%edx) +; X86-NEXT: movl %edx, %eax +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -438,44 +443,49 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_ext_i128_undef: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: subl %ecx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: sbbl %edx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: sbbl %edi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %esi, %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovll %eax, %esi -; X86-NEXT: cmovll %ebx, %edi -; X86-NEXT: cmovll %ebp, %edx -; X86-NEXT: cmovll (%esp), %ecx # 4-byte Folded Reload -; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %ecx +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: movl 24(%ebp), %edx +; X86-NEXT: movl 28(%ebp), %esi +; X86-NEXT: subl %ecx, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %edx +; X86-NEXT: sbbl %eax, %edx +; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 48(%ebp), %edx +; X86-NEXT: movl 32(%ebp), %ebx +; X86-NEXT: sbbl %edx, %ebx +; X86-NEXT: movl 52(%ebp), %esi +; X86-NEXT: movl 36(%ebp), %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: subl 24(%ebp), %ecx +; X86-NEXT: sbbl 28(%ebp), %eax +; X86-NEXT: sbbl 32(%ebp), %edx +; X86-NEXT: sbbl 36(%ebp), %esi +; X86-NEXT: cmovll %edi, %esi +; X86-NEXT: cmovll %ebx, %edx +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: cmovll {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: xorl %edi, %edi ; X86-NEXT: negl %ecx -; X86-NEXT: movl $0, %ebp -; X86-NEXT: sbbl %edx, %ebp -; X86-NEXT: movl $0, %edx -; X86-NEXT: sbbl %edi, %edx -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ecx, (%eax) -; X86-NEXT: movl %ebp, 4(%eax) -; X86-NEXT: movl %edx, 8(%eax) -; X86-NEXT: movl %ebx, 12(%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: movl $0, %ebx +; X86-NEXT: sbbl %eax, %ebx +; X86-NEXT: movl $0, %eax +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: movl 8(%ebp), %edx +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %ebx, 4(%edx) +; X86-NEXT: movl %eax, 8(%edx) +; X86-NEXT: movl %edi, 12(%edx) +; X86-NEXT: movl %edx, %eax +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -639,55 +649,59 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_minmax_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmpl %eax, %esi -; X86-NEXT: sbbl %ebx, %ecx -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: sbbl %ebp, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %edx, %ecx -; X86-NEXT: sbbl %edi, %ecx -; X86-NEXT: movl %edi, %ecx -; X86-NEXT: cmovll %edx, %ecx -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl %ebx, %ecx -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %eax, %edx -; X86-NEXT: cmovll %esi, %edx -; X86-NEXT: cmpl %esi, %eax -; X86-NEXT: movl %ebx, %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl %edi, %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %ebx -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %eax -; X86-NEXT: subl %eax, %edx -; X86-NEXT: sbbl %ebx, %ecx -; X86-NEXT: sbbl %esi, %ebp -; X86-NEXT: movl (%esp), %esi # 4-byte Reload -; X86-NEXT: sbbl %edi, %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: movl %ecx, 4(%eax) -; X86-NEXT: movl %ebp, 8(%eax) -; X86-NEXT: movl %esi, 12(%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 40(%ebp), %esi +; X86-NEXT: movl 24(%ebp), %edi +; X86-NEXT: movl 28(%ebp), %eax +; X86-NEXT: cmpl %esi, %edi +; X86-NEXT: sbbl 44(%ebp), %eax +; X86-NEXT: movl 48(%ebp), %edx +; X86-NEXT: movl 32(%ebp), %eax +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: movl 52(%ebp), %ebx +; X86-NEXT: movl 36(%ebp), %ecx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: movl %ebx, %eax +; X86-NEXT: cmovll %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %edx, %eax +; X86-NEXT: cmovll 32(%ebp), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%ebp), %eax +; X86-NEXT: cmovll 28(%ebp), %eax +; X86-NEXT: movl %esi, %ecx +; X86-NEXT: cmovll %edi, %ecx +; X86-NEXT: cmpl %edi, %esi +; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: sbbl 28(%ebp), %edi +; X86-NEXT: movl %edx, %edi +; X86-NEXT: sbbl 32(%ebp), %edi +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: sbbl 36(%ebp), %edi +; X86-NEXT: cmovll 36(%ebp), %ebx +; X86-NEXT: cmovll 32(%ebp), %edx +; X86-NEXT: movl 44(%ebp), %edi +; X86-NEXT: cmovll 28(%ebp), %edi +; X86-NEXT: cmovll 24(%ebp), %esi +; X86-NEXT: subl %esi, %ecx +; X86-NEXT: sbbl %edi, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X86-NEXT: sbbl %edx, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X86-NEXT: sbbl %ebx, %esi +; X86-NEXT: movl 8(%ebp), %edx +; X86-NEXT: movl %ecx, (%edx) +; X86-NEXT: movl %eax, 4(%edx) +; X86-NEXT: movl %edi, 8(%edx) +; X86-NEXT: movl %esi, 12(%edx) +; X86-NEXT: movl %edx, %eax +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -848,37 +862,41 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_cmp_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: subl %edx, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: sbbl %esi, %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: sbbl %ecx, %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: sbbl %edi, %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmovgel (%esp), %edx # 4-byte Folded Reload -; X86-NEXT: cmovgel %ebx, %esi -; X86-NEXT: cmovgel %ebp, %ecx -; X86-NEXT: cmovgel %eax, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 24(%ebp), %ecx +; X86-NEXT: movl 28(%ebp), %edx +; X86-NEXT: movl 40(%ebp), %eax +; X86-NEXT: movl 44(%ebp), %esi +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl %esi, %eax +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 32(%ebp), %esi +; X86-NEXT: movl 48(%ebp), %edi +; X86-NEXT: sbbl %esi, %edi +; X86-NEXT: movl 36(%ebp), %ebx +; X86-NEXT: movl 52(%ebp), %eax +; X86-NEXT: sbbl %ebx, %eax +; X86-NEXT: subl 40(%ebp), %ecx +; X86-NEXT: sbbl 44(%ebp), %edx +; X86-NEXT: sbbl 48(%ebp), %esi +; X86-NEXT: sbbl 52(%ebp), %ebx +; X86-NEXT: cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; X86-NEXT: cmovgel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: cmovgel %edi, %esi +; X86-NEXT: cmovgel %eax, %ebx +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %esi, 8(%eax) +; X86-NEXT: movl %edx, 4(%eax) +; X86-NEXT: movl %ecx, (%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -1118,35 +1136,39 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_subnsw_i128: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: xorl %ebx, %ecx -; X86-NEXT: xorl %ebx, %edx -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: subl %edi, %ebp -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: sbbl %esi, %edi -; X86-NEXT: movl %ebx, %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $16, %esp +; X86-NEXT: movl 36(%ebp), %eax +; X86-NEXT: movl 32(%ebp), %ecx +; X86-NEXT: movl 28(%ebp), %edx +; X86-NEXT: movl 24(%ebp), %esi +; X86-NEXT: subl 40(%ebp), %esi +; X86-NEXT: sbbl 44(%ebp), %edx +; X86-NEXT: sbbl 48(%ebp), %ecx +; X86-NEXT: sbbl 52(%ebp), %eax +; X86-NEXT: movl %eax, %edi +; X86-NEXT: sarl $31, %edi +; X86-NEXT: xorl %edi, %eax +; X86-NEXT: xorl %edi, %ecx +; X86-NEXT: xorl %edi, %edx +; X86-NEXT: xorl %edi, %esi +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: subl %esi, %ebx +; X86-NEXT: movl %edi, %esi ; X86-NEXT: sbbl %edx, %esi -; X86-NEXT: sbbl %ecx, %ebx -; X86-NEXT: movl %ebp, (%eax) -; X86-NEXT: movl %edi, 4(%eax) -; X86-NEXT: movl %esi, 8(%eax) -; X86-NEXT: movl %ebx, 12(%eax) +; X86-NEXT: movl %edi, %edx +; X86-NEXT: sbbl %ecx, %edx +; X86-NEXT: sbbl %eax, %edi +; X86-NEXT: movl 8(%ebp), %eax +; X86-NEXT: movl %ebx, (%eax) +; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: leal -12(%ebp), %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -1175,35 +1197,39 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_subnsw_i128_undef: ; X86: # %bb.0: ; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: subl {{[0-9]+}}(%esp), %edi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %ebx -; X86-NEXT: sarl $31, %ebx -; X86-NEXT: xorl %ebx, %ecx -; X86-NEXT: xorl %ebx, %edx -; X86-NEXT: xorl %ebx, %esi -; X86-NEXT: xorl %ebx, %edi -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: subl %edi, %ebp -; X86-NEXT: movl %ebx, %edi -; X86-NEXT: sbbl %esi, %edi -; X86-NEXT: movl %ebx, %esi +; X86-NEXT: ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/150746 _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits