https://github.com/kiran-isaac created https://github.com/llvm/llvm-project/pull/102896
Backend: - Caller and callee arguments no longer have to match, just to take up the same space, as they can be changed before the call - Allowed tail calls if callee and callee both (or neither) use sret, wheras before it would be dissalowed if either used sret - Allowed tail calls if byval args are used - Added debug trace for IsEligibleForTailCallOptimisation Frontend (clang): - Do not generate extra alloca if sret is used with musttail, as the space for the sret is allocated already Change-Id: Ic7f246a7eca43c06874922d642d7dc44bdfc98ec >From 24806b179d34b4afb21832e4a2150c13995b59e0 Mon Sep 17 00:00:00 2001 From: Kiran <kiran.st...@arm.com> Date: Thu, 8 Aug 2024 13:07:24 +0100 Subject: [PATCH] [ARM] musttail fixes Backend: - Caller and callee arguments no longer have to match, just to take up the same space, as they can be changed before the call - Allowed tail calls if callee and callee both (or neither) use sret, wheras before it would be dissalowed if either used sret - Allowed tail calls if byval args are used - Added debug trace for IsEligibleForTailCallOptimisation Frontend (clang): - Do not generate extra alloca if sret is used with musttail, as the space for the sret is allocated already Change-Id: Ic7f246a7eca43c06874922d642d7dc44bdfc98ec --- clang/lib/CodeGen/CGCall.cpp | 2 +- llvm/include/llvm/CodeGen/CallingConvLower.h | 2 + llvm/lib/CodeGen/CallingConvLower.cpp | 61 +++ llvm/lib/Target/ARM/ARMISelLowering.cpp | 141 ++---- .../ARM/2013-05-13-AAPCS-byval-padding.ll | 16 +- .../ARM/2013-05-13-AAPCS-byval-padding2.ll | 13 +- llvm/test/CodeGen/ARM/fp-arg-shuffle.ll | 22 + llvm/test/CodeGen/ARM/fp16-vector-argument.ll | 41 +- llvm/test/CodeGen/ARM/struct_byval.ll | 455 ++++++++++++++++-- llvm/test/CodeGen/ARM/tail-call-float.ll | 99 +++- 10 files changed, 661 insertions(+), 191 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 070001a180ab88..1144de0b6ba7b5 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5085,7 +5085,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, RawAddress SRetAlloca = RawAddress::invalid(); llvm::Value *UnusedReturnSizePtr = nullptr; if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { - if (IsVirtualFunctionPointerThunk && RetAI.isIndirect()) { + if ((IsVirtualFunctionPointerThunk || IsMustTail) && RetAI.isIndirect()) { SRetPtr = makeNaturalAddressForPointer(CurFn->arg_begin() + IRFunctionArgs.getSRetArgNo(), RetTy, CharUnits::fromQuantity(1)); diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h index 932a2a94ab1f1a..fdb5982cb2042b 100644 --- a/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -540,6 +540,8 @@ class CCState { }); } + void dump() const; + private: /// MarkAllocated - Mark a register and all of its aliases as allocated. void MarkAllocated(MCPhysReg Reg); diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp index b7152587a9fa05..7ba3ea83115db2 100644 --- a/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/llvm/lib/CodeGen/CallingConvLower.cpp @@ -290,3 +290,64 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC, return std::equal(RVLocs1.begin(), RVLocs1.end(), RVLocs2.begin(), RVLocs2.end(), AreCompatible); } + +void CCState::dump() const { + dbgs() << "CCState:\n"; + for (const CCValAssign &Loc : Locs) { + if (Loc.isRegLoc()) { + dbgs() << " Reg " << TRI.getName(Loc.getLocReg()); + } else if (Loc.isMemLoc()) { + dbgs() << " Mem " << Loc.getLocMemOffset(); + } else { + assert(Loc.isPendingLoc()); + dbgs() << " Pend " << Loc.getExtraInfo(); + } + + dbgs() << " ValVT:" << Loc.getValVT(); + dbgs() << " LocVT:" << Loc.getLocVT(); + + if (Loc.needsCustom()) + dbgs() << " custom"; + + switch (Loc.getLocInfo()) { + case CCValAssign::Full: + dbgs() << " Full"; + break; + case CCValAssign::SExt: + dbgs() << " SExt"; + break; + case CCValAssign::ZExt: + dbgs() << " ZExt"; + break; + case CCValAssign::AExt: + dbgs() << " AExt"; + break; + case CCValAssign::SExtUpper: + dbgs() << " SExtUpper"; + break; + case CCValAssign::ZExtUpper: + dbgs() << " ZExtUpper"; + break; + case CCValAssign::AExtUpper: + dbgs() << " AExtUpper"; + break; + case CCValAssign::BCvt: + dbgs() << " BCvt"; + break; + case CCValAssign::Trunc: + dbgs() << " Trunc"; + break; + case CCValAssign::VExt: + dbgs() << " VExt"; + break; + case CCValAssign::FPExt: + dbgs() << " FPExt"; + break; + case CCValAssign::Indirect: + dbgs() << " Indirect"; + break; + } + + dbgs() << "\n"; + } +} diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 476b7b349294ab..aeba673f9a2f77 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2407,8 +2407,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isTailCall = false; // For both the non-secure calls and the returns from a CMSE entry function, - // the function needs to do some extra work afte r the call, or before the - // return, respectively, thus it cannot end with atail call + // the function needs to do some extra work after the call, or before the + // return, respectively, thus it cannot end with a tail call if (isCmseNSCall || AFI->isCmseNSEntryFunction()) isTailCall = false; @@ -2959,50 +2959,6 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, Size = std::max<int>(Size - Excess, 0); } -/// MatchingStackOffset - Return true if the given stack call argument is -/// already available in the same position (relatively) of the caller's -/// incoming argument stack. -static -bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, - MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, - const TargetInstrInfo *TII) { - unsigned Bytes = Arg.getValueSizeInBits() / 8; - int FI = std::numeric_limits<int>::max(); - if (Arg.getOpcode() == ISD::CopyFromReg) { - Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); - if (!VR.isVirtual()) - return false; - MachineInstr *Def = MRI->getVRegDef(VR); - if (!Def) - return false; - if (!Flags.isByVal()) { - if (!TII->isLoadFromStackSlot(*Def, FI)) - return false; - } else { - return false; - } - } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { - if (Flags.isByVal()) - // ByVal argument is passed in as a pointer but it's now being - // dereferenced. e.g. - // define @foo(%struct.X* %A) { - // tail call @bar(%struct.X* byval %A) - // } - return false; - SDValue Ptr = Ld->getBasePtr(); - FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); - if (!FINode) - return false; - FI = FINode->getIndex(); - } else - return false; - - assert(FI != std::numeric_limits<int>::max()); - if (!MFI.isFixedObjectIndex(FI)) - return false; - return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); -} - /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. Note that this function also @@ -3044,8 +3000,10 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( for (const CCValAssign &AL : ArgLocs) if (AL.isRegLoc()) AddressRegisters.erase(AL.getLocReg()); - if (AddressRegisters.empty()) + if (AddressRegisters.empty()) { + LLVM_DEBUG(dbgs() << "false (no space for target address)\n"); return false; + } } // Look for obvious safe cases to perform tail call optimization that do not @@ -3054,18 +3012,26 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( // Exception-handling functions need a special set of instructions to indicate // a return to the hardware. Tail-calling another function would probably // break this. - if (CallerF.hasFnAttribute("interrupt")) + if (CallerF.hasFnAttribute("interrupt")) { + LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n"); return false; + } - if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt)) + if (canGuaranteeTCO(CalleeCC, + getTargetMachine().Options.GuaranteedTailCallOpt)) { + LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false") + << " (guaranteed tail-call CC)\n"); return CalleeCC == CallerCC; + } - // Also avoid sibcall optimization if either caller or callee uses struct - // return semantics. + // Also avoid sibcall optimization if only one of caller or callee uses + // struct return semantics. bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); bool isCallerStructRet = MF.getFunction().hasStructRetAttr(); - if (isCalleeStructRet || isCallerStructRet) + if (isCalleeStructRet != isCallerStructRet) { + LLVM_DEBUG(dbgs() << "false (struct-ret)\n"); return false; + } // Externally-defined functions with weak linkage should not be // tail-called on ARM when the OS does not support dynamic @@ -3078,8 +3044,11 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( const GlobalValue *GV = G->getGlobal(); const Triple &TT = getTargetMachine().getTargetTriple(); if (GV->hasExternalWeakLinkage() && - (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) + (!TT.isOSWindows() || TT.isOSBinFormatELF() || + TT.isOSBinFormatMachO())) { + LLVM_DEBUG(dbgs() << "false (external weak linkage)\n"); return false; + } } // Check that the call results are passed in the same way. @@ -3088,70 +3057,44 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( getEffectiveCallingConv(CalleeCC, isVarArg), getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins, CCAssignFnForReturn(CalleeCC, isVarArg), - CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) + CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) { + LLVM_DEBUG(dbgs() << "false (incompatible results)\n"); return false; + } // The callee has to preserve all registers the caller needs to preserve. const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); if (CalleeCC != CallerCC) { const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); - if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) + if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) { + LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n"); return false; + } } - // If Caller's vararg or byval argument has been split between registers and + // If Caller's vararg argument has been split between registers and // stack, do not perform tail call, since part of the argument is in caller's // local frame. const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>(); - if (AFI_Caller->getArgRegsSaveSize()) + if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) { + LLVM_DEBUG(dbgs() << "false (vararg arg reg save area)\n"); return false; + } // If the callee takes no arguments then go on to check the results of the // call. - if (!Outs.empty()) { - if (CCInfo.getStackSize()) { - // Check if the arguments are already laid out in the right way as - // the caller's fixed stack objects. - MachineFrameInfo &MFI = MF.getFrameInfo(); - const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); - i != e; - ++i, ++realArgIdx) { - CCValAssign &VA = ArgLocs[i]; - EVT RegVT = VA.getLocVT(); - SDValue Arg = OutVals[realArgIdx]; - ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; - if (VA.getLocInfo() == CCValAssign::Indirect) - return false; - if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) { - // f64 and vector types are split into multiple registers or - // register/stack-slot combinations. The types will not match - // the registers; give up on memory f64 refs until we figure - // out what to do about this. - if (!VA.isRegLoc()) - return false; - if (!ArgLocs[++i].isRegLoc()) - return false; - if (RegVT == MVT::v2f64) { - if (!ArgLocs[++i].isRegLoc()) - return false; - if (!ArgLocs[++i].isRegLoc()) - return false; - } - } else if (!VA.isRegLoc()) { - if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, - MFI, MRI, TII)) - return false; - } - } - } - - const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) - return false; + const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) { + LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n"); + return false; } + // If the stack arguments for this call do not fit into our own save area then + // the call cannot be made tail. + if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize()) + return false; + + LLVM_DEBUG(dbgs() << "true\n"); return true; } diff --git a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll index d8e22f4f5312ae..e186ae3a961502 100644 --- a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll +++ b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll @@ -12,17 +12,11 @@ define void @check227( ; arg1 --> SP+188 entry: - -;CHECK: sub sp, sp, #12 -;CHECK: push {r11, lr} -;CHECK: sub sp, sp, #4 -;CHECK: add r0, sp, #12 -;CHECK: stm r0, {r1, r2, r3} -;CHECK: ldr r0, [sp, #212] -;CHECK: bl useInt -;CHECK: add sp, sp, #4 -;CHECK: pop {r11, lr} -;CHECK: add sp, sp, #12 +; CHECK: sub sp, sp, #12 +; CHECK: stm sp, {r1, r2, r3} +; CHECK: ldr r0, [sp, #200] +; CHECK: add sp, sp, #12 +; CHECK: b useInt %0 = ptrtoint ptr %arg1 to i32 tail call void @useInt(i32 %0) diff --git a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll index 0c5d22984b99e1..efdecce9ae723a 100644 --- a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll +++ b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll @@ -7,14 +7,11 @@ define void @foo(ptr byval(%struct4bytes) %p0, ; --> R0 ptr byval(%struct20bytes) %p1 ; --> R1,R2,R3, [SP+0 .. SP+8) ) { -;CHECK: sub sp, sp, #16 -;CHECK: push {r11, lr} -;CHECK: add r12, sp, #8 -;CHECK: stm r12, {r0, r1, r2, r3} -;CHECK: add r0, sp, #12 -;CHECK: bl useInt -;CHECK: pop {r11, lr} -;CHECK: add sp, sp, #16 +;CHECK: sub sp, sp, #16 +;CHECK: stm sp, {r0, r1, r2, r3} +;CHECK: add r0, sp, #4 +;CHECK: add sp, sp, #16 +;CHECK: b useInt %1 = ptrtoint ptr %p1 to i32 tail call void @useInt(i32 %1) diff --git a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll index 4996cc8ecbf022..2ceb7a7b97a1fe 100644 --- a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll +++ b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll @@ -3,6 +3,28 @@ ; CHECK: function1 ; CHECK-NOT: vmov define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp { +; CHECK-LABEL: function1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r11, lr} +; CHECK-NEXT: push {r4, r5, r11, lr} +; CHECK-NEXT: vldr d16, [sp, #40] +; CHECK-NEXT: vldr d17, [sp, #32] +; CHECK-NEXT: vmov r12, lr, d16 +; CHECK-NEXT: vldr d16, [sp, #16] +; CHECK-NEXT: vmov r4, r5, d17 +; CHECK-NEXT: vldr d17, [sp, #24] +; CHECK-NEXT: str r3, [sp, #36] +; CHECK-NEXT: str r2, [sp, #32] +; CHECK-NEXT: str r1, [sp, #44] +; CHECK-NEXT: str r0, [sp, #40] +; CHECK-NEXT: vstr d17, [sp, #16] +; CHECK-NEXT: vstr d16, [sp, #24] +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: mov r1, lr +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: pop {r4, r5, r11, lr} +; CHECK-NEXT: b function2 entry: %call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind ret double %call diff --git a/llvm/test/CodeGen/ARM/fp16-vector-argument.ll b/llvm/test/CodeGen/ARM/fp16-vector-argument.ll index 6fc56967bc7aa9..65aff46658fd1d 100644 --- a/llvm/test/CodeGen/ARM/fp16-vector-argument.ll +++ b/llvm/test/CodeGen/ARM/fp16-vector-argument.ll @@ -145,26 +145,21 @@ entry: define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x half>, <8 x half>) { ; SOFT-LABEL: many_args_test: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: push {r11, lr} -; SOFT-NEXT: sub sp, sp, #32 -; SOFT-NEXT: add r12, sp, #80 +; SOFT-NEXT: add r12, sp, #40 ; SOFT-NEXT: vld1.64 {d16, d17}, [r12] -; SOFT-NEXT: add r12, sp, #48 +; SOFT-NEXT: add r12, sp, #8 ; SOFT-NEXT: vabs.f16 q8, q8 ; SOFT-NEXT: vld1.64 {d18, d19}, [r12] -; SOFT-NEXT: add r12, sp, #64 +; SOFT-NEXT: add r12, sp, #24 ; SOFT-NEXT: vadd.f16 q8, q8, q9 ; SOFT-NEXT: vld1.64 {d18, d19}, [r12] ; SOFT-NEXT: add r12, sp, #16 ; SOFT-NEXT: vmul.f16 q8, q9, q8 ; SOFT-NEXT: vst1.64 {d16, d17}, [r12] -; SOFT-NEXT: mov r12, sp -; SOFT-NEXT: vldr d16, [sp, #40] -; SOFT-NEXT: vst1.16 {d16}, [r12:64]! -; SOFT-NEXT: str r3, [r12] -; SOFT-NEXT: bl use -; SOFT-NEXT: add sp, sp, #32 -; SOFT-NEXT: pop {r11, pc} +; SOFT-NEXT: vldr d16, [sp] +; SOFT-NEXT: vstr d16, [sp] +; SOFT-NEXT: str r3, [sp, #8] +; SOFT-NEXT: b use ; ; HARD-LABEL: many_args_test: ; HARD: @ %bb.0: @ %entry @@ -177,33 +172,25 @@ define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x hal ; ; SOFTEB-LABEL: many_args_test: ; SOFTEB: @ %bb.0: @ %entry -; SOFTEB-NEXT: .save {r11, lr} -; SOFTEB-NEXT: push {r11, lr} -; SOFTEB-NEXT: .pad #32 -; SOFTEB-NEXT: sub sp, sp, #32 -; SOFTEB-NEXT: add r12, sp, #80 -; SOFTEB-NEXT: mov lr, sp +; SOFTEB-NEXT: add r12, sp, #40 ; SOFTEB-NEXT: vld1.64 {d16, d17}, [r12] -; SOFTEB-NEXT: add r12, sp, #48 +; SOFTEB-NEXT: add r12, sp, #8 ; SOFTEB-NEXT: vrev64.16 q8, q8 ; SOFTEB-NEXT: vabs.f16 q8, q8 ; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12] -; SOFTEB-NEXT: add r12, sp, #64 +; SOFTEB-NEXT: add r12, sp, #24 ; SOFTEB-NEXT: vrev64.16 q9, q9 ; SOFTEB-NEXT: vadd.f16 q8, q8, q9 ; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12] ; SOFTEB-NEXT: add r12, sp, #16 ; SOFTEB-NEXT: vrev64.16 q9, q9 ; SOFTEB-NEXT: vmul.f16 q8, q9, q8 -; SOFTEB-NEXT: vldr d18, [sp, #40] -; SOFTEB-NEXT: vrev64.16 d18, d18 -; SOFTEB-NEXT: vst1.16 {d18}, [lr:64]! -; SOFTEB-NEXT: str r3, [lr] +; SOFTEB-NEXT: vldr d18, [sp] ; SOFTEB-NEXT: vrev64.16 q8, q8 ; SOFTEB-NEXT: vst1.64 {d16, d17}, [r12] -; SOFTEB-NEXT: bl use -; SOFTEB-NEXT: add sp, sp, #32 -; SOFTEB-NEXT: pop {r11, pc} +; SOFTEB-NEXT: vstr d18, [sp] +; SOFTEB-NEXT: str r3, [sp, #8] +; SOFTEB-NEXT: b use ; ; HARDEB-LABEL: many_args_test: ; HARDEB: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/ARM/struct_byval.ll b/llvm/test/CodeGen/ARM/struct_byval.ll index 73a1b5ee33bca9..5564f254c9e74d 100644 --- a/llvm/test/CodeGen/ARM/struct_byval.ll +++ b/llvm/test/CodeGen/ARM/struct_byval.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s ; RUN: llc < %s -mtriple=thumbv7-apple-ios6.0 | FileCheck %s ; RUN: llc < %s -mtriple=armv7-unknown-nacl-gnueabi | FileCheck %s -check-prefix=NACL @@ -10,11 +11,122 @@ %struct.LargeStruct = type { i32, [1001 x i8], [300 x i32] } define i32 @f() nounwind ssp { +; NACL-LABEL: f: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: .save {r4, lr} +; NACL-NEXT: push {r4, lr} +; NACL-NEXT: .pad #152 +; NACL-NEXT: sub sp, sp, #152 +; NACL-NEXT: movw r0, :lower16:__stack_chk_guard +; NACL-NEXT: add r3, sp, #72 +; NACL-NEXT: movt r0, :upper16:__stack_chk_guard +; NACL-NEXT: mov lr, sp +; NACL-NEXT: ldr r0, [r0] +; NACL-NEXT: str r0, [sp, #148] +; NACL-NEXT: add r0, sp, #72 +; NACL-NEXT: add r12, r0, #16 +; NACL-NEXT: ldm r3, {r0, r1, r2, r3} +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: bl e1 +; NACL-NEXT: movw r1, :lower16:__stack_chk_guard +; NACL-NEXT: ldr r0, [sp, #148] +; NACL-NEXT: movt r1, :upper16:__stack_chk_guard +; NACL-NEXT: ldr r1, [r1] +; NACL-NEXT: cmp r1, r0 +; NACL-NEXT: moveq r0, #0 +; NACL-NEXT: addeq sp, sp, #152 +; NACL-NEXT: popeq {r4, pc} +; NACL-NEXT: .LBB0_1: @ %entry +; NACL-NEXT: bl __stack_chk_fail +; +; NOMOVT-LABEL: f: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: .save {r11, lr} +; NOMOVT-NEXT: push {r11, lr} +; NOMOVT-NEXT: .pad #144 +; NOMOVT-NEXT: sub sp, sp, #144 +; NOMOVT-NEXT: ldr r0, .LCPI0_0 +; NOMOVT-NEXT: mov r1, sp +; NOMOVT-NEXT: add r3, sp, #64 +; NOMOVT-NEXT: ldr r0, [r0] +; NOMOVT-NEXT: str r0, [sp, #140] +; NOMOVT-NEXT: add r0, sp, #64 +; NOMOVT-NEXT: add r0, r0, #16 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldm r3, {r0, r1, r2, r3} +; NOMOVT-NEXT: bl e1 +; NOMOVT-NEXT: ldr r0, [sp, #140] +; NOMOVT-NEXT: ldr r1, .LCPI0_0 +; NOMOVT-NEXT: ldr r1, [r1] +; NOMOVT-NEXT: cmp r1, r0 +; NOMOVT-NEXT: moveq r0, #0 +; NOMOVT-NEXT: addeq sp, sp, #144 +; NOMOVT-NEXT: popeq {r11, pc} +; NOMOVT-NEXT: .LBB0_1: @ %entry +; NOMOVT-NEXT: bl __stack_chk_fail +; NOMOVT-NEXT: .p2align 2 +; NOMOVT-NEXT: @ %bb.2: +; NOMOVT-NEXT: .LCPI0_0: +; NOMOVT-NEXT: .long __stack_chk_guard entry: -; CHECK-LABEL: f: -; CHECK: ldr -; CHECK: str -; CHECK-NOT:bne %st = alloca %struct.SmallStruct, align 4 %call = call i32 @e1(ptr byval(%struct.SmallStruct) %st) ret i32 0 @@ -22,20 +134,95 @@ entry: ; Generate a loop for large struct byval define i32 @g() nounwind ssp { -entry: -; CHECK-LABEL: g: -; CHECK: ldr -; CHECK: sub -; CHECK: str -; CHECK: bne ; NACL-LABEL: g: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: .save {r4, r5, r11, lr} +; NACL-NEXT: push {r4, r5, r11, lr} +; NACL-NEXT: .pad #2224 +; NACL-NEXT: sub sp, sp, #2224 +; NACL-NEXT: movw r0, :lower16:__stack_chk_guard +; NACL-NEXT: movt r0, :upper16:__stack_chk_guard +; NACL-NEXT: ldr r0, [r0] +; NACL-NEXT: str r0, [sp, #2220] +; NACL-NEXT: sub sp, sp, #2192 +; NACL-NEXT: add lr, sp, #2048 +; NACL-NEXT: ldr r1, [sp, #2208] +; NACL-NEXT: add r0, lr, #156 +; NACL-NEXT: ldr r2, [sp, #2212] +; NACL-NEXT: add r12, r0, #16 +; NACL-NEXT: ldr r0, [sp, #2204] +; NACL-NEXT: ldr r3, [sp, #2216] +; NACL-NEXT: movw lr, #2192 +; NACL-NEXT: mov r4, sp +; NACL-NEXT: .LBB1_1: @ %entry +; NACL-NEXT: @ =>This Inner Loop Header: Depth=1 +; NACL-NEXT: ldr r5, [r12], #4 +; NACL-NEXT: subs lr, lr, #4 +; NACL-NEXT: str r5, [r4], #4 +; NACL-NEXT: bne .LBB1_1 +; NACL-NEXT: @ %bb.2: @ %entry +; NACL-NEXT: bl e2 +; NACL-NEXT: add sp, sp, #2192 +; NACL-NEXT: movw r1, :lower16:__stack_chk_guard +; NACL-NEXT: ldr r0, [sp, #2220] +; NACL-NEXT: movt r1, :upper16:__stack_chk_guard +; NACL-NEXT: ldr r1, [r1] +; NACL-NEXT: cmp r1, r0 +; NACL-NEXT: moveq r0, #0 +; NACL-NEXT: addeq sp, sp, #2224 +; NACL-NEXT: popeq {r4, r5, r11, pc} +; NACL-NEXT: .LBB1_3: @ %entry +; NACL-NEXT: bl __stack_chk_fail +; +; NOMOVT-LABEL: g: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: .save {r11, lr} +; NOMOVT-NEXT: push {r11, lr} +; NOMOVT-NEXT: .pad #168 +; NOMOVT-NEXT: sub sp, sp, #168 +; NOMOVT-NEXT: .pad #2048 +; NOMOVT-NEXT: sub sp, sp, #2048 +; NOMOVT-NEXT: ldr r0, .LCPI1_1 +; NOMOVT-NEXT: ldr r0, [r0] +; NOMOVT-NEXT: str r0, [sp, #2212] +; NOMOVT-NEXT: sub sp, sp, #2192 +; NOMOVT-NEXT: add lr, sp, #2048 +; NOMOVT-NEXT: ldr r1, .LCPI1_0 +; NOMOVT-NEXT: add r0, lr, #148 +; NOMOVT-NEXT: mov r2, sp +; NOMOVT-NEXT: add r0, r0, #16 +; NOMOVT-NEXT: .LBB1_1: @ %entry +; NOMOVT-NEXT: @ =>This Inner Loop Header: Depth=1 +; NOMOVT-NEXT: ldr r3, [r0], #4 +; NOMOVT-NEXT: subs r1, r1, #4 +; NOMOVT-NEXT: str r3, [r2], #4 +; NOMOVT-NEXT: bne .LBB1_1 +; NOMOVT-NEXT: @ %bb.2: @ %entry +; NOMOVT-NEXT: ldr r0, [sp, #2196] +; NOMOVT-NEXT: ldr r1, [sp, #2200] +; NOMOVT-NEXT: ldr r2, [sp, #2204] +; NOMOVT-NEXT: ldr r3, [sp, #2208] +; NOMOVT-NEXT: bl e2 +; NOMOVT-NEXT: add sp, sp, #2192 +; NOMOVT-NEXT: ldr r0, [sp, #2212] +; NOMOVT-NEXT: ldr r1, .LCPI1_1 +; NOMOVT-NEXT: ldr r1, [r1] +; NOMOVT-NEXT: cmp r1, r0 +; NOMOVT-NEXT: moveq r0, #0 +; NOMOVT-NEXT: addeq sp, sp, #168 +; NOMOVT-NEXT: addeq sp, sp, #2048 +; NOMOVT-NEXT: popeq {r11, pc} +; NOMOVT-NEXT: .LBB1_3: @ %entry +; NOMOVT-NEXT: bl __stack_chk_fail +; NOMOVT-NEXT: .p2align 2 +; NOMOVT-NEXT: @ %bb.4: +; NOMOVT-NEXT: .LCPI1_0: +; NOMOVT-NEXT: .long 2192 @ 0x890 +; NOMOVT-NEXT: .LCPI1_1: +; NOMOVT-NEXT: .long __stack_chk_guard +entry: ; Ensure that use movw instead of constpool for the loop trip count. But don't ; match the __stack_chk_guard movw -; NACL: movw {{r[0-9]+|lr}}, # -; NACL: ldr -; NACL: sub -; NACL: str -; NACL: bne %st = alloca %struct.LargeStruct, align 4 %call = call i32 @e2(ptr byval(%struct.LargeStruct) %st) ret i32 0 @@ -43,17 +230,90 @@ entry: ; Generate a loop using NEON instructions define i32 @h() nounwind ssp { +; NACL-LABEL: h: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: .save {r4, r5, r6, r7, r8, lr} +; NACL-NEXT: push {r4, r5, r6, r7, r8, lr} +; NACL-NEXT: .pad #168 +; NACL-NEXT: sub sp, sp, #168 +; NACL-NEXT: .pad #2048 +; NACL-NEXT: sub sp, sp, #2048 +; NACL-NEXT: movw r0, :lower16:__stack_chk_guard +; NACL-NEXT: movt r0, :upper16:__stack_chk_guard +; NACL-NEXT: ldr r0, [r0] +; NACL-NEXT: str r0, [sp, #2212] +; NACL-NEXT: sub sp, sp, #2192 +; NACL-NEXT: add r3, sp, #2192 +; NACL-NEXT: add r0, sp, #2192 +; NACL-NEXT: add r12, r0, #16 +; NACL-NEXT: movw lr, #2192 +; NACL-NEXT: ldm r3, {r0, r1, r2, r3} +; NACL-NEXT: mov r4, sp +; NACL-NEXT: .LBB2_1: @ %entry +; NACL-NEXT: @ =>This Inner Loop Header: Depth=1 +; NACL-NEXT: vld1.32 {d16, d17}, [r12]! +; NACL-NEXT: subs lr, lr, #16 +; NACL-NEXT: vst1.32 {d16, d17}, [r4]! +; NACL-NEXT: bne .LBB2_1 +; NACL-NEXT: @ %bb.2: @ %entry +; NACL-NEXT: bl e3 +; NACL-NEXT: add sp, sp, #2192 +; NACL-NEXT: movw r1, :lower16:__stack_chk_guard +; NACL-NEXT: ldr r0, [sp, #2212] +; NACL-NEXT: movt r1, :upper16:__stack_chk_guard +; NACL-NEXT: ldr r1, [r1] +; NACL-NEXT: cmp r1, r0 +; NACL-NEXT: moveq r0, #0 +; NACL-NEXT: addeq sp, sp, #168 +; NACL-NEXT: addeq sp, sp, #2048 +; NACL-NEXT: popeq {r4, r5, r6, r7, r8, pc} +; NACL-NEXT: .LBB2_3: @ %entry +; NACL-NEXT: bl __stack_chk_fail +; +; NOMOVT-LABEL: h: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: .save {r6, r10, r11, lr} +; NOMOVT-NEXT: push {r6, r10, r11, lr} +; NOMOVT-NEXT: .setfp r11, sp, #8 +; NOMOVT-NEXT: add r11, sp, #8 +; NOMOVT-NEXT: .pad #2224 +; NOMOVT-NEXT: sub sp, sp, #2224 +; NOMOVT-NEXT: bic sp, sp, #15 +; NOMOVT-NEXT: ldr r0, .LCPI2_1 +; NOMOVT-NEXT: mov r6, sp +; NOMOVT-NEXT: ldr r0, [r0] +; NOMOVT-NEXT: str r0, [r6, #2220] +; NOMOVT-NEXT: sub sp, sp, #2192 +; NOMOVT-NEXT: mov r0, r6 +; NOMOVT-NEXT: ldr r1, .LCPI2_0 +; NOMOVT-NEXT: add r0, r0, #16 +; NOMOVT-NEXT: mov r2, sp +; NOMOVT-NEXT: .LBB2_1: @ %entry +; NOMOVT-NEXT: @ =>This Inner Loop Header: Depth=1 +; NOMOVT-NEXT: ldr r3, [r0], #4 +; NOMOVT-NEXT: subs r1, r1, #4 +; NOMOVT-NEXT: str r3, [r2], #4 +; NOMOVT-NEXT: bne .LBB2_1 +; NOMOVT-NEXT: @ %bb.2: @ %entry +; NOMOVT-NEXT: ldm r6, {r0, r1, r2, r3} +; NOMOVT-NEXT: bl e3 +; NOMOVT-NEXT: add sp, sp, #2192 +; NOMOVT-NEXT: ldr r0, [r6, #2220] +; NOMOVT-NEXT: ldr r1, .LCPI2_1 +; NOMOVT-NEXT: ldr r1, [r1] +; NOMOVT-NEXT: cmp r1, r0 +; NOMOVT-NEXT: moveq r0, #0 +; NOMOVT-NEXT: subeq sp, r11, #8 +; NOMOVT-NEXT: popeq {r6, r10, r11, pc} +; NOMOVT-NEXT: .LBB2_3: @ %entry +; NOMOVT-NEXT: bl __stack_chk_fail +; NOMOVT-NEXT: .p2align 2 +; NOMOVT-NEXT: @ %bb.4: +; NOMOVT-NEXT: .LCPI2_0: +; NOMOVT-NEXT: .long 2192 @ 0x890 +; NOMOVT-NEXT: .LCPI2_1: +; NOMOVT-NEXT: .long __stack_chk_guard entry: -; CHECK-LABEL: h: -; CHECK: vld1 -; CHECK: sub -; CHECK: vst1 -; CHECK: bne -; NACL: movw {{r[0-9]+|lr}}, # -; NACL: vld1 -; NACL: sub -; NACL: vst1 -; NACL: bne %st = alloca %struct.LargeStruct, align 16 %call = call i32 @e3(ptr byval(%struct.LargeStruct) align 16 %st) ret i32 0 @@ -67,16 +327,50 @@ declare i32 @e3(ptr nocapture byval(%struct.LargeStruct) align 16 %in) nounwind ; We can't do tail call since address of s is passed to the callee and part of ; s is in caller's local frame. define void @f3(ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { -; CHECK-LABEL: f3 -; CHECK: bl _consumestruct +; NACL-LABEL: f3: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: .pad #16 +; NACL-NEXT: sub sp, sp, #16 +; NACL-NEXT: stm sp, {r0, r1, r2, r3} +; NACL-NEXT: mov r0, sp +; NACL-NEXT: mov r1, #80 +; NACL-NEXT: add sp, sp, #16 +; NACL-NEXT: b consumestruct +; +; NOMOVT-LABEL: f3: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: .pad #16 +; NOMOVT-NEXT: sub sp, sp, #16 +; NOMOVT-NEXT: stm sp, {r0, r1, r2, r3} +; NOMOVT-NEXT: mov r0, sp +; NOMOVT-NEXT: mov r1, #80 +; NOMOVT-NEXT: add sp, sp, #16 +; NOMOVT-NEXT: b consumestruct entry: tail call void @consumestruct(ptr %s, i32 80) optsize ret void } define void @f4(ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { -; CHECK-LABEL: f4 -; CHECK: bl _consumestruct +; NACL-LABEL: f4: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: .pad #16 +; NACL-NEXT: sub sp, sp, #16 +; NACL-NEXT: stm sp, {r0, r1, r2, r3} +; NACL-NEXT: mov r0, sp +; NACL-NEXT: mov r1, #80 +; NACL-NEXT: add sp, sp, #16 +; NACL-NEXT: b consumestruct +; +; NOMOVT-LABEL: f4: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: .pad #16 +; NOMOVT-NEXT: sub sp, sp, #16 +; NOMOVT-NEXT: stm sp, {r0, r1, r2, r3} +; NOMOVT-NEXT: mov r0, sp +; NOMOVT-NEXT: mov r1, #80 +; NOMOVT-NEXT: add sp, sp, #16 +; NOMOVT-NEXT: b consumestruct entry: tail call void @consumestruct(ptr %s, i32 80) optsize ret void @@ -84,16 +378,34 @@ entry: ; We can do tail call here since s is in the incoming argument area. define void @f5(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { -; CHECK-LABEL: f5 -; CHECK: b{{(\.w)?}} _consumestruct +; NACL-LABEL: f5: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: mov r0, sp +; NACL-NEXT: mov r1, #80 +; NACL-NEXT: b consumestruct +; +; NOMOVT-LABEL: f5: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: mov r0, sp +; NOMOVT-NEXT: mov r1, #80 +; NOMOVT-NEXT: b consumestruct entry: tail call void @consumestruct(ptr %s, i32 80) optsize ret void } define void @f6(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { -; CHECK-LABEL: f6 -; CHECK: b{{(\.w)?}} _consumestruct +; NACL-LABEL: f6: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: mov r0, sp +; NACL-NEXT: mov r1, #80 +; NACL-NEXT: b consumestruct +; +; NOMOVT-LABEL: f6: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: mov r0, sp +; NOMOVT-NEXT: mov r1, #80 +; NOMOVT-NEXT: b consumestruct entry: tail call void @consumestruct(ptr %s, i32 80) optsize ret void @@ -106,10 +418,81 @@ declare void @consumestruct(ptr nocapture %structp, i32 %structsize) nounwind declare void @use_I(ptr byval(%struct.I.8)) define void @test_I_16() { -; CHECK-LABEL: test_I_16 -; CHECK: ldrb -; CHECK: strb +; NACL-LABEL: test_I_16: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: .save {r11, lr} +; NACL-NEXT: push {r11, lr} +; NACL-NEXT: .pad #40 +; NACL-NEXT: sub sp, sp, #40 +; NACL-NEXT: ldr r0, [r0] +; NACL-NEXT: mov r1, sp +; NACL-NEXT: vld1.32 {d16, d17}, [r2]! +; NACL-NEXT: vst1.32 {d16, d17}, [r1]! +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: mov r2, r0 +; NACL-NEXT: mov r1, r0 +; NACL-NEXT: mov r3, r0 +; NACL-NEXT: bl use_I +; NACL-NEXT: add sp, sp, #40 +; NACL-NEXT: pop {r11, pc} +; +; NOMOVT-LABEL: test_I_16: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: .save {r11, lr} +; NOMOVT-NEXT: push {r11, lr} +; NOMOVT-NEXT: .setfp r11, sp +; NOMOVT-NEXT: mov r11, sp +; NOMOVT-NEXT: .pad #40 +; NOMOVT-NEXT: sub sp, sp, #40 +; NOMOVT-NEXT: bic sp, sp, #15 +; NOMOVT-NEXT: ldr r0, [r1], #4 +; NOMOVT-NEXT: mov r2, sp +; NOMOVT-NEXT: str r0, [r2], #4 +; NOMOVT-NEXT: ldr r0, [r1], #4 +; NOMOVT-NEXT: str r0, [r2], #4 +; NOMOVT-NEXT: ldr r0, [r1], #4 +; NOMOVT-NEXT: str r0, [r2], #4 +; NOMOVT-NEXT: ldr r0, [r1], #4 +; NOMOVT-NEXT: str r0, [r2], #4 +; NOMOVT-NEXT: ldr r0, [r1], #4 +; NOMOVT-NEXT: str r0, [r2], #4 +; NOMOVT-NEXT: ldr r0, [r1], #4 +; NOMOVT-NEXT: str r0, [r2], #4 +; NOMOVT-NEXT: ldr r0, [r1], #4 +; NOMOVT-NEXT: str r0, [r2], #4 +; NOMOVT-NEXT: ldr r0, [r0] +; NOMOVT-NEXT: mov r1, r0 +; NOMOVT-NEXT: mov r2, r0 +; NOMOVT-NEXT: mov r3, r0 +; NOMOVT-NEXT: bl use_I +; NOMOVT-NEXT: mov sp, r11 +; NOMOVT-NEXT: pop {r11, pc} entry: call void @use_I(ptr byval(%struct.I.8) align 16 undef) ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/ARM/tail-call-float.ll b/llvm/test/CodeGen/ARM/tail-call-float.ll index 8cca7e0f70683e..6fa4b9229f64c8 100644 --- a/llvm/test/CodeGen/ARM/tail-call-float.ll +++ b/llvm/test/CodeGen/ARM/tail-call-float.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple armv7 -target-abi aapcs -float-abi soft -O0 -o - < %s \ ; RUN: | FileCheck %s -check-prefix CHECK-SOFT -check-prefix CHECK ; RUN: llc -mtriple armv7 -target-abi aapcs -float-abi hard -O0 -o - < %s \ @@ -12,16 +13,42 @@ declare i1 @non_variadic_big(float, float, float, float, float, float) declare i1 @variadic(float, ...) define void @non_variadic_fp(float %x, float %y) { -; CHECK-LABEL: non_variadic_fp: -; CHECK: b non_variadic +; CHECK-SOFT-LABEL: non_variadic_fp: +; CHECK-SOFT: @ %bb.0: @ %entry +; CHECK-SOFT-NEXT: mov r3, r1 +; CHECK-SOFT-NEXT: mov r2, r0 +; CHECK-SOFT-NEXT: mov r0, r3 +; CHECK-SOFT-NEXT: mov r1, r2 +; CHECK-SOFT-NEXT: b non_variadic +; +; CHECK-HARD-LABEL: non_variadic_fp: +; CHECK-HARD: @ %bb.0: @ %entry +; CHECK-HARD-NEXT: vmov.f32 s3, s1 +; CHECK-HARD-NEXT: vmov.f32 s2, s0 +; CHECK-HARD-NEXT: vmov.f32 s0, s3 +; CHECK-HARD-NEXT: vmov.f32 s1, s2 +; CHECK-HARD-NEXT: b non_variadic entry: %call = tail call i1 (float, float, float, float) @non_variadic(float %y, float %x, float %x, float %y) ret void } define void @variadic_fp(float %x, float %y) { -; CHECK-LABEL: variadic_fp: -; CHECK: b variadic +; CHECK-SOFT-LABEL: variadic_fp: +; CHECK-SOFT: @ %bb.0: @ %entry +; CHECK-SOFT-NEXT: mov r3, r1 +; CHECK-SOFT-NEXT: mov r2, r0 +; CHECK-SOFT-NEXT: mov r0, r3 +; CHECK-SOFT-NEXT: mov r1, r2 +; CHECK-SOFT-NEXT: b variadic +; +; CHECK-HARD-LABEL: variadic_fp: +; CHECK-HARD: @ %bb.0: @ %entry +; CHECK-HARD-NEXT: vmov r2, s0 +; CHECK-HARD-NEXT: vmov r3, s1 +; CHECK-HARD-NEXT: mov r0, r3 +; CHECK-HARD-NEXT: mov r1, r2 +; CHECK-HARD-NEXT: b variadic entry: %call = tail call i1 (float, ...) @variadic(float %y, float %x, float %x, float %y) ret void @@ -31,9 +58,32 @@ entry: ; of them to handle the 6 arguments. With hard-float, we have plenty of regs ; (s0-s15) to pass FP arguments. define void @non_variadic_fp_big(float %x, float %y) { -; CHECK-LABEL: non_variadic_fp_big: -; CHECK-SOFT: bl non_variadic_big -; CHECK-HARD: b non_variadic_big +; CHECK-SOFT-LABEL: non_variadic_fp_big: +; CHECK-SOFT: @ %bb.0: @ %entry +; CHECK-SOFT-NEXT: push {r11, lr} +; CHECK-SOFT-NEXT: sub sp, sp, #8 +; CHECK-SOFT-NEXT: mov r3, r1 +; CHECK-SOFT-NEXT: mov r2, r0 +; CHECK-SOFT-NEXT: vmov s0, r3 +; CHECK-SOFT-NEXT: vmov s0, r2 +; CHECK-SOFT-NEXT: mov r0, sp +; CHECK-SOFT-NEXT: str r3, [r0, #4] +; CHECK-SOFT-NEXT: str r2, [r0] +; CHECK-SOFT-NEXT: mov r0, r3 +; CHECK-SOFT-NEXT: mov r1, r2 +; CHECK-SOFT-NEXT: bl non_variadic_big +; CHECK-SOFT-NEXT: add sp, sp, #8 +; CHECK-SOFT-NEXT: pop {r11, pc} +; +; CHECK-HARD-LABEL: non_variadic_fp_big: +; CHECK-HARD: @ %bb.0: @ %entry +; CHECK-HARD-NEXT: vmov.f32 s5, s1 +; CHECK-HARD-NEXT: vmov.f32 s4, s0 +; CHECK-HARD-NEXT: vmov.f32 s0, s5 +; CHECK-HARD-NEXT: vmov.f32 s1, s4 +; CHECK-HARD-NEXT: vmov.f32 s2, s4 +; CHECK-HARD-NEXT: vmov.f32 s3, s5 +; CHECK-HARD-NEXT: b non_variadic_big entry: %call = tail call i1 (float, float, float, float, float, float) @non_variadic_big(float %y, float %x, float %x, float %y, float %x, float %y) ret void @@ -41,9 +91,40 @@ entry: ; Variadic functions cannot use FP regs to pass arguments; only GP regs. define void @variadic_fp_big(float %x, float %y) { -; CHECK-LABEL: variadic_fp_big: -; CHECK: bl variadic +; CHECK-SOFT-LABEL: variadic_fp_big: +; CHECK-SOFT: @ %bb.0: @ %entry +; CHECK-SOFT-NEXT: push {r11, lr} +; CHECK-SOFT-NEXT: sub sp, sp, #8 +; CHECK-SOFT-NEXT: mov r3, r1 +; CHECK-SOFT-NEXT: mov r2, r0 +; CHECK-SOFT-NEXT: vmov s0, r3 +; CHECK-SOFT-NEXT: vmov s0, r2 +; CHECK-SOFT-NEXT: mov r0, sp +; CHECK-SOFT-NEXT: str r3, [r0, #4] +; CHECK-SOFT-NEXT: str r2, [r0] +; CHECK-SOFT-NEXT: mov r0, r3 +; CHECK-SOFT-NEXT: mov r1, r2 +; CHECK-SOFT-NEXT: bl variadic +; CHECK-SOFT-NEXT: add sp, sp, #8 +; CHECK-SOFT-NEXT: pop {r11, pc} +; +; CHECK-HARD-LABEL: variadic_fp_big: +; CHECK-HARD: @ %bb.0: @ %entry +; CHECK-HARD-NEXT: push {r11, lr} +; CHECK-HARD-NEXT: sub sp, sp, #8 +; CHECK-HARD-NEXT: mov r0, sp +; CHECK-HARD-NEXT: vstr s1, [r0, #4] +; CHECK-HARD-NEXT: vstr s0, [r0] +; CHECK-HARD-NEXT: vmov r2, s0 +; CHECK-HARD-NEXT: vmov r3, s1 +; CHECK-HARD-NEXT: mov r0, r3 +; CHECK-HARD-NEXT: mov r1, r2 +; CHECK-HARD-NEXT: bl variadic +; CHECK-HARD-NEXT: add sp, sp, #8 +; CHECK-HARD-NEXT: pop {r11, pc} entry: %call = tail call i1 (float, ...) @variadic(float %y, float %x, float %x, float %y, float %x, float %y) ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits