https://github.com/kiran-isaac updated https://github.com/llvm/llvm-project/pull/102896
>From eb7551e83618d8452f5dadae1be4aff8f6c9d23c Mon Sep 17 00:00:00 2001 From: Kiran <kiran.st...@arm.com> Date: Thu, 8 Aug 2024 13:07:24 +0100 Subject: [PATCH 1/5] [ARM] musttail fixes Backend: - Caller and callee arguments no longer have to match, just to take up the same space, as they can be changed before the call - Allowed tail calls if callee and callee both (or neither) use sret, wheras before it would be dissalowed if either used sret - Allowed tail calls if byval args are used - Added debug trace for IsEligibleForTailCallOptimisation Frontend (clang): - Do not generate extra alloca if sret is used with musttail, as the space for the sret is allocated already Change-Id: Ic7f246a7eca43c06874922d642d7dc44bdfc98ec --- clang/lib/CodeGen/CGCall.cpp | 2 +- llvm/include/llvm/CodeGen/CallingConvLower.h | 2 + llvm/lib/CodeGen/CallingConvLower.cpp | 61 +++ llvm/lib/Target/ARM/ARMISelLowering.cpp | 141 ++---- .../ARM/2013-05-13-AAPCS-byval-padding.ll | 16 +- .../ARM/2013-05-13-AAPCS-byval-padding2.ll | 13 +- llvm/test/CodeGen/ARM/fp-arg-shuffle.ll | 22 + llvm/test/CodeGen/ARM/fp16-vector-argument.ll | 41 +- llvm/test/CodeGen/ARM/struct_byval.ll | 455 ++++++++++++++++-- llvm/test/CodeGen/ARM/tail-call-float.ll | 99 +++- 10 files changed, 661 insertions(+), 191 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index ca2c79b51ac96b..05773f91f986ba 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5086,7 +5086,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, RawAddress SRetAlloca = RawAddress::invalid(); llvm::Value *UnusedReturnSizePtr = nullptr; if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { - if (IsVirtualFunctionPointerThunk && RetAI.isIndirect()) { + if ((IsVirtualFunctionPointerThunk || IsMustTail) && RetAI.isIndirect()) { SRetPtr = makeNaturalAddressForPointer(CurFn->arg_begin() + IRFunctionArgs.getSRetArgNo(), RetTy, CharUnits::fromQuantity(1)); diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h index d5a63c8dd627a0..12a6df16e279b4 100644 --- a/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -540,6 +540,8 @@ class CCState { }); } + void dump() const; + private: /// MarkAllocated - Mark a register and all of its aliases as allocated. void MarkAllocated(MCPhysReg Reg); diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp index b7152587a9fa05..7ba3ea83115db2 100644 --- a/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/llvm/lib/CodeGen/CallingConvLower.cpp @@ -290,3 +290,64 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC, return std::equal(RVLocs1.begin(), RVLocs1.end(), RVLocs2.begin(), RVLocs2.end(), AreCompatible); } + +void CCState::dump() const { + dbgs() << "CCState:\n"; + for (const CCValAssign &Loc : Locs) { + if (Loc.isRegLoc()) { + dbgs() << " Reg " << TRI.getName(Loc.getLocReg()); + } else if (Loc.isMemLoc()) { + dbgs() << " Mem " << Loc.getLocMemOffset(); + } else { + assert(Loc.isPendingLoc()); + dbgs() << " Pend " << Loc.getExtraInfo(); + } + + dbgs() << " ValVT:" << Loc.getValVT(); + dbgs() << " LocVT:" << Loc.getLocVT(); + + if (Loc.needsCustom()) + dbgs() << " custom"; + + switch (Loc.getLocInfo()) { + case CCValAssign::Full: + dbgs() << " Full"; + break; + case CCValAssign::SExt: + dbgs() << " SExt"; + break; + case CCValAssign::ZExt: + dbgs() << " ZExt"; + break; + case CCValAssign::AExt: + dbgs() << " AExt"; + break; + case CCValAssign::SExtUpper: + dbgs() << " SExtUpper"; + break; + case CCValAssign::ZExtUpper: + dbgs() << " ZExtUpper"; + break; + case CCValAssign::AExtUpper: + dbgs() << " AExtUpper"; + break; + case CCValAssign::BCvt: + dbgs() << " BCvt"; + break; + case CCValAssign::Trunc: + dbgs() << " Trunc"; + break; + case CCValAssign::VExt: + dbgs() << " VExt"; + break; + case CCValAssign::FPExt: + dbgs() << " FPExt"; + break; + case CCValAssign::Indirect: + dbgs() << " Indirect"; + break; + } + + dbgs() << "\n"; + } +} diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 853f54943eebf1..b5fdf630a8132d 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2407,8 +2407,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isTailCall = false; // For both the non-secure calls and the returns from a CMSE entry function, - // the function needs to do some extra work afte r the call, or before the - // return, respectively, thus it cannot end with atail call + // the function needs to do some extra work after the call, or before the + // return, respectively, thus it cannot end with a tail call if (isCmseNSCall || AFI->isCmseNSEntryFunction()) isTailCall = false; @@ -2960,50 +2960,6 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, Size = std::max<int>(Size - Excess, 0); } -/// MatchingStackOffset - Return true if the given stack call argument is -/// already available in the same position (relatively) of the caller's -/// incoming argument stack. -static -bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, - MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, - const TargetInstrInfo *TII) { - unsigned Bytes = Arg.getValueSizeInBits() / 8; - int FI = std::numeric_limits<int>::max(); - if (Arg.getOpcode() == ISD::CopyFromReg) { - Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); - if (!VR.isVirtual()) - return false; - MachineInstr *Def = MRI->getVRegDef(VR); - if (!Def) - return false; - if (!Flags.isByVal()) { - if (!TII->isLoadFromStackSlot(*Def, FI)) - return false; - } else { - return false; - } - } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { - if (Flags.isByVal()) - // ByVal argument is passed in as a pointer but it's now being - // dereferenced. e.g. - // define @foo(%struct.X* %A) { - // tail call @bar(%struct.X* byval %A) - // } - return false; - SDValue Ptr = Ld->getBasePtr(); - FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); - if (!FINode) - return false; - FI = FINode->getIndex(); - } else - return false; - - assert(FI != std::numeric_limits<int>::max()); - if (!MFI.isFixedObjectIndex(FI)) - return false; - return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); -} - /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. Note that this function also @@ -3045,8 +3001,10 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( for (const CCValAssign &AL : ArgLocs) if (AL.isRegLoc()) AddressRegisters.erase(AL.getLocReg()); - if (AddressRegisters.empty()) + if (AddressRegisters.empty()) { + LLVM_DEBUG(dbgs() << "false (no space for target address)\n"); return false; + } } // Look for obvious safe cases to perform tail call optimization that do not @@ -3055,18 +3013,26 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( // Exception-handling functions need a special set of instructions to indicate // a return to the hardware. Tail-calling another function would probably // break this. - if (CallerF.hasFnAttribute("interrupt")) + if (CallerF.hasFnAttribute("interrupt")) { + LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n"); return false; + } - if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt)) + if (canGuaranteeTCO(CalleeCC, + getTargetMachine().Options.GuaranteedTailCallOpt)) { + LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false") + << " (guaranteed tail-call CC)\n"); return CalleeCC == CallerCC; + } - // Also avoid sibcall optimization if either caller or callee uses struct - // return semantics. + // Also avoid sibcall optimization if only one of caller or callee uses + // struct return semantics. bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); bool isCallerStructRet = MF.getFunction().hasStructRetAttr(); - if (isCalleeStructRet || isCallerStructRet) + if (isCalleeStructRet != isCallerStructRet) { + LLVM_DEBUG(dbgs() << "false (struct-ret)\n"); return false; + } // Externally-defined functions with weak linkage should not be // tail-called on ARM when the OS does not support dynamic @@ -3079,8 +3045,11 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( const GlobalValue *GV = G->getGlobal(); const Triple &TT = getTargetMachine().getTargetTriple(); if (GV->hasExternalWeakLinkage() && - (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) + (!TT.isOSWindows() || TT.isOSBinFormatELF() || + TT.isOSBinFormatMachO())) { + LLVM_DEBUG(dbgs() << "false (external weak linkage)\n"); return false; + } } // Check that the call results are passed in the same way. @@ -3089,70 +3058,44 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( getEffectiveCallingConv(CalleeCC, isVarArg), getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins, CCAssignFnForReturn(CalleeCC, isVarArg), - CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) + CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) { + LLVM_DEBUG(dbgs() << "false (incompatible results)\n"); return false; + } // The callee has to preserve all registers the caller needs to preserve. const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); if (CalleeCC != CallerCC) { const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); - if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) + if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) { + LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n"); return false; + } } - // If Caller's vararg or byval argument has been split between registers and + // If Caller's vararg argument has been split between registers and // stack, do not perform tail call, since part of the argument is in caller's // local frame. const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>(); - if (AFI_Caller->getArgRegsSaveSize()) + if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) { + LLVM_DEBUG(dbgs() << "false (vararg arg reg save area)\n"); return false; + } // If the callee takes no arguments then go on to check the results of the // call. - if (!Outs.empty()) { - if (CCInfo.getStackSize()) { - // Check if the arguments are already laid out in the right way as - // the caller's fixed stack objects. - MachineFrameInfo &MFI = MF.getFrameInfo(); - const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); - i != e; - ++i, ++realArgIdx) { - CCValAssign &VA = ArgLocs[i]; - EVT RegVT = VA.getLocVT(); - SDValue Arg = OutVals[realArgIdx]; - ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; - if (VA.getLocInfo() == CCValAssign::Indirect) - return false; - if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) { - // f64 and vector types are split into multiple registers or - // register/stack-slot combinations. The types will not match - // the registers; give up on memory f64 refs until we figure - // out what to do about this. - if (!VA.isRegLoc()) - return false; - if (!ArgLocs[++i].isRegLoc()) - return false; - if (RegVT == MVT::v2f64) { - if (!ArgLocs[++i].isRegLoc()) - return false; - if (!ArgLocs[++i].isRegLoc()) - return false; - } - } else if (!VA.isRegLoc()) { - if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, - MFI, MRI, TII)) - return false; - } - } - } - - const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) - return false; + const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) { + LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n"); + return false; } + // If the stack arguments for this call do not fit into our own save area then + // the call cannot be made tail. + if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize()) + return false; + + LLVM_DEBUG(dbgs() << "true\n"); return true; } diff --git a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll index d8e22f4f5312ae..e186ae3a961502 100644 --- a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll +++ b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll @@ -12,17 +12,11 @@ define void @check227( ; arg1 --> SP+188 entry: - -;CHECK: sub sp, sp, #12 -;CHECK: push {r11, lr} -;CHECK: sub sp, sp, #4 -;CHECK: add r0, sp, #12 -;CHECK: stm r0, {r1, r2, r3} -;CHECK: ldr r0, [sp, #212] -;CHECK: bl useInt -;CHECK: add sp, sp, #4 -;CHECK: pop {r11, lr} -;CHECK: add sp, sp, #12 +; CHECK: sub sp, sp, #12 +; CHECK: stm sp, {r1, r2, r3} +; CHECK: ldr r0, [sp, #200] +; CHECK: add sp, sp, #12 +; CHECK: b useInt %0 = ptrtoint ptr %arg1 to i32 tail call void @useInt(i32 %0) diff --git a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll index 0c5d22984b99e1..efdecce9ae723a 100644 --- a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll +++ b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll @@ -7,14 +7,11 @@ define void @foo(ptr byval(%struct4bytes) %p0, ; --> R0 ptr byval(%struct20bytes) %p1 ; --> R1,R2,R3, [SP+0 .. SP+8) ) { -;CHECK: sub sp, sp, #16 -;CHECK: push {r11, lr} -;CHECK: add r12, sp, #8 -;CHECK: stm r12, {r0, r1, r2, r3} -;CHECK: add r0, sp, #12 -;CHECK: bl useInt -;CHECK: pop {r11, lr} -;CHECK: add sp, sp, #16 +;CHECK: sub sp, sp, #16 +;CHECK: stm sp, {r0, r1, r2, r3} +;CHECK: add r0, sp, #4 +;CHECK: add sp, sp, #16 +;CHECK: b useInt %1 = ptrtoint ptr %p1 to i32 tail call void @useInt(i32 %1) diff --git a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll index 4996cc8ecbf022..2ceb7a7b97a1fe 100644 --- a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll +++ b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll @@ -3,6 +3,28 @@ ; CHECK: function1 ; CHECK-NOT: vmov define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp { +; CHECK-LABEL: function1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r11, lr} +; CHECK-NEXT: push {r4, r5, r11, lr} +; CHECK-NEXT: vldr d16, [sp, #40] +; CHECK-NEXT: vldr d17, [sp, #32] +; CHECK-NEXT: vmov r12, lr, d16 +; CHECK-NEXT: vldr d16, [sp, #16] +; CHECK-NEXT: vmov r4, r5, d17 +; CHECK-NEXT: vldr d17, [sp, #24] +; CHECK-NEXT: str r3, [sp, #36] +; CHECK-NEXT: str r2, [sp, #32] +; CHECK-NEXT: str r1, [sp, #44] +; CHECK-NEXT: str r0, [sp, #40] +; CHECK-NEXT: vstr d17, [sp, #16] +; CHECK-NEXT: vstr d16, [sp, #24] +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: mov r1, lr +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: pop {r4, r5, r11, lr} +; CHECK-NEXT: b function2 entry: %call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind ret double %call diff --git a/llvm/test/CodeGen/ARM/fp16-vector-argument.ll b/llvm/test/CodeGen/ARM/fp16-vector-argument.ll index 6fc56967bc7aa9..65aff46658fd1d 100644 --- a/llvm/test/CodeGen/ARM/fp16-vector-argument.ll +++ b/llvm/test/CodeGen/ARM/fp16-vector-argument.ll @@ -145,26 +145,21 @@ entry: define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x half>, <8 x half>) { ; SOFT-LABEL: many_args_test: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: push {r11, lr} -; SOFT-NEXT: sub sp, sp, #32 -; SOFT-NEXT: add r12, sp, #80 +; SOFT-NEXT: add r12, sp, #40 ; SOFT-NEXT: vld1.64 {d16, d17}, [r12] -; SOFT-NEXT: add r12, sp, #48 +; SOFT-NEXT: add r12, sp, #8 ; SOFT-NEXT: vabs.f16 q8, q8 ; SOFT-NEXT: vld1.64 {d18, d19}, [r12] -; SOFT-NEXT: add r12, sp, #64 +; SOFT-NEXT: add r12, sp, #24 ; SOFT-NEXT: vadd.f16 q8, q8, q9 ; SOFT-NEXT: vld1.64 {d18, d19}, [r12] ; SOFT-NEXT: add r12, sp, #16 ; SOFT-NEXT: vmul.f16 q8, q9, q8 ; SOFT-NEXT: vst1.64 {d16, d17}, [r12] -; SOFT-NEXT: mov r12, sp -; SOFT-NEXT: vldr d16, [sp, #40] -; SOFT-NEXT: vst1.16 {d16}, [r12:64]! -; SOFT-NEXT: str r3, [r12] -; SOFT-NEXT: bl use -; SOFT-NEXT: add sp, sp, #32 -; SOFT-NEXT: pop {r11, pc} +; SOFT-NEXT: vldr d16, [sp] +; SOFT-NEXT: vstr d16, [sp] +; SOFT-NEXT: str r3, [sp, #8] +; SOFT-NEXT: b use ; ; HARD-LABEL: many_args_test: ; HARD: @ %bb.0: @ %entry @@ -177,33 +172,25 @@ define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x hal ; ; SOFTEB-LABEL: many_args_test: ; SOFTEB: @ %bb.0: @ %entry -; SOFTEB-NEXT: .save {r11, lr} -; SOFTEB-NEXT: push {r11, lr} -; SOFTEB-NEXT: .pad #32 -; SOFTEB-NEXT: sub sp, sp, #32 -; SOFTEB-NEXT: add r12, sp, #80 -; SOFTEB-NEXT: mov lr, sp +; SOFTEB-NEXT: add r12, sp, #40 ; SOFTEB-NEXT: vld1.64 {d16, d17}, [r12] -; SOFTEB-NEXT: add r12, sp, #48 +; SOFTEB-NEXT: add r12, sp, #8 ; SOFTEB-NEXT: vrev64.16 q8, q8 ; SOFTEB-NEXT: vabs.f16 q8, q8 ; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12] -; SOFTEB-NEXT: add r12, sp, #64 +; SOFTEB-NEXT: add r12, sp, #24 ; SOFTEB-NEXT: vrev64.16 q9, q9 ; SOFTEB-NEXT: vadd.f16 q8, q8, q9 ; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12] ; SOFTEB-NEXT: add r12, sp, #16 ; SOFTEB-NEXT: vrev64.16 q9, q9 ; SOFTEB-NEXT: vmul.f16 q8, q9, q8 -; SOFTEB-NEXT: vldr d18, [sp, #40] -; SOFTEB-NEXT: vrev64.16 d18, d18 -; SOFTEB-NEXT: vst1.16 {d18}, [lr:64]! -; SOFTEB-NEXT: str r3, [lr] +; SOFTEB-NEXT: vldr d18, [sp] ; SOFTEB-NEXT: vrev64.16 q8, q8 ; SOFTEB-NEXT: vst1.64 {d16, d17}, [r12] -; SOFTEB-NEXT: bl use -; SOFTEB-NEXT: add sp, sp, #32 -; SOFTEB-NEXT: pop {r11, pc} +; SOFTEB-NEXT: vstr d18, [sp] +; SOFTEB-NEXT: str r3, [sp, #8] +; SOFTEB-NEXT: b use ; ; HARDEB-LABEL: many_args_test: ; HARDEB: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/ARM/struct_byval.ll b/llvm/test/CodeGen/ARM/struct_byval.ll index 73a1b5ee33bca9..5564f254c9e74d 100644 --- a/llvm/test/CodeGen/ARM/struct_byval.ll +++ b/llvm/test/CodeGen/ARM/struct_byval.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s ; RUN: llc < %s -mtriple=thumbv7-apple-ios6.0 | FileCheck %s ; RUN: llc < %s -mtriple=armv7-unknown-nacl-gnueabi | FileCheck %s -check-prefix=NACL @@ -10,11 +11,122 @@ %struct.LargeStruct = type { i32, [1001 x i8], [300 x i32] } define i32 @f() nounwind ssp { +; NACL-LABEL: f: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: .save {r4, lr} +; NACL-NEXT: push {r4, lr} +; NACL-NEXT: .pad #152 +; NACL-NEXT: sub sp, sp, #152 +; NACL-NEXT: movw r0, :lower16:__stack_chk_guard +; NACL-NEXT: add r3, sp, #72 +; NACL-NEXT: movt r0, :upper16:__stack_chk_guard +; NACL-NEXT: mov lr, sp +; NACL-NEXT: ldr r0, [r0] +; NACL-NEXT: str r0, [sp, #148] +; NACL-NEXT: add r0, sp, #72 +; NACL-NEXT: add r12, r0, #16 +; NACL-NEXT: ldm r3, {r0, r1, r2, r3} +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: ldr r4, [r12], #4 +; NACL-NEXT: str r4, [lr], #4 +; NACL-NEXT: bl e1 +; NACL-NEXT: movw r1, :lower16:__stack_chk_guard +; NACL-NEXT: ldr r0, [sp, #148] +; NACL-NEXT: movt r1, :upper16:__stack_chk_guard +; NACL-NEXT: ldr r1, [r1] +; NACL-NEXT: cmp r1, r0 +; NACL-NEXT: moveq r0, #0 +; NACL-NEXT: addeq sp, sp, #152 +; NACL-NEXT: popeq {r4, pc} +; NACL-NEXT: .LBB0_1: @ %entry +; NACL-NEXT: bl __stack_chk_fail +; +; NOMOVT-LABEL: f: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: .save {r11, lr} +; NOMOVT-NEXT: push {r11, lr} +; NOMOVT-NEXT: .pad #144 +; NOMOVT-NEXT: sub sp, sp, #144 +; NOMOVT-NEXT: ldr r0, .LCPI0_0 +; NOMOVT-NEXT: mov r1, sp +; NOMOVT-NEXT: add r3, sp, #64 +; NOMOVT-NEXT: ldr r0, [r0] +; NOMOVT-NEXT: str r0, [sp, #140] +; NOMOVT-NEXT: add r0, sp, #64 +; NOMOVT-NEXT: add r0, r0, #16 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldr r2, [r0], #4 +; NOMOVT-NEXT: str r2, [r1], #4 +; NOMOVT-NEXT: ldm r3, {r0, r1, r2, r3} +; NOMOVT-NEXT: bl e1 +; NOMOVT-NEXT: ldr r0, [sp, #140] +; NOMOVT-NEXT: ldr r1, .LCPI0_0 +; NOMOVT-NEXT: ldr r1, [r1] +; NOMOVT-NEXT: cmp r1, r0 +; NOMOVT-NEXT: moveq r0, #0 +; NOMOVT-NEXT: addeq sp, sp, #144 +; NOMOVT-NEXT: popeq {r11, pc} +; NOMOVT-NEXT: .LBB0_1: @ %entry +; NOMOVT-NEXT: bl __stack_chk_fail +; NOMOVT-NEXT: .p2align 2 +; NOMOVT-NEXT: @ %bb.2: +; NOMOVT-NEXT: .LCPI0_0: +; NOMOVT-NEXT: .long __stack_chk_guard entry: -; CHECK-LABEL: f: -; CHECK: ldr -; CHECK: str -; CHECK-NOT:bne %st = alloca %struct.SmallStruct, align 4 %call = call i32 @e1(ptr byval(%struct.SmallStruct) %st) ret i32 0 @@ -22,20 +134,95 @@ entry: ; Generate a loop for large struct byval define i32 @g() nounwind ssp { -entry: -; CHECK-LABEL: g: -; CHECK: ldr -; CHECK: sub -; CHECK: str -; CHECK: bne ; NACL-LABEL: g: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: .save {r4, r5, r11, lr} +; NACL-NEXT: push {r4, r5, r11, lr} +; NACL-NEXT: .pad #2224 +; NACL-NEXT: sub sp, sp, #2224 +; NACL-NEXT: movw r0, :lower16:__stack_chk_guard +; NACL-NEXT: movt r0, :upper16:__stack_chk_guard +; NACL-NEXT: ldr r0, [r0] +; NACL-NEXT: str r0, [sp, #2220] +; NACL-NEXT: sub sp, sp, #2192 +; NACL-NEXT: add lr, sp, #2048 +; NACL-NEXT: ldr r1, [sp, #2208] +; NACL-NEXT: add r0, lr, #156 +; NACL-NEXT: ldr r2, [sp, #2212] +; NACL-NEXT: add r12, r0, #16 +; NACL-NEXT: ldr r0, [sp, #2204] +; NACL-NEXT: ldr r3, [sp, #2216] +; NACL-NEXT: movw lr, #2192 +; NACL-NEXT: mov r4, sp +; NACL-NEXT: .LBB1_1: @ %entry +; NACL-NEXT: @ =>This Inner Loop Header: Depth=1 +; NACL-NEXT: ldr r5, [r12], #4 +; NACL-NEXT: subs lr, lr, #4 +; NACL-NEXT: str r5, [r4], #4 +; NACL-NEXT: bne .LBB1_1 +; NACL-NEXT: @ %bb.2: @ %entry +; NACL-NEXT: bl e2 +; NACL-NEXT: add sp, sp, #2192 +; NACL-NEXT: movw r1, :lower16:__stack_chk_guard +; NACL-NEXT: ldr r0, [sp, #2220] +; NACL-NEXT: movt r1, :upper16:__stack_chk_guard +; NACL-NEXT: ldr r1, [r1] +; NACL-NEXT: cmp r1, r0 +; NACL-NEXT: moveq r0, #0 +; NACL-NEXT: addeq sp, sp, #2224 +; NACL-NEXT: popeq {r4, r5, r11, pc} +; NACL-NEXT: .LBB1_3: @ %entry +; NACL-NEXT: bl __stack_chk_fail +; +; NOMOVT-LABEL: g: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: .save {r11, lr} +; NOMOVT-NEXT: push {r11, lr} +; NOMOVT-NEXT: .pad #168 +; NOMOVT-NEXT: sub sp, sp, #168 +; NOMOVT-NEXT: .pad #2048 +; NOMOVT-NEXT: sub sp, sp, #2048 +; NOMOVT-NEXT: ldr r0, .LCPI1_1 +; NOMOVT-NEXT: ldr r0, [r0] +; NOMOVT-NEXT: str r0, [sp, #2212] +; NOMOVT-NEXT: sub sp, sp, #2192 +; NOMOVT-NEXT: add lr, sp, #2048 +; NOMOVT-NEXT: ldr r1, .LCPI1_0 +; NOMOVT-NEXT: add r0, lr, #148 +; NOMOVT-NEXT: mov r2, sp +; NOMOVT-NEXT: add r0, r0, #16 +; NOMOVT-NEXT: .LBB1_1: @ %entry +; NOMOVT-NEXT: @ =>This Inner Loop Header: Depth=1 +; NOMOVT-NEXT: ldr r3, [r0], #4 +; NOMOVT-NEXT: subs r1, r1, #4 +; NOMOVT-NEXT: str r3, [r2], #4 +; NOMOVT-NEXT: bne .LBB1_1 +; NOMOVT-NEXT: @ %bb.2: @ %entry +; NOMOVT-NEXT: ldr r0, [sp, #2196] +; NOMOVT-NEXT: ldr r1, [sp, #2200] +; NOMOVT-NEXT: ldr r2, [sp, #2204] +; NOMOVT-NEXT: ldr r3, [sp, #2208] +; NOMOVT-NEXT: bl e2 +; NOMOVT-NEXT: add sp, sp, #2192 +; NOMOVT-NEXT: ldr r0, [sp, #2212] +; NOMOVT-NEXT: ldr r1, .LCPI1_1 +; NOMOVT-NEXT: ldr r1, [r1] +; NOMOVT-NEXT: cmp r1, r0 +; NOMOVT-NEXT: moveq r0, #0 +; NOMOVT-NEXT: addeq sp, sp, #168 +; NOMOVT-NEXT: addeq sp, sp, #2048 +; NOMOVT-NEXT: popeq {r11, pc} +; NOMOVT-NEXT: .LBB1_3: @ %entry +; NOMOVT-NEXT: bl __stack_chk_fail +; NOMOVT-NEXT: .p2align 2 +; NOMOVT-NEXT: @ %bb.4: +; NOMOVT-NEXT: .LCPI1_0: +; NOMOVT-NEXT: .long 2192 @ 0x890 +; NOMOVT-NEXT: .LCPI1_1: +; NOMOVT-NEXT: .long __stack_chk_guard +entry: ; Ensure that use movw instead of constpool for the loop trip count. But don't ; match the __stack_chk_guard movw -; NACL: movw {{r[0-9]+|lr}}, # -; NACL: ldr -; NACL: sub -; NACL: str -; NACL: bne %st = alloca %struct.LargeStruct, align 4 %call = call i32 @e2(ptr byval(%struct.LargeStruct) %st) ret i32 0 @@ -43,17 +230,90 @@ entry: ; Generate a loop using NEON instructions define i32 @h() nounwind ssp { +; NACL-LABEL: h: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: .save {r4, r5, r6, r7, r8, lr} +; NACL-NEXT: push {r4, r5, r6, r7, r8, lr} +; NACL-NEXT: .pad #168 +; NACL-NEXT: sub sp, sp, #168 +; NACL-NEXT: .pad #2048 +; NACL-NEXT: sub sp, sp, #2048 +; NACL-NEXT: movw r0, :lower16:__stack_chk_guard +; NACL-NEXT: movt r0, :upper16:__stack_chk_guard +; NACL-NEXT: ldr r0, [r0] +; NACL-NEXT: str r0, [sp, #2212] +; NACL-NEXT: sub sp, sp, #2192 +; NACL-NEXT: add r3, sp, #2192 +; NACL-NEXT: add r0, sp, #2192 +; NACL-NEXT: add r12, r0, #16 +; NACL-NEXT: movw lr, #2192 +; NACL-NEXT: ldm r3, {r0, r1, r2, r3} +; NACL-NEXT: mov r4, sp +; NACL-NEXT: .LBB2_1: @ %entry +; NACL-NEXT: @ =>This Inner Loop Header: Depth=1 +; NACL-NEXT: vld1.32 {d16, d17}, [r12]! +; NACL-NEXT: subs lr, lr, #16 +; NACL-NEXT: vst1.32 {d16, d17}, [r4]! +; NACL-NEXT: bne .LBB2_1 +; NACL-NEXT: @ %bb.2: @ %entry +; NACL-NEXT: bl e3 +; NACL-NEXT: add sp, sp, #2192 +; NACL-NEXT: movw r1, :lower16:__stack_chk_guard +; NACL-NEXT: ldr r0, [sp, #2212] +; NACL-NEXT: movt r1, :upper16:__stack_chk_guard +; NACL-NEXT: ldr r1, [r1] +; NACL-NEXT: cmp r1, r0 +; NACL-NEXT: moveq r0, #0 +; NACL-NEXT: addeq sp, sp, #168 +; NACL-NEXT: addeq sp, sp, #2048 +; NACL-NEXT: popeq {r4, r5, r6, r7, r8, pc} +; NACL-NEXT: .LBB2_3: @ %entry +; NACL-NEXT: bl __stack_chk_fail +; +; NOMOVT-LABEL: h: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: .save {r6, r10, r11, lr} +; NOMOVT-NEXT: push {r6, r10, r11, lr} +; NOMOVT-NEXT: .setfp r11, sp, #8 +; NOMOVT-NEXT: add r11, sp, #8 +; NOMOVT-NEXT: .pad #2224 +; NOMOVT-NEXT: sub sp, sp, #2224 +; NOMOVT-NEXT: bic sp, sp, #15 +; NOMOVT-NEXT: ldr r0, .LCPI2_1 +; NOMOVT-NEXT: mov r6, sp +; NOMOVT-NEXT: ldr r0, [r0] +; NOMOVT-NEXT: str r0, [r6, #2220] +; NOMOVT-NEXT: sub sp, sp, #2192 +; NOMOVT-NEXT: mov r0, r6 +; NOMOVT-NEXT: ldr r1, .LCPI2_0 +; NOMOVT-NEXT: add r0, r0, #16 +; NOMOVT-NEXT: mov r2, sp +; NOMOVT-NEXT: .LBB2_1: @ %entry +; NOMOVT-NEXT: @ =>This Inner Loop Header: Depth=1 +; NOMOVT-NEXT: ldr r3, [r0], #4 +; NOMOVT-NEXT: subs r1, r1, #4 +; NOMOVT-NEXT: str r3, [r2], #4 +; NOMOVT-NEXT: bne .LBB2_1 +; NOMOVT-NEXT: @ %bb.2: @ %entry +; NOMOVT-NEXT: ldm r6, {r0, r1, r2, r3} +; NOMOVT-NEXT: bl e3 +; NOMOVT-NEXT: add sp, sp, #2192 +; NOMOVT-NEXT: ldr r0, [r6, #2220] +; NOMOVT-NEXT: ldr r1, .LCPI2_1 +; NOMOVT-NEXT: ldr r1, [r1] +; NOMOVT-NEXT: cmp r1, r0 +; NOMOVT-NEXT: moveq r0, #0 +; NOMOVT-NEXT: subeq sp, r11, #8 +; NOMOVT-NEXT: popeq {r6, r10, r11, pc} +; NOMOVT-NEXT: .LBB2_3: @ %entry +; NOMOVT-NEXT: bl __stack_chk_fail +; NOMOVT-NEXT: .p2align 2 +; NOMOVT-NEXT: @ %bb.4: +; NOMOVT-NEXT: .LCPI2_0: +; NOMOVT-NEXT: .long 2192 @ 0x890 +; NOMOVT-NEXT: .LCPI2_1: +; NOMOVT-NEXT: .long __stack_chk_guard entry: -; CHECK-LABEL: h: -; CHECK: vld1 -; CHECK: sub -; CHECK: vst1 -; CHECK: bne -; NACL: movw {{r[0-9]+|lr}}, # -; NACL: vld1 -; NACL: sub -; NACL: vst1 -; NACL: bne %st = alloca %struct.LargeStruct, align 16 %call = call i32 @e3(ptr byval(%struct.LargeStruct) align 16 %st) ret i32 0 @@ -67,16 +327,50 @@ declare i32 @e3(ptr nocapture byval(%struct.LargeStruct) align 16 %in) nounwind ; We can't do tail call since address of s is passed to the callee and part of ; s is in caller's local frame. define void @f3(ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { -; CHECK-LABEL: f3 -; CHECK: bl _consumestruct +; NACL-LABEL: f3: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: .pad #16 +; NACL-NEXT: sub sp, sp, #16 +; NACL-NEXT: stm sp, {r0, r1, r2, r3} +; NACL-NEXT: mov r0, sp +; NACL-NEXT: mov r1, #80 +; NACL-NEXT: add sp, sp, #16 +; NACL-NEXT: b consumestruct +; +; NOMOVT-LABEL: f3: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: .pad #16 +; NOMOVT-NEXT: sub sp, sp, #16 +; NOMOVT-NEXT: stm sp, {r0, r1, r2, r3} +; NOMOVT-NEXT: mov r0, sp +; NOMOVT-NEXT: mov r1, #80 +; NOMOVT-NEXT: add sp, sp, #16 +; NOMOVT-NEXT: b consumestruct entry: tail call void @consumestruct(ptr %s, i32 80) optsize ret void } define void @f4(ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { -; CHECK-LABEL: f4 -; CHECK: bl _consumestruct +; NACL-LABEL: f4: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: .pad #16 +; NACL-NEXT: sub sp, sp, #16 +; NACL-NEXT: stm sp, {r0, r1, r2, r3} +; NACL-NEXT: mov r0, sp +; NACL-NEXT: mov r1, #80 +; NACL-NEXT: add sp, sp, #16 +; NACL-NEXT: b consumestruct +; +; NOMOVT-LABEL: f4: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: .pad #16 +; NOMOVT-NEXT: sub sp, sp, #16 +; NOMOVT-NEXT: stm sp, {r0, r1, r2, r3} +; NOMOVT-NEXT: mov r0, sp +; NOMOVT-NEXT: mov r1, #80 +; NOMOVT-NEXT: add sp, sp, #16 +; NOMOVT-NEXT: b consumestruct entry: tail call void @consumestruct(ptr %s, i32 80) optsize ret void @@ -84,16 +378,34 @@ entry: ; We can do tail call here since s is in the incoming argument area. define void @f5(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { -; CHECK-LABEL: f5 -; CHECK: b{{(\.w)?}} _consumestruct +; NACL-LABEL: f5: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: mov r0, sp +; NACL-NEXT: mov r1, #80 +; NACL-NEXT: b consumestruct +; +; NOMOVT-LABEL: f5: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: mov r0, sp +; NOMOVT-NEXT: mov r1, #80 +; NOMOVT-NEXT: b consumestruct entry: tail call void @consumestruct(ptr %s, i32 80) optsize ret void } define void @f6(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { -; CHECK-LABEL: f6 -; CHECK: b{{(\.w)?}} _consumestruct +; NACL-LABEL: f6: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: mov r0, sp +; NACL-NEXT: mov r1, #80 +; NACL-NEXT: b consumestruct +; +; NOMOVT-LABEL: f6: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: mov r0, sp +; NOMOVT-NEXT: mov r1, #80 +; NOMOVT-NEXT: b consumestruct entry: tail call void @consumestruct(ptr %s, i32 80) optsize ret void @@ -106,10 +418,81 @@ declare void @consumestruct(ptr nocapture %structp, i32 %structsize) nounwind declare void @use_I(ptr byval(%struct.I.8)) define void @test_I_16() { -; CHECK-LABEL: test_I_16 -; CHECK: ldrb -; CHECK: strb +; NACL-LABEL: test_I_16: +; NACL: @ %bb.0: @ %entry +; NACL-NEXT: .save {r11, lr} +; NACL-NEXT: push {r11, lr} +; NACL-NEXT: .pad #40 +; NACL-NEXT: sub sp, sp, #40 +; NACL-NEXT: ldr r0, [r0] +; NACL-NEXT: mov r1, sp +; NACL-NEXT: vld1.32 {d16, d17}, [r2]! +; NACL-NEXT: vst1.32 {d16, d17}, [r1]! +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: ldrb r3, [r2], #1 +; NACL-NEXT: strb r3, [r1], #1 +; NACL-NEXT: mov r2, r0 +; NACL-NEXT: mov r1, r0 +; NACL-NEXT: mov r3, r0 +; NACL-NEXT: bl use_I +; NACL-NEXT: add sp, sp, #40 +; NACL-NEXT: pop {r11, pc} +; +; NOMOVT-LABEL: test_I_16: +; NOMOVT: @ %bb.0: @ %entry +; NOMOVT-NEXT: .save {r11, lr} +; NOMOVT-NEXT: push {r11, lr} +; NOMOVT-NEXT: .setfp r11, sp +; NOMOVT-NEXT: mov r11, sp +; NOMOVT-NEXT: .pad #40 +; NOMOVT-NEXT: sub sp, sp, #40 +; NOMOVT-NEXT: bic sp, sp, #15 +; NOMOVT-NEXT: ldr r0, [r1], #4 +; NOMOVT-NEXT: mov r2, sp +; NOMOVT-NEXT: str r0, [r2], #4 +; NOMOVT-NEXT: ldr r0, [r1], #4 +; NOMOVT-NEXT: str r0, [r2], #4 +; NOMOVT-NEXT: ldr r0, [r1], #4 +; NOMOVT-NEXT: str r0, [r2], #4 +; NOMOVT-NEXT: ldr r0, [r1], #4 +; NOMOVT-NEXT: str r0, [r2], #4 +; NOMOVT-NEXT: ldr r0, [r1], #4 +; NOMOVT-NEXT: str r0, [r2], #4 +; NOMOVT-NEXT: ldr r0, [r1], #4 +; NOMOVT-NEXT: str r0, [r2], #4 +; NOMOVT-NEXT: ldr r0, [r1], #4 +; NOMOVT-NEXT: str r0, [r2], #4 +; NOMOVT-NEXT: ldr r0, [r0] +; NOMOVT-NEXT: mov r1, r0 +; NOMOVT-NEXT: mov r2, r0 +; NOMOVT-NEXT: mov r3, r0 +; NOMOVT-NEXT: bl use_I +; NOMOVT-NEXT: mov sp, r11 +; NOMOVT-NEXT: pop {r11, pc} entry: call void @use_I(ptr byval(%struct.I.8) align 16 undef) ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/ARM/tail-call-float.ll b/llvm/test/CodeGen/ARM/tail-call-float.ll index 8cca7e0f70683e..6fa4b9229f64c8 100644 --- a/llvm/test/CodeGen/ARM/tail-call-float.ll +++ b/llvm/test/CodeGen/ARM/tail-call-float.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple armv7 -target-abi aapcs -float-abi soft -O0 -o - < %s \ ; RUN: | FileCheck %s -check-prefix CHECK-SOFT -check-prefix CHECK ; RUN: llc -mtriple armv7 -target-abi aapcs -float-abi hard -O0 -o - < %s \ @@ -12,16 +13,42 @@ declare i1 @non_variadic_big(float, float, float, float, float, float) declare i1 @variadic(float, ...) define void @non_variadic_fp(float %x, float %y) { -; CHECK-LABEL: non_variadic_fp: -; CHECK: b non_variadic +; CHECK-SOFT-LABEL: non_variadic_fp: +; CHECK-SOFT: @ %bb.0: @ %entry +; CHECK-SOFT-NEXT: mov r3, r1 +; CHECK-SOFT-NEXT: mov r2, r0 +; CHECK-SOFT-NEXT: mov r0, r3 +; CHECK-SOFT-NEXT: mov r1, r2 +; CHECK-SOFT-NEXT: b non_variadic +; +; CHECK-HARD-LABEL: non_variadic_fp: +; CHECK-HARD: @ %bb.0: @ %entry +; CHECK-HARD-NEXT: vmov.f32 s3, s1 +; CHECK-HARD-NEXT: vmov.f32 s2, s0 +; CHECK-HARD-NEXT: vmov.f32 s0, s3 +; CHECK-HARD-NEXT: vmov.f32 s1, s2 +; CHECK-HARD-NEXT: b non_variadic entry: %call = tail call i1 (float, float, float, float) @non_variadic(float %y, float %x, float %x, float %y) ret void } define void @variadic_fp(float %x, float %y) { -; CHECK-LABEL: variadic_fp: -; CHECK: b variadic +; CHECK-SOFT-LABEL: variadic_fp: +; CHECK-SOFT: @ %bb.0: @ %entry +; CHECK-SOFT-NEXT: mov r3, r1 +; CHECK-SOFT-NEXT: mov r2, r0 +; CHECK-SOFT-NEXT: mov r0, r3 +; CHECK-SOFT-NEXT: mov r1, r2 +; CHECK-SOFT-NEXT: b variadic +; +; CHECK-HARD-LABEL: variadic_fp: +; CHECK-HARD: @ %bb.0: @ %entry +; CHECK-HARD-NEXT: vmov r2, s0 +; CHECK-HARD-NEXT: vmov r3, s1 +; CHECK-HARD-NEXT: mov r0, r3 +; CHECK-HARD-NEXT: mov r1, r2 +; CHECK-HARD-NEXT: b variadic entry: %call = tail call i1 (float, ...) @variadic(float %y, float %x, float %x, float %y) ret void @@ -31,9 +58,32 @@ entry: ; of them to handle the 6 arguments. With hard-float, we have plenty of regs ; (s0-s15) to pass FP arguments. define void @non_variadic_fp_big(float %x, float %y) { -; CHECK-LABEL: non_variadic_fp_big: -; CHECK-SOFT: bl non_variadic_big -; CHECK-HARD: b non_variadic_big +; CHECK-SOFT-LABEL: non_variadic_fp_big: +; CHECK-SOFT: @ %bb.0: @ %entry +; CHECK-SOFT-NEXT: push {r11, lr} +; CHECK-SOFT-NEXT: sub sp, sp, #8 +; CHECK-SOFT-NEXT: mov r3, r1 +; CHECK-SOFT-NEXT: mov r2, r0 +; CHECK-SOFT-NEXT: vmov s0, r3 +; CHECK-SOFT-NEXT: vmov s0, r2 +; CHECK-SOFT-NEXT: mov r0, sp +; CHECK-SOFT-NEXT: str r3, [r0, #4] +; CHECK-SOFT-NEXT: str r2, [r0] +; CHECK-SOFT-NEXT: mov r0, r3 +; CHECK-SOFT-NEXT: mov r1, r2 +; CHECK-SOFT-NEXT: bl non_variadic_big +; CHECK-SOFT-NEXT: add sp, sp, #8 +; CHECK-SOFT-NEXT: pop {r11, pc} +; +; CHECK-HARD-LABEL: non_variadic_fp_big: +; CHECK-HARD: @ %bb.0: @ %entry +; CHECK-HARD-NEXT: vmov.f32 s5, s1 +; CHECK-HARD-NEXT: vmov.f32 s4, s0 +; CHECK-HARD-NEXT: vmov.f32 s0, s5 +; CHECK-HARD-NEXT: vmov.f32 s1, s4 +; CHECK-HARD-NEXT: vmov.f32 s2, s4 +; CHECK-HARD-NEXT: vmov.f32 s3, s5 +; CHECK-HARD-NEXT: b non_variadic_big entry: %call = tail call i1 (float, float, float, float, float, float) @non_variadic_big(float %y, float %x, float %x, float %y, float %x, float %y) ret void @@ -41,9 +91,40 @@ entry: ; Variadic functions cannot use FP regs to pass arguments; only GP regs. define void @variadic_fp_big(float %x, float %y) { -; CHECK-LABEL: variadic_fp_big: -; CHECK: bl variadic +; CHECK-SOFT-LABEL: variadic_fp_big: +; CHECK-SOFT: @ %bb.0: @ %entry +; CHECK-SOFT-NEXT: push {r11, lr} +; CHECK-SOFT-NEXT: sub sp, sp, #8 +; CHECK-SOFT-NEXT: mov r3, r1 +; CHECK-SOFT-NEXT: mov r2, r0 +; CHECK-SOFT-NEXT: vmov s0, r3 +; CHECK-SOFT-NEXT: vmov s0, r2 +; CHECK-SOFT-NEXT: mov r0, sp +; CHECK-SOFT-NEXT: str r3, [r0, #4] +; CHECK-SOFT-NEXT: str r2, [r0] +; CHECK-SOFT-NEXT: mov r0, r3 +; CHECK-SOFT-NEXT: mov r1, r2 +; CHECK-SOFT-NEXT: bl variadic +; CHECK-SOFT-NEXT: add sp, sp, #8 +; CHECK-SOFT-NEXT: pop {r11, pc} +; +; CHECK-HARD-LABEL: variadic_fp_big: +; CHECK-HARD: @ %bb.0: @ %entry +; CHECK-HARD-NEXT: push {r11, lr} +; CHECK-HARD-NEXT: sub sp, sp, #8 +; CHECK-HARD-NEXT: mov r0, sp +; CHECK-HARD-NEXT: vstr s1, [r0, #4] +; CHECK-HARD-NEXT: vstr s0, [r0] +; CHECK-HARD-NEXT: vmov r2, s0 +; CHECK-HARD-NEXT: vmov r3, s1 +; CHECK-HARD-NEXT: mov r0, r3 +; CHECK-HARD-NEXT: mov r1, r2 +; CHECK-HARD-NEXT: bl variadic +; CHECK-HARD-NEXT: add sp, sp, #8 +; CHECK-HARD-NEXT: pop {r11, pc} entry: %call = tail call i1 (float, ...) @variadic(float %y, float %x, float %x, float %y, float %x, float %y) ret void } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} >From 082c980d07322aa7dc200df9797c2c53c5a6563e Mon Sep 17 00:00:00 2001 From: Kiran <kiran.st...@arm.com> Date: Tue, 27 Aug 2024 10:43:59 +0100 Subject: [PATCH 2/5] Seperate frontend changes, add debug directives, remove redundant stuff from tests --- clang/lib/CodeGen/CGCall.cpp | 2 +- llvm/include/llvm/CodeGen/CallingConvLower.h | 2 ++ llvm/lib/CodeGen/CallingConvLower.cpp | 2 ++ llvm/test/CodeGen/ARM/tail-call-float.ll | 4 ++-- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 05773f91f986ba..ca2c79b51ac96b 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5086,7 +5086,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, RawAddress SRetAlloca = RawAddress::invalid(); llvm::Value *UnusedReturnSizePtr = nullptr; if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { - if ((IsVirtualFunctionPointerThunk || IsMustTail) && RetAI.isIndirect()) { + if (IsVirtualFunctionPointerThunk && RetAI.isIndirect()) { SRetPtr = makeNaturalAddressForPointer(CurFn->arg_begin() + IRFunctionArgs.getSRetArgNo(), RetTy, CharUnits::fromQuantity(1)); diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h index 12a6df16e279b4..f94cb628965faa 100644 --- a/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -540,7 +540,9 @@ class CCState { }); } +#ifndef NDEBUG void dump() const; +#endif private: /// MarkAllocated - Mark a register and all of its aliases as allocated. diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp index 7ba3ea83115db2..38884762944319 100644 --- a/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/llvm/lib/CodeGen/CallingConvLower.cpp @@ -291,6 +291,7 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC, RVLocs2.end(), AreCompatible); } +#ifndef NDEBUG void CCState::dump() const { dbgs() << "CCState:\n"; for (const CCValAssign &Loc : Locs) { @@ -351,3 +352,4 @@ void CCState::dump() const { dbgs() << "\n"; } } +#endif \ No newline at end of file diff --git a/llvm/test/CodeGen/ARM/tail-call-float.ll b/llvm/test/CodeGen/ARM/tail-call-float.ll index 6fa4b9229f64c8..2f834976dd403c 100644 --- a/llvm/test/CodeGen/ARM/tail-call-float.ll +++ b/llvm/test/CodeGen/ARM/tail-call-float.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple armv7 -target-abi aapcs -float-abi soft -O0 -o - < %s \ -; RUN: | FileCheck %s -check-prefix CHECK-SOFT -check-prefix CHECK +; RUN: | FileCheck %s -check-prefix CHECK-SOFT ; RUN: llc -mtriple armv7 -target-abi aapcs -float-abi hard -O0 -o - < %s \ -; RUN: | FileCheck %s -check-prefix CHECK-HARD -check-prefix CHECK +; RUN: | FileCheck %s -check-prefix CHECK-HARD ; Tests for passing floating-point regs. Variadic functions will always use ; general-purpose registers. Standard functions will use the floating-point >From 78623ee1d26fa0823f303b53a6c4d618120ff4b2 Mon Sep 17 00:00:00 2001 From: Kiran <kiran.st...@arm.com> Date: Wed, 28 Aug 2024 14:59:21 +0100 Subject: [PATCH 3/5] Remove byval change and incorrect/unnecessary edits to tests --- llvm/include/llvm/CodeGen/CallingConvLower.h | 4 - llvm/lib/CodeGen/CallingConvLower.cpp | 63 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 25 +- .../ARM/2013-05-13-AAPCS-byval-padding.ll | 16 +- .../ARM/2013-05-13-AAPCS-byval-padding2.ll | 13 +- llvm/test/CodeGen/ARM/fp-arg-shuffle.ll | 2 +- llvm/test/CodeGen/ARM/musttail.ll | 31 ++ llvm/test/CodeGen/ARM/struct_byval.ll | 455 ++---------------- llvm/test/CodeGen/ARM/tail-call-float.ll | 103 +--- 9 files changed, 107 insertions(+), 605 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/musttail.ll diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h index f94cb628965faa..d5a63c8dd627a0 100644 --- a/llvm/include/llvm/CodeGen/CallingConvLower.h +++ b/llvm/include/llvm/CodeGen/CallingConvLower.h @@ -540,10 +540,6 @@ class CCState { }); } -#ifndef NDEBUG - void dump() const; -#endif - private: /// MarkAllocated - Mark a register and all of its aliases as allocated. void MarkAllocated(MCPhysReg Reg); diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp index 38884762944319..b7152587a9fa05 100644 --- a/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/llvm/lib/CodeGen/CallingConvLower.cpp @@ -290,66 +290,3 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC, return std::equal(RVLocs1.begin(), RVLocs1.end(), RVLocs2.begin(), RVLocs2.end(), AreCompatible); } - -#ifndef NDEBUG -void CCState::dump() const { - dbgs() << "CCState:\n"; - for (const CCValAssign &Loc : Locs) { - if (Loc.isRegLoc()) { - dbgs() << " Reg " << TRI.getName(Loc.getLocReg()); - } else if (Loc.isMemLoc()) { - dbgs() << " Mem " << Loc.getLocMemOffset(); - } else { - assert(Loc.isPendingLoc()); - dbgs() << " Pend " << Loc.getExtraInfo(); - } - - dbgs() << " ValVT:" << Loc.getValVT(); - dbgs() << " LocVT:" << Loc.getLocVT(); - - if (Loc.needsCustom()) - dbgs() << " custom"; - - switch (Loc.getLocInfo()) { - case CCValAssign::Full: - dbgs() << " Full"; - break; - case CCValAssign::SExt: - dbgs() << " SExt"; - break; - case CCValAssign::ZExt: - dbgs() << " ZExt"; - break; - case CCValAssign::AExt: - dbgs() << " AExt"; - break; - case CCValAssign::SExtUpper: - dbgs() << " SExtUpper"; - break; - case CCValAssign::ZExtUpper: - dbgs() << " ZExtUpper"; - break; - case CCValAssign::AExtUpper: - dbgs() << " AExtUpper"; - break; - case CCValAssign::BCvt: - dbgs() << " BCvt"; - break; - case CCValAssign::Trunc: - dbgs() << " Trunc"; - break; - case CCValAssign::VExt: - dbgs() << " VExt"; - break; - case CCValAssign::FPExt: - dbgs() << " FPExt"; - break; - case CCValAssign::Indirect: - dbgs() << " Indirect"; - break; - } - - dbgs() << "\n"; - } -} -#endif \ No newline at end of file diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 07d20814d126bc..7ab8f3b8b40f58 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2407,8 +2407,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, isTailCall = false; // For both the non-secure calls and the returns from a CMSE entry function, - // the function needs to do some extra work after the call, or before the - // return, respectively, thus it cannot end with a tail call + // the function needs to do some extra work afte r the call, or before the + // return, respectively, thus it cannot end with atail call if (isCmseNSCall || AFI->isCmseNSEntryFunction()) isTailCall = false; @@ -3026,12 +3026,12 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( return CalleeCC == CallerCC; } - // Also avoid sibcall optimization if only one of caller or callee uses - // struct return semantics. + // Also avoid sibcall optimization if either caller or callee uses struct + // return semantics. bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); bool isCallerStructRet = MF.getFunction().hasStructRetAttr(); if (isCalleeStructRet != isCallerStructRet) { - LLVM_DEBUG(dbgs() << "false (struct-ret)\n"); + LLVM_DEBUG(dbgs() << "false (mismatched sret)\n"); return false; } @@ -3059,29 +3059,23 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( getEffectiveCallingConv(CalleeCC, isVarArg), getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins, CCAssignFnForReturn(CalleeCC, isVarArg), - CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) { - LLVM_DEBUG(dbgs() << "false (incompatible results)\n"); + CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) return false; - } // The callee has to preserve all registers the caller needs to preserve. const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo(); const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); if (CalleeCC != CallerCC) { const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); - if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) { - LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n"); + if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) return false; - } } - // If Caller's vararg argument has been split between registers and + // If Caller's vararg or byval argument has been split between registers and // stack, do not perform tail call, since part of the argument is in caller's // local frame. const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>(); - if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) { - LLVM_DEBUG(dbgs() << "false (vararg arg reg save area)\n"); + if (AFI_Caller->getArgRegsSaveSize()) return false; - } // If the callee takes no arguments then go on to check the results of the // call. @@ -3096,7 +3090,6 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize()) return false; - LLVM_DEBUG(dbgs() << "true\n"); return true; } diff --git a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll index e186ae3a961502..d8e22f4f5312ae 100644 --- a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll +++ b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll @@ -12,11 +12,17 @@ define void @check227( ; arg1 --> SP+188 entry: -; CHECK: sub sp, sp, #12 -; CHECK: stm sp, {r1, r2, r3} -; CHECK: ldr r0, [sp, #200] -; CHECK: add sp, sp, #12 -; CHECK: b useInt + +;CHECK: sub sp, sp, #12 +;CHECK: push {r11, lr} +;CHECK: sub sp, sp, #4 +;CHECK: add r0, sp, #12 +;CHECK: stm r0, {r1, r2, r3} +;CHECK: ldr r0, [sp, #212] +;CHECK: bl useInt +;CHECK: add sp, sp, #4 +;CHECK: pop {r11, lr} +;CHECK: add sp, sp, #12 %0 = ptrtoint ptr %arg1 to i32 tail call void @useInt(i32 %0) diff --git a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll index efdecce9ae723a..0c5d22984b99e1 100644 --- a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll +++ b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll @@ -7,11 +7,14 @@ define void @foo(ptr byval(%struct4bytes) %p0, ; --> R0 ptr byval(%struct20bytes) %p1 ; --> R1,R2,R3, [SP+0 .. SP+8) ) { -;CHECK: sub sp, sp, #16 -;CHECK: stm sp, {r0, r1, r2, r3} -;CHECK: add r0, sp, #4 -;CHECK: add sp, sp, #16 -;CHECK: b useInt +;CHECK: sub sp, sp, #16 +;CHECK: push {r11, lr} +;CHECK: add r12, sp, #8 +;CHECK: stm r12, {r0, r1, r2, r3} +;CHECK: add r0, sp, #12 +;CHECK: bl useInt +;CHECK: pop {r11, lr} +;CHECK: add sp, sp, #16 %1 = ptrtoint ptr %p1 to i32 tail call void @useInt(i32 %1) diff --git a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll index 2ceb7a7b97a1fe..73c0a34fbc2f4a 100644 --- a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll +++ b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll @@ -3,6 +3,7 @@ ; CHECK: function1 ; CHECK-NOT: vmov define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp { +entry: ; CHECK-LABEL: function1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r11, lr} @@ -25,7 +26,6 @@ define double @function1(double %a, double %b, double %c, double %d, double %e, ; CHECK-NEXT: mov r3, r5 ; CHECK-NEXT: pop {r4, r5, r11, lr} ; CHECK-NEXT: b function2 -entry: %call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind ret double %call } diff --git a/llvm/test/CodeGen/ARM/musttail.ll b/llvm/test/CodeGen/ARM/musttail.ll new file mode 100644 index 00000000000000..93e661e59335ad --- /dev/null +++ b/llvm/test/CodeGen/ARM/musttail.ll @@ -0,0 +1,31 @@ +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s + +; The repro example from https://github.com/llvm/llvm-project/issues/57069#issuecomment-1212754850 +; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind sspstrong willreturn memory(none) +define hidden noundef i32 @many_args_callee(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3, i32 noundef %4, i32 noundef %5) local_unnamed_addr #0 { + %7 = add nsw i32 %1, %0 + %8 = add nsw i32 %7, %2 + %9 = add nsw i32 %8, %3 + %10 = add nsw i32 %9, %4 + %11 = add nsw i32 %10, %5 + ret i32 %11 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind sspstrong willreturn memory(none) +define hidden noundef i32 @many_args(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3, i32 noundef %4, i32 noundef %5) local_unnamed_addr #1 { +; CHECK: b many_args_callee + %7 = musttail call noundef i32 @many_args_callee(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6) + ret i32 %7 +} + +; Test with sret +; Function Attrs: optsize +declare dso_local void @sret_callee(ptr dead_on_unwind writable sret({ double, double }) align 8, i16 noundef signext) local_unnamed_addr #1 + +; Function Attrs: mustprogress optsize +define dso_local void @sret_caller(ptr dead_on_unwind noalias writable sret({ double, double }) align 8 %agg.result, i16 noundef signext %P0) local_unnamed_addr #0 { +entry: +; CHECK: b sret_callee + musttail call void @sret_callee(ptr dead_on_unwind writable sret({ double, double }) align 8 %agg.result, i16 noundef signext 20391) #2 + ret void +} \ No newline at end of file diff --git a/llvm/test/CodeGen/ARM/struct_byval.ll b/llvm/test/CodeGen/ARM/struct_byval.ll index 5564f254c9e74d..73a1b5ee33bca9 100644 --- a/llvm/test/CodeGen/ARM/struct_byval.ll +++ b/llvm/test/CodeGen/ARM/struct_byval.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s ; RUN: llc < %s -mtriple=thumbv7-apple-ios6.0 | FileCheck %s ; RUN: llc < %s -mtriple=armv7-unknown-nacl-gnueabi | FileCheck %s -check-prefix=NACL @@ -11,122 +10,11 @@ %struct.LargeStruct = type { i32, [1001 x i8], [300 x i32] } define i32 @f() nounwind ssp { -; NACL-LABEL: f: -; NACL: @ %bb.0: @ %entry -; NACL-NEXT: .save {r4, lr} -; NACL-NEXT: push {r4, lr} -; NACL-NEXT: .pad #152 -; NACL-NEXT: sub sp, sp, #152 -; NACL-NEXT: movw r0, :lower16:__stack_chk_guard -; NACL-NEXT: add r3, sp, #72 -; NACL-NEXT: movt r0, :upper16:__stack_chk_guard -; NACL-NEXT: mov lr, sp -; NACL-NEXT: ldr r0, [r0] -; NACL-NEXT: str r0, [sp, #148] -; NACL-NEXT: add r0, sp, #72 -; NACL-NEXT: add r12, r0, #16 -; NACL-NEXT: ldm r3, {r0, r1, r2, r3} -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: ldr r4, [r12], #4 -; NACL-NEXT: str r4, [lr], #4 -; NACL-NEXT: bl e1 -; NACL-NEXT: movw r1, :lower16:__stack_chk_guard -; NACL-NEXT: ldr r0, [sp, #148] -; NACL-NEXT: movt r1, :upper16:__stack_chk_guard -; NACL-NEXT: ldr r1, [r1] -; NACL-NEXT: cmp r1, r0 -; NACL-NEXT: moveq r0, #0 -; NACL-NEXT: addeq sp, sp, #152 -; NACL-NEXT: popeq {r4, pc} -; NACL-NEXT: .LBB0_1: @ %entry -; NACL-NEXT: bl __stack_chk_fail -; -; NOMOVT-LABEL: f: -; NOMOVT: @ %bb.0: @ %entry -; NOMOVT-NEXT: .save {r11, lr} -; NOMOVT-NEXT: push {r11, lr} -; NOMOVT-NEXT: .pad #144 -; NOMOVT-NEXT: sub sp, sp, #144 -; NOMOVT-NEXT: ldr r0, .LCPI0_0 -; NOMOVT-NEXT: mov r1, sp -; NOMOVT-NEXT: add r3, sp, #64 -; NOMOVT-NEXT: ldr r0, [r0] -; NOMOVT-NEXT: str r0, [sp, #140] -; NOMOVT-NEXT: add r0, sp, #64 -; NOMOVT-NEXT: add r0, r0, #16 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldr r2, [r0], #4 -; NOMOVT-NEXT: str r2, [r1], #4 -; NOMOVT-NEXT: ldm r3, {r0, r1, r2, r3} -; NOMOVT-NEXT: bl e1 -; NOMOVT-NEXT: ldr r0, [sp, #140] -; NOMOVT-NEXT: ldr r1, .LCPI0_0 -; NOMOVT-NEXT: ldr r1, [r1] -; NOMOVT-NEXT: cmp r1, r0 -; NOMOVT-NEXT: moveq r0, #0 -; NOMOVT-NEXT: addeq sp, sp, #144 -; NOMOVT-NEXT: popeq {r11, pc} -; NOMOVT-NEXT: .LBB0_1: @ %entry -; NOMOVT-NEXT: bl __stack_chk_fail -; NOMOVT-NEXT: .p2align 2 -; NOMOVT-NEXT: @ %bb.2: -; NOMOVT-NEXT: .LCPI0_0: -; NOMOVT-NEXT: .long __stack_chk_guard entry: +; CHECK-LABEL: f: +; CHECK: ldr +; CHECK: str +; CHECK-NOT:bne %st = alloca %struct.SmallStruct, align 4 %call = call i32 @e1(ptr byval(%struct.SmallStruct) %st) ret i32 0 @@ -134,95 +22,20 @@ entry: ; Generate a loop for large struct byval define i32 @g() nounwind ssp { -; NACL-LABEL: g: -; NACL: @ %bb.0: @ %entry -; NACL-NEXT: .save {r4, r5, r11, lr} -; NACL-NEXT: push {r4, r5, r11, lr} -; NACL-NEXT: .pad #2224 -; NACL-NEXT: sub sp, sp, #2224 -; NACL-NEXT: movw r0, :lower16:__stack_chk_guard -; NACL-NEXT: movt r0, :upper16:__stack_chk_guard -; NACL-NEXT: ldr r0, [r0] -; NACL-NEXT: str r0, [sp, #2220] -; NACL-NEXT: sub sp, sp, #2192 -; NACL-NEXT: add lr, sp, #2048 -; NACL-NEXT: ldr r1, [sp, #2208] -; NACL-NEXT: add r0, lr, #156 -; NACL-NEXT: ldr r2, [sp, #2212] -; NACL-NEXT: add r12, r0, #16 -; NACL-NEXT: ldr r0, [sp, #2204] -; NACL-NEXT: ldr r3, [sp, #2216] -; NACL-NEXT: movw lr, #2192 -; NACL-NEXT: mov r4, sp -; NACL-NEXT: .LBB1_1: @ %entry -; NACL-NEXT: @ =>This Inner Loop Header: Depth=1 -; NACL-NEXT: ldr r5, [r12], #4 -; NACL-NEXT: subs lr, lr, #4 -; NACL-NEXT: str r5, [r4], #4 -; NACL-NEXT: bne .LBB1_1 -; NACL-NEXT: @ %bb.2: @ %entry -; NACL-NEXT: bl e2 -; NACL-NEXT: add sp, sp, #2192 -; NACL-NEXT: movw r1, :lower16:__stack_chk_guard -; NACL-NEXT: ldr r0, [sp, #2220] -; NACL-NEXT: movt r1, :upper16:__stack_chk_guard -; NACL-NEXT: ldr r1, [r1] -; NACL-NEXT: cmp r1, r0 -; NACL-NEXT: moveq r0, #0 -; NACL-NEXT: addeq sp, sp, #2224 -; NACL-NEXT: popeq {r4, r5, r11, pc} -; NACL-NEXT: .LBB1_3: @ %entry -; NACL-NEXT: bl __stack_chk_fail -; -; NOMOVT-LABEL: g: -; NOMOVT: @ %bb.0: @ %entry -; NOMOVT-NEXT: .save {r11, lr} -; NOMOVT-NEXT: push {r11, lr} -; NOMOVT-NEXT: .pad #168 -; NOMOVT-NEXT: sub sp, sp, #168 -; NOMOVT-NEXT: .pad #2048 -; NOMOVT-NEXT: sub sp, sp, #2048 -; NOMOVT-NEXT: ldr r0, .LCPI1_1 -; NOMOVT-NEXT: ldr r0, [r0] -; NOMOVT-NEXT: str r0, [sp, #2212] -; NOMOVT-NEXT: sub sp, sp, #2192 -; NOMOVT-NEXT: add lr, sp, #2048 -; NOMOVT-NEXT: ldr r1, .LCPI1_0 -; NOMOVT-NEXT: add r0, lr, #148 -; NOMOVT-NEXT: mov r2, sp -; NOMOVT-NEXT: add r0, r0, #16 -; NOMOVT-NEXT: .LBB1_1: @ %entry -; NOMOVT-NEXT: @ =>This Inner Loop Header: Depth=1 -; NOMOVT-NEXT: ldr r3, [r0], #4 -; NOMOVT-NEXT: subs r1, r1, #4 -; NOMOVT-NEXT: str r3, [r2], #4 -; NOMOVT-NEXT: bne .LBB1_1 -; NOMOVT-NEXT: @ %bb.2: @ %entry -; NOMOVT-NEXT: ldr r0, [sp, #2196] -; NOMOVT-NEXT: ldr r1, [sp, #2200] -; NOMOVT-NEXT: ldr r2, [sp, #2204] -; NOMOVT-NEXT: ldr r3, [sp, #2208] -; NOMOVT-NEXT: bl e2 -; NOMOVT-NEXT: add sp, sp, #2192 -; NOMOVT-NEXT: ldr r0, [sp, #2212] -; NOMOVT-NEXT: ldr r1, .LCPI1_1 -; NOMOVT-NEXT: ldr r1, [r1] -; NOMOVT-NEXT: cmp r1, r0 -; NOMOVT-NEXT: moveq r0, #0 -; NOMOVT-NEXT: addeq sp, sp, #168 -; NOMOVT-NEXT: addeq sp, sp, #2048 -; NOMOVT-NEXT: popeq {r11, pc} -; NOMOVT-NEXT: .LBB1_3: @ %entry -; NOMOVT-NEXT: bl __stack_chk_fail -; NOMOVT-NEXT: .p2align 2 -; NOMOVT-NEXT: @ %bb.4: -; NOMOVT-NEXT: .LCPI1_0: -; NOMOVT-NEXT: .long 2192 @ 0x890 -; NOMOVT-NEXT: .LCPI1_1: -; NOMOVT-NEXT: .long __stack_chk_guard entry: +; CHECK-LABEL: g: +; CHECK: ldr +; CHECK: sub +; CHECK: str +; CHECK: bne +; NACL-LABEL: g: ; Ensure that use movw instead of constpool for the loop trip count. But don't ; match the __stack_chk_guard movw +; NACL: movw {{r[0-9]+|lr}}, # +; NACL: ldr +; NACL: sub +; NACL: str +; NACL: bne %st = alloca %struct.LargeStruct, align 4 %call = call i32 @e2(ptr byval(%struct.LargeStruct) %st) ret i32 0 @@ -230,90 +43,17 @@ entry: ; Generate a loop using NEON instructions define i32 @h() nounwind ssp { -; NACL-LABEL: h: -; NACL: @ %bb.0: @ %entry -; NACL-NEXT: .save {r4, r5, r6, r7, r8, lr} -; NACL-NEXT: push {r4, r5, r6, r7, r8, lr} -; NACL-NEXT: .pad #168 -; NACL-NEXT: sub sp, sp, #168 -; NACL-NEXT: .pad #2048 -; NACL-NEXT: sub sp, sp, #2048 -; NACL-NEXT: movw r0, :lower16:__stack_chk_guard -; NACL-NEXT: movt r0, :upper16:__stack_chk_guard -; NACL-NEXT: ldr r0, [r0] -; NACL-NEXT: str r0, [sp, #2212] -; NACL-NEXT: sub sp, sp, #2192 -; NACL-NEXT: add r3, sp, #2192 -; NACL-NEXT: add r0, sp, #2192 -; NACL-NEXT: add r12, r0, #16 -; NACL-NEXT: movw lr, #2192 -; NACL-NEXT: ldm r3, {r0, r1, r2, r3} -; NACL-NEXT: mov r4, sp -; NACL-NEXT: .LBB2_1: @ %entry -; NACL-NEXT: @ =>This Inner Loop Header: Depth=1 -; NACL-NEXT: vld1.32 {d16, d17}, [r12]! -; NACL-NEXT: subs lr, lr, #16 -; NACL-NEXT: vst1.32 {d16, d17}, [r4]! -; NACL-NEXT: bne .LBB2_1 -; NACL-NEXT: @ %bb.2: @ %entry -; NACL-NEXT: bl e3 -; NACL-NEXT: add sp, sp, #2192 -; NACL-NEXT: movw r1, :lower16:__stack_chk_guard -; NACL-NEXT: ldr r0, [sp, #2212] -; NACL-NEXT: movt r1, :upper16:__stack_chk_guard -; NACL-NEXT: ldr r1, [r1] -; NACL-NEXT: cmp r1, r0 -; NACL-NEXT: moveq r0, #0 -; NACL-NEXT: addeq sp, sp, #168 -; NACL-NEXT: addeq sp, sp, #2048 -; NACL-NEXT: popeq {r4, r5, r6, r7, r8, pc} -; NACL-NEXT: .LBB2_3: @ %entry -; NACL-NEXT: bl __stack_chk_fail -; -; NOMOVT-LABEL: h: -; NOMOVT: @ %bb.0: @ %entry -; NOMOVT-NEXT: .save {r6, r10, r11, lr} -; NOMOVT-NEXT: push {r6, r10, r11, lr} -; NOMOVT-NEXT: .setfp r11, sp, #8 -; NOMOVT-NEXT: add r11, sp, #8 -; NOMOVT-NEXT: .pad #2224 -; NOMOVT-NEXT: sub sp, sp, #2224 -; NOMOVT-NEXT: bic sp, sp, #15 -; NOMOVT-NEXT: ldr r0, .LCPI2_1 -; NOMOVT-NEXT: mov r6, sp -; NOMOVT-NEXT: ldr r0, [r0] -; NOMOVT-NEXT: str r0, [r6, #2220] -; NOMOVT-NEXT: sub sp, sp, #2192 -; NOMOVT-NEXT: mov r0, r6 -; NOMOVT-NEXT: ldr r1, .LCPI2_0 -; NOMOVT-NEXT: add r0, r0, #16 -; NOMOVT-NEXT: mov r2, sp -; NOMOVT-NEXT: .LBB2_1: @ %entry -; NOMOVT-NEXT: @ =>This Inner Loop Header: Depth=1 -; NOMOVT-NEXT: ldr r3, [r0], #4 -; NOMOVT-NEXT: subs r1, r1, #4 -; NOMOVT-NEXT: str r3, [r2], #4 -; NOMOVT-NEXT: bne .LBB2_1 -; NOMOVT-NEXT: @ %bb.2: @ %entry -; NOMOVT-NEXT: ldm r6, {r0, r1, r2, r3} -; NOMOVT-NEXT: bl e3 -; NOMOVT-NEXT: add sp, sp, #2192 -; NOMOVT-NEXT: ldr r0, [r6, #2220] -; NOMOVT-NEXT: ldr r1, .LCPI2_1 -; NOMOVT-NEXT: ldr r1, [r1] -; NOMOVT-NEXT: cmp r1, r0 -; NOMOVT-NEXT: moveq r0, #0 -; NOMOVT-NEXT: subeq sp, r11, #8 -; NOMOVT-NEXT: popeq {r6, r10, r11, pc} -; NOMOVT-NEXT: .LBB2_3: @ %entry -; NOMOVT-NEXT: bl __stack_chk_fail -; NOMOVT-NEXT: .p2align 2 -; NOMOVT-NEXT: @ %bb.4: -; NOMOVT-NEXT: .LCPI2_0: -; NOMOVT-NEXT: .long 2192 @ 0x890 -; NOMOVT-NEXT: .LCPI2_1: -; NOMOVT-NEXT: .long __stack_chk_guard entry: +; CHECK-LABEL: h: +; CHECK: vld1 +; CHECK: sub +; CHECK: vst1 +; CHECK: bne +; NACL: movw {{r[0-9]+|lr}}, # +; NACL: vld1 +; NACL: sub +; NACL: vst1 +; NACL: bne %st = alloca %struct.LargeStruct, align 16 %call = call i32 @e3(ptr byval(%struct.LargeStruct) align 16 %st) ret i32 0 @@ -327,50 +67,16 @@ declare i32 @e3(ptr nocapture byval(%struct.LargeStruct) align 16 %in) nounwind ; We can't do tail call since address of s is passed to the callee and part of ; s is in caller's local frame. define void @f3(ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { -; NACL-LABEL: f3: -; NACL: @ %bb.0: @ %entry -; NACL-NEXT: .pad #16 -; NACL-NEXT: sub sp, sp, #16 -; NACL-NEXT: stm sp, {r0, r1, r2, r3} -; NACL-NEXT: mov r0, sp -; NACL-NEXT: mov r1, #80 -; NACL-NEXT: add sp, sp, #16 -; NACL-NEXT: b consumestruct -; -; NOMOVT-LABEL: f3: -; NOMOVT: @ %bb.0: @ %entry -; NOMOVT-NEXT: .pad #16 -; NOMOVT-NEXT: sub sp, sp, #16 -; NOMOVT-NEXT: stm sp, {r0, r1, r2, r3} -; NOMOVT-NEXT: mov r0, sp -; NOMOVT-NEXT: mov r1, #80 -; NOMOVT-NEXT: add sp, sp, #16 -; NOMOVT-NEXT: b consumestruct +; CHECK-LABEL: f3 +; CHECK: bl _consumestruct entry: tail call void @consumestruct(ptr %s, i32 80) optsize ret void } define void @f4(ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { -; NACL-LABEL: f4: -; NACL: @ %bb.0: @ %entry -; NACL-NEXT: .pad #16 -; NACL-NEXT: sub sp, sp, #16 -; NACL-NEXT: stm sp, {r0, r1, r2, r3} -; NACL-NEXT: mov r0, sp -; NACL-NEXT: mov r1, #80 -; NACL-NEXT: add sp, sp, #16 -; NACL-NEXT: b consumestruct -; -; NOMOVT-LABEL: f4: -; NOMOVT: @ %bb.0: @ %entry -; NOMOVT-NEXT: .pad #16 -; NOMOVT-NEXT: sub sp, sp, #16 -; NOMOVT-NEXT: stm sp, {r0, r1, r2, r3} -; NOMOVT-NEXT: mov r0, sp -; NOMOVT-NEXT: mov r1, #80 -; NOMOVT-NEXT: add sp, sp, #16 -; NOMOVT-NEXT: b consumestruct +; CHECK-LABEL: f4 +; CHECK: bl _consumestruct entry: tail call void @consumestruct(ptr %s, i32 80) optsize ret void @@ -378,34 +84,16 @@ entry: ; We can do tail call here since s is in the incoming argument area. define void @f5(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { -; NACL-LABEL: f5: -; NACL: @ %bb.0: @ %entry -; NACL-NEXT: mov r0, sp -; NACL-NEXT: mov r1, #80 -; NACL-NEXT: b consumestruct -; -; NOMOVT-LABEL: f5: -; NOMOVT: @ %bb.0: @ %entry -; NOMOVT-NEXT: mov r0, sp -; NOMOVT-NEXT: mov r1, #80 -; NOMOVT-NEXT: b consumestruct +; CHECK-LABEL: f5 +; CHECK: b{{(\.w)?}} _consumestruct entry: tail call void @consumestruct(ptr %s, i32 80) optsize ret void } define void @f6(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize { -; NACL-LABEL: f6: -; NACL: @ %bb.0: @ %entry -; NACL-NEXT: mov r0, sp -; NACL-NEXT: mov r1, #80 -; NACL-NEXT: b consumestruct -; -; NOMOVT-LABEL: f6: -; NOMOVT: @ %bb.0: @ %entry -; NOMOVT-NEXT: mov r0, sp -; NOMOVT-NEXT: mov r1, #80 -; NOMOVT-NEXT: b consumestruct +; CHECK-LABEL: f6 +; CHECK: b{{(\.w)?}} _consumestruct entry: tail call void @consumestruct(ptr %s, i32 80) optsize ret void @@ -418,81 +106,10 @@ declare void @consumestruct(ptr nocapture %structp, i32 %structsize) nounwind declare void @use_I(ptr byval(%struct.I.8)) define void @test_I_16() { -; NACL-LABEL: test_I_16: -; NACL: @ %bb.0: @ %entry -; NACL-NEXT: .save {r11, lr} -; NACL-NEXT: push {r11, lr} -; NACL-NEXT: .pad #40 -; NACL-NEXT: sub sp, sp, #40 -; NACL-NEXT: ldr r0, [r0] -; NACL-NEXT: mov r1, sp -; NACL-NEXT: vld1.32 {d16, d17}, [r2]! -; NACL-NEXT: vst1.32 {d16, d17}, [r1]! -; NACL-NEXT: ldrb r3, [r2], #1 -; NACL-NEXT: strb r3, [r1], #1 -; NACL-NEXT: ldrb r3, [r2], #1 -; NACL-NEXT: strb r3, [r1], #1 -; NACL-NEXT: ldrb r3, [r2], #1 -; NACL-NEXT: strb r3, [r1], #1 -; NACL-NEXT: ldrb r3, [r2], #1 -; NACL-NEXT: strb r3, [r1], #1 -; NACL-NEXT: ldrb r3, [r2], #1 -; NACL-NEXT: strb r3, [r1], #1 -; NACL-NEXT: ldrb r3, [r2], #1 -; NACL-NEXT: strb r3, [r1], #1 -; NACL-NEXT: ldrb r3, [r2], #1 -; NACL-NEXT: strb r3, [r1], #1 -; NACL-NEXT: ldrb r3, [r2], #1 -; NACL-NEXT: strb r3, [r1], #1 -; NACL-NEXT: ldrb r3, [r2], #1 -; NACL-NEXT: strb r3, [r1], #1 -; NACL-NEXT: ldrb r3, [r2], #1 -; NACL-NEXT: strb r3, [r1], #1 -; NACL-NEXT: ldrb r3, [r2], #1 -; NACL-NEXT: strb r3, [r1], #1 -; NACL-NEXT: ldrb r3, [r2], #1 -; NACL-NEXT: strb r3, [r1], #1 -; NACL-NEXT: mov r2, r0 -; NACL-NEXT: mov r1, r0 -; NACL-NEXT: mov r3, r0 -; NACL-NEXT: bl use_I -; NACL-NEXT: add sp, sp, #40 -; NACL-NEXT: pop {r11, pc} -; -; NOMOVT-LABEL: test_I_16: -; NOMOVT: @ %bb.0: @ %entry -; NOMOVT-NEXT: .save {r11, lr} -; NOMOVT-NEXT: push {r11, lr} -; NOMOVT-NEXT: .setfp r11, sp -; NOMOVT-NEXT: mov r11, sp -; NOMOVT-NEXT: .pad #40 -; NOMOVT-NEXT: sub sp, sp, #40 -; NOMOVT-NEXT: bic sp, sp, #15 -; NOMOVT-NEXT: ldr r0, [r1], #4 -; NOMOVT-NEXT: mov r2, sp -; NOMOVT-NEXT: str r0, [r2], #4 -; NOMOVT-NEXT: ldr r0, [r1], #4 -; NOMOVT-NEXT: str r0, [r2], #4 -; NOMOVT-NEXT: ldr r0, [r1], #4 -; NOMOVT-NEXT: str r0, [r2], #4 -; NOMOVT-NEXT: ldr r0, [r1], #4 -; NOMOVT-NEXT: str r0, [r2], #4 -; NOMOVT-NEXT: ldr r0, [r1], #4 -; NOMOVT-NEXT: str r0, [r2], #4 -; NOMOVT-NEXT: ldr r0, [r1], #4 -; NOMOVT-NEXT: str r0, [r2], #4 -; NOMOVT-NEXT: ldr r0, [r1], #4 -; NOMOVT-NEXT: str r0, [r2], #4 -; NOMOVT-NEXT: ldr r0, [r0] -; NOMOVT-NEXT: mov r1, r0 -; NOMOVT-NEXT: mov r2, r0 -; NOMOVT-NEXT: mov r3, r0 -; NOMOVT-NEXT: bl use_I -; NOMOVT-NEXT: mov sp, r11 -; NOMOVT-NEXT: pop {r11, pc} +; CHECK-LABEL: test_I_16 +; CHECK: ldrb +; CHECK: strb entry: call void @use_I(ptr byval(%struct.I.8) align 16 undef) ret void } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/ARM/tail-call-float.ll b/llvm/test/CodeGen/ARM/tail-call-float.ll index 2f834976dd403c..8cca7e0f70683e 100644 --- a/llvm/test/CodeGen/ARM/tail-call-float.ll +++ b/llvm/test/CodeGen/ARM/tail-call-float.ll @@ -1,8 +1,7 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mtriple armv7 -target-abi aapcs -float-abi soft -O0 -o - < %s \ -; RUN: | FileCheck %s -check-prefix CHECK-SOFT +; RUN: | FileCheck %s -check-prefix CHECK-SOFT -check-prefix CHECK ; RUN: llc -mtriple armv7 -target-abi aapcs -float-abi hard -O0 -o - < %s \ -; RUN: | FileCheck %s -check-prefix CHECK-HARD +; RUN: | FileCheck %s -check-prefix CHECK-HARD -check-prefix CHECK ; Tests for passing floating-point regs. Variadic functions will always use ; general-purpose registers. Standard functions will use the floating-point @@ -13,42 +12,16 @@ declare i1 @non_variadic_big(float, float, float, float, float, float) declare i1 @variadic(float, ...) define void @non_variadic_fp(float %x, float %y) { -; CHECK-SOFT-LABEL: non_variadic_fp: -; CHECK-SOFT: @ %bb.0: @ %entry -; CHECK-SOFT-NEXT: mov r3, r1 -; CHECK-SOFT-NEXT: mov r2, r0 -; CHECK-SOFT-NEXT: mov r0, r3 -; CHECK-SOFT-NEXT: mov r1, r2 -; CHECK-SOFT-NEXT: b non_variadic -; -; CHECK-HARD-LABEL: non_variadic_fp: -; CHECK-HARD: @ %bb.0: @ %entry -; CHECK-HARD-NEXT: vmov.f32 s3, s1 -; CHECK-HARD-NEXT: vmov.f32 s2, s0 -; CHECK-HARD-NEXT: vmov.f32 s0, s3 -; CHECK-HARD-NEXT: vmov.f32 s1, s2 -; CHECK-HARD-NEXT: b non_variadic +; CHECK-LABEL: non_variadic_fp: +; CHECK: b non_variadic entry: %call = tail call i1 (float, float, float, float) @non_variadic(float %y, float %x, float %x, float %y) ret void } define void @variadic_fp(float %x, float %y) { -; CHECK-SOFT-LABEL: variadic_fp: -; CHECK-SOFT: @ %bb.0: @ %entry -; CHECK-SOFT-NEXT: mov r3, r1 -; CHECK-SOFT-NEXT: mov r2, r0 -; CHECK-SOFT-NEXT: mov r0, r3 -; CHECK-SOFT-NEXT: mov r1, r2 -; CHECK-SOFT-NEXT: b variadic -; -; CHECK-HARD-LABEL: variadic_fp: -; CHECK-HARD: @ %bb.0: @ %entry -; CHECK-HARD-NEXT: vmov r2, s0 -; CHECK-HARD-NEXT: vmov r3, s1 -; CHECK-HARD-NEXT: mov r0, r3 -; CHECK-HARD-NEXT: mov r1, r2 -; CHECK-HARD-NEXT: b variadic +; CHECK-LABEL: variadic_fp: +; CHECK: b variadic entry: %call = tail call i1 (float, ...) @variadic(float %y, float %x, float %x, float %y) ret void @@ -58,32 +31,9 @@ entry: ; of them to handle the 6 arguments. With hard-float, we have plenty of regs ; (s0-s15) to pass FP arguments. define void @non_variadic_fp_big(float %x, float %y) { -; CHECK-SOFT-LABEL: non_variadic_fp_big: -; CHECK-SOFT: @ %bb.0: @ %entry -; CHECK-SOFT-NEXT: push {r11, lr} -; CHECK-SOFT-NEXT: sub sp, sp, #8 -; CHECK-SOFT-NEXT: mov r3, r1 -; CHECK-SOFT-NEXT: mov r2, r0 -; CHECK-SOFT-NEXT: vmov s0, r3 -; CHECK-SOFT-NEXT: vmov s0, r2 -; CHECK-SOFT-NEXT: mov r0, sp -; CHECK-SOFT-NEXT: str r3, [r0, #4] -; CHECK-SOFT-NEXT: str r2, [r0] -; CHECK-SOFT-NEXT: mov r0, r3 -; CHECK-SOFT-NEXT: mov r1, r2 -; CHECK-SOFT-NEXT: bl non_variadic_big -; CHECK-SOFT-NEXT: add sp, sp, #8 -; CHECK-SOFT-NEXT: pop {r11, pc} -; -; CHECK-HARD-LABEL: non_variadic_fp_big: -; CHECK-HARD: @ %bb.0: @ %entry -; CHECK-HARD-NEXT: vmov.f32 s5, s1 -; CHECK-HARD-NEXT: vmov.f32 s4, s0 -; CHECK-HARD-NEXT: vmov.f32 s0, s5 -; CHECK-HARD-NEXT: vmov.f32 s1, s4 -; CHECK-HARD-NEXT: vmov.f32 s2, s4 -; CHECK-HARD-NEXT: vmov.f32 s3, s5 -; CHECK-HARD-NEXT: b non_variadic_big +; CHECK-LABEL: non_variadic_fp_big: +; CHECK-SOFT: bl non_variadic_big +; CHECK-HARD: b non_variadic_big entry: %call = tail call i1 (float, float, float, float, float, float) @non_variadic_big(float %y, float %x, float %x, float %y, float %x, float %y) ret void @@ -91,40 +41,9 @@ entry: ; Variadic functions cannot use FP regs to pass arguments; only GP regs. define void @variadic_fp_big(float %x, float %y) { -; CHECK-SOFT-LABEL: variadic_fp_big: -; CHECK-SOFT: @ %bb.0: @ %entry -; CHECK-SOFT-NEXT: push {r11, lr} -; CHECK-SOFT-NEXT: sub sp, sp, #8 -; CHECK-SOFT-NEXT: mov r3, r1 -; CHECK-SOFT-NEXT: mov r2, r0 -; CHECK-SOFT-NEXT: vmov s0, r3 -; CHECK-SOFT-NEXT: vmov s0, r2 -; CHECK-SOFT-NEXT: mov r0, sp -; CHECK-SOFT-NEXT: str r3, [r0, #4] -; CHECK-SOFT-NEXT: str r2, [r0] -; CHECK-SOFT-NEXT: mov r0, r3 -; CHECK-SOFT-NEXT: mov r1, r2 -; CHECK-SOFT-NEXT: bl variadic -; CHECK-SOFT-NEXT: add sp, sp, #8 -; CHECK-SOFT-NEXT: pop {r11, pc} -; -; CHECK-HARD-LABEL: variadic_fp_big: -; CHECK-HARD: @ %bb.0: @ %entry -; CHECK-HARD-NEXT: push {r11, lr} -; CHECK-HARD-NEXT: sub sp, sp, #8 -; CHECK-HARD-NEXT: mov r0, sp -; CHECK-HARD-NEXT: vstr s1, [r0, #4] -; CHECK-HARD-NEXT: vstr s0, [r0] -; CHECK-HARD-NEXT: vmov r2, s0 -; CHECK-HARD-NEXT: vmov r3, s1 -; CHECK-HARD-NEXT: mov r0, r3 -; CHECK-HARD-NEXT: mov r1, r2 -; CHECK-HARD-NEXT: bl variadic -; CHECK-HARD-NEXT: add sp, sp, #8 -; CHECK-HARD-NEXT: pop {r11, pc} +; CHECK-LABEL: variadic_fp_big: +; CHECK: bl variadic entry: %call = tail call i1 (float, ...) @variadic(float %y, float %x, float %x, float %y, float %x, float %y) ret void } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK: {{.*}} >From 820306156a7e274ebedcb1e9ba5083aae798e48d Mon Sep 17 00:00:00 2001 From: Kiran <kiran.st...@arm.com> Date: Thu, 29 Aug 2024 10:09:55 +0100 Subject: [PATCH 4/5] remove extra check line --- llvm/test/CodeGen/ARM/fp-arg-shuffle.ll | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll index 73c0a34fbc2f4a..9e400364a60998 100644 --- a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll +++ b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll @@ -1,7 +1,6 @@ ; RUN: llc -mtriple=arm-eabi -mattr=+neon -float-abi=soft %s -o - | FileCheck %s ; CHECK: function1 -; CHECK-NOT: vmov define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp { entry: ; CHECK-LABEL: function1: >From 973303bb1fa0943ac0f269cfc27e97b8e0f13dc9 Mon Sep 17 00:00:00 2001 From: Kiran <kiran.st...@arm.com> Date: Fri, 30 Aug 2024 11:56:48 +0100 Subject: [PATCH 5/5] added large test case --- llvm/test/CodeGen/ARM/musttail.ll | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/llvm/test/CodeGen/ARM/musttail.ll b/llvm/test/CodeGen/ARM/musttail.ll index 93e661e59335ad..145e95bdf47a7b 100644 --- a/llvm/test/CodeGen/ARM/musttail.ll +++ b/llvm/test/CodeGen/ARM/musttail.ll @@ -28,4 +28,16 @@ entry: ; CHECK: b sret_callee musttail call void @sret_callee(ptr dead_on_unwind writable sret({ double, double }) align 8 %agg.result, i16 noundef signext 20391) #2 ret void -} \ No newline at end of file +} + +%struct.Large = type { [60 x i32] } + +; Function Attrs: mustprogress noinline optnone +define dso_local void @large_caller(i64 noundef %0, i64 noundef %1, %struct.Large* noundef byval(%struct.Large) align 4 %2, %struct.Large* noundef byval(%struct.Large) align 4 %3) #0 { +entry: +; CHECK: b large_callee + musttail call void @large_callee(i64 noundef %0, i64 noundef %1, %struct.Large* noundef byval(%struct.Large) align 4 %2, %struct.Large* noundef byval(%struct.Large) align 4 %3) + ret void +} + +declare dso_local void @large_callee(i64 noundef, i64 noundef, %struct.Large* noundef byval(%struct.Large) align 4, %struct.Large* noundef byval(%struct.Large) align 4) #1 \ No newline at end of file _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits