[clang] [llvm] [ARM] musttail fixes (PR #102896)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 27 02:50:58 PDT 2024
https://github.com/kiran-isaac updated https://github.com/llvm/llvm-project/pull/102896
>From 24806b179d34b4afb21832e4a2150c13995b59e0 Mon Sep 17 00:00:00 2001
From: Kiran <kiran.sturt at arm.com>
Date: Thu, 8 Aug 2024 13:07:24 +0100
Subject: [PATCH 1/3] [ARM] musttail fixes
Backend:
- Caller and callee arguments no longer have to match, just to take up the same space, as they can be changed before the call
- Allowed tail calls if callee and callee both (or neither) use sret, wheras before it would be dissalowed if either used sret
- Allowed tail calls if byval args are used
- Added debug trace for IsEligibleForTailCallOptimisation
Frontend (clang):
- Do not generate extra alloca if sret is used with musttail, as the space for the sret is allocated already
Change-Id: Ic7f246a7eca43c06874922d642d7dc44bdfc98ec
---
clang/lib/CodeGen/CGCall.cpp | 2 +-
llvm/include/llvm/CodeGen/CallingConvLower.h | 2 +
llvm/lib/CodeGen/CallingConvLower.cpp | 61 +++
llvm/lib/Target/ARM/ARMISelLowering.cpp | 141 ++----
.../ARM/2013-05-13-AAPCS-byval-padding.ll | 16 +-
.../ARM/2013-05-13-AAPCS-byval-padding2.ll | 13 +-
llvm/test/CodeGen/ARM/fp-arg-shuffle.ll | 22 +
llvm/test/CodeGen/ARM/fp16-vector-argument.ll | 41 +-
llvm/test/CodeGen/ARM/struct_byval.ll | 455 ++++++++++++++++--
llvm/test/CodeGen/ARM/tail-call-float.ll | 99 +++-
10 files changed, 661 insertions(+), 191 deletions(-)
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 070001a180ab88..1144de0b6ba7b5 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5085,7 +5085,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
RawAddress SRetAlloca = RawAddress::invalid();
llvm::Value *UnusedReturnSizePtr = nullptr;
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
- if (IsVirtualFunctionPointerThunk && RetAI.isIndirect()) {
+ if ((IsVirtualFunctionPointerThunk || IsMustTail) && RetAI.isIndirect()) {
SRetPtr = makeNaturalAddressForPointer(CurFn->arg_begin() +
IRFunctionArgs.getSRetArgNo(),
RetTy, CharUnits::fromQuantity(1));
diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h
index 932a2a94ab1f1a..fdb5982cb2042b 100644
--- a/llvm/include/llvm/CodeGen/CallingConvLower.h
+++ b/llvm/include/llvm/CodeGen/CallingConvLower.h
@@ -540,6 +540,8 @@ class CCState {
});
}
+ void dump() const;
+
private:
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void MarkAllocated(MCPhysReg Reg);
diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp
index b7152587a9fa05..7ba3ea83115db2 100644
--- a/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -290,3 +290,64 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC,
return std::equal(RVLocs1.begin(), RVLocs1.end(), RVLocs2.begin(),
RVLocs2.end(), AreCompatible);
}
+
+void CCState::dump() const {
+ dbgs() << "CCState:\n";
+ for (const CCValAssign &Loc : Locs) {
+ if (Loc.isRegLoc()) {
+ dbgs() << " Reg " << TRI.getName(Loc.getLocReg());
+ } else if (Loc.isMemLoc()) {
+ dbgs() << " Mem " << Loc.getLocMemOffset();
+ } else {
+ assert(Loc.isPendingLoc());
+ dbgs() << " Pend " << Loc.getExtraInfo();
+ }
+
+ dbgs() << " ValVT:" << Loc.getValVT();
+ dbgs() << " LocVT:" << Loc.getLocVT();
+
+ if (Loc.needsCustom())
+ dbgs() << " custom";
+
+ switch (Loc.getLocInfo()) {
+ case CCValAssign::Full:
+ dbgs() << " Full";
+ break;
+ case CCValAssign::SExt:
+ dbgs() << " SExt";
+ break;
+ case CCValAssign::ZExt:
+ dbgs() << " ZExt";
+ break;
+ case CCValAssign::AExt:
+ dbgs() << " AExt";
+ break;
+ case CCValAssign::SExtUpper:
+ dbgs() << " SExtUpper";
+ break;
+ case CCValAssign::ZExtUpper:
+ dbgs() << " ZExtUpper";
+ break;
+ case CCValAssign::AExtUpper:
+ dbgs() << " AExtUpper";
+ break;
+ case CCValAssign::BCvt:
+ dbgs() << " BCvt";
+ break;
+ case CCValAssign::Trunc:
+ dbgs() << " Trunc";
+ break;
+ case CCValAssign::VExt:
+ dbgs() << " VExt";
+ break;
+ case CCValAssign::FPExt:
+ dbgs() << " FPExt";
+ break;
+ case CCValAssign::Indirect:
+ dbgs() << " Indirect";
+ break;
+ }
+
+ dbgs() << "\n";
+ }
+}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 476b7b349294ab..aeba673f9a2f77 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2407,8 +2407,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isTailCall = false;
// For both the non-secure calls and the returns from a CMSE entry function,
- // the function needs to do some extra work afte r the call, or before the
- // return, respectively, thus it cannot end with atail call
+ // the function needs to do some extra work after the call, or before the
+ // return, respectively, thus it cannot end with a tail call
if (isCmseNSCall || AFI->isCmseNSEntryFunction())
isTailCall = false;
@@ -2959,50 +2959,6 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
Size = std::max<int>(Size - Excess, 0);
}
-/// MatchingStackOffset - Return true if the given stack call argument is
-/// already available in the same position (relatively) of the caller's
-/// incoming argument stack.
-static
-bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
- MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
- const TargetInstrInfo *TII) {
- unsigned Bytes = Arg.getValueSizeInBits() / 8;
- int FI = std::numeric_limits<int>::max();
- if (Arg.getOpcode() == ISD::CopyFromReg) {
- Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
- if (!VR.isVirtual())
- return false;
- MachineInstr *Def = MRI->getVRegDef(VR);
- if (!Def)
- return false;
- if (!Flags.isByVal()) {
- if (!TII->isLoadFromStackSlot(*Def, FI))
- return false;
- } else {
- return false;
- }
- } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
- if (Flags.isByVal())
- // ByVal argument is passed in as a pointer but it's now being
- // dereferenced. e.g.
- // define @foo(%struct.X* %A) {
- // tail call @bar(%struct.X* byval %A)
- // }
- return false;
- SDValue Ptr = Ld->getBasePtr();
- FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
- if (!FINode)
- return false;
- FI = FINode->getIndex();
- } else
- return false;
-
- assert(FI != std::numeric_limits<int>::max());
- if (!MFI.isFixedObjectIndex(FI))
- return false;
- return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
-}
-
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function. Note that this function also
@@ -3044,8 +3000,10 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
for (const CCValAssign &AL : ArgLocs)
if (AL.isRegLoc())
AddressRegisters.erase(AL.getLocReg());
- if (AddressRegisters.empty())
+ if (AddressRegisters.empty()) {
+ LLVM_DEBUG(dbgs() << "false (no space for target address)\n");
return false;
+ }
}
// Look for obvious safe cases to perform tail call optimization that do not
@@ -3054,18 +3012,26 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
// Exception-handling functions need a special set of instructions to indicate
// a return to the hardware. Tail-calling another function would probably
// break this.
- if (CallerF.hasFnAttribute("interrupt"))
+ if (CallerF.hasFnAttribute("interrupt")) {
+ LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
return false;
+ }
- if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
+ if (canGuaranteeTCO(CalleeCC,
+ getTargetMachine().Options.GuaranteedTailCallOpt)) {
+ LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
+ << " (guaranteed tail-call CC)\n");
return CalleeCC == CallerCC;
+ }
- // Also avoid sibcall optimization if either caller or callee uses struct
- // return semantics.
+ // Also avoid sibcall optimization if only one of caller or callee uses
+ // struct return semantics.
bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
- if (isCalleeStructRet || isCallerStructRet)
+ if (isCalleeStructRet != isCallerStructRet) {
+ LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
return false;
+ }
// Externally-defined functions with weak linkage should not be
// tail-called on ARM when the OS does not support dynamic
@@ -3078,8 +3044,11 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
const GlobalValue *GV = G->getGlobal();
const Triple &TT = getTargetMachine().getTargetTriple();
if (GV->hasExternalWeakLinkage() &&
- (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
+ (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
+ TT.isOSBinFormatMachO())) {
+ LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
return false;
+ }
}
// Check that the call results are passed in the same way.
@@ -3088,70 +3057,44 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
getEffectiveCallingConv(CalleeCC, isVarArg),
getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
CCAssignFnForReturn(CalleeCC, isVarArg),
- CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
+ CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
+ LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
return false;
+ }
// The callee has to preserve all registers the caller needs to preserve.
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (CalleeCC != CallerCC) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
- if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
+ if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
+ LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
return false;
+ }
}
- // If Caller's vararg or byval argument has been split between registers and
+ // If Caller's vararg argument has been split between registers and
// stack, do not perform tail call, since part of the argument is in caller's
// local frame.
const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
- if (AFI_Caller->getArgRegsSaveSize())
+ if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {
+ LLVM_DEBUG(dbgs() << "false (vararg arg reg save area)\n");
return false;
+ }
// If the callee takes no arguments then go on to check the results of the
// call.
- if (!Outs.empty()) {
- if (CCInfo.getStackSize()) {
- // Check if the arguments are already laid out in the right way as
- // the caller's fixed stack objects.
- MachineFrameInfo &MFI = MF.getFrameInfo();
- const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
- for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
- i != e;
- ++i, ++realArgIdx) {
- CCValAssign &VA = ArgLocs[i];
- EVT RegVT = VA.getLocVT();
- SDValue Arg = OutVals[realArgIdx];
- ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
- if (VA.getLocInfo() == CCValAssign::Indirect)
- return false;
- if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
- // f64 and vector types are split into multiple registers or
- // register/stack-slot combinations. The types will not match
- // the registers; give up on memory f64 refs until we figure
- // out what to do about this.
- if (!VA.isRegLoc())
- return false;
- if (!ArgLocs[++i].isRegLoc())
- return false;
- if (RegVT == MVT::v2f64) {
- if (!ArgLocs[++i].isRegLoc())
- return false;
- if (!ArgLocs[++i].isRegLoc())
- return false;
- }
- } else if (!VA.isRegLoc()) {
- if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
- MFI, MRI, TII))
- return false;
- }
- }
- }
-
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
- return false;
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
+ LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
+ return false;
}
+ // If the stack arguments for this call do not fit into our own save area then
+ // the call cannot be made tail.
+ if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "true\n");
return true;
}
diff --git a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
index d8e22f4f5312ae..e186ae3a961502 100644
--- a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
+++ b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
@@ -12,17 +12,11 @@ define void @check227(
; arg1 --> SP+188
entry:
-
-;CHECK: sub sp, sp, #12
-;CHECK: push {r11, lr}
-;CHECK: sub sp, sp, #4
-;CHECK: add r0, sp, #12
-;CHECK: stm r0, {r1, r2, r3}
-;CHECK: ldr r0, [sp, #212]
-;CHECK: bl useInt
-;CHECK: add sp, sp, #4
-;CHECK: pop {r11, lr}
-;CHECK: add sp, sp, #12
+; CHECK: sub sp, sp, #12
+; CHECK: stm sp, {r1, r2, r3}
+; CHECK: ldr r0, [sp, #200]
+; CHECK: add sp, sp, #12
+; CHECK: b useInt
%0 = ptrtoint ptr %arg1 to i32
tail call void @useInt(i32 %0)
diff --git a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll
index 0c5d22984b99e1..efdecce9ae723a 100644
--- a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll
+++ b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll
@@ -7,14 +7,11 @@
define void @foo(ptr byval(%struct4bytes) %p0, ; --> R0
ptr byval(%struct20bytes) %p1 ; --> R1,R2,R3, [SP+0 .. SP+8)
) {
-;CHECK: sub sp, sp, #16
-;CHECK: push {r11, lr}
-;CHECK: add r12, sp, #8
-;CHECK: stm r12, {r0, r1, r2, r3}
-;CHECK: add r0, sp, #12
-;CHECK: bl useInt
-;CHECK: pop {r11, lr}
-;CHECK: add sp, sp, #16
+;CHECK: sub sp, sp, #16
+;CHECK: stm sp, {r0, r1, r2, r3}
+;CHECK: add r0, sp, #4
+;CHECK: add sp, sp, #16
+;CHECK: b useInt
%1 = ptrtoint ptr %p1 to i32
tail call void @useInt(i32 %1)
diff --git a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll
index 4996cc8ecbf022..2ceb7a7b97a1fe 100644
--- a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll
+++ b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll
@@ -3,6 +3,28 @@
; CHECK: function1
; CHECK-NOT: vmov
define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp {
+; CHECK-LABEL: function1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r11, lr}
+; CHECK-NEXT: push {r4, r5, r11, lr}
+; CHECK-NEXT: vldr d16, [sp, #40]
+; CHECK-NEXT: vldr d17, [sp, #32]
+; CHECK-NEXT: vmov r12, lr, d16
+; CHECK-NEXT: vldr d16, [sp, #16]
+; CHECK-NEXT: vmov r4, r5, d17
+; CHECK-NEXT: vldr d17, [sp, #24]
+; CHECK-NEXT: str r3, [sp, #36]
+; CHECK-NEXT: str r2, [sp, #32]
+; CHECK-NEXT: str r1, [sp, #44]
+; CHECK-NEXT: str r0, [sp, #40]
+; CHECK-NEXT: vstr d17, [sp, #16]
+; CHECK-NEXT: vstr d16, [sp, #24]
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: mov r1, lr
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: pop {r4, r5, r11, lr}
+; CHECK-NEXT: b function2
entry:
%call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind
ret double %call
diff --git a/llvm/test/CodeGen/ARM/fp16-vector-argument.ll b/llvm/test/CodeGen/ARM/fp16-vector-argument.ll
index 6fc56967bc7aa9..65aff46658fd1d 100644
--- a/llvm/test/CodeGen/ARM/fp16-vector-argument.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vector-argument.ll
@@ -145,26 +145,21 @@ entry:
define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x half>, <8 x half>) {
; SOFT-LABEL: many_args_test:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: push {r11, lr}
-; SOFT-NEXT: sub sp, sp, #32
-; SOFT-NEXT: add r12, sp, #80
+; SOFT-NEXT: add r12, sp, #40
; SOFT-NEXT: vld1.64 {d16, d17}, [r12]
-; SOFT-NEXT: add r12, sp, #48
+; SOFT-NEXT: add r12, sp, #8
; SOFT-NEXT: vabs.f16 q8, q8
; SOFT-NEXT: vld1.64 {d18, d19}, [r12]
-; SOFT-NEXT: add r12, sp, #64
+; SOFT-NEXT: add r12, sp, #24
; SOFT-NEXT: vadd.f16 q8, q8, q9
; SOFT-NEXT: vld1.64 {d18, d19}, [r12]
; SOFT-NEXT: add r12, sp, #16
; SOFT-NEXT: vmul.f16 q8, q9, q8
; SOFT-NEXT: vst1.64 {d16, d17}, [r12]
-; SOFT-NEXT: mov r12, sp
-; SOFT-NEXT: vldr d16, [sp, #40]
-; SOFT-NEXT: vst1.16 {d16}, [r12:64]!
-; SOFT-NEXT: str r3, [r12]
-; SOFT-NEXT: bl use
-; SOFT-NEXT: add sp, sp, #32
-; SOFT-NEXT: pop {r11, pc}
+; SOFT-NEXT: vldr d16, [sp]
+; SOFT-NEXT: vstr d16, [sp]
+; SOFT-NEXT: str r3, [sp, #8]
+; SOFT-NEXT: b use
;
; HARD-LABEL: many_args_test:
; HARD: @ %bb.0: @ %entry
@@ -177,33 +172,25 @@ define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x hal
;
; SOFTEB-LABEL: many_args_test:
; SOFTEB: @ %bb.0: @ %entry
-; SOFTEB-NEXT: .save {r11, lr}
-; SOFTEB-NEXT: push {r11, lr}
-; SOFTEB-NEXT: .pad #32
-; SOFTEB-NEXT: sub sp, sp, #32
-; SOFTEB-NEXT: add r12, sp, #80
-; SOFTEB-NEXT: mov lr, sp
+; SOFTEB-NEXT: add r12, sp, #40
; SOFTEB-NEXT: vld1.64 {d16, d17}, [r12]
-; SOFTEB-NEXT: add r12, sp, #48
+; SOFTEB-NEXT: add r12, sp, #8
; SOFTEB-NEXT: vrev64.16 q8, q8
; SOFTEB-NEXT: vabs.f16 q8, q8
; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12]
-; SOFTEB-NEXT: add r12, sp, #64
+; SOFTEB-NEXT: add r12, sp, #24
; SOFTEB-NEXT: vrev64.16 q9, q9
; SOFTEB-NEXT: vadd.f16 q8, q8, q9
; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12]
; SOFTEB-NEXT: add r12, sp, #16
; SOFTEB-NEXT: vrev64.16 q9, q9
; SOFTEB-NEXT: vmul.f16 q8, q9, q8
-; SOFTEB-NEXT: vldr d18, [sp, #40]
-; SOFTEB-NEXT: vrev64.16 d18, d18
-; SOFTEB-NEXT: vst1.16 {d18}, [lr:64]!
-; SOFTEB-NEXT: str r3, [lr]
+; SOFTEB-NEXT: vldr d18, [sp]
; SOFTEB-NEXT: vrev64.16 q8, q8
; SOFTEB-NEXT: vst1.64 {d16, d17}, [r12]
-; SOFTEB-NEXT: bl use
-; SOFTEB-NEXT: add sp, sp, #32
-; SOFTEB-NEXT: pop {r11, pc}
+; SOFTEB-NEXT: vstr d18, [sp]
+; SOFTEB-NEXT: str r3, [sp, #8]
+; SOFTEB-NEXT: b use
;
; HARDEB-LABEL: many_args_test:
; HARDEB: @ %bb.0: @ %entry
diff --git a/llvm/test/CodeGen/ARM/struct_byval.ll b/llvm/test/CodeGen/ARM/struct_byval.ll
index 73a1b5ee33bca9..5564f254c9e74d 100644
--- a/llvm/test/CodeGen/ARM/struct_byval.ll
+++ b/llvm/test/CodeGen/ARM/struct_byval.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-ios6.0 | FileCheck %s
; RUN: llc < %s -mtriple=armv7-unknown-nacl-gnueabi | FileCheck %s -check-prefix=NACL
@@ -10,11 +11,122 @@
%struct.LargeStruct = type { i32, [1001 x i8], [300 x i32] }
define i32 @f() nounwind ssp {
+; NACL-LABEL: f:
+; NACL: @ %bb.0: @ %entry
+; NACL-NEXT: .save {r4, lr}
+; NACL-NEXT: push {r4, lr}
+; NACL-NEXT: .pad #152
+; NACL-NEXT: sub sp, sp, #152
+; NACL-NEXT: movw r0, :lower16:__stack_chk_guard
+; NACL-NEXT: add r3, sp, #72
+; NACL-NEXT: movt r0, :upper16:__stack_chk_guard
+; NACL-NEXT: mov lr, sp
+; NACL-NEXT: ldr r0, [r0]
+; NACL-NEXT: str r0, [sp, #148]
+; NACL-NEXT: add r0, sp, #72
+; NACL-NEXT: add r12, r0, #16
+; NACL-NEXT: ldm r3, {r0, r1, r2, r3}
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: bl e1
+; NACL-NEXT: movw r1, :lower16:__stack_chk_guard
+; NACL-NEXT: ldr r0, [sp, #148]
+; NACL-NEXT: movt r1, :upper16:__stack_chk_guard
+; NACL-NEXT: ldr r1, [r1]
+; NACL-NEXT: cmp r1, r0
+; NACL-NEXT: moveq r0, #0
+; NACL-NEXT: addeq sp, sp, #152
+; NACL-NEXT: popeq {r4, pc}
+; NACL-NEXT: .LBB0_1: @ %entry
+; NACL-NEXT: bl __stack_chk_fail
+;
+; NOMOVT-LABEL: f:
+; NOMOVT: @ %bb.0: @ %entry
+; NOMOVT-NEXT: .save {r11, lr}
+; NOMOVT-NEXT: push {r11, lr}
+; NOMOVT-NEXT: .pad #144
+; NOMOVT-NEXT: sub sp, sp, #144
+; NOMOVT-NEXT: ldr r0, .LCPI0_0
+; NOMOVT-NEXT: mov r1, sp
+; NOMOVT-NEXT: add r3, sp, #64
+; NOMOVT-NEXT: ldr r0, [r0]
+; NOMOVT-NEXT: str r0, [sp, #140]
+; NOMOVT-NEXT: add r0, sp, #64
+; NOMOVT-NEXT: add r0, r0, #16
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldr r2, [r0], #4
+; NOMOVT-NEXT: str r2, [r1], #4
+; NOMOVT-NEXT: ldm r3, {r0, r1, r2, r3}
+; NOMOVT-NEXT: bl e1
+; NOMOVT-NEXT: ldr r0, [sp, #140]
+; NOMOVT-NEXT: ldr r1, .LCPI0_0
+; NOMOVT-NEXT: ldr r1, [r1]
+; NOMOVT-NEXT: cmp r1, r0
+; NOMOVT-NEXT: moveq r0, #0
+; NOMOVT-NEXT: addeq sp, sp, #144
+; NOMOVT-NEXT: popeq {r11, pc}
+; NOMOVT-NEXT: .LBB0_1: @ %entry
+; NOMOVT-NEXT: bl __stack_chk_fail
+; NOMOVT-NEXT: .p2align 2
+; NOMOVT-NEXT: @ %bb.2:
+; NOMOVT-NEXT: .LCPI0_0:
+; NOMOVT-NEXT: .long __stack_chk_guard
entry:
-; CHECK-LABEL: f:
-; CHECK: ldr
-; CHECK: str
-; CHECK-NOT:bne
%st = alloca %struct.SmallStruct, align 4
%call = call i32 @e1(ptr byval(%struct.SmallStruct) %st)
ret i32 0
@@ -22,20 +134,95 @@ entry:
; Generate a loop for large struct byval
define i32 @g() nounwind ssp {
-entry:
-; CHECK-LABEL: g:
-; CHECK: ldr
-; CHECK: sub
-; CHECK: str
-; CHECK: bne
; NACL-LABEL: g:
+; NACL: @ %bb.0: @ %entry
+; NACL-NEXT: .save {r4, r5, r11, lr}
+; NACL-NEXT: push {r4, r5, r11, lr}
+; NACL-NEXT: .pad #2224
+; NACL-NEXT: sub sp, sp, #2224
+; NACL-NEXT: movw r0, :lower16:__stack_chk_guard
+; NACL-NEXT: movt r0, :upper16:__stack_chk_guard
+; NACL-NEXT: ldr r0, [r0]
+; NACL-NEXT: str r0, [sp, #2220]
+; NACL-NEXT: sub sp, sp, #2192
+; NACL-NEXT: add lr, sp, #2048
+; NACL-NEXT: ldr r1, [sp, #2208]
+; NACL-NEXT: add r0, lr, #156
+; NACL-NEXT: ldr r2, [sp, #2212]
+; NACL-NEXT: add r12, r0, #16
+; NACL-NEXT: ldr r0, [sp, #2204]
+; NACL-NEXT: ldr r3, [sp, #2216]
+; NACL-NEXT: movw lr, #2192
+; NACL-NEXT: mov r4, sp
+; NACL-NEXT: .LBB1_1: @ %entry
+; NACL-NEXT: @ =>This Inner Loop Header: Depth=1
+; NACL-NEXT: ldr r5, [r12], #4
+; NACL-NEXT: subs lr, lr, #4
+; NACL-NEXT: str r5, [r4], #4
+; NACL-NEXT: bne .LBB1_1
+; NACL-NEXT: @ %bb.2: @ %entry
+; NACL-NEXT: bl e2
+; NACL-NEXT: add sp, sp, #2192
+; NACL-NEXT: movw r1, :lower16:__stack_chk_guard
+; NACL-NEXT: ldr r0, [sp, #2220]
+; NACL-NEXT: movt r1, :upper16:__stack_chk_guard
+; NACL-NEXT: ldr r1, [r1]
+; NACL-NEXT: cmp r1, r0
+; NACL-NEXT: moveq r0, #0
+; NACL-NEXT: addeq sp, sp, #2224
+; NACL-NEXT: popeq {r4, r5, r11, pc}
+; NACL-NEXT: .LBB1_3: @ %entry
+; NACL-NEXT: bl __stack_chk_fail
+;
+; NOMOVT-LABEL: g:
+; NOMOVT: @ %bb.0: @ %entry
+; NOMOVT-NEXT: .save {r11, lr}
+; NOMOVT-NEXT: push {r11, lr}
+; NOMOVT-NEXT: .pad #168
+; NOMOVT-NEXT: sub sp, sp, #168
+; NOMOVT-NEXT: .pad #2048
+; NOMOVT-NEXT: sub sp, sp, #2048
+; NOMOVT-NEXT: ldr r0, .LCPI1_1
+; NOMOVT-NEXT: ldr r0, [r0]
+; NOMOVT-NEXT: str r0, [sp, #2212]
+; NOMOVT-NEXT: sub sp, sp, #2192
+; NOMOVT-NEXT: add lr, sp, #2048
+; NOMOVT-NEXT: ldr r1, .LCPI1_0
+; NOMOVT-NEXT: add r0, lr, #148
+; NOMOVT-NEXT: mov r2, sp
+; NOMOVT-NEXT: add r0, r0, #16
+; NOMOVT-NEXT: .LBB1_1: @ %entry
+; NOMOVT-NEXT: @ =>This Inner Loop Header: Depth=1
+; NOMOVT-NEXT: ldr r3, [r0], #4
+; NOMOVT-NEXT: subs r1, r1, #4
+; NOMOVT-NEXT: str r3, [r2], #4
+; NOMOVT-NEXT: bne .LBB1_1
+; NOMOVT-NEXT: @ %bb.2: @ %entry
+; NOMOVT-NEXT: ldr r0, [sp, #2196]
+; NOMOVT-NEXT: ldr r1, [sp, #2200]
+; NOMOVT-NEXT: ldr r2, [sp, #2204]
+; NOMOVT-NEXT: ldr r3, [sp, #2208]
+; NOMOVT-NEXT: bl e2
+; NOMOVT-NEXT: add sp, sp, #2192
+; NOMOVT-NEXT: ldr r0, [sp, #2212]
+; NOMOVT-NEXT: ldr r1, .LCPI1_1
+; NOMOVT-NEXT: ldr r1, [r1]
+; NOMOVT-NEXT: cmp r1, r0
+; NOMOVT-NEXT: moveq r0, #0
+; NOMOVT-NEXT: addeq sp, sp, #168
+; NOMOVT-NEXT: addeq sp, sp, #2048
+; NOMOVT-NEXT: popeq {r11, pc}
+; NOMOVT-NEXT: .LBB1_3: @ %entry
+; NOMOVT-NEXT: bl __stack_chk_fail
+; NOMOVT-NEXT: .p2align 2
+; NOMOVT-NEXT: @ %bb.4:
+; NOMOVT-NEXT: .LCPI1_0:
+; NOMOVT-NEXT: .long 2192 @ 0x890
+; NOMOVT-NEXT: .LCPI1_1:
+; NOMOVT-NEXT: .long __stack_chk_guard
+entry:
; Ensure that use movw instead of constpool for the loop trip count. But don't
; match the __stack_chk_guard movw
-; NACL: movw {{r[0-9]+|lr}}, #
-; NACL: ldr
-; NACL: sub
-; NACL: str
-; NACL: bne
%st = alloca %struct.LargeStruct, align 4
%call = call i32 @e2(ptr byval(%struct.LargeStruct) %st)
ret i32 0
@@ -43,17 +230,90 @@ entry:
; Generate a loop using NEON instructions
define i32 @h() nounwind ssp {
+; NACL-LABEL: h:
+; NACL: @ %bb.0: @ %entry
+; NACL-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; NACL-NEXT: push {r4, r5, r6, r7, r8, lr}
+; NACL-NEXT: .pad #168
+; NACL-NEXT: sub sp, sp, #168
+; NACL-NEXT: .pad #2048
+; NACL-NEXT: sub sp, sp, #2048
+; NACL-NEXT: movw r0, :lower16:__stack_chk_guard
+; NACL-NEXT: movt r0, :upper16:__stack_chk_guard
+; NACL-NEXT: ldr r0, [r0]
+; NACL-NEXT: str r0, [sp, #2212]
+; NACL-NEXT: sub sp, sp, #2192
+; NACL-NEXT: add r3, sp, #2192
+; NACL-NEXT: add r0, sp, #2192
+; NACL-NEXT: add r12, r0, #16
+; NACL-NEXT: movw lr, #2192
+; NACL-NEXT: ldm r3, {r0, r1, r2, r3}
+; NACL-NEXT: mov r4, sp
+; NACL-NEXT: .LBB2_1: @ %entry
+; NACL-NEXT: @ =>This Inner Loop Header: Depth=1
+; NACL-NEXT: vld1.32 {d16, d17}, [r12]!
+; NACL-NEXT: subs lr, lr, #16
+; NACL-NEXT: vst1.32 {d16, d17}, [r4]!
+; NACL-NEXT: bne .LBB2_1
+; NACL-NEXT: @ %bb.2: @ %entry
+; NACL-NEXT: bl e3
+; NACL-NEXT: add sp, sp, #2192
+; NACL-NEXT: movw r1, :lower16:__stack_chk_guard
+; NACL-NEXT: ldr r0, [sp, #2212]
+; NACL-NEXT: movt r1, :upper16:__stack_chk_guard
+; NACL-NEXT: ldr r1, [r1]
+; NACL-NEXT: cmp r1, r0
+; NACL-NEXT: moveq r0, #0
+; NACL-NEXT: addeq sp, sp, #168
+; NACL-NEXT: addeq sp, sp, #2048
+; NACL-NEXT: popeq {r4, r5, r6, r7, r8, pc}
+; NACL-NEXT: .LBB2_3: @ %entry
+; NACL-NEXT: bl __stack_chk_fail
+;
+; NOMOVT-LABEL: h:
+; NOMOVT: @ %bb.0: @ %entry
+; NOMOVT-NEXT: .save {r6, r10, r11, lr}
+; NOMOVT-NEXT: push {r6, r10, r11, lr}
+; NOMOVT-NEXT: .setfp r11, sp, #8
+; NOMOVT-NEXT: add r11, sp, #8
+; NOMOVT-NEXT: .pad #2224
+; NOMOVT-NEXT: sub sp, sp, #2224
+; NOMOVT-NEXT: bic sp, sp, #15
+; NOMOVT-NEXT: ldr r0, .LCPI2_1
+; NOMOVT-NEXT: mov r6, sp
+; NOMOVT-NEXT: ldr r0, [r0]
+; NOMOVT-NEXT: str r0, [r6, #2220]
+; NOMOVT-NEXT: sub sp, sp, #2192
+; NOMOVT-NEXT: mov r0, r6
+; NOMOVT-NEXT: ldr r1, .LCPI2_0
+; NOMOVT-NEXT: add r0, r0, #16
+; NOMOVT-NEXT: mov r2, sp
+; NOMOVT-NEXT: .LBB2_1: @ %entry
+; NOMOVT-NEXT: @ =>This Inner Loop Header: Depth=1
+; NOMOVT-NEXT: ldr r3, [r0], #4
+; NOMOVT-NEXT: subs r1, r1, #4
+; NOMOVT-NEXT: str r3, [r2], #4
+; NOMOVT-NEXT: bne .LBB2_1
+; NOMOVT-NEXT: @ %bb.2: @ %entry
+; NOMOVT-NEXT: ldm r6, {r0, r1, r2, r3}
+; NOMOVT-NEXT: bl e3
+; NOMOVT-NEXT: add sp, sp, #2192
+; NOMOVT-NEXT: ldr r0, [r6, #2220]
+; NOMOVT-NEXT: ldr r1, .LCPI2_1
+; NOMOVT-NEXT: ldr r1, [r1]
+; NOMOVT-NEXT: cmp r1, r0
+; NOMOVT-NEXT: moveq r0, #0
+; NOMOVT-NEXT: subeq sp, r11, #8
+; NOMOVT-NEXT: popeq {r6, r10, r11, pc}
+; NOMOVT-NEXT: .LBB2_3: @ %entry
+; NOMOVT-NEXT: bl __stack_chk_fail
+; NOMOVT-NEXT: .p2align 2
+; NOMOVT-NEXT: @ %bb.4:
+; NOMOVT-NEXT: .LCPI2_0:
+; NOMOVT-NEXT: .long 2192 @ 0x890
+; NOMOVT-NEXT: .LCPI2_1:
+; NOMOVT-NEXT: .long __stack_chk_guard
entry:
-; CHECK-LABEL: h:
-; CHECK: vld1
-; CHECK: sub
-; CHECK: vst1
-; CHECK: bne
-; NACL: movw {{r[0-9]+|lr}}, #
-; NACL: vld1
-; NACL: sub
-; NACL: vst1
-; NACL: bne
%st = alloca %struct.LargeStruct, align 16
%call = call i32 @e3(ptr byval(%struct.LargeStruct) align 16 %st)
ret i32 0
@@ -67,16 +327,50 @@ declare i32 @e3(ptr nocapture byval(%struct.LargeStruct) align 16 %in) nounwind
; We can't do tail call since address of s is passed to the callee and part of
; s is in caller's local frame.
define void @f3(ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize {
-; CHECK-LABEL: f3
-; CHECK: bl _consumestruct
+; NACL-LABEL: f3:
+; NACL: @ %bb.0: @ %entry
+; NACL-NEXT: .pad #16
+; NACL-NEXT: sub sp, sp, #16
+; NACL-NEXT: stm sp, {r0, r1, r2, r3}
+; NACL-NEXT: mov r0, sp
+; NACL-NEXT: mov r1, #80
+; NACL-NEXT: add sp, sp, #16
+; NACL-NEXT: b consumestruct
+;
+; NOMOVT-LABEL: f3:
+; NOMOVT: @ %bb.0: @ %entry
+; NOMOVT-NEXT: .pad #16
+; NOMOVT-NEXT: sub sp, sp, #16
+; NOMOVT-NEXT: stm sp, {r0, r1, r2, r3}
+; NOMOVT-NEXT: mov r0, sp
+; NOMOVT-NEXT: mov r1, #80
+; NOMOVT-NEXT: add sp, sp, #16
+; NOMOVT-NEXT: b consumestruct
entry:
tail call void @consumestruct(ptr %s, i32 80) optsize
ret void
}
define void @f4(ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize {
-; CHECK-LABEL: f4
-; CHECK: bl _consumestruct
+; NACL-LABEL: f4:
+; NACL: @ %bb.0: @ %entry
+; NACL-NEXT: .pad #16
+; NACL-NEXT: sub sp, sp, #16
+; NACL-NEXT: stm sp, {r0, r1, r2, r3}
+; NACL-NEXT: mov r0, sp
+; NACL-NEXT: mov r1, #80
+; NACL-NEXT: add sp, sp, #16
+; NACL-NEXT: b consumestruct
+;
+; NOMOVT-LABEL: f4:
+; NOMOVT: @ %bb.0: @ %entry
+; NOMOVT-NEXT: .pad #16
+; NOMOVT-NEXT: sub sp, sp, #16
+; NOMOVT-NEXT: stm sp, {r0, r1, r2, r3}
+; NOMOVT-NEXT: mov r0, sp
+; NOMOVT-NEXT: mov r1, #80
+; NOMOVT-NEXT: add sp, sp, #16
+; NOMOVT-NEXT: b consumestruct
entry:
tail call void @consumestruct(ptr %s, i32 80) optsize
ret void
@@ -84,16 +378,34 @@ entry:
; We can do tail call here since s is in the incoming argument area.
define void @f5(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize {
-; CHECK-LABEL: f5
-; CHECK: b{{(\.w)?}} _consumestruct
+; NACL-LABEL: f5:
+; NACL: @ %bb.0: @ %entry
+; NACL-NEXT: mov r0, sp
+; NACL-NEXT: mov r1, #80
+; NACL-NEXT: b consumestruct
+;
+; NOMOVT-LABEL: f5:
+; NOMOVT: @ %bb.0: @ %entry
+; NOMOVT-NEXT: mov r0, sp
+; NOMOVT-NEXT: mov r1, #80
+; NOMOVT-NEXT: b consumestruct
entry:
tail call void @consumestruct(ptr %s, i32 80) optsize
ret void
}
define void @f6(i32 %a, i32 %b, i32 %c, i32 %d, ptr nocapture byval(%struct.SmallStruct) %s) nounwind optsize {
-; CHECK-LABEL: f6
-; CHECK: b{{(\.w)?}} _consumestruct
+; NACL-LABEL: f6:
+; NACL: @ %bb.0: @ %entry
+; NACL-NEXT: mov r0, sp
+; NACL-NEXT: mov r1, #80
+; NACL-NEXT: b consumestruct
+;
+; NOMOVT-LABEL: f6:
+; NOMOVT: @ %bb.0: @ %entry
+; NOMOVT-NEXT: mov r0, sp
+; NOMOVT-NEXT: mov r1, #80
+; NOMOVT-NEXT: b consumestruct
entry:
tail call void @consumestruct(ptr %s, i32 80) optsize
ret void
@@ -106,10 +418,81 @@ declare void @consumestruct(ptr nocapture %structp, i32 %structsize) nounwind
declare void @use_I(ptr byval(%struct.I.8))
define void @test_I_16() {
-; CHECK-LABEL: test_I_16
-; CHECK: ldrb
-; CHECK: strb
+; NACL-LABEL: test_I_16:
+; NACL: @ %bb.0: @ %entry
+; NACL-NEXT: .save {r11, lr}
+; NACL-NEXT: push {r11, lr}
+; NACL-NEXT: .pad #40
+; NACL-NEXT: sub sp, sp, #40
+; NACL-NEXT: ldr r0, [r0]
+; NACL-NEXT: mov r1, sp
+; NACL-NEXT: vld1.32 {d16, d17}, [r2]!
+; NACL-NEXT: vst1.32 {d16, d17}, [r1]!
+; NACL-NEXT: ldrb r3, [r2], #1
+; NACL-NEXT: strb r3, [r1], #1
+; NACL-NEXT: ldrb r3, [r2], #1
+; NACL-NEXT: strb r3, [r1], #1
+; NACL-NEXT: ldrb r3, [r2], #1
+; NACL-NEXT: strb r3, [r1], #1
+; NACL-NEXT: ldrb r3, [r2], #1
+; NACL-NEXT: strb r3, [r1], #1
+; NACL-NEXT: ldrb r3, [r2], #1
+; NACL-NEXT: strb r3, [r1], #1
+; NACL-NEXT: ldrb r3, [r2], #1
+; NACL-NEXT: strb r3, [r1], #1
+; NACL-NEXT: ldrb r3, [r2], #1
+; NACL-NEXT: strb r3, [r1], #1
+; NACL-NEXT: ldrb r3, [r2], #1
+; NACL-NEXT: strb r3, [r1], #1
+; NACL-NEXT: ldrb r3, [r2], #1
+; NACL-NEXT: strb r3, [r1], #1
+; NACL-NEXT: ldrb r3, [r2], #1
+; NACL-NEXT: strb r3, [r1], #1
+; NACL-NEXT: ldrb r3, [r2], #1
+; NACL-NEXT: strb r3, [r1], #1
+; NACL-NEXT: ldrb r3, [r2], #1
+; NACL-NEXT: strb r3, [r1], #1
+; NACL-NEXT: mov r2, r0
+; NACL-NEXT: mov r1, r0
+; NACL-NEXT: mov r3, r0
+; NACL-NEXT: bl use_I
+; NACL-NEXT: add sp, sp, #40
+; NACL-NEXT: pop {r11, pc}
+;
+; NOMOVT-LABEL: test_I_16:
+; NOMOVT: @ %bb.0: @ %entry
+; NOMOVT-NEXT: .save {r11, lr}
+; NOMOVT-NEXT: push {r11, lr}
+; NOMOVT-NEXT: .setfp r11, sp
+; NOMOVT-NEXT: mov r11, sp
+; NOMOVT-NEXT: .pad #40
+; NOMOVT-NEXT: sub sp, sp, #40
+; NOMOVT-NEXT: bic sp, sp, #15
+; NOMOVT-NEXT: ldr r0, [r1], #4
+; NOMOVT-NEXT: mov r2, sp
+; NOMOVT-NEXT: str r0, [r2], #4
+; NOMOVT-NEXT: ldr r0, [r1], #4
+; NOMOVT-NEXT: str r0, [r2], #4
+; NOMOVT-NEXT: ldr r0, [r1], #4
+; NOMOVT-NEXT: str r0, [r2], #4
+; NOMOVT-NEXT: ldr r0, [r1], #4
+; NOMOVT-NEXT: str r0, [r2], #4
+; NOMOVT-NEXT: ldr r0, [r1], #4
+; NOMOVT-NEXT: str r0, [r2], #4
+; NOMOVT-NEXT: ldr r0, [r1], #4
+; NOMOVT-NEXT: str r0, [r2], #4
+; NOMOVT-NEXT: ldr r0, [r1], #4
+; NOMOVT-NEXT: str r0, [r2], #4
+; NOMOVT-NEXT: ldr r0, [r0]
+; NOMOVT-NEXT: mov r1, r0
+; NOMOVT-NEXT: mov r2, r0
+; NOMOVT-NEXT: mov r3, r0
+; NOMOVT-NEXT: bl use_I
+; NOMOVT-NEXT: mov sp, r11
+; NOMOVT-NEXT: pop {r11, pc}
entry:
call void @use_I(ptr byval(%struct.I.8) align 16 undef)
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/ARM/tail-call-float.ll b/llvm/test/CodeGen/ARM/tail-call-float.ll
index 8cca7e0f70683e..6fa4b9229f64c8 100644
--- a/llvm/test/CodeGen/ARM/tail-call-float.ll
+++ b/llvm/test/CodeGen/ARM/tail-call-float.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple armv7 -target-abi aapcs -float-abi soft -O0 -o - < %s \
; RUN: | FileCheck %s -check-prefix CHECK-SOFT -check-prefix CHECK
; RUN: llc -mtriple armv7 -target-abi aapcs -float-abi hard -O0 -o - < %s \
@@ -12,16 +13,42 @@ declare i1 @non_variadic_big(float, float, float, float, float, float)
declare i1 @variadic(float, ...)
define void @non_variadic_fp(float %x, float %y) {
-; CHECK-LABEL: non_variadic_fp:
-; CHECK: b non_variadic
+; CHECK-SOFT-LABEL: non_variadic_fp:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: mov r3, r1
+; CHECK-SOFT-NEXT: mov r2, r0
+; CHECK-SOFT-NEXT: mov r0, r3
+; CHECK-SOFT-NEXT: mov r1, r2
+; CHECK-SOFT-NEXT: b non_variadic
+;
+; CHECK-HARD-LABEL: non_variadic_fp:
+; CHECK-HARD: @ %bb.0: @ %entry
+; CHECK-HARD-NEXT: vmov.f32 s3, s1
+; CHECK-HARD-NEXT: vmov.f32 s2, s0
+; CHECK-HARD-NEXT: vmov.f32 s0, s3
+; CHECK-HARD-NEXT: vmov.f32 s1, s2
+; CHECK-HARD-NEXT: b non_variadic
entry:
%call = tail call i1 (float, float, float, float) @non_variadic(float %y, float %x, float %x, float %y)
ret void
}
define void @variadic_fp(float %x, float %y) {
-; CHECK-LABEL: variadic_fp:
-; CHECK: b variadic
+; CHECK-SOFT-LABEL: variadic_fp:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: mov r3, r1
+; CHECK-SOFT-NEXT: mov r2, r0
+; CHECK-SOFT-NEXT: mov r0, r3
+; CHECK-SOFT-NEXT: mov r1, r2
+; CHECK-SOFT-NEXT: b variadic
+;
+; CHECK-HARD-LABEL: variadic_fp:
+; CHECK-HARD: @ %bb.0: @ %entry
+; CHECK-HARD-NEXT: vmov r2, s0
+; CHECK-HARD-NEXT: vmov r3, s1
+; CHECK-HARD-NEXT: mov r0, r3
+; CHECK-HARD-NEXT: mov r1, r2
+; CHECK-HARD-NEXT: b variadic
entry:
%call = tail call i1 (float, ...) @variadic(float %y, float %x, float %x, float %y)
ret void
@@ -31,9 +58,32 @@ entry:
; of them to handle the 6 arguments. With hard-float, we have plenty of regs
; (s0-s15) to pass FP arguments.
define void @non_variadic_fp_big(float %x, float %y) {
-; CHECK-LABEL: non_variadic_fp_big:
-; CHECK-SOFT: bl non_variadic_big
-; CHECK-HARD: b non_variadic_big
+; CHECK-SOFT-LABEL: non_variadic_fp_big:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: push {r11, lr}
+; CHECK-SOFT-NEXT: sub sp, sp, #8
+; CHECK-SOFT-NEXT: mov r3, r1
+; CHECK-SOFT-NEXT: mov r2, r0
+; CHECK-SOFT-NEXT: vmov s0, r3
+; CHECK-SOFT-NEXT: vmov s0, r2
+; CHECK-SOFT-NEXT: mov r0, sp
+; CHECK-SOFT-NEXT: str r3, [r0, #4]
+; CHECK-SOFT-NEXT: str r2, [r0]
+; CHECK-SOFT-NEXT: mov r0, r3
+; CHECK-SOFT-NEXT: mov r1, r2
+; CHECK-SOFT-NEXT: bl non_variadic_big
+; CHECK-SOFT-NEXT: add sp, sp, #8
+; CHECK-SOFT-NEXT: pop {r11, pc}
+;
+; CHECK-HARD-LABEL: non_variadic_fp_big:
+; CHECK-HARD: @ %bb.0: @ %entry
+; CHECK-HARD-NEXT: vmov.f32 s5, s1
+; CHECK-HARD-NEXT: vmov.f32 s4, s0
+; CHECK-HARD-NEXT: vmov.f32 s0, s5
+; CHECK-HARD-NEXT: vmov.f32 s1, s4
+; CHECK-HARD-NEXT: vmov.f32 s2, s4
+; CHECK-HARD-NEXT: vmov.f32 s3, s5
+; CHECK-HARD-NEXT: b non_variadic_big
entry:
%call = tail call i1 (float, float, float, float, float, float) @non_variadic_big(float %y, float %x, float %x, float %y, float %x, float %y)
ret void
@@ -41,9 +91,40 @@ entry:
; Variadic functions cannot use FP regs to pass arguments; only GP regs.
define void @variadic_fp_big(float %x, float %y) {
-; CHECK-LABEL: variadic_fp_big:
-; CHECK: bl variadic
+; CHECK-SOFT-LABEL: variadic_fp_big:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: push {r11, lr}
+; CHECK-SOFT-NEXT: sub sp, sp, #8
+; CHECK-SOFT-NEXT: mov r3, r1
+; CHECK-SOFT-NEXT: mov r2, r0
+; CHECK-SOFT-NEXT: vmov s0, r3
+; CHECK-SOFT-NEXT: vmov s0, r2
+; CHECK-SOFT-NEXT: mov r0, sp
+; CHECK-SOFT-NEXT: str r3, [r0, #4]
+; CHECK-SOFT-NEXT: str r2, [r0]
+; CHECK-SOFT-NEXT: mov r0, r3
+; CHECK-SOFT-NEXT: mov r1, r2
+; CHECK-SOFT-NEXT: bl variadic
+; CHECK-SOFT-NEXT: add sp, sp, #8
+; CHECK-SOFT-NEXT: pop {r11, pc}
+;
+; CHECK-HARD-LABEL: variadic_fp_big:
+; CHECK-HARD: @ %bb.0: @ %entry
+; CHECK-HARD-NEXT: push {r11, lr}
+; CHECK-HARD-NEXT: sub sp, sp, #8
+; CHECK-HARD-NEXT: mov r0, sp
+; CHECK-HARD-NEXT: vstr s1, [r0, #4]
+; CHECK-HARD-NEXT: vstr s0, [r0]
+; CHECK-HARD-NEXT: vmov r2, s0
+; CHECK-HARD-NEXT: vmov r3, s1
+; CHECK-HARD-NEXT: mov r0, r3
+; CHECK-HARD-NEXT: mov r1, r2
+; CHECK-HARD-NEXT: bl variadic
+; CHECK-HARD-NEXT: add sp, sp, #8
+; CHECK-HARD-NEXT: pop {r11, pc}
entry:
%call = tail call i1 (float, ...) @variadic(float %y, float %x, float %x, float %y, float %x, float %y)
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
>From 39371e0582ff6bbcc558970a82d007b132e6ed10 Mon Sep 17 00:00:00 2001
From: Kiran <kiran.sturt at arm.com>
Date: Tue, 27 Aug 2024 10:43:59 +0100
Subject: [PATCH 2/3] Seperate frontend changes, add debug directives, remove
redundant stuff from tests
---
clang/lib/CodeGen/CGCall.cpp | 2 +-
llvm/include/llvm/CodeGen/CallingConvLower.h | 2 ++
llvm/lib/CodeGen/CallingConvLower.cpp | 2 ++
llvm/test/CodeGen/ARM/tail-call-float.ll | 4 ++--
4 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 05773f91f986ba..ca2c79b51ac96b 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5086,7 +5086,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
RawAddress SRetAlloca = RawAddress::invalid();
llvm::Value *UnusedReturnSizePtr = nullptr;
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
- if ((IsVirtualFunctionPointerThunk || IsMustTail) && RetAI.isIndirect()) {
+ if (IsVirtualFunctionPointerThunk && RetAI.isIndirect()) {
SRetPtr = makeNaturalAddressForPointer(CurFn->arg_begin() +
IRFunctionArgs.getSRetArgNo(),
RetTy, CharUnits::fromQuantity(1));
diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h
index 12a6df16e279b4..f94cb628965faa 100644
--- a/llvm/include/llvm/CodeGen/CallingConvLower.h
+++ b/llvm/include/llvm/CodeGen/CallingConvLower.h
@@ -540,7 +540,9 @@ class CCState {
});
}
+#ifndef NDEBUG
void dump() const;
+#endif
private:
/// MarkAllocated - Mark a register and all of its aliases as allocated.
diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp
index 7ba3ea83115db2..38884762944319 100644
--- a/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -291,6 +291,7 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC,
RVLocs2.end(), AreCompatible);
}
+#ifndef NDEBUG
void CCState::dump() const {
dbgs() << "CCState:\n";
for (const CCValAssign &Loc : Locs) {
@@ -351,3 +352,4 @@ void CCState::dump() const {
dbgs() << "\n";
}
}
+#endif
\ No newline at end of file
diff --git a/llvm/test/CodeGen/ARM/tail-call-float.ll b/llvm/test/CodeGen/ARM/tail-call-float.ll
index 6fa4b9229f64c8..2f834976dd403c 100644
--- a/llvm/test/CodeGen/ARM/tail-call-float.ll
+++ b/llvm/test/CodeGen/ARM/tail-call-float.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple armv7 -target-abi aapcs -float-abi soft -O0 -o - < %s \
-; RUN: | FileCheck %s -check-prefix CHECK-SOFT -check-prefix CHECK
+; RUN: | FileCheck %s -check-prefix CHECK-SOFT
; RUN: llc -mtriple armv7 -target-abi aapcs -float-abi hard -O0 -o - < %s \
-; RUN: | FileCheck %s -check-prefix CHECK-HARD -check-prefix CHECK
+; RUN: | FileCheck %s -check-prefix CHECK-HARD
; Tests for passing floating-point regs. Variadic functions will always use
; general-purpose registers. Standard functions will use the floating-point
>From 03d4cc0ed426037b57b601d786be85f15237c958 Mon Sep 17 00:00:00 2001
From: Kiran <kiran.sturt at arm.com>
Date: Tue, 27 Aug 2024 10:46:18 +0100
Subject: [PATCH 3/3] Revert "Seperate frontend changes, add debug directives,
remove redundant stuff from tests"
This reverts commit 1a908c6be3317bbbac73e6a6fc52cabefbdebf7d.
---
clang/lib/CodeGen/CGCall.cpp | 2 +-
llvm/include/llvm/CodeGen/CallingConvLower.h | 2 --
llvm/lib/CodeGen/CallingConvLower.cpp | 2 --
llvm/test/CodeGen/ARM/tail-call-float.ll | 4 ++--
4 files changed, 3 insertions(+), 7 deletions(-)
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index ca2c79b51ac96b..05773f91f986ba 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5086,7 +5086,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
RawAddress SRetAlloca = RawAddress::invalid();
llvm::Value *UnusedReturnSizePtr = nullptr;
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
- if (IsVirtualFunctionPointerThunk && RetAI.isIndirect()) {
+ if ((IsVirtualFunctionPointerThunk || IsMustTail) && RetAI.isIndirect()) {
SRetPtr = makeNaturalAddressForPointer(CurFn->arg_begin() +
IRFunctionArgs.getSRetArgNo(),
RetTy, CharUnits::fromQuantity(1));
diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h
index f94cb628965faa..12a6df16e279b4 100644
--- a/llvm/include/llvm/CodeGen/CallingConvLower.h
+++ b/llvm/include/llvm/CodeGen/CallingConvLower.h
@@ -540,9 +540,7 @@ class CCState {
});
}
-#ifndef NDEBUG
void dump() const;
-#endif
private:
/// MarkAllocated - Mark a register and all of its aliases as allocated.
diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp
index 38884762944319..7ba3ea83115db2 100644
--- a/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -291,7 +291,6 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC,
RVLocs2.end(), AreCompatible);
}
-#ifndef NDEBUG
void CCState::dump() const {
dbgs() << "CCState:\n";
for (const CCValAssign &Loc : Locs) {
@@ -352,4 +351,3 @@ void CCState::dump() const {
dbgs() << "\n";
}
}
-#endif
\ No newline at end of file
diff --git a/llvm/test/CodeGen/ARM/tail-call-float.ll b/llvm/test/CodeGen/ARM/tail-call-float.ll
index 2f834976dd403c..6fa4b9229f64c8 100644
--- a/llvm/test/CodeGen/ARM/tail-call-float.ll
+++ b/llvm/test/CodeGen/ARM/tail-call-float.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple armv7 -target-abi aapcs -float-abi soft -O0 -o - < %s \
-; RUN: | FileCheck %s -check-prefix CHECK-SOFT
+; RUN: | FileCheck %s -check-prefix CHECK-SOFT -check-prefix CHECK
; RUN: llc -mtriple armv7 -target-abi aapcs -float-abi hard -O0 -o - < %s \
-; RUN: | FileCheck %s -check-prefix CHECK-HARD
+; RUN: | FileCheck %s -check-prefix CHECK-HARD -check-prefix CHECK
; Tests for passing floating-point regs. Variadic functions will always use
; general-purpose registers. Standard functions will use the floating-point
More information about the llvm-commits
mailing list