[clang] [llvm] [ARM] musttail fixes (PR #102896)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Aug 12 06:04:17 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
@llvm/pr-subscribers-clang
Author: Kiran (kiran-isaac)
<details>
<summary>Changes</summary>
Backend:
- Caller and callee arguments no longer have to match, just to take up the same space, as they can be changed before the call
- Allowed tail calls if callee and callee both (or neither) use sret, wheras before it would be dissalowed if either used sret
- Allowed tail calls if byval args are used
- Added debug trace for IsEligibleForTailCallOptimisation
Frontend (clang):
- Do not generate extra alloca if sret is used with musttail, as the space for the sret is allocated already
---
Patch is 39.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/102896.diff
10 Files Affected:
- (modified) clang/lib/CodeGen/CGCall.cpp (+1-1)
- (modified) llvm/include/llvm/CodeGen/CallingConvLower.h (+2)
- (modified) llvm/lib/CodeGen/CallingConvLower.cpp (+61)
- (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+42-99)
- (modified) llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll (+5-11)
- (modified) llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll (+5-8)
- (modified) llvm/test/CodeGen/ARM/fp-arg-shuffle.ll (+22)
- (modified) llvm/test/CodeGen/ARM/fp16-vector-argument.ll (+14-27)
- (modified) llvm/test/CodeGen/ARM/struct_byval.ll (+419-36)
- (modified) llvm/test/CodeGen/ARM/tail-call-float.ll (+90-9)
``````````diff
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index e4f221ae55eef..b035c7443ac1c 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5085,7 +5085,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
RawAddress SRetAlloca = RawAddress::invalid();
llvm::Value *UnusedReturnSizePtr = nullptr;
if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) {
- if (IsVirtualFunctionPointerThunk && RetAI.isIndirect()) {
+ if ((IsVirtualFunctionPointerThunk || IsMustTail) && RetAI.isIndirect()) {
SRetPtr = makeNaturalAddressForPointer(CurFn->arg_begin() +
IRFunctionArgs.getSRetArgNo(),
RetTy, CharUnits::fromQuantity(1));
diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h
index 932a2a94ab1f1..fdb5982cb2042 100644
--- a/llvm/include/llvm/CodeGen/CallingConvLower.h
+++ b/llvm/include/llvm/CodeGen/CallingConvLower.h
@@ -540,6 +540,8 @@ class CCState {
});
}
+ void dump() const;
+
private:
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void MarkAllocated(MCPhysReg Reg);
diff --git a/llvm/lib/CodeGen/CallingConvLower.cpp b/llvm/lib/CodeGen/CallingConvLower.cpp
index b7152587a9fa0..7ba3ea83115db 100644
--- a/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -290,3 +290,64 @@ bool CCState::resultsCompatible(CallingConv::ID CalleeCC,
return std::equal(RVLocs1.begin(), RVLocs1.end(), RVLocs2.begin(),
RVLocs2.end(), AreCompatible);
}
+
+void CCState::dump() const {
+ dbgs() << "CCState:\n";
+ for (const CCValAssign &Loc : Locs) {
+ if (Loc.isRegLoc()) {
+ dbgs() << " Reg " << TRI.getName(Loc.getLocReg());
+ } else if (Loc.isMemLoc()) {
+ dbgs() << " Mem " << Loc.getLocMemOffset();
+ } else {
+ assert(Loc.isPendingLoc());
+ dbgs() << " Pend " << Loc.getExtraInfo();
+ }
+
+ dbgs() << " ValVT:" << Loc.getValVT();
+ dbgs() << " LocVT:" << Loc.getLocVT();
+
+ if (Loc.needsCustom())
+ dbgs() << " custom";
+
+ switch (Loc.getLocInfo()) {
+ case CCValAssign::Full:
+ dbgs() << " Full";
+ break;
+ case CCValAssign::SExt:
+ dbgs() << " SExt";
+ break;
+ case CCValAssign::ZExt:
+ dbgs() << " ZExt";
+ break;
+ case CCValAssign::AExt:
+ dbgs() << " AExt";
+ break;
+ case CCValAssign::SExtUpper:
+ dbgs() << " SExtUpper";
+ break;
+ case CCValAssign::ZExtUpper:
+ dbgs() << " ZExtUpper";
+ break;
+ case CCValAssign::AExtUpper:
+ dbgs() << " AExtUpper";
+ break;
+ case CCValAssign::BCvt:
+ dbgs() << " BCvt";
+ break;
+ case CCValAssign::Trunc:
+ dbgs() << " Trunc";
+ break;
+ case CCValAssign::VExt:
+ dbgs() << " VExt";
+ break;
+ case CCValAssign::FPExt:
+ dbgs() << " FPExt";
+ break;
+ case CCValAssign::Indirect:
+ dbgs() << " Indirect";
+ break;
+ }
+
+ dbgs() << "\n";
+ }
+}
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 1fab30a0b8550..242dfe9cb44ba 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2407,8 +2407,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
isTailCall = false;
// For both the non-secure calls and the returns from a CMSE entry function,
- // the function needs to do some extra work afte r the call, or before the
- // return, respectively, thus it cannot end with atail call
+ // the function needs to do some extra work after the call, or before the
+ // return, respectively, thus it cannot end with a tail call
if (isCmseNSCall || AFI->isCmseNSEntryFunction())
isTailCall = false;
@@ -2959,50 +2959,6 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
Size = std::max<int>(Size - Excess, 0);
}
-/// MatchingStackOffset - Return true if the given stack call argument is
-/// already available in the same position (relatively) of the caller's
-/// incoming argument stack.
-static
-bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
- MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
- const TargetInstrInfo *TII) {
- unsigned Bytes = Arg.getValueSizeInBits() / 8;
- int FI = std::numeric_limits<int>::max();
- if (Arg.getOpcode() == ISD::CopyFromReg) {
- Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
- if (!VR.isVirtual())
- return false;
- MachineInstr *Def = MRI->getVRegDef(VR);
- if (!Def)
- return false;
- if (!Flags.isByVal()) {
- if (!TII->isLoadFromStackSlot(*Def, FI))
- return false;
- } else {
- return false;
- }
- } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
- if (Flags.isByVal())
- // ByVal argument is passed in as a pointer but it's now being
- // dereferenced. e.g.
- // define @foo(%struct.X* %A) {
- // tail call @bar(%struct.X* byval %A)
- // }
- return false;
- SDValue Ptr = Ld->getBasePtr();
- FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
- if (!FINode)
- return false;
- FI = FINode->getIndex();
- } else
- return false;
-
- assert(FI != std::numeric_limits<int>::max());
- if (!MFI.isFixedObjectIndex(FI))
- return false;
- return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
-}
-
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function. Note that this function also
@@ -3044,8 +3000,10 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
for (const CCValAssign &AL : ArgLocs)
if (AL.isRegLoc())
AddressRegisters.erase(AL.getLocReg());
- if (AddressRegisters.empty())
+ if (AddressRegisters.empty()) {
+ LLVM_DEBUG(dbgs() << "false (no space for target address)\n");
return false;
+ }
}
// Look for obvious safe cases to perform tail call optimization that do not
@@ -3054,18 +3012,26 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
// Exception-handling functions need a special set of instructions to indicate
// a return to the hardware. Tail-calling another function would probably
// break this.
- if (CallerF.hasFnAttribute("interrupt"))
+ if (CallerF.hasFnAttribute("interrupt")) {
+ LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
return false;
+ }
- if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
+ if (canGuaranteeTCO(CalleeCC,
+ getTargetMachine().Options.GuaranteedTailCallOpt)) {
+ LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
+ << " (guaranteed tail-call CC)\n");
return CalleeCC == CallerCC;
+ }
- // Also avoid sibcall optimization if either caller or callee uses struct
- // return semantics.
+ // Also avoid sibcall optimization if only one of caller or callee uses
+ // struct return semantics.
bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
- if (isCalleeStructRet || isCallerStructRet)
+ if (isCalleeStructRet != isCallerStructRet) {
+ LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
return false;
+ }
// Externally-defined functions with weak linkage should not be
// tail-called on ARM when the OS does not support dynamic
@@ -3078,8 +3044,11 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
const GlobalValue *GV = G->getGlobal();
const Triple &TT = getTargetMachine().getTargetTriple();
if (GV->hasExternalWeakLinkage() &&
- (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
+ (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
+ TT.isOSBinFormatMachO())) {
+ LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
return false;
+ }
}
// Check that the call results are passed in the same way.
@@ -3088,70 +3057,44 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
getEffectiveCallingConv(CalleeCC, isVarArg),
getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
CCAssignFnForReturn(CalleeCC, isVarArg),
- CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
+ CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
+ LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
return false;
+ }
// The callee has to preserve all registers the caller needs to preserve.
const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
if (CalleeCC != CallerCC) {
const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
- if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
+ if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
+ LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
return false;
+ }
}
- // If Caller's vararg or byval argument has been split between registers and
+ // If Caller's vararg argument has been split between registers and
// stack, do not perform tail call, since part of the argument is in caller's
// local frame.
const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
- if (AFI_Caller->getArgRegsSaveSize())
+ if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {
+ LLVM_DEBUG(dbgs() << "false (vararg arg reg save area)\n");
return false;
+ }
// If the callee takes no arguments then go on to check the results of the
// call.
- if (!Outs.empty()) {
- if (CCInfo.getStackSize()) {
- // Check if the arguments are already laid out in the right way as
- // the caller's fixed stack objects.
- MachineFrameInfo &MFI = MF.getFrameInfo();
- const MachineRegisterInfo *MRI = &MF.getRegInfo();
- const TargetInstrInfo *TII = Subtarget->getInstrInfo();
- for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
- i != e;
- ++i, ++realArgIdx) {
- CCValAssign &VA = ArgLocs[i];
- EVT RegVT = VA.getLocVT();
- SDValue Arg = OutVals[realArgIdx];
- ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
- if (VA.getLocInfo() == CCValAssign::Indirect)
- return false;
- if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
- // f64 and vector types are split into multiple registers or
- // register/stack-slot combinations. The types will not match
- // the registers; give up on memory f64 refs until we figure
- // out what to do about this.
- if (!VA.isRegLoc())
- return false;
- if (!ArgLocs[++i].isRegLoc())
- return false;
- if (RegVT == MVT::v2f64) {
- if (!ArgLocs[++i].isRegLoc())
- return false;
- if (!ArgLocs[++i].isRegLoc())
- return false;
- }
- } else if (!VA.isRegLoc()) {
- if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
- MFI, MRI, TII))
- return false;
- }
- }
- }
-
- const MachineRegisterInfo &MRI = MF.getRegInfo();
- if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
- return false;
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
+ LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
+ return false;
}
+ // If the stack arguments for this call do not fit into our own save area then
+ // the call cannot be made tail.
+ if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "true\n");
return true;
}
diff --git a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
index d8e22f4f5312a..e186ae3a96150 100644
--- a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
+++ b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding.ll
@@ -12,17 +12,11 @@ define void @check227(
; arg1 --> SP+188
entry:
-
-;CHECK: sub sp, sp, #12
-;CHECK: push {r11, lr}
-;CHECK: sub sp, sp, #4
-;CHECK: add r0, sp, #12
-;CHECK: stm r0, {r1, r2, r3}
-;CHECK: ldr r0, [sp, #212]
-;CHECK: bl useInt
-;CHECK: add sp, sp, #4
-;CHECK: pop {r11, lr}
-;CHECK: add sp, sp, #12
+; CHECK: sub sp, sp, #12
+; CHECK: stm sp, {r1, r2, r3}
+; CHECK: ldr r0, [sp, #200]
+; CHECK: add sp, sp, #12
+; CHECK: b useInt
%0 = ptrtoint ptr %arg1 to i32
tail call void @useInt(i32 %0)
diff --git a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll
index 0c5d22984b99e..efdecce9ae723 100644
--- a/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll
+++ b/llvm/test/CodeGen/ARM/2013-05-13-AAPCS-byval-padding2.ll
@@ -7,14 +7,11 @@
define void @foo(ptr byval(%struct4bytes) %p0, ; --> R0
ptr byval(%struct20bytes) %p1 ; --> R1,R2,R3, [SP+0 .. SP+8)
) {
-;CHECK: sub sp, sp, #16
-;CHECK: push {r11, lr}
-;CHECK: add r12, sp, #8
-;CHECK: stm r12, {r0, r1, r2, r3}
-;CHECK: add r0, sp, #12
-;CHECK: bl useInt
-;CHECK: pop {r11, lr}
-;CHECK: add sp, sp, #16
+;CHECK: sub sp, sp, #16
+;CHECK: stm sp, {r0, r1, r2, r3}
+;CHECK: add r0, sp, #4
+;CHECK: add sp, sp, #16
+;CHECK: b useInt
%1 = ptrtoint ptr %p1 to i32
tail call void @useInt(i32 %1)
diff --git a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll
index 4996cc8ecbf02..2ceb7a7b97a1f 100644
--- a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll
+++ b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll
@@ -3,6 +3,28 @@
; CHECK: function1
; CHECK-NOT: vmov
define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp {
+; CHECK-LABEL: function1:
+; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: .save {r4, r5, r11, lr}
+; CHECK-NEXT: push {r4, r5, r11, lr}
+; CHECK-NEXT: vldr d16, [sp, #40]
+; CHECK-NEXT: vldr d17, [sp, #32]
+; CHECK-NEXT: vmov r12, lr, d16
+; CHECK-NEXT: vldr d16, [sp, #16]
+; CHECK-NEXT: vmov r4, r5, d17
+; CHECK-NEXT: vldr d17, [sp, #24]
+; CHECK-NEXT: str r3, [sp, #36]
+; CHECK-NEXT: str r2, [sp, #32]
+; CHECK-NEXT: str r1, [sp, #44]
+; CHECK-NEXT: str r0, [sp, #40]
+; CHECK-NEXT: vstr d17, [sp, #16]
+; CHECK-NEXT: vstr d16, [sp, #24]
+; CHECK-NEXT: mov r0, r12
+; CHECK-NEXT: mov r1, lr
+; CHECK-NEXT: mov r2, r4
+; CHECK-NEXT: mov r3, r5
+; CHECK-NEXT: pop {r4, r5, r11, lr}
+; CHECK-NEXT: b function2
entry:
%call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind
ret double %call
diff --git a/llvm/test/CodeGen/ARM/fp16-vector-argument.ll b/llvm/test/CodeGen/ARM/fp16-vector-argument.ll
index 6fc56967bc7aa..65aff46658fd1 100644
--- a/llvm/test/CodeGen/ARM/fp16-vector-argument.ll
+++ b/llvm/test/CodeGen/ARM/fp16-vector-argument.ll
@@ -145,26 +145,21 @@ entry:
define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x half>, <8 x half>) {
; SOFT-LABEL: many_args_test:
; SOFT: @ %bb.0: @ %entry
-; SOFT-NEXT: push {r11, lr}
-; SOFT-NEXT: sub sp, sp, #32
-; SOFT-NEXT: add r12, sp, #80
+; SOFT-NEXT: add r12, sp, #40
; SOFT-NEXT: vld1.64 {d16, d17}, [r12]
-; SOFT-NEXT: add r12, sp, #48
+; SOFT-NEXT: add r12, sp, #8
; SOFT-NEXT: vabs.f16 q8, q8
; SOFT-NEXT: vld1.64 {d18, d19}, [r12]
-; SOFT-NEXT: add r12, sp, #64
+; SOFT-NEXT: add r12, sp, #24
; SOFT-NEXT: vadd.f16 q8, q8, q9
; SOFT-NEXT: vld1.64 {d18, d19}, [r12]
; SOFT-NEXT: add r12, sp, #16
; SOFT-NEXT: vmul.f16 q8, q9, q8
; SOFT-NEXT: vst1.64 {d16, d17}, [r12]
-; SOFT-NEXT: mov r12, sp
-; SOFT-NEXT: vldr d16, [sp, #40]
-; SOFT-NEXT: vst1.16 {d16}, [r12:64]!
-; SOFT-NEXT: str r3, [r12]
-; SOFT-NEXT: bl use
-; SOFT-NEXT: add sp, sp, #32
-; SOFT-NEXT: pop {r11, pc}
+; SOFT-NEXT: vldr d16, [sp]
+; SOFT-NEXT: vstr d16, [sp]
+; SOFT-NEXT: str r3, [sp, #8]
+; SOFT-NEXT: b use
;
; HARD-LABEL: many_args_test:
; HARD: @ %bb.0: @ %entry
@@ -177,33 +172,25 @@ define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x hal
;
; SOFTEB-LABEL: many_args_test:
; SOFTEB: @ %bb.0: @ %entry
-; SOFTEB-NEXT: .save {r11, lr}
-; SOFTEB-NEXT: push {r11, lr}
-; SOFTEB-NEXT: .pad #32
-; SOFTEB-NEXT: sub sp, sp, #32
-; SOFTEB-NEXT: add r12, sp, #80
-; SOFTEB-NEXT: mov lr, sp
+; SOFTEB-NEXT: add r12, sp, #40
; SOFTEB-NEXT: vld1.64 {d16, d17}, [r12]
-; SOFTEB-NEXT: add r12, sp, #48
+; SOFTEB-NEXT: add r12, sp, #8
; SOFTEB-NEXT: vrev64.16 q8, q8
; SOFTEB-NEXT: vabs.f16 q8, q8
; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12]
-; SOFTEB-NEXT: add r12, sp, #64
+; SOFTEB-NEXT: add r12, sp, #24
; SOFTEB-NEXT: vrev64.16 q9, q9
; SOFTEB-NEXT: vadd.f16 q8, q8, q9
; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12]
; SOFTEB-NEXT: add r12, sp, #16
; SOFTEB-NEXT: vrev64.16 q9, q9
; SOFTEB-NEXT: vmul.f16 q8, q9, q8
-; SOFTEB-NEXT: vldr d18, [sp, #40]
-; SOFTEB-NEXT: vrev64.16 d18, d18
-; SOFTEB-NEXT: vst1.16 {d18}, [lr:64]!
-; SOFTEB-NEXT: str r3, [lr]
+; SOFTEB-NEXT: vldr d18, [sp]
; SOFTEB-NEXT: vrev64.16 q8, q8
; SOFTEB-NEXT: vst1.64 {d16, d17}, [r12]
-; SOFTEB-NEXT: bl use
-; SOFTEB-NEXT: add sp, sp, #32
-; SOFTEB-NEXT: pop {r11, pc}
+; SOFTEB-NEXT: vstr d18, [sp]
+; SOFTEB-NEXT: str r3, [sp, #8]
+; SOFTEB-NEXT: b use
;
; HARDEB-LABEL: many_args_test:
; HARDEB: @ %bb.0: @ %entry
diff --git a/llvm/test/CodeGen/ARM/struct_byval.ll b/llvm/test/CodeGen/ARM/struct_byval.ll
index 73a1b5ee33bca..5564f254c9e74 100644
--- a/llvm/test/CodeGen/ARM/struct_byval.ll
+++ b/llvm/test/CodeGen/ARM/struct_byval.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=armv7-apple-ios6.0 | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-ios6.0 | FileCheck %s
; RUN: llc < %s -mtriple=armv7-unknown-nacl-gnueabi | FileCheck %s -check-prefix=NACL
@@ -10,11 +11,122 @@
%struct.LargeStruct = type { i32, [1001 x i8], [300 x i32] }
define i32 @f() nounwind ssp {
+; NACL-LABEL: f:
+; NACL: @ %bb.0: @ %entry
+; NACL-NEXT: .save {r4, lr}
+; NACL-NEXT: push {r4, lr}
+; NACL-NEXT: .pad #152
+; NACL-NEXT: sub sp, sp, #152
+; NACL-NEXT: movw r0, :lower16:__stack_chk_guard
+; NACL-NEXT: add r3, sp, #72
+; NACL-NEXT: movt r0, :upper16:__stack_chk_guard
+; NACL-NEXT: mov lr, sp
+; NACL-NEXT: ldr r0, [r0]
+; NACL-NEXT: str r0, [sp, #148]
+; NACL-NEXT: add r0, sp, #72
+; NACL-NEXT: add r12, r0, #16
+; NACL-NEXT: ldm r3, {r0, r1, r2, r3}
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-NEXT: ldr r4, [r12], #4
+; NACL-NEXT: str r4, [lr], #4
+; NACL-N...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/102896
More information about the cfe-commits
mailing list