[llvm] bc2e572 - Re-commit: [ARM] CMSE code generation
Momchil Velikov via llvm-commits
llvm-commits at lists.llvm.org
Thu May 14 09:31:27 PDT 2020
Author: Momchil Velikov
Date: 2020-05-14T16:46:16+01:00
New Revision: bc2e572f51dac4aed8ef86b2f09427109f0cabb8
URL: https://github.com/llvm/llvm-project/commit/bc2e572f51dac4aed8ef86b2f09427109f0cabb8
DIFF: https://github.com/llvm/llvm-project/commit/bc2e572f51dac4aed8ef86b2f09427109f0cabb8.diff
LOG: Re-commit: [ARM] CMSE code generation
This patch implements the final bits of CMSE code generation:
* emit special linker symbols
* restrict parameter passing to no use memory
* emit BXNS and BLXNS instructions for returns from non-secure entry
functions, and non-secure function calls, respectively
* emit code to save/restore secure floating-point state around calls
to non-secure functions
* emit code to save/restore non-secure floating-pointy state upon
entry to non-secure entry function, and return to non-secure state
* emit code to clobber registers not used for arguments and returns
* when switching to no-secure state
Patch by Momchil Velikov, Bradley Smith, Javed Absar, David Green,
possibly others.
Differential Revision: https://reviews.llvm.org/D76518
Added:
llvm/test/CodeGen/ARM/cmse-clear-float-bigend.mir
llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll
llvm/test/CodeGen/ARM/cmse-clear-float-hard2.ll
llvm/test/CodeGen/ARM/cmse-clear-float-mve.ll
llvm/test/CodeGen/ARM/cmse-clear-float.ll
llvm/test/CodeGen/ARM/cmse-clear.ll
llvm/test/CodeGen/ARM/cmse-clrm-it-block.ll
llvm/test/CodeGen/ARM/cmse-expand-bxns-ret.mir
llvm/test/CodeGen/ARM/cmse.ll
Modified:
llvm/lib/Target/ARM/ARMAsmPrinter.cpp
llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
llvm/lib/Target/ARM/ARMFastISel.cpp
llvm/lib/Target/ARM/ARMFrameLowering.cpp
llvm/lib/Target/ARM/ARMFrameLowering.h
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/ARM/ARMISelLowering.h
llvm/lib/Target/ARM/ARMInstrInfo.td
llvm/lib/Target/ARM/ARMInstrThumb.td
llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
llvm/lib/Target/ARM/ARMRegisterInfo.td
llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index d0adb24437d6..d1f3573cb537 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -73,6 +73,16 @@ void ARMAsmPrinter::emitFunctionEntryLabel() {
} else {
OutStreamer->emitAssemblerFlag(MCAF_Code32);
}
+
+ // Emit symbol for CMSE non-secure entry point
+ if (AFI->isCmseNSEntryFunction()) {
+ MCSymbol *S =
+ OutContext.getOrCreateSymbol("__acle_se_" + CurrentFnSym->getName());
+ emitLinkage(&MF->getFunction(), S);
+ OutStreamer->emitSymbolAttribute(S, MCSA_ELF_TypeFunction);
+ OutStreamer->emitLabel(S);
+ }
+
OutStreamer->emitLabel(CurrentFnSym);
}
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 2812a7634176..f908f883fb24 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -71,6 +71,35 @@ namespace {
unsigned Opc, bool IsExt);
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI);
+ void CMSEClearGPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+ const SmallVectorImpl<unsigned> &ClearRegs,
+ unsigned ClobberReg);
+ MachineBasicBlock &CMSEClearFPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
+ MachineBasicBlock &CMSEClearFPRegsV8(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const BitVector &ClearRegs);
+ MachineBasicBlock &CMSEClearFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const BitVector &ClearRegs);
+ void CMSESaveClearFPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs);
+ void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &ScratchRegs);
+ void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL);
+ void CMSERestoreFPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs);
+ void CMSERestoreFPRegsV8(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs);
+ void CMSERestoreFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs);
bool ExpandCMP_SWAP(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, unsigned LdrexOp,
unsigned StrexOp, unsigned UxtOp,
@@ -927,6 +956,573 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump(););
}
+// The size of the area, accessed by that VLSTM/VLLDM
+// S0-S31 + FPSCR + 8 more bytes (VPR + pad, or just pad)
+static const int CMSE_FP_SAVE_SIZE = 136;
+
+void determineGPRegsToClear(const MachineInstr &MI,
+ const std::initializer_list<unsigned> &Regs,
+ SmallVectorImpl<unsigned> &ClearRegs) {
+ SmallVector<unsigned, 4> OpRegs;
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ OpRegs.push_back(Op.getReg());
+ }
+ llvm::sort(OpRegs);
+
+ std::set_
diff erence(Regs.begin(), Regs.end(), OpRegs.begin(), OpRegs.end(),
+ std::back_inserter(ClearRegs));
+}
+
+void ARMExpandPseudo::CMSEClearGPRegs(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+ const DebugLoc &DL, const SmallVectorImpl<unsigned> &ClearRegs,
+ unsigned ClobberReg) {
+
+ if (STI->hasV8_1MMainlineOps()) {
+ // Clear the registers using the CLRM instruction.
+ MachineInstrBuilder CLRM =
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2CLRM)).add(predOps(ARMCC::AL));
+ for (unsigned R : ClearRegs)
+ CLRM.addReg(R, RegState::Define);
+ CLRM.addReg(ARM::APSR, RegState::Define);
+ CLRM.addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
+ } else {
+ // Clear the registers and flags by copying ClobberReg into them.
+ // (Baseline can't do a high register clear in one instruction).
+ for (unsigned Reg : ClearRegs) {
+ if (Reg == ClobberReg)
+ continue;
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVr), Reg)
+ .addReg(ClobberReg)
+ .add(predOps(ARMCC::AL));
+ }
+
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2MSR_M))
+ .addImm(STI->hasDSP() ? 0xc00 : 0x800)
+ .addReg(ClobberReg)
+ .add(predOps(ARMCC::AL));
+ }
+}
+
+// Find which FP registers need to be cleared. The parameter `ClearRegs` is
+// initialised with all elements set to true, and this function resets all the
+// bits, which correspond to register uses. Returns true if any floating point
+// register is defined, false otherwise.
+static bool determineFPRegsToClear(const MachineInstr &MI,
+ BitVector &ClearRegs) {
+ bool DefFP = false;
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg())
+ continue;
+
+ unsigned Reg = Op.getReg();
+ if (Op.isDef()) {
+ if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
+ (Reg >= ARM::D0 && Reg <= ARM::D15) ||
+ (Reg >= ARM::S0 && Reg <= ARM::S31))
+ DefFP = true;
+ continue;
+ }
+
+ if (Reg >= ARM::Q0 && Reg <= ARM::Q7) {
+ int R = Reg - ARM::Q0;
+ ClearRegs.reset(R * 4, (R + 1) * 4);
+ } else if (Reg >= ARM::D0 && Reg <= ARM::D15) {
+ int R = Reg - ARM::D0;
+ ClearRegs.reset(R * 2, (R + 1) * 2);
+ } else if (Reg >= ARM::S0 && Reg <= ARM::S31) {
+ ClearRegs[Reg - ARM::S0] = false;
+ }
+ }
+ return DefFP;
+}
+
+MachineBasicBlock &
+ARMExpandPseudo::CMSEClearFPRegs(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ BitVector ClearRegs(16, true);
+ (void)determineFPRegsToClear(*MBBI, ClearRegs);
+
+ if (STI->hasV8_1MMainlineOps())
+ return CMSEClearFPRegsV81(MBB, MBBI, ClearRegs);
+ else
+ return CMSEClearFPRegsV8(MBB, MBBI, ClearRegs);
+}
+
+// Clear the FP registers for v8.0-M, by copying over the content
+// of LR. Uses R12 as a scratch register.
+MachineBasicBlock &
+ARMExpandPseudo::CMSEClearFPRegsV8(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const BitVector &ClearRegs) {
+ if (!STI->hasFPRegs())
+ return MBB;
+
+ auto &RetI = *MBBI;
+ const DebugLoc &DL = RetI.getDebugLoc();
+
+ // If optimising for minimum size, clear FP registers unconditionally.
+ // Otherwise, check the CONTROL.SFPA (Secure Floating-Point Active) bit and
+ // don't clear them if they belong to the non-secure state.
+ MachineBasicBlock *ClearBB, *DoneBB;
+ if (STI->hasMinSize()) {
+ ClearBB = DoneBB = &MBB;
+ } else {
+ MachineFunction *MF = MBB.getParent();
+ ClearBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+ DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
+
+ MF->insert(++MBB.getIterator(), ClearBB);
+ MF->insert(++ClearBB->getIterator(), DoneBB);
+
+ DoneBB->splice(DoneBB->end(), &MBB, MBBI, MBB.end());
+ DoneBB->transferSuccessors(&MBB);
+ MBB.addSuccessor(ClearBB);
+ MBB.addSuccessor(DoneBB);
+ ClearBB->addSuccessor(DoneBB);
+
+ // At the new basic blocks we need to have live-in the registers, used
+ // for the return value as well as LR, used to clear registers.
+ for (const MachineOperand &Op : RetI.operands()) {
+ if (!Op.isReg())
+ continue;
+ Register Reg = Op.getReg();
+ if (Reg == ARM::NoRegister || Reg == ARM::LR)
+ continue;
+ assert(Register::isPhysicalRegister(Reg) && "Unallocated register");
+ ClearBB->addLiveIn(Reg);
+ DoneBB->addLiveIn(Reg);
+ }
+ ClearBB->addLiveIn(ARM::LR);
+ DoneBB->addLiveIn(ARM::LR);
+
+ // Read the CONTROL register.
+ BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2MRS_M), ARM::R12)
+ .addImm(20)
+ .add(predOps(ARMCC::AL));
+ // Check bit 3 (SFPA).
+ BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2TSTri))
+ .addReg(ARM::R12)
+ .addImm(8)
+ .add(predOps(ARMCC::AL));
+ // If SFPA is clear, jump over ClearBB to DoneBB.
+ BuildMI(MBB, MBB.end(), DL, TII->get(ARM::tBcc))
+ .addMBB(DoneBB)
+ .addImm(ARMCC::EQ)
+ .addReg(ARM::CPSR, RegState::Kill);
+ }
+
+ // Emit the clearing sequence
+ for (unsigned D = 0; D < 8; D++) {
+ // Attempt to clear as double
+ if (ClearRegs[D * 2 + 0] && ClearRegs[D * 2 + 1]) {
+ unsigned Reg = ARM::D0 + D;
+ BuildMI(ClearBB, DL, TII->get(ARM::VMOVDRR), Reg)
+ .addReg(ARM::LR)
+ .addReg(ARM::LR)
+ .add(predOps(ARMCC::AL));
+ } else {
+ // Clear first part as single
+ if (ClearRegs[D * 2 + 0]) {
+ unsigned Reg = ARM::S0 + D * 2;
+ BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg)
+ .addReg(ARM::LR)
+ .add(predOps(ARMCC::AL));
+ }
+ // Clear second part as single
+ if (ClearRegs[D * 2 + 1]) {
+ unsigned Reg = ARM::S0 + D * 2 + 1;
+ BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg)
+ .addReg(ARM::LR)
+ .add(predOps(ARMCC::AL));
+ }
+ }
+ }
+
+ // Clear FPSCR bits 0-4, 7, 28-31
+ // The other bits are program global according to the AAPCS
+ BuildMI(ClearBB, DL, TII->get(ARM::VMRS), ARM::R12)
+ .add(predOps(ARMCC::AL));
+ BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12)
+ .addReg(ARM::R12)
+ .addImm(0x0000009F)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12)
+ .addReg(ARM::R12)
+ .addImm(0xF0000000)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(ClearBB, DL, TII->get(ARM::VMSR))
+ .addReg(ARM::R12)
+ .add(predOps(ARMCC::AL));
+
+ return *DoneBB;
+}
+
+MachineBasicBlock &
+ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ const BitVector &ClearRegs) {
+ auto &RetI = *MBBI;
+
+ // Emit a sequence of VSCCLRM <sreglist> instructions, one instruction for
+ // each contiguous sequence of S-registers.
+ int Start = -1, End = -1;
+ for (int S = 0, E = ClearRegs.size(); S != E; ++S) {
+ if (ClearRegs[S] && S == End + 1) {
+ End = S; // extend range
+ continue;
+ }
+ // Emit current range.
+ if (Start < End) {
+ MachineInstrBuilder VSCCLRM =
+ BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS))
+ .add(predOps(ARMCC::AL));
+ while (++Start <= End)
+ VSCCLRM.addReg(ARM::S0 + Start, RegState::Define);
+ VSCCLRM.addReg(ARM::VPR, RegState::Define);
+ }
+ Start = End = S;
+ }
+ // Emit last range.
+ if (Start < End) {
+ MachineInstrBuilder VSCCLRM =
+ BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS))
+ .add(predOps(ARMCC::AL));
+ while (++Start <= End)
+ VSCCLRM.addReg(ARM::S0 + Start, RegState::Define);
+ VSCCLRM.addReg(ARM::VPR, RegState::Define);
+ }
+
+ return MBB;
+}
+
+void ARMExpandPseudo::CMSESaveClearFPRegs(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &ScratchRegs) {
+ if (STI->hasV8_1MMainlineOps())
+ CMSESaveClearFPRegsV81(MBB, MBBI, DL);
+ else
+ CMSESaveClearFPRegsV8(MBB, MBBI, DL, ScratchRegs);
+}
+
+// Save and clear FP registers if present
+void ARMExpandPseudo::CMSESaveClearFPRegsV8(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &ScratchRegs) {
+ if (!STI->hasFPRegs())
+ return;
+
+ // Store an available register for FPSCR clearing
+ assert(!ScratchRegs.empty());
+ unsigned SpareReg = ScratchRegs.front();
+
+ // save space on stack for VLSTM
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(CMSE_FP_SAVE_SIZE >> 2)
+ .add(predOps(ARMCC::AL));
+
+ // Use ScratchRegs to store the fp regs
+ std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
+ std::vector<unsigned> NonclearedFPRegs;
+ for (const MachineOperand &Op : MBBI->operands()) {
+ if (Op.isReg() && Op.isUse()) {
+ unsigned Reg = Op.getReg();
+ assert(!ARM::DPRRegClass.contains(Reg) ||
+ ARM::DPR_VFP2RegClass.contains(Reg));
+ assert(!ARM::QPRRegClass.contains(Reg));
+ if (ARM::DPR_VFP2RegClass.contains(Reg)) {
+ if (ScratchRegs.size() >= 2) {
+ unsigned SaveReg2 = ScratchRegs.pop_back_val();
+ unsigned SaveReg1 = ScratchRegs.pop_back_val();
+ ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2);
+
+ // Save the fp register to the normal registers
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD))
+ .addReg(SaveReg1, RegState::Define)
+ .addReg(SaveReg2, RegState::Define)
+ .addReg(Reg)
+ .add(predOps(ARMCC::AL));
+ } else {
+ NonclearedFPRegs.push_back(Reg);
+ }
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ if (ScratchRegs.size() >= 1) {
+ unsigned SaveReg = ScratchRegs.pop_back_val();
+ ClearedFPRegs.emplace_back(Reg, SaveReg, 0);
+
+ // Save the fp register to the normal registers
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg)
+ .addReg(Reg)
+ .add(predOps(ARMCC::AL));
+ } else {
+ NonclearedFPRegs.push_back(Reg);
+ }
+ }
+ }
+ }
+
+ bool passesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty());
+
+ // Lazy store all fp registers to the stack
+ MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+
+ // Restore all arguments
+ for (const auto &Regs : ClearedFPRegs) {
+ unsigned Reg, SaveReg1, SaveReg2;
+ std::tie(Reg, SaveReg1, SaveReg2) = Regs;
+ if (ARM::DPR_VFP2RegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg)
+ .addReg(SaveReg1)
+ .addReg(SaveReg2)
+ .add(predOps(ARMCC::AL));
+ else if (ARM::SPRRegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg)
+ .addReg(SaveReg1)
+ .add(predOps(ARMCC::AL));
+ }
+
+ for (unsigned Reg : NonclearedFPRegs) {
+ if (ARM::DPR_VFP2RegClass.contains(Reg)) {
+ if (STI->isLittle()) {
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRD), Reg)
+ .addReg(ARM::SP)
+ .addImm((Reg - ARM::D0) * 2)
+ .add(predOps(ARMCC::AL));
+ } else {
+ // For big-endian targets we need to load the two subregisters of Reg
+ // manually because VLDRD would load them in wrong order
+ unsigned SReg0 = TRI->getSubReg(Reg, ARM::ssub_0);
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0)
+ .addReg(ARM::SP)
+ .addImm((Reg - ARM::D0) * 2)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0 + 1)
+ .addReg(ARM::SP)
+ .addImm((Reg - ARM::D0) * 2 + 1)
+ .add(predOps(ARMCC::AL));
+ }
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), Reg)
+ .addReg(ARM::SP)
+ .addImm(Reg - ARM::S0)
+ .add(predOps(ARMCC::AL));
+ }
+ }
+ // restore FPSCR from stack and clear bits 0-4, 7, 28-31
+ // The other bits are program global according to the AAPCS
+ if (passesFPReg) {
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2LDRi8), SpareReg)
+ .addReg(ARM::SP)
+ .addImm(0x40)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
+ .addReg(SpareReg)
+ .addImm(0x0000009F)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
+ .addReg(SpareReg)
+ .addImm(0xF0000000)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMSR))
+ .addReg(SpareReg)
+ .add(predOps(ARMCC::AL));
+ // The ldr must happen after a floating point instruction. To prevent the
+ // post-ra scheduler to mess with the order, we create a bundle.
+ finalizeBundle(MBB, VLSTM->getIterator(), MBBI->getIterator());
+ }
+}
+
+void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ DebugLoc &DL) {
+ BitVector ClearRegs(32, true);
+ bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs);
+
+ // If the instruction does not write to a FP register and no elements were
+ // removed from the set, then no FP registers were used to pass
+ // arguments/returns.
+ if (!DefFP && ClearRegs.count() == ClearRegs.size()) {
+ // save space on stack for VLSTM
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(CMSE_FP_SAVE_SIZE >> 2)
+ .add(predOps(ARMCC::AL));
+
+ // Lazy store all FP registers to the stack
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ } else {
+ // Push all the callee-saved registers (s16-s31).
+ MachineInstrBuilder VPUSH =
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTMSDB_UPD), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
+ VPUSH.addReg(Reg);
+
+ // Clear FP registers with a VSCCLRM.
+ (void)CMSEClearFPRegsV81(MBB, MBBI, ClearRegs);
+
+ // Save floating-point context.
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTR_FPCXTS_pre), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(-8)
+ .add(predOps(ARMCC::AL));
+ }
+}
+
+// Restore FP registers if present
+void ARMExpandPseudo::CMSERestoreFPRegs(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs) {
+ if (STI->hasV8_1MMainlineOps())
+ CMSERestoreFPRegsV81(MBB, MBBI, DL, AvailableRegs);
+ else
+ CMSERestoreFPRegsV8(MBB, MBBI, DL, AvailableRegs);
+}
+
+void ARMExpandPseudo::CMSERestoreFPRegsV8(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs) {
+ if (!STI->hasFPRegs())
+ return;
+
+ // Use AvailableRegs to store the fp regs
+ std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
+ std::vector<unsigned> NonclearedFPRegs;
+ for (const MachineOperand &Op : MBBI->operands()) {
+ if (Op.isReg() && Op.isDef()) {
+ unsigned Reg = Op.getReg();
+ assert(!ARM::DPRRegClass.contains(Reg) ||
+ ARM::DPR_VFP2RegClass.contains(Reg));
+ assert(!ARM::QPRRegClass.contains(Reg));
+ if (ARM::DPR_VFP2RegClass.contains(Reg)) {
+ if (AvailableRegs.size() >= 2) {
+ unsigned SaveReg2 = AvailableRegs.pop_back_val();
+ unsigned SaveReg1 = AvailableRegs.pop_back_val();
+ ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2);
+
+ // Save the fp register to the normal registers
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD))
+ .addReg(SaveReg1, RegState::Define)
+ .addReg(SaveReg2, RegState::Define)
+ .addReg(Reg)
+ .add(predOps(ARMCC::AL));
+ } else {
+ NonclearedFPRegs.push_back(Reg);
+ }
+ } else if (ARM::SPRRegClass.contains(Reg)) {
+ if (AvailableRegs.size() >= 1) {
+ unsigned SaveReg = AvailableRegs.pop_back_val();
+ ClearedFPRegs.emplace_back(Reg, SaveReg, 0);
+
+ // Save the fp register to the normal registers
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg)
+ .addReg(Reg)
+ .add(predOps(ARMCC::AL));
+ } else {
+ NonclearedFPRegs.push_back(Reg);
+ }
+ }
+ }
+ }
+
+ // Push FP regs that cannot be restored via normal registers on the stack
+ for (unsigned Reg : NonclearedFPRegs) {
+ if (ARM::DPR_VFP2RegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD), Reg)
+ .addReg(ARM::SP)
+ .addImm((Reg - ARM::D0) * 2)
+ .add(predOps(ARMCC::AL));
+ else if (ARM::SPRRegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS), Reg)
+ .addReg(ARM::SP)
+ .addImm(Reg - ARM::S0)
+ .add(predOps(ARMCC::AL));
+ }
+
+ // Lazy load fp regs from stack
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+
+ // Restore all FP registers via normal registers
+ for (const auto &Regs : ClearedFPRegs) {
+ unsigned Reg, SaveReg1, SaveReg2;
+ std::tie(Reg, SaveReg1, SaveReg2) = Regs;
+ if (ARM::DPR_VFP2RegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg)
+ .addReg(SaveReg1)
+ .addReg(SaveReg2)
+ .add(predOps(ARMCC::AL));
+ else if (ARM::SPRRegClass.contains(Reg))
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg)
+ .addReg(SaveReg1)
+ .add(predOps(ARMCC::AL));
+ }
+
+ // Pop the stack space
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(CMSE_FP_SAVE_SIZE >> 2)
+ .add(predOps(ARMCC::AL));
+}
+
+static bool definesOrUsesFPReg(const MachineInstr &MI) {
+ for (const MachineOperand &Op : MI.operands()) {
+ if (!Op.isReg())
+ continue;
+ unsigned Reg = Op.getReg();
+ if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
+ (Reg >= ARM::D0 && Reg <= ARM::D15) ||
+ (Reg >= ARM::S0 && Reg <= ARM::S31))
+ return true;
+ }
+ return false;
+}
+
+void ARMExpandPseudo::CMSERestoreFPRegsV81(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
+ SmallVectorImpl<unsigned> &AvailableRegs) {
+ if (!definesOrUsesFPReg(*MBBI)) {
+ // Load FP registers from stack.
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+
+ // Pop the stack space
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(CMSE_FP_SAVE_SIZE >> 2)
+ .add(predOps(ARMCC::AL));
+ } else {
+ // Restore the floating point context.
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::VLDR_FPCXTS_post),
+ ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(8)
+ .add(predOps(ARMCC::AL));
+
+ // Pop all the callee-saved registers (s16-s31).
+ MachineInstrBuilder VPOP =
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDMSIA_UPD), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
+ VPOP.addReg(Reg, RegState::Define);
+ }
+}
+
/// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
/// possible. This only gets used at -O0 so we don't care about efficiency of
/// the generated code.
@@ -1155,6 +1751,89 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
return true;
}
+static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int JumpReg,
+ bool Thumb1Only) {
+ const DebugLoc &DL = MBBI->getDebugLoc();
+ if (Thumb1Only) { // push Lo and Hi regs separately
+ MachineInstrBuilder PushMIB =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
+ for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg)
+ PushMIB.addReg(Reg, Reg != JumpReg ? RegState::Undef : 0);
+
+ // Thumb1 can only tPUSH low regs, so we copy the high regs to the low
+ // regs that we just saved and push the low regs again, taking care to
+ // not clobber JumpReg. If JumpReg is one of the low registers, push first
+ // the values of r9-r11, and then r8. That would leave them ordered in
+ // memory, and allow us to later pop them with a single instructions.
+ // FIXME: Could also use any of r0-r3 that are free (including in the
+ // first PUSH above).
+ for (int LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4; --LoReg) {
+ if (JumpReg == LoReg)
+ continue;
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
+ .addReg(HiReg, RegState::Undef)
+ .add(predOps(ARMCC::AL));
+ --HiReg;
+ }
+ MachineInstrBuilder PushMIB2 =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
+ for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
+ if (Reg == JumpReg)
+ continue;
+ PushMIB2.addReg(Reg, RegState::Kill);
+ }
+
+ // If we couldn't use a low register for temporary storage (because it was
+ // the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been
+ // saved.
+ if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) {
+ int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4;
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
+ .addReg(ARM::R8)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH))
+ .add(predOps(ARMCC::AL))
+ .addReg(LoReg);
+ }
+ } else { // push Lo and Hi registers with a single instruction
+ MachineInstrBuilder PushMIB =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg)
+ PushMIB.addReg(Reg, Reg != JumpReg ? RegState::Undef : 0);
+ }
+}
+
+static void CMSEPopCalleeSaves(const TargetInstrInfo &TII,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI, int JumpReg,
+ bool Thumb1Only) {
+ const DebugLoc &DL = MBBI->getDebugLoc();
+ if (Thumb1Only) {
+ MachineInstrBuilder PopMIB =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
+ for (int R = 0; R < 4; ++R) {
+ PopMIB.addReg(ARM::R4 + R, RegState::Define);
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), ARM::R8 + R)
+ .addReg(ARM::R4 + R, RegState::Kill)
+ .add(predOps(ARMCC::AL));
+ }
+ MachineInstrBuilder PopMIB2 =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
+ for (int R = 0; R < 4; ++R)
+ PopMIB2.addReg(ARM::R4 + R, RegState::Define);
+ } else { // pop Lo and Hi registers with a single instruction
+ MachineInstrBuilder PopMIB =
+ BuildMI(MBB, MBBI, DL, TII.get(ARM::t2LDMIA_UPD), ARM::SP)
+ .addReg(ARM::SP)
+ .add(predOps(ARMCC::AL));
+ for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg)
+ PopMIB.addReg(Reg, RegState::Define);
+ }
+}
bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
@@ -1220,6 +1899,99 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
MBBI = NewMI;
return true;
}
+ case ARM::tBXNS_RET: {
+ MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI);
+
+ if (STI->hasV8_1MMainlineOps()) {
+ // Restore the non-secure floating point context.
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
+ TII->get(ARM::VLDR_FPCXTNS_post), ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(4)
+ .add(predOps(ARMCC::AL));
+ }
+
+ // Clear all GPR that are not a use of the return instruction.
+ assert(llvm::all_of(MBBI->operands(), [](const MachineOperand &Op) {
+ return !Op.isReg() || Op.getReg() != ARM::R12;
+ }));
+ SmallVector<unsigned, 5> ClearRegs;
+ determineGPRegsToClear(
+ *MBBI, {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12}, ClearRegs);
+ CMSEClearGPRegs(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), ClearRegs,
+ ARM::LR);
+
+ MachineInstrBuilder NewMI =
+ BuildMI(AfterBB, AfterBB.end(), MBBI->getDebugLoc(),
+ TII->get(ARM::tBXNS))
+ .addReg(ARM::LR)
+ .add(predOps(ARMCC::AL));
+ for (const MachineOperand &Op : MI.operands())
+ NewMI->addOperand(Op);
+ MI.eraseFromParent();
+ return true;
+ }
+ case ARM::tBLXNS_CALL: {
+ DebugLoc DL = MBBI->getDebugLoc();
+ unsigned JumpReg = MBBI->getOperand(0).getReg();
+ CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg,
+ AFI->isThumb1OnlyFunction());
+
+ SmallVector<unsigned, 16> ClearRegs;
+ determineGPRegsToClear(*MBBI,
+ {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4,
+ ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9,
+ ARM::R10, ARM::R11, ARM::R12},
+ ClearRegs);
+ auto OriginalClearRegs = ClearRegs;
+
+ // Get the first cleared register as a scratch (to use later with tBIC).
+ // We need to use the first so we can ensure it is a low register.
+ unsigned ScratchReg = ClearRegs.front();
+
+ // Clear LSB of JumpReg
+ if (AFI->isThumb2Function()) {
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), JumpReg)
+ .addReg(JumpReg)
+ .addImm(1)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp());
+ } else {
+ // We need to use an extra register to cope with 8M Baseline,
+ // since we have saved all of the registers we are ok to trash a non
+ // argument register here.
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVi8), ScratchReg)
+ .add(condCodeOp())
+ .addImm(1)
+ .add(predOps(ARMCC::AL));
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tBIC), JumpReg)
+ .addReg(ARM::CPSR, RegState::Define)
+ .addReg(JumpReg)
+ .addReg(ScratchReg)
+ .add(predOps(ARMCC::AL));
+ }
+
+ CMSESaveClearFPRegs(MBB, MBBI, DL,
+ ClearRegs); // save+clear FP regs with ClearRegs
+ CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg);
+
+ const MachineInstrBuilder NewCall =
+ BuildMI(MBB, MBBI, DL, TII->get(ARM::tBLXNSr))
+ .add(predOps(ARMCC::AL))
+ .addReg(JumpReg, RegState::Kill);
+
+ for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
+ NewCall->addOperand(MI.getOperand(I));
+ if (MI.isCandidateForCallSiteEntry())
+ MI.getMF()->moveCallSiteInfo(&MI, NewCall.getInstr());
+
+ CMSERestoreFPRegs(MBB, MBBI, DL, OriginalClearRegs); // restore FP registers
+
+ CMSEPopCalleeSaves(*TII, MBB, MBBI, JumpReg, AFI->isThumb1OnlyFunction());
+
+ MI.eraseFromParent();
+ return true;
+ }
case ARM::VMOVHcc:
case ARM::VMOVScc:
case ARM::VMOVDcc: {
diff --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp
index d5400178ff5a..f47e9ff9f3b6 100644
--- a/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -2077,6 +2077,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<Register> &UsedRegs,
bool ARMFastISel::SelectRet(const Instruction *I) {
const ReturnInst *Ret = cast<ReturnInst>(I);
const Function &F = *I->getParent()->getParent();
+ const bool IsCmseNSEntry = F.hasFnAttribute("cmse_nonsecure_entry");
if (!FuncInfo.CanLowerReturn)
return false;
@@ -2153,8 +2154,17 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
RetRegs.push_back(VA.getLocReg());
}
+ unsigned RetOpc;
+ if (IsCmseNSEntry)
+ if (isThumb2)
+ RetOpc = ARM::tBXNS_RET;
+ else
+ llvm_unreachable("CMSE not valid for non-Thumb targets");
+ else
+ RetOpc = Subtarget->getReturnOpcode();
+
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(Subtarget->getReturnOpcode()));
+ TII.get(RetOpc));
AddOptionalDefs(MIB);
for (unsigned R : RetRegs)
MIB.addReg(R, RegState::Implicit);
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 00dad53ca19d..00b310abb547 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -322,14 +322,15 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
/// this to produce a conservative estimate that we check in an assert() later.
-static int getMaxFPOffset(const Function &F, const ARMFunctionInfo &AFI) {
+static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI) {
// For Thumb1, push.w isn't available, so the first push will always push
// r7 and lr onto the stack first.
if (AFI.isThumb1OnlyFunction())
return -AFI.getArgRegsSaveSize() - (2 * 4);
// This is a conservative estimation: Assume the frame pointer being r7 and
// pc("r15") up to r8 getting spilled before (= 8 registers).
- return -AFI.getArgRegsSaveSize() - (8 * 4);
+ int FPCXTSaveSize = (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
+ return - FPCXTSaveSize - AFI.getArgRegsSaveSize() - (8 * 4);
}
void ARMFrameLowering::emitPrologue(MachineFunction &MF,
@@ -350,6 +351,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
unsigned NumBytes = MFI.getStackSize();
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ int FPCXTSaveSize = 0;
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
@@ -418,6 +420,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
FramePtrSpillFI = FI;
GPRCS1Size += 4;
break;
+ case ARM::FPCXTNS:
+ FPCXTSaveSize = 4;
+ break;
default:
// This is a DPR. Exclude the aligned DPRCS2 spills.
if (Reg == ARM::D8)
@@ -427,26 +432,35 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
}
}
- // Move past area 1.
+ // Move past FPCXT area.
MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
+ if (FPCXTSaveSize > 0) {
+ LastPush = MBBI++;
+ DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
+ }
+
+ // Move past area 1.
if (GPRCS1Size > 0) {
GPRCS1Push = LastPush = MBBI++;
DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
}
// Determine starting offsets of spill areas.
- unsigned GPRCS1Offset = NumBytes - ArgRegsSaveSize - GPRCS1Size;
+ unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
+ unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
unsigned DPRGapSize =
- (GPRCS1Size + GPRCS2Size + ArgRegsSaveSize) % DPRAlign.value();
+ (GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) %
+ DPRAlign.value();
+
unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
int FramePtrOffsetInPush = 0;
if (HasFP) {
int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
- assert(getMaxFPOffset(MF.getFunction(), *AFI) <= FPOffset &&
+ assert(getMaxFPOffset(STI, *AFI) <= FPOffset &&
"Max FP estimation is wrong");
- FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize;
+ FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
NumBytes);
}
@@ -581,7 +595,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
if (FramePtrOffsetInPush + PushSize != 0) {
unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
nullptr, MRI->getDwarfRegNum(FramePtr, true),
- -(ArgRegsSaveSize - FramePtrOffsetInPush)));
+ -(FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush)));
BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
@@ -687,6 +701,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
AFI->getFramePtrSpillOffset());
+ AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
AFI->setDPRCalleeSavedGapSize(DPRGapSize);
@@ -788,6 +803,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
// Move SP to start of FP callee save spill area.
NumBytes -= (ArgRegsSaveSize +
+ AFI->getFPCXTSaveAreaSize() +
AFI->getGPRCalleeSavedArea1Size() +
AFI->getGPRCalleeSavedArea2Size() +
AFI->getDPRCalleeSavedGapSize() +
@@ -855,6 +871,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
if (AFI->getGPRCalleeSavedArea2Size()) MBBI++;
if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
+ if (AFI->getFPCXTSaveAreaSize()) MBBI++;
}
if (ArgRegsSaveSize)
@@ -1045,6 +1062,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
bool isTailCall = false;
bool isInterrupt = false;
bool isTrap = false;
+ bool isCmseEntry = false;
if (MBB.end() != MI) {
DL = MI->getDebugLoc();
unsigned RetOpcode = MI->getOpcode();
@@ -1054,6 +1072,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
isTrap =
RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
RetOpcode == ARM::tTRAP;
+ isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
}
SmallVector<unsigned, 4> Regs;
@@ -1071,7 +1090,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
continue;
if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
- !isTrap && STI.hasV5TOps()) {
+ !isCmseEntry && !isTrap && STI.hasV5TOps()) {
if (MBB.succ_empty()) {
Reg = ARM::PC;
// Fold the return instruction into the LDM.
@@ -1423,6 +1442,16 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
ARM::t2STR_PRE : ARM::STR_PRE_IMM;
unsigned FltOpc = ARM::VSTMDDB_UPD;
unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
+ // Save the non-secure floating point context.
+ if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
+ return C.getReg() == ARM::FPCXTNS;
+ })) {
+ BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
+ ARM::SP)
+ .addReg(ARM::SP)
+ .addImm(-4)
+ .add(predOps(ARMCC::AL));
+ }
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0,
MachineInstr::FrameSetup);
emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0,
@@ -1615,6 +1644,16 @@ checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {
SavedRegs.set(ARM::R4);
}
+bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
+ // For CMSE entry functions, we want to save the FPCXT_NS immediately
+ // upon function entry (resp. restore it immmediately before return)
+ if (STI.hasV8_1MMainlineOps() &&
+ MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())
+ return false;
+
+ return true;
+}
+
void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
BitVector &SavedRegs,
RegScavenger *RS) const {
@@ -1684,6 +1723,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (RegInfo->hasBasePointer(MF))
SavedRegs.set(RegInfo->getBaseRegister());
+ // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
+ if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
+ CanEliminateFrame = false;
+
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is modified.
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
@@ -1842,7 +1885,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
//
// We could do slightly better on Thumb1; in some cases, an sp-relative
// offset would be legal even though an fp-relative offset is not.
- int MaxFPOffset = getMaxFPOffset(MF.getFunction(), *AFI);
+ int MaxFPOffset = getMaxFPOffset(STI, *AFI);
bool HasLargeArgumentList =
HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
@@ -2124,6 +2167,27 @@ void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,
SavedRegs.set(ARM::R0);
}
+bool ARMFrameLowering::assignCalleeSavedSpillSlots(
+ MachineFunction &MF, const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const {
+ // For CMSE entry functions, handle floating-point context as if it was a
+ // callee-saved register.
+ if (STI.hasV8_1MMainlineOps() &&
+ MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {
+ CSI.emplace_back(ARM::FPCXTNS);
+ CSI.back().setRestored(false);
+ }
+
+ return false;
+}
+
+const TargetFrameLowering::SpillSlot *
+ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {
+ static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
+ NumEntries = array_lengthof(FixedSpillOffsets);
+ return FixedSpillOffsets;
+}
+
MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h
index e46a873f480d..dd4c0caf1c95 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -63,15 +63,22 @@ class ARMFrameLowering : public TargetFrameLowering {
MachineBasicBlock &MBB) const override;
/// Returns true if the target will correctly handle shrink wrapping.
- bool enableShrinkWrapping(const MachineFunction &MF) const override {
- return true;
- }
+ bool enableShrinkWrapping(const MachineFunction &MF) const override;
+
bool isProfitableForNoCSROpt(const Function &F) const override {
// The no-CSR optimisation is bad for code size on ARM, because we can save
// many registers with a single PUSH/POP pair.
return false;
}
+ bool
+ assignCalleeSavedSpillSlots(MachineFunction &MF,
+ const TargetRegisterInfo *TRI,
+ std::vector<CalleeSavedInfo> &CSI) const override;
+
+ const SpillSlot *
+ getCalleeSavedSpillSlots(unsigned &NumEntries) const override;
+
private:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
ArrayRef<CalleeSavedInfo> CSI, unsigned StmOpc,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 2c6d124d4715..80336f7d41ad 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1562,10 +1562,12 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CALL: return "ARMISD::CALL";
case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
+ case ARMISD::tSECALL: return "ARMISD::tSECALL";
case ARMISD::BRCOND: return "ARMISD::BRCOND";
case ARMISD::BR_JT: return "ARMISD::BR_JT";
case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
+ case ARMISD::SERET_FLAG: return "ARMISD::SERET_FLAG";
case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
case ARMISD::CMP: return "ARMISD::CMP";
@@ -2129,15 +2131,27 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
MachineFunction::CallSiteInfo CSInfo;
bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool isThisReturn = false;
+ bool isCmseNSCall = false;
bool PreferIndirect = false;
+ // Determine whether this is a non-secure function call.
+ if (CLI.CB && CLI.CB->getAttributes().hasFnAttribute("cmse_nonsecure_call"))
+ isCmseNSCall = true;
+
// Disable tail calls if they're not supported.
if (!Subtarget->supportsTailCall())
isTailCall = false;
+ // For both the non-secure calls and the returns from a CMSE entry function,
+ // the function needs to do some extra work afte r the call, or before the
+ // return, respectively, thus it cannot end with atail call
+ if (isCmseNSCall || AFI->isCmseNSEntryFunction())
+ isTailCall = false;
+
if (isa<GlobalAddressSDNode>(Callee)) {
// If we're optimizing for minimum size and the function is called three or
// more times in this block, we can improve codesize by calling indirectly
@@ -2343,7 +2357,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
bool isLocalARMFunc = false;
- ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
auto PtrVt = getPointerTy(DAG.getDataLayout());
if (Subtarget->genLongCalls()) {
@@ -2437,10 +2450,31 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
}
+ if (isCmseNSCall) {
+ assert(!isARMFunc && !isDirect &&
+ "Cannot handle call to ARM function or direct call");
+ if (NumBytes > 0) {
+ DiagnosticInfoUnsupported Diag(DAG.getMachineFunction().getFunction(),
+ "call to non-secure function would "
+ "require passing arguments on stack",
+ dl.getDebugLoc());
+ DAG.getContext()->diagnose(Diag);
+ }
+ if (isStructRet) {
+ DiagnosticInfoUnsupported Diag(
+ DAG.getMachineFunction().getFunction(),
+ "call to non-secure function would return value through pointer",
+ dl.getDebugLoc());
+ DAG.getContext()->diagnose(Diag);
+ }
+ }
+
// FIXME: handle tail calls
diff erently.
unsigned CallOpc;
if (Subtarget->isThumb()) {
- if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
+ if (isCmseNSCall)
+ CallOpc = ARMISD::tSECALL;
+ else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
CallOpc = ARMISD::CALL_NOLINK;
else
CallOpc = ARMISD::CALL;
@@ -2811,6 +2845,17 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
AFI->setReturnRegsCount(RVLocs.size());
+ // Report error if cmse entry function returns structure through first ptr arg.
+ if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
+ // Note: using an empty SDLoc(), as the first line of the function is a
+ // better place to report than the last line.
+ DiagnosticInfoUnsupported Diag(
+ DAG.getMachineFunction().getFunction(),
+ "secure entry function would return value through pointer",
+ SDLoc().getDebugLoc());
+ DAG.getContext()->diagnose(Diag);
+ }
+
// Copy the result values into the output registers.
for (unsigned i = 0, realRVLocIdx = 0;
i != RVLocs.size();
@@ -2932,7 +2977,9 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
return LowerInterruptReturn(RetOps, dl, DAG);
}
- return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
+ ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_FLAG :
+ ARMISD::RET_FLAG;
+ return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
}
bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 5e9b077278a0..c5f7183684f6 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -68,10 +68,12 @@ class VectorType;
CALL, // Function call.
CALL_PRED, // Function call that's predicable.
CALL_NOLINK, // Function call with branch not branch-and-link.
+ tSECALL, // CMSE non-secure function call.
BRCOND, // Conditional branch.
BR_JT, // Jumptable branch.
BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump).
RET_FLAG, // Return with a flag operand.
+ SERET_FLAG, // CMSE Entry function return with a flag operand.
INTRET_FLAG, // Interrupt return with an LR-offset and a flag operand.
PIC_ADD, // Add with a PC operand and a PIC label.
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index 8f8853802e6e..6b990a59ed0e 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -159,6 +159,8 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall,
def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def ARMseretflag : SDNode<"ARMISD::SERET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov,
diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td
index 26164222136f..7fae32117243 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -14,6 +14,10 @@
// Thumb specific DAG Nodes.
//
+def ARMtsecall : SDNode<"ARMISD::tSECALL", SDT_ARMcall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
+
def imm_sr_XFORM: SDNodeXForm<imm, [{
unsigned Imm = N->getZExtValue();
return CurDAG->getTargetConstant((Imm == 32 ? 0 : Imm), SDLoc(N), MVT::i32);
@@ -499,6 +503,10 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br,
[(ARMretflag)], (tBX LR, pred:$p)>, Sched<[WriteBr]>;
+ // alternative return for CMSE entry functions
+ def tBXNS_RET : tPseudoInst<(outs), (ins), 2, IIC_Br,
+ [(ARMseretflag)]>, Sched<[WriteBr]>;
+
// Alternative return instruction used by vararg functions.
def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p),
2, IIC_Br, [],
@@ -560,6 +568,10 @@ let isCall = 1,
let Unpredictable{1-0} = 0b11;
}
+ def tBLXNS_CALL : PseudoInst<(outs), (ins GPRnopc:$func), IIC_Br,
+ [(ARMtsecall GPRnopc:$func)]>,
+ Requires<[IsThumb, Has8MSecExt]>, Sched<[WriteBr]>;
+
// ARMv4T
def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func),
4, IIC_Br,
diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index 3b676ca4c883..507c3e69b3a4 100644
--- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -15,4 +15,6 @@ void ARMFunctionInfo::anchor() {}
ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
: isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()),
- hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()) {}
+ hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()),
+ IsCmseNSEntry(MF.getFunction().hasFnAttribute("cmse_nonsecure_entry")),
+ IsCmseNSCall(MF.getFunction().hasFnAttribute("cmse_nonsecure_call")) {}
diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index 85c6837b72ce..298c8a238987 100644
--- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -83,6 +83,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// GPRCS1Size, GPRCS2Size, DPRCSSize - Sizes of callee saved register spills
/// areas.
+ unsigned FPCXTSaveSize = 0;
unsigned GPRCS1Size = 0;
unsigned GPRCS2Size = 0;
unsigned DPRCSAlignGapSize = 0;
@@ -105,6 +106,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// HasITBlocks - True if IT blocks have been inserted.
bool HasITBlocks = false;
+ // Security Extensions
+ bool IsCmseNSEntry;
+ bool IsCmseNSCall;
+
/// CPEClones - Track constant pool entries clones created by Constant Island
/// pass.
DenseMap<unsigned, unsigned> CPEClones;
@@ -140,6 +145,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; }
bool isThumb2Function() const { return isThumb && hasThumb2; }
+ bool isCmseNSEntryFunction() const { return IsCmseNSEntry; }
+ bool isCmseNSCallFunction() const { return IsCmseNSCall; }
+
unsigned getStoredByValParamsPadding() const { return StByValParamsPadding; }
void setStoredByValParamsPadding(unsigned p) { StByValParamsPadding = p; }
@@ -172,11 +180,13 @@ class ARMFunctionInfo : public MachineFunctionInfo {
void setGPRCalleeSavedArea2Offset(unsigned o) { GPRCS2Offset = o; }
void setDPRCalleeSavedAreaOffset(unsigned o) { DPRCSOffset = o; }
+ unsigned getFPCXTSaveAreaSize() const { return FPCXTSaveSize; }
unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; }
unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; }
+ void setFPCXTSaveAreaSize(unsigned s) { FPCXTSaveSize = s; }
void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; }
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td
index 3b260f95a98d..39cdb685c492 100644
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -588,3 +588,6 @@ def Tuples4DSpc : RegisterTuples<[dsub_0, dsub_2, dsub_4, dsub_6],
// Spaced quads of D registers.
def DQuadSpc : RegisterClass<"ARM", [v4i64], 64, (add Tuples3DSpc)>;
+
+// FP context payload
+def FPCXTRegs : RegisterClass<"ARM", [i32], 32, (add FPCXTNS)>;
diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index 5676c4f411a2..8222ebce7d07 100644
--- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -1047,6 +1047,10 @@ bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
if (!STI.hasV5TOps())
continue;
+ // CMSE entry functions must return via BXNS, see emitEpilogue.
+ if (AFI->isCmseNSEntryFunction())
+ continue;
+
// Pop LR into PC.
Reg = ARM::PC;
(*MIB).setDesc(TII.get(ARM::tPOP_RET));
diff --git a/llvm/test/CodeGen/ARM/cmse-clear-float-bigend.mir b/llvm/test/CodeGen/ARM/cmse-clear-float-bigend.mir
new file mode 100644
index 000000000000..5c743d5b5a3a
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmse-clear-float-bigend.mir
@@ -0,0 +1,101 @@
+# RUN: llc -mcpu=cortex-m33 -run-pass=arm-pseudo %s -o - | FileCheck %s
+--- |
+ target datalayout = "E-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbebv8m.main-arm-none-eabi"
+
+ ; Function Attrs: cmse_nonsecure_entry nounwind
+ define hidden arm_aapcs_vfpcc void @secure_foo(void (double, double, double, double, double, double, double, double)* %fptr) local_unnamed_addr #0 {
+ entry:
+ %0 = ptrtoint void (double, double, double, double, double, double, double, double)* %fptr to i32
+ %and = and i32 %0, -2
+ %1 = inttoptr i32 %and to void (double, double, double, double, double, double, double, double)*
+ call arm_aapcs_vfpcc void %1(double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00, double 5.000000e+00, double 6.000000e+00, double 7.000000e+00) #2
+ ret void
+ }
+
+ ; Function Attrs: nounwind
+ declare void @llvm.stackprotector(i8*, i8**) #1
+
+ attributes #0 = { "cmse_nonsecure_entry" nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denormal-fp-math"="preserve-sign" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+8msecext,+armv8-m.main,-d32,-fp64,+fp-armv8,+hwdiv,+thumb-mode,-crypto,-fullfp16,-neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+ attributes #1 = { nounwind }
+ attributes #2 = { "cmse_nonsecure_call" nounwind }
+
+ !llvm.module.flags = !{!0, !1, !2, !3}
+
+ !0 = !{i32 1, !"wchar_size", i32 4}
+ !1 = !{i32 1, !"static_rwdata", i32 1}
+ !2 = !{i32 1, !"enumsize_buildattr", i32 2}
+ !3 = !{i32 1, !"armlib_unavailable", i32 0}
+
+...
+---
+name: secure_foo
+alignment: 2
+tracksRegLiveness: true
+liveins:
+ - { reg: '$r0' }
+frameInfo:
+ stackSize: 8
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+stack:
+ - { id: 0, type: spill-slot, offset: -4, size: 4, alignment: 4, stack-id: default,
+ callee-saved-register: '$lr' }
+ - { id: 1, type: spill-slot, offset: -8, size: 4, alignment: 4, stack-id: default,
+ callee-saved-register: '$r7' }
+constants:
+ - id: 0
+ value: 'double 0.000000e+00'
+ alignment: 8
+ - id: 1
+ value: 'double 1.000000e+00'
+ alignment: 8
+ - id: 2
+ value: 'double 2.000000e+00'
+ alignment: 8
+ - id: 3
+ value: 'double 3.000000e+00'
+ alignment: 8
+ - id: 4
+ value: 'double 4.000000e+00'
+ alignment: 8
+ - id: 5
+ value: 'double 5.000000e+00'
+ alignment: 8
+ - id: 6
+ value: 'double 6.000000e+00'
+ alignment: 8
+ - id: 7
+ value: 'double 7.000000e+00'
+ alignment: 8
+body: |
+ bb.0.entry:
+ liveins: $r0, $r7, $lr
+
+ $sp = frame-setup t2STMDB_UPD $sp, 14, $noreg, killed $r7, killed $lr
+ frame-setup CFI_INSTRUCTION def_cfa_offset 8
+ frame-setup CFI_INSTRUCTION offset $lr, -4
+ frame-setup CFI_INSTRUCTION offset $r7, -8
+ renamable $d0 = VLDRD %const.0, 0, 14, $noreg :: (load 8 from constant-pool)
+ renamable $d1 = VLDRD %const.1, 0, 14, $noreg :: (load 8 from constant-pool)
+ renamable $d2 = VLDRD %const.2, 0, 14, $noreg :: (load 8 from constant-pool)
+ renamable $d3 = VLDRD %const.3, 0, 14, $noreg :: (load 8 from constant-pool)
+ renamable $d4 = VLDRD %const.4, 0, 14, $noreg :: (load 8 from constant-pool)
+ renamable $d5 = VLDRD %const.5, 0, 14, $noreg :: (load 8 from constant-pool)
+ renamable $d6 = VLDRD %const.6, 0, 14, $noreg :: (load 8 from constant-pool)
+ renamable $d7 = VLDRD %const.7, 0, 14, $noreg :: (load 8 from constant-pool)
+ renamable $r0 = t2BICri killed renamable $r0, 1, 14, $noreg, $noreg
+ tBLXNS_CALL killed renamable $r0, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit $d0, implicit $d1, implicit $d2, implicit $d3, implicit $d4, implicit $d5, implicit $d6, implicit $d7, implicit-def $sp
+ $sp = t2LDMIA_UPD $sp, 14, $noreg, def $r7, def $lr
+ tBXNS_RET
+
+...
+
+# CHECK: VLSTM
+# CHECK-DAG: $s12 = VLDRS $sp, 12, 14 /* CC::al */, $noreg
+# CHECK-DAG: $s13 = VLDRS $sp, 13, 14 /* CC::al */, $noreg
+# CHECK-DAG: $s14 = VLDRS $sp, 14, 14 /* CC::al */, $noreg
+# CHECK-DAG: $s15 = VLDRS $sp, 15, 14 /* CC::al */, $noreg
+# CHECK: tBLXNSr
diff --git a/llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll b/llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll
new file mode 100644
index 000000000000..1975b8f18310
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmse-clear-float-hard.ll
@@ -0,0 +1,811 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -mtriple=thumbv8m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \
+; RUN: FileCheck %s --check-prefix=CHECK-8M --check-prefix=CHECK-8M-LE
+; RUN: llc %s -o - -mtriple=thumbebv8m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \
+; RUN: FileCheck %s --check-prefix=CHECK-8M --check-prefix=CHECK-8M-BE
+; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-LE
+; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-BE
+; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=+mve.fp -float-abi=hard | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-LE
+; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=+mve.fp -float-abi=hard | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-BE
+
+define float @f1(float (float)* nocapture %fptr) #0 {
+; CHECK-8M-LABEL: f1:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: vmov.f32 s0, #1.000000e+01
+; CHECK-8M-NEXT: blx r0
+; CHECK-8M-NEXT: pop.w {r7, lr}
+; CHECK-8M-NEXT: mrs r12, control
+; CHECK-8M-NEXT: tst.w r12, #8
+; CHECK-8M-NEXT: beq .LBB0_2
+; CHECK-8M-NEXT: @ %bb.1: @ %entry
+; CHECK-8M-NEXT: vmrs r12, fpscr
+; CHECK-8M-NEXT: vmov s1, lr
+; CHECK-8M-NEXT: vmov d1, lr, lr
+; CHECK-8M-NEXT: vmov d2, lr, lr
+; CHECK-8M-NEXT: vmov d3, lr, lr
+; CHECK-8M-NEXT: vmov d4, lr, lr
+; CHECK-8M-NEXT: vmov d5, lr, lr
+; CHECK-8M-NEXT: vmov d6, lr, lr
+; CHECK-8M-NEXT: vmov d7, lr, lr
+; CHECK-8M-NEXT: bic r12, r12, #159
+; CHECK-8M-NEXT: bic r12, r12, #4026531840
+; CHECK-8M-NEXT: vmsr fpscr, r12
+; CHECK-8M-NEXT: .LBB0_2: @ %entry
+; CHECK-8M-NEXT: mov r0, lr
+; CHECK-8M-NEXT: mov r1, lr
+; CHECK-8M-NEXT: mov r2, lr
+; CHECK-8M-NEXT: mov r3, lr
+; CHECK-8M-NEXT: mov r12, lr
+; CHECK-8M-NEXT: msr apsr_nzcvqg, lr
+; CHECK-8M-NEXT: bxns lr
+;
+; CHECK-81M-LABEL: f1:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: sub sp, #4
+; CHECK-81M-NEXT: vmov.f32 s0, #1.000000e+01
+; CHECK-81M-NEXT: blx r0
+; CHECK-81M-NEXT: add sp, #4
+; CHECK-81M-NEXT: pop.w {r7, lr}
+; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-81M-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-81M-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
+; CHECK-81M-NEXT: bxns lr
+entry:
+ %call = call float %fptr(float 10.0) #1
+ ret float %call
+}
+
+attributes #0 = { "cmse_nonsecure_entry" nounwind }
+attributes #1 = { nounwind }
+
+define double @d1(double (double)* nocapture %fptr) #0 {
+; CHECK-8M-LE-LABEL: d1:
+; CHECK-8M-LE: @ %bb.0: @ %entry
+; CHECK-8M-LE-NEXT: push {r7, lr}
+; CHECK-8M-LE-NEXT: vldr d0, .LCPI1_0
+; CHECK-8M-LE-NEXT: blx r0
+; CHECK-8M-LE-NEXT: pop.w {r7, lr}
+; CHECK-8M-LE-NEXT: mrs r12, control
+; CHECK-8M-LE-NEXT: tst.w r12, #8
+; CHECK-8M-LE-NEXT: beq .LBB1_2
+; CHECK-8M-LE-NEXT: @ %bb.1: @ %entry
+; CHECK-8M-LE-NEXT: vmrs r12, fpscr
+; CHECK-8M-LE-NEXT: vmov d1, lr, lr
+; CHECK-8M-LE-NEXT: vmov d2, lr, lr
+; CHECK-8M-LE-NEXT: vmov d3, lr, lr
+; CHECK-8M-LE-NEXT: vmov d4, lr, lr
+; CHECK-8M-LE-NEXT: vmov d5, lr, lr
+; CHECK-8M-LE-NEXT: vmov d6, lr, lr
+; CHECK-8M-LE-NEXT: vmov d7, lr, lr
+; CHECK-8M-LE-NEXT: bic r12, r12, #159
+; CHECK-8M-LE-NEXT: bic r12, r12, #4026531840
+; CHECK-8M-LE-NEXT: vmsr fpscr, r12
+; CHECK-8M-LE-NEXT: .LBB1_2: @ %entry
+; CHECK-8M-LE-NEXT: mov r0, lr
+; CHECK-8M-LE-NEXT: mov r1, lr
+; CHECK-8M-LE-NEXT: mov r2, lr
+; CHECK-8M-LE-NEXT: mov r3, lr
+; CHECK-8M-LE-NEXT: mov r12, lr
+; CHECK-8M-LE-NEXT: msr apsr_nzcvqg, lr
+; CHECK-8M-LE-NEXT: bxns lr
+; CHECK-8M-LE-NEXT: .p2align 3
+; CHECK-8M-LE-NEXT: @ %bb.3:
+; CHECK-8M-LE-NEXT: .LCPI1_0:
+; CHECK-8M-LE-NEXT: .long 0 @ double 10
+; CHECK-8M-LE-NEXT: .long 1076101120
+;
+; CHECK-8M-BE-LABEL: d1:
+; CHECK-8M-BE: @ %bb.0: @ %entry
+; CHECK-8M-BE-NEXT: push {r7, lr}
+; CHECK-8M-BE-NEXT: vldr d0, .LCPI1_0
+; CHECK-8M-BE-NEXT: blx r0
+; CHECK-8M-BE-NEXT: pop.w {r7, lr}
+; CHECK-8M-BE-NEXT: mrs r12, control
+; CHECK-8M-BE-NEXT: tst.w r12, #8
+; CHECK-8M-BE-NEXT: beq .LBB1_2
+; CHECK-8M-BE-NEXT: @ %bb.1: @ %entry
+; CHECK-8M-BE-NEXT: vmrs r12, fpscr
+; CHECK-8M-BE-NEXT: vmov d1, lr, lr
+; CHECK-8M-BE-NEXT: vmov d2, lr, lr
+; CHECK-8M-BE-NEXT: vmov d3, lr, lr
+; CHECK-8M-BE-NEXT: vmov d4, lr, lr
+; CHECK-8M-BE-NEXT: vmov d5, lr, lr
+; CHECK-8M-BE-NEXT: vmov d6, lr, lr
+; CHECK-8M-BE-NEXT: vmov d7, lr, lr
+; CHECK-8M-BE-NEXT: bic r12, r12, #159
+; CHECK-8M-BE-NEXT: bic r12, r12, #4026531840
+; CHECK-8M-BE-NEXT: vmsr fpscr, r12
+; CHECK-8M-BE-NEXT: .LBB1_2: @ %entry
+; CHECK-8M-BE-NEXT: mov r0, lr
+; CHECK-8M-BE-NEXT: mov r1, lr
+; CHECK-8M-BE-NEXT: mov r2, lr
+; CHECK-8M-BE-NEXT: mov r3, lr
+; CHECK-8M-BE-NEXT: mov r12, lr
+; CHECK-8M-BE-NEXT: msr apsr_nzcvqg, lr
+; CHECK-8M-BE-NEXT: bxns lr
+; CHECK-8M-BE-NEXT: .p2align 3
+; CHECK-8M-BE-NEXT: @ %bb.3:
+; CHECK-8M-BE-NEXT: .LCPI1_0:
+; CHECK-8M-BE-NEXT: .long 1076101120 @ double 10
+; CHECK-8M-BE-NEXT: .long 0
+;
+; CHECK-81M-LE-LABEL: d1:
+; CHECK-81M-LE: @ %bb.0: @ %entry
+; CHECK-81M-LE-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-81M-LE-NEXT: push {r7, lr}
+; CHECK-81M-LE-NEXT: sub sp, #4
+; CHECK-81M-LE-NEXT: vldr d0, .LCPI1_0
+; CHECK-81M-LE-NEXT: blx r0
+; CHECK-81M-LE-NEXT: add sp, #4
+; CHECK-81M-LE-NEXT: pop.w {r7, lr}
+; CHECK-81M-LE-NEXT: vscclrm {s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-81M-LE-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-81M-LE-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
+; CHECK-81M-LE-NEXT: bxns lr
+; CHECK-81M-LE-NEXT: .p2align 3
+; CHECK-81M-LE-NEXT: @ %bb.1:
+; CHECK-81M-LE-NEXT: .LCPI1_0:
+; CHECK-81M-LE-NEXT: .long 0 @ double 10
+; CHECK-81M-LE-NEXT: .long 1076101120
+;
+; CHECK-81M-BE-LABEL: d1:
+; CHECK-81M-BE: @ %bb.0: @ %entry
+; CHECK-81M-BE-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-81M-BE-NEXT: push {r7, lr}
+; CHECK-81M-BE-NEXT: sub sp, #4
+; CHECK-81M-BE-NEXT: vldr d0, .LCPI1_0
+; CHECK-81M-BE-NEXT: blx r0
+; CHECK-81M-BE-NEXT: add sp, #4
+; CHECK-81M-BE-NEXT: pop.w {r7, lr}
+; CHECK-81M-BE-NEXT: vscclrm {s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-81M-BE-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-81M-BE-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
+; CHECK-81M-BE-NEXT: bxns lr
+; CHECK-81M-BE-NEXT: .p2align 3
+; CHECK-81M-BE-NEXT: @ %bb.1:
+; CHECK-81M-BE-NEXT: .LCPI1_0:
+; CHECK-81M-BE-NEXT: .long 1076101120 @ double 10
+; CHECK-81M-BE-NEXT: .long 0
+entry:
+ %call = call double %fptr(double 10.0) #1
+ ret double %call
+}
+
+define float @f2(float (float)* nocapture %fptr) #2 {
+; CHECK-8M-LABEL: f2:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: vmov.f32 s0, #1.000000e+01
+; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: bic r0, r0, #1
+; CHECK-8M-NEXT: sub sp, #136
+; CHECK-8M-NEXT: vmov r12, s0
+; CHECK-8M-NEXT: vlstm sp
+; CHECK-8M-NEXT: vmov s0, r12
+; CHECK-8M-NEXT: ldr r1, [sp, #64]
+; CHECK-8M-NEXT: bic r1, r1, #159
+; CHECK-8M-NEXT: bic r1, r1, #4026531840
+; CHECK-8M-NEXT: vmsr fpscr, r1
+; CHECK-8M-NEXT: mov r1, r0
+; CHECK-8M-NEXT: mov r2, r0
+; CHECK-8M-NEXT: mov r3, r0
+; CHECK-8M-NEXT: mov r4, r0
+; CHECK-8M-NEXT: mov r5, r0
+; CHECK-8M-NEXT: mov r6, r0
+; CHECK-8M-NEXT: mov r7, r0
+; CHECK-8M-NEXT: mov r8, r0
+; CHECK-8M-NEXT: mov r9, r0
+; CHECK-8M-NEXT: mov r10, r0
+; CHECK-8M-NEXT: mov r11, r0
+; CHECK-8M-NEXT: msr apsr_nzcvqg, r0
+; CHECK-8M-NEXT: blxns r0
+; CHECK-8M-NEXT: vmov r12, s0
+; CHECK-8M-NEXT: vlldm sp
+; CHECK-8M-NEXT: vmov s0, r12
+; CHECK-8M-NEXT: add sp, #136
+; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: pop {r7, pc}
+;
+; CHECK-81M-LABEL: f2:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: vmov.f32 s0, #1.000000e+01
+; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: bic r0, r0, #1
+; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT: blxns r0
+; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
+; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: pop {r7, pc}
+entry:
+ %call = call float %fptr(float 10.0) #3
+ ret float %call
+}
+
+attributes #2 = { nounwind }
+attributes #3 = { "cmse_nonsecure_call" nounwind }
+
+define double @d2(double (double)* nocapture %fptr) #2 {
+; CHECK-8M-LE-LABEL: d2:
+; CHECK-8M-LE: @ %bb.0: @ %entry
+; CHECK-8M-LE-NEXT: push {r7, lr}
+; CHECK-8M-LE-NEXT: vldr d0, .LCPI3_0
+; CHECK-8M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-LE-NEXT: bic r0, r0, #1
+; CHECK-8M-LE-NEXT: sub sp, #136
+; CHECK-8M-LE-NEXT: vmov r11, r12, d0
+; CHECK-8M-LE-NEXT: vlstm sp
+; CHECK-8M-LE-NEXT: vmov d0, r11, r12
+; CHECK-8M-LE-NEXT: ldr r1, [sp, #64]
+; CHECK-8M-LE-NEXT: bic r1, r1, #159
+; CHECK-8M-LE-NEXT: bic r1, r1, #4026531840
+; CHECK-8M-LE-NEXT: vmsr fpscr, r1
+; CHECK-8M-LE-NEXT: mov r1, r0
+; CHECK-8M-LE-NEXT: mov r2, r0
+; CHECK-8M-LE-NEXT: mov r3, r0
+; CHECK-8M-LE-NEXT: mov r4, r0
+; CHECK-8M-LE-NEXT: mov r5, r0
+; CHECK-8M-LE-NEXT: mov r6, r0
+; CHECK-8M-LE-NEXT: mov r7, r0
+; CHECK-8M-LE-NEXT: mov r8, r0
+; CHECK-8M-LE-NEXT: mov r9, r0
+; CHECK-8M-LE-NEXT: mov r10, r0
+; CHECK-8M-LE-NEXT: msr apsr_nzcvqg, r0
+; CHECK-8M-LE-NEXT: blxns r0
+; CHECK-8M-LE-NEXT: vmov r11, r12, d0
+; CHECK-8M-LE-NEXT: vlldm sp
+; CHECK-8M-LE-NEXT: vmov d0, r11, r12
+; CHECK-8M-LE-NEXT: add sp, #136
+; CHECK-8M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-LE-NEXT: pop {r7, pc}
+; CHECK-8M-LE-NEXT: .p2align 3
+; CHECK-8M-LE-NEXT: @ %bb.1:
+; CHECK-8M-LE-NEXT: .LCPI3_0:
+; CHECK-8M-LE-NEXT: .long 0 @ double 10
+; CHECK-8M-LE-NEXT: .long 1076101120
+;
+; CHECK-8M-BE-LABEL: d2:
+; CHECK-8M-BE: @ %bb.0: @ %entry
+; CHECK-8M-BE-NEXT: push {r7, lr}
+; CHECK-8M-BE-NEXT: vldr d0, .LCPI3_0
+; CHECK-8M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-BE-NEXT: bic r0, r0, #1
+; CHECK-8M-BE-NEXT: sub sp, #136
+; CHECK-8M-BE-NEXT: vmov r11, r12, d0
+; CHECK-8M-BE-NEXT: vlstm sp
+; CHECK-8M-BE-NEXT: vmov d0, r11, r12
+; CHECK-8M-BE-NEXT: ldr r1, [sp, #64]
+; CHECK-8M-BE-NEXT: bic r1, r1, #159
+; CHECK-8M-BE-NEXT: bic r1, r1, #4026531840
+; CHECK-8M-BE-NEXT: vmsr fpscr, r1
+; CHECK-8M-BE-NEXT: mov r1, r0
+; CHECK-8M-BE-NEXT: mov r2, r0
+; CHECK-8M-BE-NEXT: mov r3, r0
+; CHECK-8M-BE-NEXT: mov r4, r0
+; CHECK-8M-BE-NEXT: mov r5, r0
+; CHECK-8M-BE-NEXT: mov r6, r0
+; CHECK-8M-BE-NEXT: mov r7, r0
+; CHECK-8M-BE-NEXT: mov r8, r0
+; CHECK-8M-BE-NEXT: mov r9, r0
+; CHECK-8M-BE-NEXT: mov r10, r0
+; CHECK-8M-BE-NEXT: msr apsr_nzcvqg, r0
+; CHECK-8M-BE-NEXT: blxns r0
+; CHECK-8M-BE-NEXT: vmov r11, r12, d0
+; CHECK-8M-BE-NEXT: vlldm sp
+; CHECK-8M-BE-NEXT: vmov d0, r11, r12
+; CHECK-8M-BE-NEXT: add sp, #136
+; CHECK-8M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-BE-NEXT: pop {r7, pc}
+; CHECK-8M-BE-NEXT: .p2align 3
+; CHECK-8M-BE-NEXT: @ %bb.1:
+; CHECK-8M-BE-NEXT: .LCPI3_0:
+; CHECK-8M-BE-NEXT: .long 1076101120 @ double 10
+; CHECK-8M-BE-NEXT: .long 0
+;
+; CHECK-81M-LE-LABEL: d2:
+; CHECK-81M-LE: @ %bb.0: @ %entry
+; CHECK-81M-LE-NEXT: push {r7, lr}
+; CHECK-81M-LE-NEXT: vldr d0, .LCPI3_0
+; CHECK-81M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-LE-NEXT: bic r0, r0, #1
+; CHECK-81M-LE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-LE-NEXT: vscclrm {s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-LE-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-81M-LE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-LE-NEXT: blxns r0
+; CHECK-81M-LE-NEXT: vldr fpcxts, [sp], #8
+; CHECK-81M-LE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-LE-NEXT: pop {r7, pc}
+; CHECK-81M-LE-NEXT: .p2align 3
+; CHECK-81M-LE-NEXT: @ %bb.1:
+; CHECK-81M-LE-NEXT: .LCPI3_0:
+; CHECK-81M-LE-NEXT: .long 0 @ double 10
+; CHECK-81M-LE-NEXT: .long 1076101120
+;
+; CHECK-81M-BE-LABEL: d2:
+; CHECK-81M-BE: @ %bb.0: @ %entry
+; CHECK-81M-BE-NEXT: push {r7, lr}
+; CHECK-81M-BE-NEXT: vldr d0, .LCPI3_0
+; CHECK-81M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-BE-NEXT: bic r0, r0, #1
+; CHECK-81M-BE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-BE-NEXT: vscclrm {s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-BE-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-81M-BE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-BE-NEXT: blxns r0
+; CHECK-81M-BE-NEXT: vldr fpcxts, [sp], #8
+; CHECK-81M-BE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-BE-NEXT: pop {r7, pc}
+; CHECK-81M-BE-NEXT: .p2align 3
+; CHECK-81M-BE-NEXT: @ %bb.1:
+; CHECK-81M-BE-NEXT: .LCPI3_0:
+; CHECK-81M-BE-NEXT: .long 1076101120 @ double 10
+; CHECK-81M-BE-NEXT: .long 0
+entry:
+ %call = call double %fptr(double 10.0) #3
+ ret double %call
+}
+
+define float @f3(float (float)* nocapture %fptr) #4 {
+; CHECK-8M-LABEL: f3:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: vmov.f32 s0, #1.000000e+01
+; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: bic r0, r0, #1
+; CHECK-8M-NEXT: sub sp, #136
+; CHECK-8M-NEXT: vmov r12, s0
+; CHECK-8M-NEXT: vlstm sp
+; CHECK-8M-NEXT: vmov s0, r12
+; CHECK-8M-NEXT: ldr r1, [sp, #64]
+; CHECK-8M-NEXT: bic r1, r1, #159
+; CHECK-8M-NEXT: bic r1, r1, #4026531840
+; CHECK-8M-NEXT: vmsr fpscr, r1
+; CHECK-8M-NEXT: mov r1, r0
+; CHECK-8M-NEXT: mov r2, r0
+; CHECK-8M-NEXT: mov r3, r0
+; CHECK-8M-NEXT: mov r4, r0
+; CHECK-8M-NEXT: mov r5, r0
+; CHECK-8M-NEXT: mov r6, r0
+; CHECK-8M-NEXT: mov r7, r0
+; CHECK-8M-NEXT: mov r8, r0
+; CHECK-8M-NEXT: mov r9, r0
+; CHECK-8M-NEXT: mov r10, r0
+; CHECK-8M-NEXT: mov r11, r0
+; CHECK-8M-NEXT: msr apsr_nzcvqg, r0
+; CHECK-8M-NEXT: blxns r0
+; CHECK-8M-NEXT: vmov r12, s0
+; CHECK-8M-NEXT: vlldm sp
+; CHECK-8M-NEXT: vmov s0, r12
+; CHECK-8M-NEXT: add sp, #136
+; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: pop {r7, pc}
+;
+; CHECK-81M-LABEL: f3:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: vmov.f32 s0, #1.000000e+01
+; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: bic r0, r0, #1
+; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT: vscclrm {s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT: blxns r0
+; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
+; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: pop {r7, pc}
+entry:
+ %call = tail call float %fptr(float 10.0) #5
+ ret float %call
+}
+
+attributes #4 = { nounwind }
+attributes #5 = { "cmse_nonsecure_call" nounwind }
+
+define double @d3(double (double)* nocapture %fptr) #4 {
+; CHECK-8M-LE-LABEL: d3:
+; CHECK-8M-LE: @ %bb.0: @ %entry
+; CHECK-8M-LE-NEXT: push {r7, lr}
+; CHECK-8M-LE-NEXT: vldr d0, .LCPI5_0
+; CHECK-8M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-LE-NEXT: bic r0, r0, #1
+; CHECK-8M-LE-NEXT: sub sp, #136
+; CHECK-8M-LE-NEXT: vmov r11, r12, d0
+; CHECK-8M-LE-NEXT: vlstm sp
+; CHECK-8M-LE-NEXT: vmov d0, r11, r12
+; CHECK-8M-LE-NEXT: ldr r1, [sp, #64]
+; CHECK-8M-LE-NEXT: bic r1, r1, #159
+; CHECK-8M-LE-NEXT: bic r1, r1, #4026531840
+; CHECK-8M-LE-NEXT: vmsr fpscr, r1
+; CHECK-8M-LE-NEXT: mov r1, r0
+; CHECK-8M-LE-NEXT: mov r2, r0
+; CHECK-8M-LE-NEXT: mov r3, r0
+; CHECK-8M-LE-NEXT: mov r4, r0
+; CHECK-8M-LE-NEXT: mov r5, r0
+; CHECK-8M-LE-NEXT: mov r6, r0
+; CHECK-8M-LE-NEXT: mov r7, r0
+; CHECK-8M-LE-NEXT: mov r8, r0
+; CHECK-8M-LE-NEXT: mov r9, r0
+; CHECK-8M-LE-NEXT: mov r10, r0
+; CHECK-8M-LE-NEXT: msr apsr_nzcvqg, r0
+; CHECK-8M-LE-NEXT: blxns r0
+; CHECK-8M-LE-NEXT: vmov r11, r12, d0
+; CHECK-8M-LE-NEXT: vlldm sp
+; CHECK-8M-LE-NEXT: vmov d0, r11, r12
+; CHECK-8M-LE-NEXT: add sp, #136
+; CHECK-8M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-LE-NEXT: pop {r7, pc}
+; CHECK-8M-LE-NEXT: .p2align 3
+; CHECK-8M-LE-NEXT: @ %bb.1:
+; CHECK-8M-LE-NEXT: .LCPI5_0:
+; CHECK-8M-LE-NEXT: .long 0 @ double 10
+; CHECK-8M-LE-NEXT: .long 1076101120
+;
+; CHECK-8M-BE-LABEL: d3:
+; CHECK-8M-BE: @ %bb.0: @ %entry
+; CHECK-8M-BE-NEXT: push {r7, lr}
+; CHECK-8M-BE-NEXT: vldr d0, .LCPI5_0
+; CHECK-8M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-BE-NEXT: bic r0, r0, #1
+; CHECK-8M-BE-NEXT: sub sp, #136
+; CHECK-8M-BE-NEXT: vmov r11, r12, d0
+; CHECK-8M-BE-NEXT: vlstm sp
+; CHECK-8M-BE-NEXT: vmov d0, r11, r12
+; CHECK-8M-BE-NEXT: ldr r1, [sp, #64]
+; CHECK-8M-BE-NEXT: bic r1, r1, #159
+; CHECK-8M-BE-NEXT: bic r1, r1, #4026531840
+; CHECK-8M-BE-NEXT: vmsr fpscr, r1
+; CHECK-8M-BE-NEXT: mov r1, r0
+; CHECK-8M-BE-NEXT: mov r2, r0
+; CHECK-8M-BE-NEXT: mov r3, r0
+; CHECK-8M-BE-NEXT: mov r4, r0
+; CHECK-8M-BE-NEXT: mov r5, r0
+; CHECK-8M-BE-NEXT: mov r6, r0
+; CHECK-8M-BE-NEXT: mov r7, r0
+; CHECK-8M-BE-NEXT: mov r8, r0
+; CHECK-8M-BE-NEXT: mov r9, r0
+; CHECK-8M-BE-NEXT: mov r10, r0
+; CHECK-8M-BE-NEXT: msr apsr_nzcvqg, r0
+; CHECK-8M-BE-NEXT: blxns r0
+; CHECK-8M-BE-NEXT: vmov r11, r12, d0
+; CHECK-8M-BE-NEXT: vlldm sp
+; CHECK-8M-BE-NEXT: vmov d0, r11, r12
+; CHECK-8M-BE-NEXT: add sp, #136
+; CHECK-8M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-BE-NEXT: pop {r7, pc}
+; CHECK-8M-BE-NEXT: .p2align 3
+; CHECK-8M-BE-NEXT: @ %bb.1:
+; CHECK-8M-BE-NEXT: .LCPI5_0:
+; CHECK-8M-BE-NEXT: .long 1076101120 @ double 10
+; CHECK-8M-BE-NEXT: .long 0
+;
+; CHECK-81M-LE-LABEL: d3:
+; CHECK-81M-LE: @ %bb.0: @ %entry
+; CHECK-81M-LE-NEXT: push {r7, lr}
+; CHECK-81M-LE-NEXT: vldr d0, .LCPI5_0
+; CHECK-81M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-LE-NEXT: bic r0, r0, #1
+; CHECK-81M-LE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-LE-NEXT: vscclrm {s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-LE-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-81M-LE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-LE-NEXT: blxns r0
+; CHECK-81M-LE-NEXT: vldr fpcxts, [sp], #8
+; CHECK-81M-LE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-LE-NEXT: pop {r7, pc}
+; CHECK-81M-LE-NEXT: .p2align 3
+; CHECK-81M-LE-NEXT: @ %bb.1:
+; CHECK-81M-LE-NEXT: .LCPI5_0:
+; CHECK-81M-LE-NEXT: .long 0 @ double 10
+; CHECK-81M-LE-NEXT: .long 1076101120
+;
+; CHECK-81M-BE-LABEL: d3:
+; CHECK-81M-BE: @ %bb.0: @ %entry
+; CHECK-81M-BE-NEXT: push {r7, lr}
+; CHECK-81M-BE-NEXT: vldr d0, .LCPI5_0
+; CHECK-81M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-BE-NEXT: bic r0, r0, #1
+; CHECK-81M-BE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-BE-NEXT: vscclrm {s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-BE-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-81M-BE-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-BE-NEXT: blxns r0
+; CHECK-81M-BE-NEXT: vldr fpcxts, [sp], #8
+; CHECK-81M-BE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-BE-NEXT: pop {r7, pc}
+; CHECK-81M-BE-NEXT: .p2align 3
+; CHECK-81M-BE-NEXT: @ %bb.1:
+; CHECK-81M-BE-NEXT: .LCPI5_0:
+; CHECK-81M-BE-NEXT: .long 1076101120 @ double 10
+; CHECK-81M-BE-NEXT: .long 0
+entry:
+ %call = tail call double %fptr(double 10.0) #5
+ ret double %call
+}
+
+define float @f4(float ()* nocapture %fptr) #6 {
+; CHECK-8M-LABEL: f4:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: bic r0, r0, #1
+; CHECK-8M-NEXT: sub sp, #136
+; CHECK-8M-NEXT: vlstm sp
+; CHECK-8M-NEXT: mov r1, r0
+; CHECK-8M-NEXT: mov r2, r0
+; CHECK-8M-NEXT: mov r3, r0
+; CHECK-8M-NEXT: mov r4, r0
+; CHECK-8M-NEXT: mov r5, r0
+; CHECK-8M-NEXT: mov r6, r0
+; CHECK-8M-NEXT: mov r7, r0
+; CHECK-8M-NEXT: mov r8, r0
+; CHECK-8M-NEXT: mov r9, r0
+; CHECK-8M-NEXT: mov r10, r0
+; CHECK-8M-NEXT: mov r11, r0
+; CHECK-8M-NEXT: mov r12, r0
+; CHECK-8M-NEXT: msr apsr_nzcvqg, r0
+; CHECK-8M-NEXT: blxns r0
+; CHECK-8M-NEXT: vmov r12, s0
+; CHECK-8M-NEXT: vlldm sp
+; CHECK-8M-NEXT: vmov s0, r12
+; CHECK-8M-NEXT: add sp, #136
+; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: pop {r7, pc}
+;
+; CHECK-81M-LABEL: f4:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: bic r0, r0, #1
+; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT: blxns r0
+; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
+; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: pop {r7, pc}
+entry:
+ %call = call float %fptr() #7
+ ret float %call
+}
+
+attributes #6 = { nounwind }
+attributes #7 = { "cmse_nonsecure_call" nounwind }
+
+define double @d4(double ()* nocapture %fptr) #6 {
+; CHECK-8M-LABEL: d4:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: bic r0, r0, #1
+; CHECK-8M-NEXT: sub sp, #136
+; CHECK-8M-NEXT: vlstm sp
+; CHECK-8M-NEXT: mov r1, r0
+; CHECK-8M-NEXT: mov r2, r0
+; CHECK-8M-NEXT: mov r3, r0
+; CHECK-8M-NEXT: mov r4, r0
+; CHECK-8M-NEXT: mov r5, r0
+; CHECK-8M-NEXT: mov r6, r0
+; CHECK-8M-NEXT: mov r7, r0
+; CHECK-8M-NEXT: mov r8, r0
+; CHECK-8M-NEXT: mov r9, r0
+; CHECK-8M-NEXT: mov r10, r0
+; CHECK-8M-NEXT: mov r11, r0
+; CHECK-8M-NEXT: mov r12, r0
+; CHECK-8M-NEXT: msr apsr_nzcvqg, r0
+; CHECK-8M-NEXT: blxns r0
+; CHECK-8M-NEXT: vmov r11, r12, d0
+; CHECK-8M-NEXT: vlldm sp
+; CHECK-8M-NEXT: vmov d0, r11, r12
+; CHECK-8M-NEXT: add sp, #136
+; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: pop {r7, pc}
+;
+; CHECK-81M-LABEL: d4:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: bic r0, r0, #1
+; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT: blxns r0
+; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
+; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: pop {r7, pc}
+entry:
+ %call = call double %fptr() #7
+ ret double %call
+}
+
+define void @fd(void (float, double)* %f, float %a, double %b) #8 {
+; CHECK-8M-LABEL: fd:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: bic r0, r0, #1
+; CHECK-8M-NEXT: sub sp, #136
+; CHECK-8M-NEXT: vmov r12, s0
+; CHECK-8M-NEXT: mov r2, r0
+; CHECK-8M-NEXT: vmov r10, r11, d1
+; CHECK-8M-NEXT: vlstm sp
+; CHECK-8M-NEXT: vmov s0, r12
+; CHECK-8M-NEXT: vmov d1, r10, r11
+; CHECK-8M-NEXT: ldr r1, [sp, #64]
+; CHECK-8M-NEXT: bic r1, r1, #159
+; CHECK-8M-NEXT: bic r1, r1, #4026531840
+; CHECK-8M-NEXT: vmsr fpscr, r1
+; CHECK-8M-NEXT: mov r1, r0
+; CHECK-8M-NEXT: mov r3, r0
+; CHECK-8M-NEXT: mov r4, r0
+; CHECK-8M-NEXT: mov r5, r0
+; CHECK-8M-NEXT: mov r6, r0
+; CHECK-8M-NEXT: mov r7, r0
+; CHECK-8M-NEXT: mov r8, r0
+; CHECK-8M-NEXT: mov r9, r0
+; CHECK-8M-NEXT: msr apsr_nzcvqg, r0
+; CHECK-8M-NEXT: blxns r0
+; CHECK-8M-NEXT: vlldm sp
+; CHECK-8M-NEXT: add sp, #136
+; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: pop {r7, pc}
+;
+; CHECK-81M-LABEL: fd:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: bic r0, r0, #1
+; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT: vscclrm {s1, vpr}
+; CHECK-81M-NEXT: vscclrm {s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT: blxns r0
+; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
+; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: pop {r7, pc}
+entry:
+ call void %f(float %a, double %b) #9
+ ret void
+}
+
+attributes #8 = { nounwind }
+attributes #9 = { "cmse_nonsecure_call" nounwind }
+
+define void @fdff(void (float, double, float, float)* %f, float %a, double %b, float %c, float %d) #8 {
+; CHECK-8M-LABEL: fdff:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: bic r0, r0, #1
+; CHECK-8M-NEXT: sub sp, #136
+; CHECK-8M-NEXT: vmov r12, s0
+; CHECK-8M-NEXT: mov r2, r0
+; CHECK-8M-NEXT: vmov r10, r11, d1
+; CHECK-8M-NEXT: mov r3, r0
+; CHECK-8M-NEXT: vmov r9, s1
+; CHECK-8M-NEXT: mov r4, r0
+; CHECK-8M-NEXT: vmov r8, s4
+; CHECK-8M-NEXT: vlstm sp
+; CHECK-8M-NEXT: vmov s0, r12
+; CHECK-8M-NEXT: vmov d1, r10, r11
+; CHECK-8M-NEXT: vmov s1, r9
+; CHECK-8M-NEXT: vmov s4, r8
+; CHECK-8M-NEXT: ldr r1, [sp, #64]
+; CHECK-8M-NEXT: bic r1, r1, #159
+; CHECK-8M-NEXT: bic r1, r1, #4026531840
+; CHECK-8M-NEXT: vmsr fpscr, r1
+; CHECK-8M-NEXT: mov r1, r0
+; CHECK-8M-NEXT: mov r5, r0
+; CHECK-8M-NEXT: mov r6, r0
+; CHECK-8M-NEXT: mov r7, r0
+; CHECK-8M-NEXT: msr apsr_nzcvqg, r0
+; CHECK-8M-NEXT: blxns r0
+; CHECK-8M-NEXT: vlldm sp
+; CHECK-8M-NEXT: add sp, #136
+; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: pop {r7, pc}
+;
+; CHECK-81M-LABEL: fdff:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: bic r0, r0, #1
+; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT: vscclrm {s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT: blxns r0
+; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
+; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: pop {r7, pc}
+entry:
+ call void %f(float %a, double %b, float %c, float %d) #9
+ ret void
+}
+
+define void @fidififid(void (float, i32, double, i32, float, i32, float, i32, double)* %fu, float %a, i32 %b, double %c, i32 %d, float %e, i32 %f, float %g, i32 %h, double %i) #8 {
+; CHECK-8M-LABEL: fidififid:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: mov lr, r3
+; CHECK-8M-NEXT: mov r12, r0
+; CHECK-8M-NEXT: mov r0, r1
+; CHECK-8M-NEXT: mov r1, r2
+; CHECK-8M-NEXT: ldr r3, [sp, #8]
+; CHECK-8M-NEXT: mov r2, lr
+; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: bic r12, r12, #1
+; CHECK-8M-NEXT: sub sp, #136
+; CHECK-8M-NEXT: vmov r11, s0
+; CHECK-8M-NEXT: vmov r9, r10, d1
+; CHECK-8M-NEXT: vmov r8, s1
+; CHECK-8M-NEXT: vmov r7, s4
+; CHECK-8M-NEXT: vmov r5, r6, d3
+; CHECK-8M-NEXT: vlstm sp
+; CHECK-8M-NEXT: vmov s0, r11
+; CHECK-8M-NEXT: vmov d1, r9, r10
+; CHECK-8M-NEXT: vmov s1, r8
+; CHECK-8M-NEXT: vmov s4, r7
+; CHECK-8M-NEXT: vmov d3, r5, r6
+; CHECK-8M-NEXT: ldr r4, [sp, #64]
+; CHECK-8M-NEXT: bic r4, r4, #159
+; CHECK-8M-NEXT: bic r4, r4, #4026531840
+; CHECK-8M-NEXT: vmsr fpscr, r4
+; CHECK-8M-NEXT: mov r4, r12
+; CHECK-8M-NEXT: msr apsr_nzcvqg, r12
+; CHECK-8M-NEXT: blxns r12
+; CHECK-8M-NEXT: vlldm sp
+; CHECK-8M-NEXT: add sp, #136
+; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: pop {r7, pc}
+;
+; CHECK-81M-LABEL: fidififid:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: mov lr, r3
+; CHECK-81M-NEXT: mov r12, r0
+; CHECK-81M-NEXT: mov r0, r1
+; CHECK-81M-NEXT: mov r1, r2
+; CHECK-81M-NEXT: ldr r3, [sp, #8]
+; CHECK-81M-NEXT: mov r2, lr
+; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: bic r12, r12, #1
+; CHECK-81M-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT: vscclrm {s5, vpr}
+; CHECK-81M-NEXT: vscclrm {s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-81M-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-81M-NEXT: clrm {r4, r5, r6, r7, r8, r9, r10, r11, apsr}
+; CHECK-81M-NEXT: blxns r12
+; CHECK-81M-NEXT: vldr fpcxts, [sp], #8
+; CHECK-81M-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: pop {r7, pc}
+entry:
+ call void %fu(float %a, i32 %b, double %c, i32 %d, float %e, i32 %f, float %g, i32 %h, double %i) #9
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/ARM/cmse-clear-float-hard2.ll b/llvm/test/CodeGen/ARM/cmse-clear-float-hard2.ll
new file mode 100644
index 000000000000..715ef0bd58d5
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmse-clear-float-hard2.ll
@@ -0,0 +1,144 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -mtriple=thumbv8m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \
+; RUN: FileCheck %s --check-prefix=CHECK-V8-LE
+; RUN: llc %s -o - -mtriple=thumbebv8m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \
+; RUN: FileCheck %s --check-prefix=CHECK-V8-BE
+
+; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \
+; RUN: FileCheck %s --check-prefix=CHECK-V81-LE
+; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=+fp-armv8d16sp,+dsp -float-abi=hard | \
+; RUN: FileCheck %s --check-prefix=CHECK-V81-BE
+
+attributes #0 = { nounwind }
+attributes #1 = { "cmse_nonsecure_call" nounwind }
+
+define void @fidififiddddff(void (float, i32, double, i32, float, i32, float, i32, double, double, double, double, float, float)* %fu, float %a, i32 %b, double %c, i32 %d, float %e, i32 %f, float %g, i32 %h, double %i, double %j, double %k, double %l, float %m, float %n) #0 {
+; CHECK-V8-LE-LABEL: fidififiddddff:
+; CHECK-V8-LE: @ %bb.0: @ %entry
+; CHECK-V8-LE-NEXT: push {r7, lr}
+; CHECK-V8-LE-NEXT: mov lr, r3
+; CHECK-V8-LE-NEXT: mov r12, r0
+; CHECK-V8-LE-NEXT: mov r0, r1
+; CHECK-V8-LE-NEXT: mov r1, r2
+; CHECK-V8-LE-NEXT: ldr r3, [sp, #8]
+; CHECK-V8-LE-NEXT: mov r2, lr
+; CHECK-V8-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-V8-LE-NEXT: bic r12, r12, #1
+; CHECK-V8-LE-NEXT: sub sp, #136
+; CHECK-V8-LE-NEXT: vmov r4, s5
+; CHECK-V8-LE-NEXT: vmov r11, s0
+; CHECK-V8-LE-NEXT: vmov r9, r10, d1
+; CHECK-V8-LE-NEXT: vmov r8, s1
+; CHECK-V8-LE-NEXT: vmov r7, s4
+; CHECK-V8-LE-NEXT: vmov r5, r6, d3
+; CHECK-V8-LE-NEXT: vlstm sp
+; CHECK-V8-LE-NEXT: vmov s0, r11
+; CHECK-V8-LE-NEXT: vmov d1, r9, r10
+; CHECK-V8-LE-NEXT: vmov s1, r8
+; CHECK-V8-LE-NEXT: vmov s4, r7
+; CHECK-V8-LE-NEXT: vmov d3, r5, r6
+; CHECK-V8-LE-NEXT: vmov s5, r4
+; CHECK-V8-LE-NEXT: vldr d4, [sp, #32]
+; CHECK-V8-LE-NEXT: vldr d5, [sp, #40]
+; CHECK-V8-LE-NEXT: vldr d6, [sp, #48]
+; CHECK-V8-LE-NEXT: vldr s14, [sp, #56]
+; CHECK-V8-LE-NEXT: ldr r4, [sp, #64]
+; CHECK-V8-LE-NEXT: bic r4, r4, #159
+; CHECK-V8-LE-NEXT: bic r4, r4, #4026531840
+; CHECK-V8-LE-NEXT: vmsr fpscr, r4
+; CHECK-V8-LE-NEXT: msr apsr_nzcvqg, r12
+; CHECK-V8-LE-NEXT: blxns r12
+; CHECK-V8-LE-NEXT: vlldm sp
+; CHECK-V8-LE-NEXT: add sp, #136
+; CHECK-V8-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-V8-LE-NEXT: pop {r7, pc}
+;
+; CHECK-V8-BE-LABEL: fidififiddddff:
+; CHECK-V8-BE: @ %bb.0: @ %entry
+; CHECK-V8-BE-NEXT: push {r7, lr}
+; CHECK-V8-BE-NEXT: mov lr, r3
+; CHECK-V8-BE-NEXT: mov r12, r0
+; CHECK-V8-BE-NEXT: mov r0, r1
+; CHECK-V8-BE-NEXT: mov r1, r2
+; CHECK-V8-BE-NEXT: ldr r3, [sp, #8]
+; CHECK-V8-BE-NEXT: mov r2, lr
+; CHECK-V8-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-V8-BE-NEXT: bic r12, r12, #1
+; CHECK-V8-BE-NEXT: sub sp, #136
+; CHECK-V8-BE-NEXT: vmov r4, s5
+; CHECK-V8-BE-NEXT: vmov r11, s0
+; CHECK-V8-BE-NEXT: vmov r9, r10, d1
+; CHECK-V8-BE-NEXT: vmov r8, s1
+; CHECK-V8-BE-NEXT: vmov r7, s4
+; CHECK-V8-BE-NEXT: vmov r5, r6, d3
+; CHECK-V8-BE-NEXT: vlstm sp
+; CHECK-V8-BE-NEXT: vmov s0, r11
+; CHECK-V8-BE-NEXT: vmov d1, r9, r10
+; CHECK-V8-BE-NEXT: vmov s1, r8
+; CHECK-V8-BE-NEXT: vmov s4, r7
+; CHECK-V8-BE-NEXT: vmov d3, r5, r6
+; CHECK-V8-BE-NEXT: vmov s5, r4
+; CHECK-V8-BE-NEXT: vldr s8, [sp, #32]
+; CHECK-V8-BE-NEXT: vldr s9, [sp, #36]
+; CHECK-V8-BE-NEXT: vldr s10, [sp, #40]
+; CHECK-V8-BE-NEXT: vldr s11, [sp, #44]
+; CHECK-V8-BE-NEXT: vldr s12, [sp, #48]
+; CHECK-V8-BE-NEXT: vldr s13, [sp, #52]
+; CHECK-V8-BE-NEXT: vldr s14, [sp, #56]
+; CHECK-V8-BE-NEXT: ldr r4, [sp, #64]
+; CHECK-V8-BE-NEXT: bic r4, r4, #159
+; CHECK-V8-BE-NEXT: bic r4, r4, #4026531840
+; CHECK-V8-BE-NEXT: vmsr fpscr, r4
+; CHECK-V8-BE-NEXT: msr apsr_nzcvqg, r12
+; CHECK-V8-BE-NEXT: blxns r12
+; CHECK-V8-BE-NEXT: vlldm sp
+; CHECK-V8-BE-NEXT: add sp, #136
+; CHECK-V8-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-V8-BE-NEXT: pop {r7, pc}
+;
+; CHECK-V81-LE-LABEL: fidififiddddff:
+; CHECK-V81-LE: @ %bb.0: @ %entry
+; CHECK-V81-LE-NEXT: push {r7, lr}
+; CHECK-V81-LE-NEXT: mov lr, r3
+; CHECK-V81-LE-NEXT: mov r12, r0
+; CHECK-V81-LE-NEXT: mov r0, r1
+; CHECK-V81-LE-NEXT: mov r1, r2
+; CHECK-V81-LE-NEXT: ldr r3, [sp, #8]
+; CHECK-V81-LE-NEXT: mov r2, lr
+; CHECK-V81-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-V81-LE-NEXT: bic r12, r12, #1
+; CHECK-V81-LE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-V81-LE-NEXT: vscclrm {s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-V81-LE-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-V81-LE-NEXT: clrm {r4, r5, r6, r7, r8, r9, r10, r11, apsr}
+; CHECK-V81-LE-NEXT: blxns r12
+; CHECK-V81-LE-NEXT: vldr fpcxts, [sp], #8
+; CHECK-V81-LE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-V81-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-V81-LE-NEXT: pop {r7, pc}
+;
+; CHECK-V81-BE-LABEL: fidififiddddff:
+; CHECK-V81-BE: @ %bb.0: @ %entry
+; CHECK-V81-BE-NEXT: push {r7, lr}
+; CHECK-V81-BE-NEXT: mov lr, r3
+; CHECK-V81-BE-NEXT: mov r12, r0
+; CHECK-V81-BE-NEXT: mov r0, r1
+; CHECK-V81-BE-NEXT: mov r1, r2
+; CHECK-V81-BE-NEXT: ldr r3, [sp, #8]
+; CHECK-V81-BE-NEXT: mov r2, lr
+; CHECK-V81-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-V81-BE-NEXT: bic r12, r12, #1
+; CHECK-V81-BE-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-V81-BE-NEXT: vscclrm {s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-V81-BE-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-V81-BE-NEXT: clrm {r4, r5, r6, r7, r8, r9, r10, r11, apsr}
+; CHECK-V81-BE-NEXT: blxns r12
+; CHECK-V81-BE-NEXT: vldr fpcxts, [sp], #8
+; CHECK-V81-BE-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-V81-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-V81-BE-NEXT: pop {r7, pc}
+entry:
+ call void %fu(float %a, i32 %b, double %c, i32 %d, float %e, i32 %f, float %g, i32 %h, double %i, double %j, double %k, double %l, float %m, float %n) #1
+ ret void
+}
+
diff --git a/llvm/test/CodeGen/ARM/cmse-clear-float-mve.ll b/llvm/test/CodeGen/ARM/cmse-clear-float-mve.ll
new file mode 100644
index 000000000000..0da8080e9446
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmse-clear-float-mve.ll
@@ -0,0 +1,172 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-eabi -mattr=+8msecext,+mve.fp %s -o - | FileCheck %s --check-prefix=CHECK-SOFTFP
+; RUN: llc -mtriple=thumbebv8.1m.main-eabi -mattr=+8msecext,+mve.fp %s -o - | FileCheck %s --check-prefix=CHECK-SOFTFP
+; RUN: llc -mtriple=thumbv8.1m.main-eabi -mattr=+8msecext,+mve.fp --float-abi=hard %s -o - | FileCheck %s --check-prefix=CHECK-HARD
+; RUN: llc -mtriple=thumbebv8.1m.main-eabi -mattr=+8msecext,+mve.fp --float-abi=hard %s -o - | FileCheck %s --check-prefix=CHECK-HARD
+
+declare <8 x i16> @g0(...) #0
+declare <4 x float> @g1(...) #0
+
+;;
+;; Test clearing before return to nonsecure state
+;;
+
+define <8 x i16> @f0() #1 {
+; CHECK-SOFTFP-LABEL: f0:
+; CHECK-SOFTFP: @ %bb.0: @ %entry
+; CHECK-SOFTFP-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-SOFTFP-NEXT: .save {r7, lr}
+; CHECK-SOFTFP-NEXT: push {r7, lr}
+; CHECK-SOFTFP-NEXT: .pad #4
+; CHECK-SOFTFP-NEXT: sub sp, #4
+; CHECK-SOFTFP-NEXT: bl g0
+; CHECK-SOFTFP-NEXT: add sp, #4
+; CHECK-SOFTFP-NEXT: pop.w {r7, lr}
+; CHECK-SOFTFP-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-SOFTFP-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-SOFTFP-NEXT: clrm {r12, apsr}
+; CHECK-SOFTFP-NEXT: bxns lr
+;
+; CHECK-HARD-LABEL: f0:
+; CHECK-HARD: @ %bb.0: @ %entry
+; CHECK-HARD-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-HARD-NEXT: .save {r7, lr}
+; CHECK-HARD-NEXT: push {r7, lr}
+; CHECK-HARD-NEXT: .pad #4
+; CHECK-HARD-NEXT: sub sp, #4
+; CHECK-HARD-NEXT: bl g0
+; CHECK-HARD-NEXT: add sp, #4
+; CHECK-HARD-NEXT: pop.w {r7, lr}
+; CHECK-HARD-NEXT: vscclrm {s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-HARD-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-HARD-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
+; CHECK-HARD-NEXT: bxns lr
+entry:
+ %call = call <8 x i16> bitcast (<8 x i16> (...)* @g0 to <8 x i16> ()*)() #0
+ ret <8 x i16> %call
+}
+
+define <4 x float> @f1() #1 {
+; CHECK-SOFTFP-LABEL: f1:
+; CHECK-SOFTFP: @ %bb.0: @ %entry
+; CHECK-SOFTFP-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-SOFTFP-NEXT: .save {r7, lr}
+; CHECK-SOFTFP-NEXT: push {r7, lr}
+; CHECK-SOFTFP-NEXT: .pad #4
+; CHECK-SOFTFP-NEXT: sub sp, #4
+; CHECK-SOFTFP-NEXT: bl g1
+; CHECK-SOFTFP-NEXT: add sp, #4
+; CHECK-SOFTFP-NEXT: pop.w {r7, lr}
+; CHECK-SOFTFP-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-SOFTFP-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-SOFTFP-NEXT: clrm {r12, apsr}
+; CHECK-SOFTFP-NEXT: bxns lr
+;
+; CHECK-HARD-LABEL: f1:
+; CHECK-HARD: @ %bb.0: @ %entry
+; CHECK-HARD-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-HARD-NEXT: .save {r7, lr}
+; CHECK-HARD-NEXT: push {r7, lr}
+; CHECK-HARD-NEXT: .pad #4
+; CHECK-HARD-NEXT: sub sp, #4
+; CHECK-HARD-NEXT: bl g1
+; CHECK-HARD-NEXT: add sp, #4
+; CHECK-HARD-NEXT: pop.w {r7, lr}
+; CHECK-HARD-NEXT: vscclrm {s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-HARD-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-HARD-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
+; CHECK-HARD-NEXT: bxns lr
+entry:
+ %call = call nnan ninf nsz <4 x float> bitcast (<4 x float> (...)* @g1 to <4 x float> ()*)() #0
+ ret <4 x float> %call
+}
+
+;;
+;; Test clearing around nonsecure calls
+;;
+
+define void @f2(void (<8 x i16>)* nocapture %cb) #0 {
+; CHECK-SOFTFP-LABEL: f2:
+; CHECK-SOFTFP: @ %bb.0: @ %entry
+; CHECK-SOFTFP-NEXT: .save {r4, lr}
+; CHECK-SOFTFP-NEXT: push {r4, lr}
+; CHECK-SOFTFP-NEXT: mov r4, r0
+; CHECK-SOFTFP-NEXT: bl g0
+; CHECK-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-SOFTFP-NEXT: bic r4, r4, #1
+; CHECK-SOFTFP-NEXT: sub sp, #136
+; CHECK-SOFTFP-NEXT: vlstm sp
+; CHECK-SOFTFP-NEXT: clrm {r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-SOFTFP-NEXT: blxns r4
+; CHECK-SOFTFP-NEXT: vlldm sp
+; CHECK-SOFTFP-NEXT: add sp, #136
+; CHECK-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-SOFTFP-NEXT: pop {r4, pc}
+;
+; CHECK-HARD-LABEL: f2:
+; CHECK-HARD: @ %bb.0: @ %entry
+; CHECK-HARD-NEXT: .save {r4, lr}
+; CHECK-HARD-NEXT: push {r4, lr}
+; CHECK-HARD-NEXT: mov r4, r0
+; CHECK-HARD-NEXT: bl g0
+; CHECK-HARD-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-HARD-NEXT: bic r4, r4, #1
+; CHECK-HARD-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-HARD-NEXT: vscclrm {s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-HARD-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-HARD-NEXT: clrm {r0, r1, r2, r3, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-HARD-NEXT: blxns r4
+; CHECK-HARD-NEXT: vldr fpcxts, [sp], #8
+; CHECK-HARD-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-HARD-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-HARD-NEXT: pop {r4, pc}
+entry:
+ %call = tail call <8 x i16> bitcast (<8 x i16> (...)* @g0 to <8 x i16> ()*)() #0
+ tail call void %cb(<8 x i16> %call) #2
+ ret void
+}
+
+define void @f3(void (<4 x float>)* nocapture %cb) #0 {
+; CHECK-SOFTFP-LABEL: f3:
+; CHECK-SOFTFP: @ %bb.0: @ %entry
+; CHECK-SOFTFP-NEXT: .save {r4, lr}
+; CHECK-SOFTFP-NEXT: push {r4, lr}
+; CHECK-SOFTFP-NEXT: mov r4, r0
+; CHECK-SOFTFP-NEXT: bl g1
+; CHECK-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-SOFTFP-NEXT: bic r4, r4, #1
+; CHECK-SOFTFP-NEXT: sub sp, #136
+; CHECK-SOFTFP-NEXT: vlstm sp
+; CHECK-SOFTFP-NEXT: clrm {r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-SOFTFP-NEXT: blxns r4
+; CHECK-SOFTFP-NEXT: vlldm sp
+; CHECK-SOFTFP-NEXT: add sp, #136
+; CHECK-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-SOFTFP-NEXT: pop {r4, pc}
+;
+; CHECK-HARD-LABEL: f3:
+; CHECK-HARD: @ %bb.0: @ %entry
+; CHECK-HARD-NEXT: .save {r4, lr}
+; CHECK-HARD-NEXT: push {r4, lr}
+; CHECK-HARD-NEXT: mov r4, r0
+; CHECK-HARD-NEXT: bl g1
+; CHECK-HARD-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-HARD-NEXT: bic r4, r4, #1
+; CHECK-HARD-NEXT: vpush {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-HARD-NEXT: vscclrm {s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31, vpr}
+; CHECK-HARD-NEXT: vstr fpcxts, [sp, #-8]!
+; CHECK-HARD-NEXT: clrm {r0, r1, r2, r3, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-HARD-NEXT: blxns r4
+; CHECK-HARD-NEXT: vldr fpcxts, [sp], #8
+; CHECK-HARD-NEXT: vpop {s16, s17, s18, s19, s20, s21, s22, s23, s24, s25, s26, s27, s28, s29, s30, s31}
+; CHECK-HARD-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-HARD-NEXT: pop {r4, pc}
+entry:
+ %call = tail call nnan ninf nsz <4 x float> bitcast (<4 x float> (...)* @g1 to <4 x float> ()*)() #0
+ tail call void %cb(<4 x float> %call) #2
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind "cmse_nonsecure_entry" }
+attributes #2 = { nounwind "cmse_nonsecure_call" }
diff --git a/llvm/test/CodeGen/ARM/cmse-clear-float.ll b/llvm/test/CodeGen/ARM/cmse-clear-float.ll
new file mode 100644
index 000000000000..356d13c16da3
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmse-clear-float.ll
@@ -0,0 +1,718 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -mtriple=thumbv8m.main -mattr=+fp-armv8d16sp,+dsp | \
+; RUN: FileCheck %s --check-prefix=CHECK-8M --check-prefix=CHECK-8M-LE
+; RUN: llc %s -o - -mtriple=thumbebv8m.main -mattr=+fp-armv8d16sp,+dsp | \
+; RUN: FileCheck %s --check-prefix=CHECK-8M --check-prefix=CHECK-8M-BE
+
+; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=+fp-armv8d16sp,+dsp | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-LE
+; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=+fp-armv8d16sp,+dsp | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-BE
+; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=+mve | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-LE
+; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=+mve | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-BE
+
+define float @f1(float (float)* nocapture %fptr) #0 {
+; CHECK-8M-LABEL: f1:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: mov r1, r0
+; CHECK-8M-NEXT: movs r0, #0
+; CHECK-8M-NEXT: movt r0, #16672
+; CHECK-8M-NEXT: blx r1
+; CHECK-8M-NEXT: pop.w {r7, lr}
+; CHECK-8M-NEXT: mrs r12, control
+; CHECK-8M-NEXT: tst.w r12, #8
+; CHECK-8M-NEXT: beq .LBB0_2
+; CHECK-8M-NEXT: @ %bb.1: @ %entry
+; CHECK-8M-NEXT: vmrs r12, fpscr
+; CHECK-8M-NEXT: vmov d0, lr, lr
+; CHECK-8M-NEXT: vmov d1, lr, lr
+; CHECK-8M-NEXT: vmov d2, lr, lr
+; CHECK-8M-NEXT: vmov d3, lr, lr
+; CHECK-8M-NEXT: vmov d4, lr, lr
+; CHECK-8M-NEXT: vmov d5, lr, lr
+; CHECK-8M-NEXT: vmov d6, lr, lr
+; CHECK-8M-NEXT: vmov d7, lr, lr
+; CHECK-8M-NEXT: bic r12, r12, #159
+; CHECK-8M-NEXT: bic r12, r12, #4026531840
+; CHECK-8M-NEXT: vmsr fpscr, r12
+; CHECK-8M-NEXT: .LBB0_2: @ %entry
+; CHECK-8M-NEXT: mov r1, lr
+; CHECK-8M-NEXT: mov r2, lr
+; CHECK-8M-NEXT: mov r3, lr
+; CHECK-8M-NEXT: mov r12, lr
+; CHECK-8M-NEXT: msr apsr_nzcvqg, lr
+; CHECK-8M-NEXT: bxns lr
+;
+; CHECK-81M-LABEL: f1:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: sub sp, #4
+; CHECK-81M-NEXT: mov r1, r0
+; CHECK-81M-NEXT: movs r0, #0
+; CHECK-81M-NEXT: movt r0, #16672
+; CHECK-81M-NEXT: blx r1
+; CHECK-81M-NEXT: add sp, #4
+; CHECK-81M-NEXT: pop.w {r7, lr}
+; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-81M-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-81M-NEXT: clrm {r1, r2, r3, r12, apsr}
+; CHECK-81M-NEXT: bxns lr
+entry:
+ %call = call float %fptr(float 10.0) #1
+ ret float %call
+}
+
+attributes #0 = { "cmse_nonsecure_entry" nounwind }
+attributes #1 = { nounwind }
+
+define double @d1(double (double)* nocapture %fptr) #0 {
+; CHECK-8M-LE-LABEL: d1:
+; CHECK-8M-LE: @ %bb.0: @ %entry
+; CHECK-8M-LE-NEXT: push {r7, lr}
+; CHECK-8M-LE-NEXT: vldr d0, .LCPI1_0
+; CHECK-8M-LE-NEXT: mov r2, r0
+; CHECK-8M-LE-NEXT: vmov r0, r1, d0
+; CHECK-8M-LE-NEXT: blx r2
+; CHECK-8M-LE-NEXT: pop.w {r7, lr}
+; CHECK-8M-LE-NEXT: mrs r12, control
+; CHECK-8M-LE-NEXT: tst.w r12, #8
+; CHECK-8M-LE-NEXT: beq .LBB1_2
+; CHECK-8M-LE-NEXT: @ %bb.1: @ %entry
+; CHECK-8M-LE-NEXT: vmrs r12, fpscr
+; CHECK-8M-LE-NEXT: vmov d0, lr, lr
+; CHECK-8M-LE-NEXT: vmov d1, lr, lr
+; CHECK-8M-LE-NEXT: vmov d2, lr, lr
+; CHECK-8M-LE-NEXT: vmov d3, lr, lr
+; CHECK-8M-LE-NEXT: vmov d4, lr, lr
+; CHECK-8M-LE-NEXT: vmov d5, lr, lr
+; CHECK-8M-LE-NEXT: vmov d6, lr, lr
+; CHECK-8M-LE-NEXT: vmov d7, lr, lr
+; CHECK-8M-LE-NEXT: bic r12, r12, #159
+; CHECK-8M-LE-NEXT: bic r12, r12, #4026531840
+; CHECK-8M-LE-NEXT: vmsr fpscr, r12
+; CHECK-8M-LE-NEXT: .LBB1_2: @ %entry
+; CHECK-8M-LE-NEXT: mov r2, lr
+; CHECK-8M-LE-NEXT: mov r3, lr
+; CHECK-8M-LE-NEXT: mov r12, lr
+; CHECK-8M-LE-NEXT: msr apsr_nzcvqg, lr
+; CHECK-8M-LE-NEXT: bxns lr
+; CHECK-8M-LE-NEXT: .p2align 3
+; CHECK-8M-LE-NEXT: @ %bb.3:
+; CHECK-8M-LE-NEXT: .LCPI1_0:
+; CHECK-8M-LE-NEXT: .long 0 @ double 10
+; CHECK-8M-LE-NEXT: .long 1076101120
+;
+; CHECK-8M-BE-LABEL: d1:
+; CHECK-8M-BE: @ %bb.0: @ %entry
+; CHECK-8M-BE-NEXT: push {r7, lr}
+; CHECK-8M-BE-NEXT: vldr d0, .LCPI1_0
+; CHECK-8M-BE-NEXT: mov r2, r0
+; CHECK-8M-BE-NEXT: vmov r1, r0, d0
+; CHECK-8M-BE-NEXT: blx r2
+; CHECK-8M-BE-NEXT: pop.w {r7, lr}
+; CHECK-8M-BE-NEXT: mrs r12, control
+; CHECK-8M-BE-NEXT: tst.w r12, #8
+; CHECK-8M-BE-NEXT: beq .LBB1_2
+; CHECK-8M-BE-NEXT: @ %bb.1: @ %entry
+; CHECK-8M-BE-NEXT: vmrs r12, fpscr
+; CHECK-8M-BE-NEXT: vmov d0, lr, lr
+; CHECK-8M-BE-NEXT: vmov d1, lr, lr
+; CHECK-8M-BE-NEXT: vmov d2, lr, lr
+; CHECK-8M-BE-NEXT: vmov d3, lr, lr
+; CHECK-8M-BE-NEXT: vmov d4, lr, lr
+; CHECK-8M-BE-NEXT: vmov d5, lr, lr
+; CHECK-8M-BE-NEXT: vmov d6, lr, lr
+; CHECK-8M-BE-NEXT: vmov d7, lr, lr
+; CHECK-8M-BE-NEXT: bic r12, r12, #159
+; CHECK-8M-BE-NEXT: bic r12, r12, #4026531840
+; CHECK-8M-BE-NEXT: vmsr fpscr, r12
+; CHECK-8M-BE-NEXT: .LBB1_2: @ %entry
+; CHECK-8M-BE-NEXT: mov r2, lr
+; CHECK-8M-BE-NEXT: mov r3, lr
+; CHECK-8M-BE-NEXT: mov r12, lr
+; CHECK-8M-BE-NEXT: msr apsr_nzcvqg, lr
+; CHECK-8M-BE-NEXT: bxns lr
+; CHECK-8M-BE-NEXT: .p2align 3
+; CHECK-8M-BE-NEXT: @ %bb.3:
+; CHECK-8M-BE-NEXT: .LCPI1_0:
+; CHECK-8M-BE-NEXT: .long 1076101120 @ double 10
+; CHECK-8M-BE-NEXT: .long 0
+;
+; CHECK-81M-LE-LABEL: d1:
+; CHECK-81M-LE: @ %bb.0: @ %entry
+; CHECK-81M-LE-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-81M-LE-NEXT: push {r7, lr}
+; CHECK-81M-LE-NEXT: sub sp, #4
+; CHECK-81M-LE-NEXT: vldr d0, .LCPI1_0
+; CHECK-81M-LE-NEXT: mov r2, r0
+; CHECK-81M-LE-NEXT: vmov r0, r1, d0
+; CHECK-81M-LE-NEXT: blx r2
+; CHECK-81M-LE-NEXT: add sp, #4
+; CHECK-81M-LE-NEXT: pop.w {r7, lr}
+; CHECK-81M-LE-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-81M-LE-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-81M-LE-NEXT: clrm {r2, r3, r12, apsr}
+; CHECK-81M-LE-NEXT: bxns lr
+; CHECK-81M-LE-NEXT: .p2align 3
+; CHECK-81M-LE-NEXT: @ %bb.1:
+; CHECK-81M-LE-NEXT: .LCPI1_0:
+; CHECK-81M-LE-NEXT: .long 0 @ double 10
+; CHECK-81M-LE-NEXT: .long 1076101120
+;
+; CHECK-81M-BE-LABEL: d1:
+; CHECK-81M-BE: @ %bb.0: @ %entry
+; CHECK-81M-BE-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-81M-BE-NEXT: push {r7, lr}
+; CHECK-81M-BE-NEXT: sub sp, #4
+; CHECK-81M-BE-NEXT: vldr d0, .LCPI1_0
+; CHECK-81M-BE-NEXT: mov r2, r0
+; CHECK-81M-BE-NEXT: vmov r1, r0, d0
+; CHECK-81M-BE-NEXT: blx r2
+; CHECK-81M-BE-NEXT: add sp, #4
+; CHECK-81M-BE-NEXT: pop.w {r7, lr}
+; CHECK-81M-BE-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-81M-BE-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-81M-BE-NEXT: clrm {r2, r3, r12, apsr}
+; CHECK-81M-BE-NEXT: bxns lr
+; CHECK-81M-BE-NEXT: .p2align 3
+; CHECK-81M-BE-NEXT: @ %bb.1:
+; CHECK-81M-BE-NEXT: .LCPI1_0:
+; CHECK-81M-BE-NEXT: .long 1076101120 @ double 10
+; CHECK-81M-BE-NEXT: .long 0
+entry:
+ %call = call double %fptr(double 10.0) #1
+ ret double %call
+}
+
+define float @f2(float (float)* nocapture %fptr) #2 {
+; CHECK-8M-LABEL: f2:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: mov r1, r0
+; CHECK-8M-NEXT: movs r0, #0
+; CHECK-8M-NEXT: movt r0, #16672
+; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: bic r1, r1, #1
+; CHECK-8M-NEXT: sub sp, #136
+; CHECK-8M-NEXT: vlstm sp
+; CHECK-8M-NEXT: mov r2, r1
+; CHECK-8M-NEXT: mov r3, r1
+; CHECK-8M-NEXT: mov r4, r1
+; CHECK-8M-NEXT: mov r5, r1
+; CHECK-8M-NEXT: mov r6, r1
+; CHECK-8M-NEXT: mov r7, r1
+; CHECK-8M-NEXT: mov r8, r1
+; CHECK-8M-NEXT: mov r9, r1
+; CHECK-8M-NEXT: mov r10, r1
+; CHECK-8M-NEXT: mov r11, r1
+; CHECK-8M-NEXT: mov r12, r1
+; CHECK-8M-NEXT: msr apsr_nzcvqg, r1
+; CHECK-8M-NEXT: blxns r1
+; CHECK-8M-NEXT: vlldm sp
+; CHECK-8M-NEXT: add sp, #136
+; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: pop {r7, pc}
+;
+; CHECK-81M-LABEL: f2:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: mov r1, r0
+; CHECK-81M-NEXT: movs r0, #0
+; CHECK-81M-NEXT: movt r0, #16672
+; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: bic r1, r1, #1
+; CHECK-81M-NEXT: sub sp, #136
+; CHECK-81M-NEXT: vlstm sp
+; CHECK-81M-NEXT: clrm {r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT: blxns r1
+; CHECK-81M-NEXT: vlldm sp
+; CHECK-81M-NEXT: add sp, #136
+; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: pop {r7, pc}
+entry:
+ %call = call float %fptr(float 10.0) #3
+ ret float %call
+}
+
+attributes #2 = { nounwind }
+attributes #3 = { "cmse_nonsecure_call" nounwind }
+
+define double @d2(double (double)* nocapture %fptr) #2 {
+; CHECK-8M-LE-LABEL: d2:
+; CHECK-8M-LE: @ %bb.0: @ %entry
+; CHECK-8M-LE-NEXT: push {r7, lr}
+; CHECK-8M-LE-NEXT: vldr d0, .LCPI3_0
+; CHECK-8M-LE-NEXT: mov r2, r0
+; CHECK-8M-LE-NEXT: vmov r0, r1, d0
+; CHECK-8M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-LE-NEXT: bic r2, r2, #1
+; CHECK-8M-LE-NEXT: sub sp, #136
+; CHECK-8M-LE-NEXT: vlstm sp
+; CHECK-8M-LE-NEXT: mov r3, r2
+; CHECK-8M-LE-NEXT: mov r4, r2
+; CHECK-8M-LE-NEXT: mov r5, r2
+; CHECK-8M-LE-NEXT: mov r6, r2
+; CHECK-8M-LE-NEXT: mov r7, r2
+; CHECK-8M-LE-NEXT: mov r8, r2
+; CHECK-8M-LE-NEXT: mov r9, r2
+; CHECK-8M-LE-NEXT: mov r10, r2
+; CHECK-8M-LE-NEXT: mov r11, r2
+; CHECK-8M-LE-NEXT: mov r12, r2
+; CHECK-8M-LE-NEXT: msr apsr_nzcvqg, r2
+; CHECK-8M-LE-NEXT: blxns r2
+; CHECK-8M-LE-NEXT: vlldm sp
+; CHECK-8M-LE-NEXT: add sp, #136
+; CHECK-8M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-LE-NEXT: pop {r7, pc}
+; CHECK-8M-LE-NEXT: .p2align 3
+; CHECK-8M-LE-NEXT: @ %bb.1:
+; CHECK-8M-LE-NEXT: .LCPI3_0:
+; CHECK-8M-LE-NEXT: .long 0 @ double 10
+; CHECK-8M-LE-NEXT: .long 1076101120
+;
+; CHECK-8M-BE-LABEL: d2:
+; CHECK-8M-BE: @ %bb.0: @ %entry
+; CHECK-8M-BE-NEXT: push {r7, lr}
+; CHECK-8M-BE-NEXT: vldr d0, .LCPI3_0
+; CHECK-8M-BE-NEXT: mov r2, r0
+; CHECK-8M-BE-NEXT: vmov r1, r0, d0
+; CHECK-8M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-BE-NEXT: bic r2, r2, #1
+; CHECK-8M-BE-NEXT: sub sp, #136
+; CHECK-8M-BE-NEXT: vlstm sp
+; CHECK-8M-BE-NEXT: mov r3, r2
+; CHECK-8M-BE-NEXT: mov r4, r2
+; CHECK-8M-BE-NEXT: mov r5, r2
+; CHECK-8M-BE-NEXT: mov r6, r2
+; CHECK-8M-BE-NEXT: mov r7, r2
+; CHECK-8M-BE-NEXT: mov r8, r2
+; CHECK-8M-BE-NEXT: mov r9, r2
+; CHECK-8M-BE-NEXT: mov r10, r2
+; CHECK-8M-BE-NEXT: mov r11, r2
+; CHECK-8M-BE-NEXT: mov r12, r2
+; CHECK-8M-BE-NEXT: msr apsr_nzcvqg, r2
+; CHECK-8M-BE-NEXT: blxns r2
+; CHECK-8M-BE-NEXT: vlldm sp
+; CHECK-8M-BE-NEXT: add sp, #136
+; CHECK-8M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-BE-NEXT: pop {r7, pc}
+; CHECK-8M-BE-NEXT: .p2align 3
+; CHECK-8M-BE-NEXT: @ %bb.1:
+; CHECK-8M-BE-NEXT: .LCPI3_0:
+; CHECK-8M-BE-NEXT: .long 1076101120 @ double 10
+; CHECK-8M-BE-NEXT: .long 0
+;
+; CHECK-81M-LE-LABEL: d2:
+; CHECK-81M-LE: @ %bb.0: @ %entry
+; CHECK-81M-LE-NEXT: push {r7, lr}
+; CHECK-81M-LE-NEXT: vldr d0, .LCPI3_0
+; CHECK-81M-LE-NEXT: mov r2, r0
+; CHECK-81M-LE-NEXT: vmov r0, r1, d0
+; CHECK-81M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-LE-NEXT: bic r2, r2, #1
+; CHECK-81M-LE-NEXT: sub sp, #136
+; CHECK-81M-LE-NEXT: vlstm sp
+; CHECK-81M-LE-NEXT: clrm {r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-LE-NEXT: blxns r2
+; CHECK-81M-LE-NEXT: vlldm sp
+; CHECK-81M-LE-NEXT: add sp, #136
+; CHECK-81M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-LE-NEXT: pop {r7, pc}
+; CHECK-81M-LE-NEXT: .p2align 3
+; CHECK-81M-LE-NEXT: @ %bb.1:
+; CHECK-81M-LE-NEXT: .LCPI3_0:
+; CHECK-81M-LE-NEXT: .long 0 @ double 10
+; CHECK-81M-LE-NEXT: .long 1076101120
+;
+; CHECK-81M-BE-LABEL: d2:
+; CHECK-81M-BE: @ %bb.0: @ %entry
+; CHECK-81M-BE-NEXT: push {r7, lr}
+; CHECK-81M-BE-NEXT: vldr d0, .LCPI3_0
+; CHECK-81M-BE-NEXT: mov r2, r0
+; CHECK-81M-BE-NEXT: vmov r1, r0, d0
+; CHECK-81M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-BE-NEXT: bic r2, r2, #1
+; CHECK-81M-BE-NEXT: sub sp, #136
+; CHECK-81M-BE-NEXT: vlstm sp
+; CHECK-81M-BE-NEXT: clrm {r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-BE-NEXT: blxns r2
+; CHECK-81M-BE-NEXT: vlldm sp
+; CHECK-81M-BE-NEXT: add sp, #136
+; CHECK-81M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-BE-NEXT: pop {r7, pc}
+; CHECK-81M-BE-NEXT: .p2align 3
+; CHECK-81M-BE-NEXT: @ %bb.1:
+; CHECK-81M-BE-NEXT: .LCPI3_0:
+; CHECK-81M-BE-NEXT: .long 1076101120 @ double 10
+; CHECK-81M-BE-NEXT: .long 0
+entry:
+ %call = call double %fptr(double 10.0) #3
+ ret double %call
+}
+
+define float @f3(float (float)* nocapture %fptr) #4 {
+; CHECK-8M-LABEL: f3:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: mov r1, r0
+; CHECK-8M-NEXT: movs r0, #0
+; CHECK-8M-NEXT: movt r0, #16672
+; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: bic r1, r1, #1
+; CHECK-8M-NEXT: sub sp, #136
+; CHECK-8M-NEXT: vlstm sp
+; CHECK-8M-NEXT: mov r2, r1
+; CHECK-8M-NEXT: mov r3, r1
+; CHECK-8M-NEXT: mov r4, r1
+; CHECK-8M-NEXT: mov r5, r1
+; CHECK-8M-NEXT: mov r6, r1
+; CHECK-8M-NEXT: mov r7, r1
+; CHECK-8M-NEXT: mov r8, r1
+; CHECK-8M-NEXT: mov r9, r1
+; CHECK-8M-NEXT: mov r10, r1
+; CHECK-8M-NEXT: mov r11, r1
+; CHECK-8M-NEXT: mov r12, r1
+; CHECK-8M-NEXT: msr apsr_nzcvqg, r1
+; CHECK-8M-NEXT: blxns r1
+; CHECK-8M-NEXT: vlldm sp
+; CHECK-8M-NEXT: add sp, #136
+; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: pop {r7, pc}
+;
+; CHECK-81M-LABEL: f3:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: mov r1, r0
+; CHECK-81M-NEXT: movs r0, #0
+; CHECK-81M-NEXT: movt r0, #16672
+; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: bic r1, r1, #1
+; CHECK-81M-NEXT: sub sp, #136
+; CHECK-81M-NEXT: vlstm sp
+; CHECK-81M-NEXT: clrm {r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT: blxns r1
+; CHECK-81M-NEXT: vlldm sp
+; CHECK-81M-NEXT: add sp, #136
+; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: pop {r7, pc}
+entry:
+ %call = tail call float %fptr(float 10.0) #5
+ ret float %call
+}
+
+attributes #4 = { nounwind }
+attributes #5 = { "cmse_nonsecure_call" nounwind }
+
+define double @d3(double (double)* nocapture %fptr) #4 {
+; CHECK-8M-LE-LABEL: d3:
+; CHECK-8M-LE: @ %bb.0: @ %entry
+; CHECK-8M-LE-NEXT: push {r7, lr}
+; CHECK-8M-LE-NEXT: vldr d0, .LCPI5_0
+; CHECK-8M-LE-NEXT: mov r2, r0
+; CHECK-8M-LE-NEXT: vmov r0, r1, d0
+; CHECK-8M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-LE-NEXT: bic r2, r2, #1
+; CHECK-8M-LE-NEXT: sub sp, #136
+; CHECK-8M-LE-NEXT: vlstm sp
+; CHECK-8M-LE-NEXT: mov r3, r2
+; CHECK-8M-LE-NEXT: mov r4, r2
+; CHECK-8M-LE-NEXT: mov r5, r2
+; CHECK-8M-LE-NEXT: mov r6, r2
+; CHECK-8M-LE-NEXT: mov r7, r2
+; CHECK-8M-LE-NEXT: mov r8, r2
+; CHECK-8M-LE-NEXT: mov r9, r2
+; CHECK-8M-LE-NEXT: mov r10, r2
+; CHECK-8M-LE-NEXT: mov r11, r2
+; CHECK-8M-LE-NEXT: mov r12, r2
+; CHECK-8M-LE-NEXT: msr apsr_nzcvqg, r2
+; CHECK-8M-LE-NEXT: blxns r2
+; CHECK-8M-LE-NEXT: vlldm sp
+; CHECK-8M-LE-NEXT: add sp, #136
+; CHECK-8M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-LE-NEXT: pop {r7, pc}
+; CHECK-8M-LE-NEXT: .p2align 3
+; CHECK-8M-LE-NEXT: @ %bb.1:
+; CHECK-8M-LE-NEXT: .LCPI5_0:
+; CHECK-8M-LE-NEXT: .long 0 @ double 10
+; CHECK-8M-LE-NEXT: .long 1076101120
+;
+; CHECK-8M-BE-LABEL: d3:
+; CHECK-8M-BE: @ %bb.0: @ %entry
+; CHECK-8M-BE-NEXT: push {r7, lr}
+; CHECK-8M-BE-NEXT: vldr d0, .LCPI5_0
+; CHECK-8M-BE-NEXT: mov r2, r0
+; CHECK-8M-BE-NEXT: vmov r1, r0, d0
+; CHECK-8M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-BE-NEXT: bic r2, r2, #1
+; CHECK-8M-BE-NEXT: sub sp, #136
+; CHECK-8M-BE-NEXT: vlstm sp
+; CHECK-8M-BE-NEXT: mov r3, r2
+; CHECK-8M-BE-NEXT: mov r4, r2
+; CHECK-8M-BE-NEXT: mov r5, r2
+; CHECK-8M-BE-NEXT: mov r6, r2
+; CHECK-8M-BE-NEXT: mov r7, r2
+; CHECK-8M-BE-NEXT: mov r8, r2
+; CHECK-8M-BE-NEXT: mov r9, r2
+; CHECK-8M-BE-NEXT: mov r10, r2
+; CHECK-8M-BE-NEXT: mov r11, r2
+; CHECK-8M-BE-NEXT: mov r12, r2
+; CHECK-8M-BE-NEXT: msr apsr_nzcvqg, r2
+; CHECK-8M-BE-NEXT: blxns r2
+; CHECK-8M-BE-NEXT: vlldm sp
+; CHECK-8M-BE-NEXT: add sp, #136
+; CHECK-8M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-BE-NEXT: pop {r7, pc}
+; CHECK-8M-BE-NEXT: .p2align 3
+; CHECK-8M-BE-NEXT: @ %bb.1:
+; CHECK-8M-BE-NEXT: .LCPI5_0:
+; CHECK-8M-BE-NEXT: .long 1076101120 @ double 10
+; CHECK-8M-BE-NEXT: .long 0
+;
+; CHECK-81M-LE-LABEL: d3:
+; CHECK-81M-LE: @ %bb.0: @ %entry
+; CHECK-81M-LE-NEXT: push {r7, lr}
+; CHECK-81M-LE-NEXT: vldr d0, .LCPI5_0
+; CHECK-81M-LE-NEXT: mov r2, r0
+; CHECK-81M-LE-NEXT: vmov r0, r1, d0
+; CHECK-81M-LE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-LE-NEXT: bic r2, r2, #1
+; CHECK-81M-LE-NEXT: sub sp, #136
+; CHECK-81M-LE-NEXT: vlstm sp
+; CHECK-81M-LE-NEXT: clrm {r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-LE-NEXT: blxns r2
+; CHECK-81M-LE-NEXT: vlldm sp
+; CHECK-81M-LE-NEXT: add sp, #136
+; CHECK-81M-LE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-LE-NEXT: pop {r7, pc}
+; CHECK-81M-LE-NEXT: .p2align 3
+; CHECK-81M-LE-NEXT: @ %bb.1:
+; CHECK-81M-LE-NEXT: .LCPI5_0:
+; CHECK-81M-LE-NEXT: .long 0 @ double 10
+; CHECK-81M-LE-NEXT: .long 1076101120
+;
+; CHECK-81M-BE-LABEL: d3:
+; CHECK-81M-BE: @ %bb.0: @ %entry
+; CHECK-81M-BE-NEXT: push {r7, lr}
+; CHECK-81M-BE-NEXT: vldr d0, .LCPI5_0
+; CHECK-81M-BE-NEXT: mov r2, r0
+; CHECK-81M-BE-NEXT: vmov r1, r0, d0
+; CHECK-81M-BE-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-BE-NEXT: bic r2, r2, #1
+; CHECK-81M-BE-NEXT: sub sp, #136
+; CHECK-81M-BE-NEXT: vlstm sp
+; CHECK-81M-BE-NEXT: clrm {r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-BE-NEXT: blxns r2
+; CHECK-81M-BE-NEXT: vlldm sp
+; CHECK-81M-BE-NEXT: add sp, #136
+; CHECK-81M-BE-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-BE-NEXT: pop {r7, pc}
+; CHECK-81M-BE-NEXT: .p2align 3
+; CHECK-81M-BE-NEXT: @ %bb.1:
+; CHECK-81M-BE-NEXT: .LCPI5_0:
+; CHECK-81M-BE-NEXT: .long 1076101120 @ double 10
+; CHECK-81M-BE-NEXT: .long 0
+entry:
+ %call = tail call double %fptr(double 10.0) #5
+ ret double %call
+}
+
+define float @f4(float ()* nocapture %fptr) #6 {
+; CHECK-8M-LABEL: f4:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: bic r0, r0, #1
+; CHECK-8M-NEXT: sub sp, #136
+; CHECK-8M-NEXT: vlstm sp
+; CHECK-8M-NEXT: mov r1, r0
+; CHECK-8M-NEXT: mov r2, r0
+; CHECK-8M-NEXT: mov r3, r0
+; CHECK-8M-NEXT: mov r4, r0
+; CHECK-8M-NEXT: mov r5, r0
+; CHECK-8M-NEXT: mov r6, r0
+; CHECK-8M-NEXT: mov r7, r0
+; CHECK-8M-NEXT: mov r8, r0
+; CHECK-8M-NEXT: mov r9, r0
+; CHECK-8M-NEXT: mov r10, r0
+; CHECK-8M-NEXT: mov r11, r0
+; CHECK-8M-NEXT: mov r12, r0
+; CHECK-8M-NEXT: msr apsr_nzcvqg, r0
+; CHECK-8M-NEXT: blxns r0
+; CHECK-8M-NEXT: vlldm sp
+; CHECK-8M-NEXT: add sp, #136
+; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: pop {r7, pc}
+;
+; CHECK-81M-LABEL: f4:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: bic r0, r0, #1
+; CHECK-81M-NEXT: sub sp, #136
+; CHECK-81M-NEXT: vlstm sp
+; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT: blxns r0
+; CHECK-81M-NEXT: vlldm sp
+; CHECK-81M-NEXT: add sp, #136
+; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: pop {r7, pc}
+entry:
+ %call = call float %fptr() #7
+ ret float %call
+}
+
+attributes #6 = { nounwind }
+attributes #7 = { "cmse_nonsecure_call" nounwind }
+
+define double @d4(double ()* nocapture %fptr) #6 {
+; CHECK-8M-LABEL: d4:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: bic r0, r0, #1
+; CHECK-8M-NEXT: sub sp, #136
+; CHECK-8M-NEXT: vlstm sp
+; CHECK-8M-NEXT: mov r1, r0
+; CHECK-8M-NEXT: mov r2, r0
+; CHECK-8M-NEXT: mov r3, r0
+; CHECK-8M-NEXT: mov r4, r0
+; CHECK-8M-NEXT: mov r5, r0
+; CHECK-8M-NEXT: mov r6, r0
+; CHECK-8M-NEXT: mov r7, r0
+; CHECK-8M-NEXT: mov r8, r0
+; CHECK-8M-NEXT: mov r9, r0
+; CHECK-8M-NEXT: mov r10, r0
+; CHECK-8M-NEXT: mov r11, r0
+; CHECK-8M-NEXT: mov r12, r0
+; CHECK-8M-NEXT: msr apsr_nzcvqg, r0
+; CHECK-8M-NEXT: blxns r0
+; CHECK-8M-NEXT: vlldm sp
+; CHECK-8M-NEXT: add sp, #136
+; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: pop {r7, pc}
+;
+; CHECK-81M-LABEL: d4:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: bic r0, r0, #1
+; CHECK-81M-NEXT: sub sp, #136
+; CHECK-81M-NEXT: vlstm sp
+; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT: blxns r0
+; CHECK-81M-NEXT: vlldm sp
+; CHECK-81M-NEXT: add sp, #136
+; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: pop {r7, pc}
+entry:
+ %call = call double %fptr() #7
+ ret double %call
+}
+
+define void @fd(void (float, double)* %f, float %a, double %b) #8 {
+; CHECK-8M-LABEL: fd:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: mov r12, r0
+; CHECK-8M-NEXT: mov r0, r1
+; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: bic r12, r12, #1
+; CHECK-8M-NEXT: sub sp, #136
+; CHECK-8M-NEXT: vlstm sp
+; CHECK-8M-NEXT: mov r1, r12
+; CHECK-8M-NEXT: mov r4, r12
+; CHECK-8M-NEXT: mov r5, r12
+; CHECK-8M-NEXT: mov r6, r12
+; CHECK-8M-NEXT: mov r7, r12
+; CHECK-8M-NEXT: mov r8, r12
+; CHECK-8M-NEXT: mov r9, r12
+; CHECK-8M-NEXT: mov r10, r12
+; CHECK-8M-NEXT: mov r11, r12
+; CHECK-8M-NEXT: msr apsr_nzcvqg, r12
+; CHECK-8M-NEXT: blxns r12
+; CHECK-8M-NEXT: vlldm sp
+; CHECK-8M-NEXT: add sp, #136
+; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: pop {r7, pc}
+;
+; CHECK-81M-LABEL: fd:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: mov r12, r0
+; CHECK-81M-NEXT: mov r0, r1
+; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: bic r12, r12, #1
+; CHECK-81M-NEXT: sub sp, #136
+; CHECK-81M-NEXT: vlstm sp
+; CHECK-81M-NEXT: clrm {r1, r4, r5, r6, r7, r8, r9, r10, r11, apsr}
+; CHECK-81M-NEXT: blxns r12
+; CHECK-81M-NEXT: vlldm sp
+; CHECK-81M-NEXT: add sp, #136
+; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: pop {r7, pc}
+entry:
+ call void %f(float %a, double %b) #9
+ ret void
+}
+
+attributes #8 = { nounwind }
+attributes #9 = { "cmse_nonsecure_call" nounwind }
+
+define float @f1_minsize(float (float)* nocapture %fptr) #10 {
+; CHECK-8M-LABEL: f1_minsize:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: mov r1, r0
+; CHECK-8M-NEXT: ldr r0, .LCPI9_0
+; CHECK-8M-NEXT: blx r1
+; CHECK-8M-NEXT: pop.w {r7, lr}
+; CHECK-8M-NEXT: vmrs r12, fpscr
+; CHECK-8M-NEXT: vmov d0, lr, lr
+; CHECK-8M-NEXT: vmov d1, lr, lr
+; CHECK-8M-NEXT: mov r1, lr
+; CHECK-8M-NEXT: vmov d2, lr, lr
+; CHECK-8M-NEXT: mov r2, lr
+; CHECK-8M-NEXT: vmov d3, lr, lr
+; CHECK-8M-NEXT: mov r3, lr
+; CHECK-8M-NEXT: vmov d4, lr, lr
+; CHECK-8M-NEXT: vmov d5, lr, lr
+; CHECK-8M-NEXT: vmov d6, lr, lr
+; CHECK-8M-NEXT: vmov d7, lr, lr
+; CHECK-8M-NEXT: bic r12, r12, #159
+; CHECK-8M-NEXT: bic r12, r12, #4026531840
+; CHECK-8M-NEXT: vmsr fpscr, r12
+; CHECK-8M-NEXT: mov r12, lr
+; CHECK-8M-NEXT: msr apsr_nzcvqg, lr
+; CHECK-8M-NEXT: bxns lr
+; CHECK-8M-NEXT: .p2align 2
+; CHECK-8M-NEXT: @ %bb.1:
+; CHECK-8M-NEXT: .LCPI9_0:
+; CHECK-8M-NEXT: .long 1092616192 @ 0x41200000
+;
+; CHECK-81M-LABEL: f1_minsize:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-81M-NEXT: push {r6, r7, lr}
+; CHECK-81M-NEXT: mov r1, r0
+; CHECK-81M-NEXT: ldr r0, .LCPI9_0
+; CHECK-81M-NEXT: blx r1
+; CHECK-81M-NEXT: pop.w {r3, r7, lr}
+; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-81M-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-81M-NEXT: clrm {r1, r2, r3, r12, apsr}
+; CHECK-81M-NEXT: bxns lr
+; CHECK-81M-NEXT: .p2align 2
+; CHECK-81M-NEXT: @ %bb.1:
+; CHECK-81M-NEXT: .LCPI9_0:
+; CHECK-81M-NEXT: .long 1092616192 @ 0x41200000
+entry:
+ %call = call float %fptr(float 10.0) #11
+ ret float %call
+}
+
+attributes #10 = { "cmse_nonsecure_entry" minsize nounwind }
+attributes #11 = { nounwind }
diff --git a/llvm/test/CodeGen/ARM/cmse-clear.ll b/llvm/test/CodeGen/ARM/cmse-clear.ll
new file mode 100644
index 000000000000..5c5a8674bdf2
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmse-clear.ll
@@ -0,0 +1,634 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -mtriple=thumbv8m.base | \
+; RUN: FileCheck %s --check-prefix=CHECK-8B
+; RUN: llc %s -o - -mtriple=thumbebv8m.base | \
+; RUN: FileCheck %s --check-prefix=CHECK-8B
+; RUN: llc %s -o - -mtriple=thumbv8m.main -mattr=-fpregs,+dsp | \
+; RUN: FileCheck %s --check-prefix=CHECK-8M-SOFT
+; RUN: llc %s -o - -mtriple=thumbebv8m.main -mattr=-fpregs,+dsp | \
+; RUN: FileCheck %s --check-prefix=CHECK-8M-SOFT
+; RUN: llc %s -o - -mtriple=thumbv8m.main -mattr=+fp-armv8d16sp,+dsp | \
+; RUN: FileCheck %s --check-prefix=CHECK-8M-SOFTFP
+; RUN: llc %s -o - -mtriple=thumbebv8m.main -mattr=+fp-armv8d16sp,+dsp | \
+; RUN: FileCheck %s --check-prefix=CHECK-8M-SOFTFP
+; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=-fpregs,+dsp | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M-SOFT
+; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=-fpregs,+dsp | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M-SOFT
+; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=+fp-armv8d16sp,+dsp | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M-SOFTFP
+; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=+fp-armv8d16sp,+dsp | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M-SOFTFP
+; RUN: llc %s -o - -mtriple=thumbv8.1m.main -mattr=+mve | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M-SOFTFP
+; RUN: llc %s -o - -mtriple=thumbebv8.1m.main -mattr=mve | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M-SOFTFP
+
+define i32 @ns_entry(i32 (i32)* nocapture %fptr) #0 {
+; CHECK-8B-LABEL: ns_entry:
+; CHECK-8B: @ %bb.0: @ %entry
+; CHECK-8B-NEXT: push {r7, lr}
+; CHECK-8B-NEXT: mov r1, r0
+; CHECK-8B-NEXT: movs r0, #10
+; CHECK-8B-NEXT: blx r1
+; CHECK-8B-NEXT: pop {r7}
+; CHECK-8B-NEXT: pop {r1}
+; CHECK-8B-NEXT: mov lr, r1
+; CHECK-8B-NEXT: mov r1, lr
+; CHECK-8B-NEXT: mov r2, lr
+; CHECK-8B-NEXT: mov r3, lr
+; CHECK-8B-NEXT: mov r12, lr
+; CHECK-8B-NEXT: msr apsr, lr
+; CHECK-8B-NEXT: bxns lr
+;
+; CHECK-8M-SOFT-LABEL: ns_entry:
+; CHECK-8M-SOFT: @ %bb.0: @ %entry
+; CHECK-8M-SOFT-NEXT: push {r7, lr}
+; CHECK-8M-SOFT-NEXT: mov r1, r0
+; CHECK-8M-SOFT-NEXT: movs r0, #10
+; CHECK-8M-SOFT-NEXT: blx r1
+; CHECK-8M-SOFT-NEXT: pop.w {r7, lr}
+; CHECK-8M-SOFT-NEXT: mov r1, lr
+; CHECK-8M-SOFT-NEXT: mov r2, lr
+; CHECK-8M-SOFT-NEXT: mov r3, lr
+; CHECK-8M-SOFT-NEXT: mov r12, lr
+; CHECK-8M-SOFT-NEXT: msr apsr_nzcvqg, lr
+; CHECK-8M-SOFT-NEXT: bxns lr
+;
+; CHECK-8M-SOFTFP-LABEL: ns_entry:
+; CHECK-8M-SOFTFP: @ %bb.0: @ %entry
+; CHECK-8M-SOFTFP-NEXT: push {r7, lr}
+; CHECK-8M-SOFTFP-NEXT: mov r1, r0
+; CHECK-8M-SOFTFP-NEXT: movs r0, #10
+; CHECK-8M-SOFTFP-NEXT: blx r1
+; CHECK-8M-SOFTFP-NEXT: pop.w {r7, lr}
+; CHECK-8M-SOFTFP-NEXT: mrs r12, control
+; CHECK-8M-SOFTFP-NEXT: tst.w r12, #8
+; CHECK-8M-SOFTFP-NEXT: beq .LBB0_2
+; CHECK-8M-SOFTFP-NEXT: @ %bb.1: @ %entry
+; CHECK-8M-SOFTFP-NEXT: vmrs r12, fpscr
+; CHECK-8M-SOFTFP-NEXT: vmov d0, lr, lr
+; CHECK-8M-SOFTFP-NEXT: vmov d1, lr, lr
+; CHECK-8M-SOFTFP-NEXT: vmov d2, lr, lr
+; CHECK-8M-SOFTFP-NEXT: vmov d3, lr, lr
+; CHECK-8M-SOFTFP-NEXT: vmov d4, lr, lr
+; CHECK-8M-SOFTFP-NEXT: vmov d5, lr, lr
+; CHECK-8M-SOFTFP-NEXT: vmov d6, lr, lr
+; CHECK-8M-SOFTFP-NEXT: vmov d7, lr, lr
+; CHECK-8M-SOFTFP-NEXT: bic r12, r12, #159
+; CHECK-8M-SOFTFP-NEXT: bic r12, r12, #4026531840
+; CHECK-8M-SOFTFP-NEXT: vmsr fpscr, r12
+; CHECK-8M-SOFTFP-NEXT: .LBB0_2: @ %entry
+; CHECK-8M-SOFTFP-NEXT: mov r1, lr
+; CHECK-8M-SOFTFP-NEXT: mov r2, lr
+; CHECK-8M-SOFTFP-NEXT: mov r3, lr
+; CHECK-8M-SOFTFP-NEXT: mov r12, lr
+; CHECK-8M-SOFTFP-NEXT: msr apsr_nzcvqg, lr
+; CHECK-8M-SOFTFP-NEXT: bxns lr
+;
+; CHECK-81M-SOFT-LABEL: ns_entry:
+; CHECK-81M-SOFT: @ %bb.0: @ %entry
+; CHECK-81M-SOFT-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-81M-SOFT-NEXT: push {r7, lr}
+; CHECK-81M-SOFT-NEXT: sub sp, #4
+; CHECK-81M-SOFT-NEXT: mov r1, r0
+; CHECK-81M-SOFT-NEXT: movs r0, #10
+; CHECK-81M-SOFT-NEXT: blx r1
+; CHECK-81M-SOFT-NEXT: add sp, #4
+; CHECK-81M-SOFT-NEXT: pop.w {r7, lr}
+; CHECK-81M-SOFT-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-81M-SOFT-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-81M-SOFT-NEXT: clrm {r1, r2, r3, r12, apsr}
+; CHECK-81M-SOFT-NEXT: bxns lr
+;
+; CHECK-81M-SOFTFP-LABEL: ns_entry:
+; CHECK-81M-SOFTFP: @ %bb.0: @ %entry
+; CHECK-81M-SOFTFP-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-81M-SOFTFP-NEXT: push {r7, lr}
+; CHECK-81M-SOFTFP-NEXT: sub sp, #4
+; CHECK-81M-SOFTFP-NEXT: mov r1, r0
+; CHECK-81M-SOFTFP-NEXT: movs r0, #10
+; CHECK-81M-SOFTFP-NEXT: blx r1
+; CHECK-81M-SOFTFP-NEXT: add sp, #4
+; CHECK-81M-SOFTFP-NEXT: pop.w {r7, lr}
+; CHECK-81M-SOFTFP-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-81M-SOFTFP-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-81M-SOFTFP-NEXT: clrm {r1, r2, r3, r12, apsr}
+; CHECK-81M-SOFTFP-NEXT: bxns lr
+entry:
+ %call = call i32 %fptr(i32 10) #1
+ ret i32 %call
+}
+
+attributes #0 = { "cmse_nonsecure_entry" nounwind }
+attributes #1 = { nounwind }
+
+
+define i32 @ns_call(i32 (i32)* nocapture %fptr) #2 {
+; CHECK-8B-LABEL: ns_call:
+; CHECK-8B: @ %bb.0: @ %entry
+; CHECK-8B-NEXT: push {r7, lr}
+; CHECK-8B-NEXT: mov r1, r0
+; CHECK-8B-NEXT: movs r0, #10
+; CHECK-8B-NEXT: push {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r7, r11
+; CHECK-8B-NEXT: mov r6, r10
+; CHECK-8B-NEXT: mov r5, r9
+; CHECK-8B-NEXT: mov r4, r8
+; CHECK-8B-NEXT: push {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r2, #1
+; CHECK-8B-NEXT: bics r1, r2
+; CHECK-8B-NEXT: mov r2, r1
+; CHECK-8B-NEXT: mov r3, r1
+; CHECK-8B-NEXT: mov r4, r1
+; CHECK-8B-NEXT: mov r5, r1
+; CHECK-8B-NEXT: mov r6, r1
+; CHECK-8B-NEXT: mov r7, r1
+; CHECK-8B-NEXT: mov r8, r1
+; CHECK-8B-NEXT: mov r9, r1
+; CHECK-8B-NEXT: mov r10, r1
+; CHECK-8B-NEXT: mov r11, r1
+; CHECK-8B-NEXT: mov r12, r1
+; CHECK-8B-NEXT: msr apsr, r1
+; CHECK-8B-NEXT: blxns r1
+; CHECK-8B-NEXT: pop {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r8, r4
+; CHECK-8B-NEXT: mov r9, r5
+; CHECK-8B-NEXT: mov r10, r6
+; CHECK-8B-NEXT: mov r11, r7
+; CHECK-8B-NEXT: pop {r4, r5, r6, r7}
+; CHECK-8B-NEXT: pop {r7, pc}
+;
+; CHECK-8M-SOFT-LABEL: ns_call:
+; CHECK-8M-SOFT: @ %bb.0: @ %entry
+; CHECK-8M-SOFT-NEXT: push {r7, lr}
+; CHECK-8M-SOFT-NEXT: mov r1, r0
+; CHECK-8M-SOFT-NEXT: movs r0, #10
+; CHECK-8M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFT-NEXT: bic r1, r1, #1
+; CHECK-8M-SOFT-NEXT: msr apsr_nzcvqg, r1
+; CHECK-8M-SOFT-NEXT: mov r2, r1
+; CHECK-8M-SOFT-NEXT: mov r3, r1
+; CHECK-8M-SOFT-NEXT: mov r4, r1
+; CHECK-8M-SOFT-NEXT: mov r5, r1
+; CHECK-8M-SOFT-NEXT: mov r6, r1
+; CHECK-8M-SOFT-NEXT: mov r7, r1
+; CHECK-8M-SOFT-NEXT: mov r8, r1
+; CHECK-8M-SOFT-NEXT: mov r9, r1
+; CHECK-8M-SOFT-NEXT: mov r10, r1
+; CHECK-8M-SOFT-NEXT: mov r11, r1
+; CHECK-8M-SOFT-NEXT: mov r12, r1
+; CHECK-8M-SOFT-NEXT: blxns r1
+; CHECK-8M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFT-NEXT: pop {r7, pc}
+;
+; CHECK-8M-SOFTFP-LABEL: ns_call:
+; CHECK-8M-SOFTFP: @ %bb.0: @ %entry
+; CHECK-8M-SOFTFP-NEXT: push {r7, lr}
+; CHECK-8M-SOFTFP-NEXT: mov r1, r0
+; CHECK-8M-SOFTFP-NEXT: movs r0, #10
+; CHECK-8M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFTFP-NEXT: bic r1, r1, #1
+; CHECK-8M-SOFTFP-NEXT: sub sp, #136
+; CHECK-8M-SOFTFP-NEXT: vlstm sp
+; CHECK-8M-SOFTFP-NEXT: mov r2, r1
+; CHECK-8M-SOFTFP-NEXT: mov r3, r1
+; CHECK-8M-SOFTFP-NEXT: mov r4, r1
+; CHECK-8M-SOFTFP-NEXT: mov r5, r1
+; CHECK-8M-SOFTFP-NEXT: mov r6, r1
+; CHECK-8M-SOFTFP-NEXT: mov r7, r1
+; CHECK-8M-SOFTFP-NEXT: mov r8, r1
+; CHECK-8M-SOFTFP-NEXT: mov r9, r1
+; CHECK-8M-SOFTFP-NEXT: mov r10, r1
+; CHECK-8M-SOFTFP-NEXT: mov r11, r1
+; CHECK-8M-SOFTFP-NEXT: mov r12, r1
+; CHECK-8M-SOFTFP-NEXT: msr apsr_nzcvqg, r1
+; CHECK-8M-SOFTFP-NEXT: blxns r1
+; CHECK-8M-SOFTFP-NEXT: vlldm sp
+; CHECK-8M-SOFTFP-NEXT: add sp, #136
+; CHECK-8M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFTFP-NEXT: pop {r7, pc}
+;
+; CHECK-81M-SOFT-LABEL: ns_call:
+; CHECK-81M-SOFT: @ %bb.0: @ %entry
+; CHECK-81M-SOFT-NEXT: push {r7, lr}
+; CHECK-81M-SOFT-NEXT: mov r1, r0
+; CHECK-81M-SOFT-NEXT: movs r0, #10
+; CHECK-81M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFT-NEXT: bic r1, r1, #1
+; CHECK-81M-SOFT-NEXT: sub sp, #136
+; CHECK-81M-SOFT-NEXT: vlstm sp
+; CHECK-81M-SOFT-NEXT: clrm {r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-SOFT-NEXT: blxns r1
+; CHECK-81M-SOFT-NEXT: vlldm sp
+; CHECK-81M-SOFT-NEXT: add sp, #136
+; CHECK-81M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFT-NEXT: pop {r7, pc}
+;
+; CHECK-81M-SOFTFP-LABEL: ns_call:
+; CHECK-81M-SOFTFP: @ %bb.0: @ %entry
+; CHECK-81M-SOFTFP-NEXT: push {r7, lr}
+; CHECK-81M-SOFTFP-NEXT: mov r1, r0
+; CHECK-81M-SOFTFP-NEXT: movs r0, #10
+; CHECK-81M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFTFP-NEXT: bic r1, r1, #1
+; CHECK-81M-SOFTFP-NEXT: sub sp, #136
+; CHECK-81M-SOFTFP-NEXT: vlstm sp
+; CHECK-81M-SOFTFP-NEXT: clrm {r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-SOFTFP-NEXT: blxns r1
+; CHECK-81M-SOFTFP-NEXT: vlldm sp
+; CHECK-81M-SOFTFP-NEXT: add sp, #136
+; CHECK-81M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFTFP-NEXT: pop {r7, pc}
+entry:
+ %call = call i32 %fptr(i32 10) #3
+ ret i32 %call
+}
+
+attributes #2 = { nounwind }
+attributes #3 = { "cmse_nonsecure_call" nounwind }
+
+
+define i32 @ns_tail_call(i32 (i32)* nocapture %fptr) #4 {
+; CHECK-8B-LABEL: ns_tail_call:
+; CHECK-8B: @ %bb.0: @ %entry
+; CHECK-8B-NEXT: push {r7, lr}
+; CHECK-8B-NEXT: mov r1, r0
+; CHECK-8B-NEXT: movs r0, #10
+; CHECK-8B-NEXT: push {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r7, r11
+; CHECK-8B-NEXT: mov r6, r10
+; CHECK-8B-NEXT: mov r5, r9
+; CHECK-8B-NEXT: mov r4, r8
+; CHECK-8B-NEXT: push {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r2, #1
+; CHECK-8B-NEXT: bics r1, r2
+; CHECK-8B-NEXT: mov r2, r1
+; CHECK-8B-NEXT: mov r3, r1
+; CHECK-8B-NEXT: mov r4, r1
+; CHECK-8B-NEXT: mov r5, r1
+; CHECK-8B-NEXT: mov r6, r1
+; CHECK-8B-NEXT: mov r7, r1
+; CHECK-8B-NEXT: mov r8, r1
+; CHECK-8B-NEXT: mov r9, r1
+; CHECK-8B-NEXT: mov r10, r1
+; CHECK-8B-NEXT: mov r11, r1
+; CHECK-8B-NEXT: mov r12, r1
+; CHECK-8B-NEXT: msr apsr, r1
+; CHECK-8B-NEXT: blxns r1
+; CHECK-8B-NEXT: pop {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r8, r4
+; CHECK-8B-NEXT: mov r9, r5
+; CHECK-8B-NEXT: mov r10, r6
+; CHECK-8B-NEXT: mov r11, r7
+; CHECK-8B-NEXT: pop {r4, r5, r6, r7}
+; CHECK-8B-NEXT: pop {r7, pc}
+;
+; CHECK-8M-SOFT-LABEL: ns_tail_call:
+; CHECK-8M-SOFT: @ %bb.0: @ %entry
+; CHECK-8M-SOFT-NEXT: push {r7, lr}
+; CHECK-8M-SOFT-NEXT: mov r1, r0
+; CHECK-8M-SOFT-NEXT: movs r0, #10
+; CHECK-8M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFT-NEXT: bic r1, r1, #1
+; CHECK-8M-SOFT-NEXT: msr apsr_nzcvqg, r1
+; CHECK-8M-SOFT-NEXT: mov r2, r1
+; CHECK-8M-SOFT-NEXT: mov r3, r1
+; CHECK-8M-SOFT-NEXT: mov r4, r1
+; CHECK-8M-SOFT-NEXT: mov r5, r1
+; CHECK-8M-SOFT-NEXT: mov r6, r1
+; CHECK-8M-SOFT-NEXT: mov r7, r1
+; CHECK-8M-SOFT-NEXT: mov r8, r1
+; CHECK-8M-SOFT-NEXT: mov r9, r1
+; CHECK-8M-SOFT-NEXT: mov r10, r1
+; CHECK-8M-SOFT-NEXT: mov r11, r1
+; CHECK-8M-SOFT-NEXT: mov r12, r1
+; CHECK-8M-SOFT-NEXT: blxns r1
+; CHECK-8M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFT-NEXT: pop {r7, pc}
+;
+; CHECK-8M-SOFTFP-LABEL: ns_tail_call:
+; CHECK-8M-SOFTFP: @ %bb.0: @ %entry
+; CHECK-8M-SOFTFP-NEXT: push {r7, lr}
+; CHECK-8M-SOFTFP-NEXT: mov r1, r0
+; CHECK-8M-SOFTFP-NEXT: movs r0, #10
+; CHECK-8M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFTFP-NEXT: bic r1, r1, #1
+; CHECK-8M-SOFTFP-NEXT: sub sp, #136
+; CHECK-8M-SOFTFP-NEXT: vlstm sp
+; CHECK-8M-SOFTFP-NEXT: mov r2, r1
+; CHECK-8M-SOFTFP-NEXT: mov r3, r1
+; CHECK-8M-SOFTFP-NEXT: mov r4, r1
+; CHECK-8M-SOFTFP-NEXT: mov r5, r1
+; CHECK-8M-SOFTFP-NEXT: mov r6, r1
+; CHECK-8M-SOFTFP-NEXT: mov r7, r1
+; CHECK-8M-SOFTFP-NEXT: mov r8, r1
+; CHECK-8M-SOFTFP-NEXT: mov r9, r1
+; CHECK-8M-SOFTFP-NEXT: mov r10, r1
+; CHECK-8M-SOFTFP-NEXT: mov r11, r1
+; CHECK-8M-SOFTFP-NEXT: mov r12, r1
+; CHECK-8M-SOFTFP-NEXT: msr apsr_nzcvqg, r1
+; CHECK-8M-SOFTFP-NEXT: blxns r1
+; CHECK-8M-SOFTFP-NEXT: vlldm sp
+; CHECK-8M-SOFTFP-NEXT: add sp, #136
+; CHECK-8M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFTFP-NEXT: pop {r7, pc}
+;
+; CHECK-81M-SOFT-LABEL: ns_tail_call:
+; CHECK-81M-SOFT: @ %bb.0: @ %entry
+; CHECK-81M-SOFT-NEXT: push {r7, lr}
+; CHECK-81M-SOFT-NEXT: mov r1, r0
+; CHECK-81M-SOFT-NEXT: movs r0, #10
+; CHECK-81M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFT-NEXT: bic r1, r1, #1
+; CHECK-81M-SOFT-NEXT: sub sp, #136
+; CHECK-81M-SOFT-NEXT: vlstm sp
+; CHECK-81M-SOFT-NEXT: clrm {r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-SOFT-NEXT: blxns r1
+; CHECK-81M-SOFT-NEXT: vlldm sp
+; CHECK-81M-SOFT-NEXT: add sp, #136
+; CHECK-81M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFT-NEXT: pop {r7, pc}
+;
+; CHECK-81M-SOFTFP-LABEL: ns_tail_call:
+; CHECK-81M-SOFTFP: @ %bb.0: @ %entry
+; CHECK-81M-SOFTFP-NEXT: push {r7, lr}
+; CHECK-81M-SOFTFP-NEXT: mov r1, r0
+; CHECK-81M-SOFTFP-NEXT: movs r0, #10
+; CHECK-81M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFTFP-NEXT: bic r1, r1, #1
+; CHECK-81M-SOFTFP-NEXT: sub sp, #136
+; CHECK-81M-SOFTFP-NEXT: vlstm sp
+; CHECK-81M-SOFTFP-NEXT: clrm {r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-SOFTFP-NEXT: blxns r1
+; CHECK-81M-SOFTFP-NEXT: vlldm sp
+; CHECK-81M-SOFTFP-NEXT: add sp, #136
+; CHECK-81M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFTFP-NEXT: pop {r7, pc}
+entry:
+ %call = tail call i32 %fptr(i32 10) #5
+ ret i32 %call
+}
+
+attributes #4 = { nounwind }
+attributes #5 = { "cmse_nonsecure_call" nounwind }
+
+
+define void (i32, i32, i32, i32)* @ns_tail_call_many_args(void (i32, i32, i32, i32)* %f, i32 %a, i32 %b, i32 %c, i32 %d) #6 {
+; CHECK-8B-LABEL: ns_tail_call_many_args:
+; CHECK-8B: @ %bb.0:
+; CHECK-8B-NEXT: push {r4, r5, r7, lr}
+; CHECK-8B-NEXT: mov r5, r3
+; CHECK-8B-NEXT: mov r4, r0
+; CHECK-8B-NEXT: ldr r3, [sp, #16]
+; CHECK-8B-NEXT: mov r0, r1
+; CHECK-8B-NEXT: mov r1, r2
+; CHECK-8B-NEXT: mov r2, r5
+; CHECK-8B-NEXT: push {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r7, r11
+; CHECK-8B-NEXT: mov r6, r10
+; CHECK-8B-NEXT: mov r5, r9
+; CHECK-8B-NEXT: push {r5, r6, r7}
+; CHECK-8B-NEXT: mov r5, r8
+; CHECK-8B-NEXT: push {r5}
+; CHECK-8B-NEXT: mov r5, #1
+; CHECK-8B-NEXT: bics r4, r5
+; CHECK-8B-NEXT: mov r5, r4
+; CHECK-8B-NEXT: mov r6, r4
+; CHECK-8B-NEXT: mov r7, r4
+; CHECK-8B-NEXT: mov r8, r4
+; CHECK-8B-NEXT: mov r9, r4
+; CHECK-8B-NEXT: mov r10, r4
+; CHECK-8B-NEXT: mov r11, r4
+; CHECK-8B-NEXT: mov r12, r4
+; CHECK-8B-NEXT: msr apsr, r4
+; CHECK-8B-NEXT: blxns r4
+; CHECK-8B-NEXT: pop {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r8, r4
+; CHECK-8B-NEXT: mov r9, r5
+; CHECK-8B-NEXT: mov r10, r6
+; CHECK-8B-NEXT: mov r11, r7
+; CHECK-8B-NEXT: pop {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r0, r4
+; CHECK-8B-NEXT: pop {r4, r5, r7, pc}
+;
+; CHECK-8M-SOFT-LABEL: ns_tail_call_many_args:
+; CHECK-8M-SOFT: @ %bb.0:
+; CHECK-8M-SOFT-NEXT: push {r4, lr}
+; CHECK-8M-SOFT-NEXT: mov r4, r0
+; CHECK-8M-SOFT-NEXT: mov r12, r3
+; CHECK-8M-SOFT-NEXT: mov r0, r1
+; CHECK-8M-SOFT-NEXT: mov r1, r2
+; CHECK-8M-SOFT-NEXT: ldr r3, [sp, #8]
+; CHECK-8M-SOFT-NEXT: mov r2, r12
+; CHECK-8M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFT-NEXT: bic r4, r4, #1
+; CHECK-8M-SOFT-NEXT: msr apsr_nzcvqg, r4
+; CHECK-8M-SOFT-NEXT: mov r5, r4
+; CHECK-8M-SOFT-NEXT: mov r6, r4
+; CHECK-8M-SOFT-NEXT: mov r7, r4
+; CHECK-8M-SOFT-NEXT: mov r8, r4
+; CHECK-8M-SOFT-NEXT: mov r9, r4
+; CHECK-8M-SOFT-NEXT: mov r10, r4
+; CHECK-8M-SOFT-NEXT: mov r11, r4
+; CHECK-8M-SOFT-NEXT: mov r12, r4
+; CHECK-8M-SOFT-NEXT: blxns r4
+; CHECK-8M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFT-NEXT: mov r0, r4
+; CHECK-8M-SOFT-NEXT: pop {r4, pc}
+;
+; CHECK-8M-SOFTFP-LABEL: ns_tail_call_many_args:
+; CHECK-8M-SOFTFP: @ %bb.0:
+; CHECK-8M-SOFTFP-NEXT: push {r4, lr}
+; CHECK-8M-SOFTFP-NEXT: mov r4, r0
+; CHECK-8M-SOFTFP-NEXT: mov r12, r3
+; CHECK-8M-SOFTFP-NEXT: mov r0, r1
+; CHECK-8M-SOFTFP-NEXT: mov r1, r2
+; CHECK-8M-SOFTFP-NEXT: ldr r3, [sp, #8]
+; CHECK-8M-SOFTFP-NEXT: mov r2, r12
+; CHECK-8M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFTFP-NEXT: bic r4, r4, #1
+; CHECK-8M-SOFTFP-NEXT: sub sp, #136
+; CHECK-8M-SOFTFP-NEXT: vlstm sp
+; CHECK-8M-SOFTFP-NEXT: mov r5, r4
+; CHECK-8M-SOFTFP-NEXT: mov r6, r4
+; CHECK-8M-SOFTFP-NEXT: mov r7, r4
+; CHECK-8M-SOFTFP-NEXT: mov r8, r4
+; CHECK-8M-SOFTFP-NEXT: mov r9, r4
+; CHECK-8M-SOFTFP-NEXT: mov r10, r4
+; CHECK-8M-SOFTFP-NEXT: mov r11, r4
+; CHECK-8M-SOFTFP-NEXT: mov r12, r4
+; CHECK-8M-SOFTFP-NEXT: msr apsr_nzcvqg, r4
+; CHECK-8M-SOFTFP-NEXT: blxns r4
+; CHECK-8M-SOFTFP-NEXT: vlldm sp
+; CHECK-8M-SOFTFP-NEXT: add sp, #136
+; CHECK-8M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFTFP-NEXT: mov r0, r4
+; CHECK-8M-SOFTFP-NEXT: pop {r4, pc}
+;
+; CHECK-81M-SOFT-LABEL: ns_tail_call_many_args:
+; CHECK-81M-SOFT: @ %bb.0:
+; CHECK-81M-SOFT-NEXT: push {r4, lr}
+; CHECK-81M-SOFT-NEXT: mov r4, r0
+; CHECK-81M-SOFT-NEXT: mov r12, r3
+; CHECK-81M-SOFT-NEXT: mov r0, r1
+; CHECK-81M-SOFT-NEXT: mov r1, r2
+; CHECK-81M-SOFT-NEXT: ldr r3, [sp, #8]
+; CHECK-81M-SOFT-NEXT: mov r2, r12
+; CHECK-81M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFT-NEXT: bic r4, r4, #1
+; CHECK-81M-SOFT-NEXT: sub sp, #136
+; CHECK-81M-SOFT-NEXT: vlstm sp
+; CHECK-81M-SOFT-NEXT: clrm {r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-SOFT-NEXT: blxns r4
+; CHECK-81M-SOFT-NEXT: vlldm sp
+; CHECK-81M-SOFT-NEXT: add sp, #136
+; CHECK-81M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFT-NEXT: mov r0, r4
+; CHECK-81M-SOFT-NEXT: pop {r4, pc}
+;
+; CHECK-81M-SOFTFP-LABEL: ns_tail_call_many_args:
+; CHECK-81M-SOFTFP: @ %bb.0:
+; CHECK-81M-SOFTFP-NEXT: push {r4, lr}
+; CHECK-81M-SOFTFP-NEXT: mov r4, r0
+; CHECK-81M-SOFTFP-NEXT: mov r12, r3
+; CHECK-81M-SOFTFP-NEXT: mov r0, r1
+; CHECK-81M-SOFTFP-NEXT: mov r1, r2
+; CHECK-81M-SOFTFP-NEXT: ldr r3, [sp, #8]
+; CHECK-81M-SOFTFP-NEXT: mov r2, r12
+; CHECK-81M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFTFP-NEXT: bic r4, r4, #1
+; CHECK-81M-SOFTFP-NEXT: sub sp, #136
+; CHECK-81M-SOFTFP-NEXT: vlstm sp
+; CHECK-81M-SOFTFP-NEXT: clrm {r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-SOFTFP-NEXT: blxns r4
+; CHECK-81M-SOFTFP-NEXT: vlldm sp
+; CHECK-81M-SOFTFP-NEXT: add sp, #136
+; CHECK-81M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFTFP-NEXT: mov r0, r4
+; CHECK-81M-SOFTFP-NEXT: pop {r4, pc}
+ tail call void %f(i32 %a, i32 %b, i32 %c, i32 %d) #7
+ ret void (i32, i32, i32, i32)* %f
+}
+
+attributes #6 = { nounwind }
+attributes #7 = { "cmse_nonsecure_call" nounwind }
+
+
+define i32 @ns_call_void(i32 %reg0, i32 ()* nocapture %fptr) #8 {
+; CHECK-8B-LABEL: ns_call_void:
+; CHECK-8B: @ %bb.0: @ %entry
+; CHECK-8B-NEXT: push {r7, lr}
+; CHECK-8B-NEXT: push {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r7, r11
+; CHECK-8B-NEXT: mov r6, r10
+; CHECK-8B-NEXT: mov r5, r9
+; CHECK-8B-NEXT: mov r4, r8
+; CHECK-8B-NEXT: push {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r0, #1
+; CHECK-8B-NEXT: bics r1, r0
+; CHECK-8B-NEXT: mov r0, r1
+; CHECK-8B-NEXT: mov r2, r1
+; CHECK-8B-NEXT: mov r3, r1
+; CHECK-8B-NEXT: mov r4, r1
+; CHECK-8B-NEXT: mov r5, r1
+; CHECK-8B-NEXT: mov r6, r1
+; CHECK-8B-NEXT: mov r7, r1
+; CHECK-8B-NEXT: mov r8, r1
+; CHECK-8B-NEXT: mov r9, r1
+; CHECK-8B-NEXT: mov r10, r1
+; CHECK-8B-NEXT: mov r11, r1
+; CHECK-8B-NEXT: mov r12, r1
+; CHECK-8B-NEXT: msr apsr, r1
+; CHECK-8B-NEXT: blxns r1
+; CHECK-8B-NEXT: pop {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r8, r4
+; CHECK-8B-NEXT: mov r9, r5
+; CHECK-8B-NEXT: mov r10, r6
+; CHECK-8B-NEXT: mov r11, r7
+; CHECK-8B-NEXT: pop {r4, r5, r6, r7}
+; CHECK-8B-NEXT: pop {r7, pc}
+;
+; CHECK-8M-SOFT-LABEL: ns_call_void:
+; CHECK-8M-SOFT: @ %bb.0: @ %entry
+; CHECK-8M-SOFT-NEXT: push {r7, lr}
+; CHECK-8M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFT-NEXT: bic r1, r1, #1
+; CHECK-8M-SOFT-NEXT: msr apsr_nzcvqg, r1
+; CHECK-8M-SOFT-NEXT: mov r0, r1
+; CHECK-8M-SOFT-NEXT: mov r2, r1
+; CHECK-8M-SOFT-NEXT: mov r3, r1
+; CHECK-8M-SOFT-NEXT: mov r4, r1
+; CHECK-8M-SOFT-NEXT: mov r5, r1
+; CHECK-8M-SOFT-NEXT: mov r6, r1
+; CHECK-8M-SOFT-NEXT: mov r7, r1
+; CHECK-8M-SOFT-NEXT: mov r8, r1
+; CHECK-8M-SOFT-NEXT: mov r9, r1
+; CHECK-8M-SOFT-NEXT: mov r10, r1
+; CHECK-8M-SOFT-NEXT: mov r11, r1
+; CHECK-8M-SOFT-NEXT: mov r12, r1
+; CHECK-8M-SOFT-NEXT: blxns r1
+; CHECK-8M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFT-NEXT: pop {r7, pc}
+;
+; CHECK-8M-SOFTFP-LABEL: ns_call_void:
+; CHECK-8M-SOFTFP: @ %bb.0: @ %entry
+; CHECK-8M-SOFTFP-NEXT: push {r7, lr}
+; CHECK-8M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFTFP-NEXT: bic r1, r1, #1
+; CHECK-8M-SOFTFP-NEXT: sub sp, #136
+; CHECK-8M-SOFTFP-NEXT: vlstm sp
+; CHECK-8M-SOFTFP-NEXT: mov r0, r1
+; CHECK-8M-SOFTFP-NEXT: mov r2, r1
+; CHECK-8M-SOFTFP-NEXT: mov r3, r1
+; CHECK-8M-SOFTFP-NEXT: mov r4, r1
+; CHECK-8M-SOFTFP-NEXT: mov r5, r1
+; CHECK-8M-SOFTFP-NEXT: mov r6, r1
+; CHECK-8M-SOFTFP-NEXT: mov r7, r1
+; CHECK-8M-SOFTFP-NEXT: mov r8, r1
+; CHECK-8M-SOFTFP-NEXT: mov r9, r1
+; CHECK-8M-SOFTFP-NEXT: mov r10, r1
+; CHECK-8M-SOFTFP-NEXT: mov r11, r1
+; CHECK-8M-SOFTFP-NEXT: mov r12, r1
+; CHECK-8M-SOFTFP-NEXT: msr apsr_nzcvqg, r1
+; CHECK-8M-SOFTFP-NEXT: blxns r1
+; CHECK-8M-SOFTFP-NEXT: vlldm sp
+; CHECK-8M-SOFTFP-NEXT: add sp, #136
+; CHECK-8M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-SOFTFP-NEXT: pop {r7, pc}
+;
+; CHECK-81M-SOFT-LABEL: ns_call_void:
+; CHECK-81M-SOFT: @ %bb.0: @ %entry
+; CHECK-81M-SOFT-NEXT: push {r7, lr}
+; CHECK-81M-SOFT-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFT-NEXT: bic r1, r1, #1
+; CHECK-81M-SOFT-NEXT: sub sp, #136
+; CHECK-81M-SOFT-NEXT: vlstm sp
+; CHECK-81M-SOFT-NEXT: clrm {r0, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-SOFT-NEXT: blxns r1
+; CHECK-81M-SOFT-NEXT: vlldm sp
+; CHECK-81M-SOFT-NEXT: add sp, #136
+; CHECK-81M-SOFT-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFT-NEXT: pop {r7, pc}
+;
+; CHECK-81M-SOFTFP-LABEL: ns_call_void:
+; CHECK-81M-SOFTFP: @ %bb.0: @ %entry
+; CHECK-81M-SOFTFP-NEXT: push {r7, lr}
+; CHECK-81M-SOFTFP-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFTFP-NEXT: bic r1, r1, #1
+; CHECK-81M-SOFTFP-NEXT: sub sp, #136
+; CHECK-81M-SOFTFP-NEXT: vlstm sp
+; CHECK-81M-SOFTFP-NEXT: clrm {r0, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-SOFTFP-NEXT: blxns r1
+; CHECK-81M-SOFTFP-NEXT: vlldm sp
+; CHECK-81M-SOFTFP-NEXT: add sp, #136
+; CHECK-81M-SOFTFP-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-SOFTFP-NEXT: pop {r7, pc}
+entry:
+ %call = call i32 %fptr() #9
+ ret i32 %call
+}
+
+attributes #8 = { nounwind }
+attributes #9 = { "cmse_nonsecure_call" nounwind }
+
diff --git a/llvm/test/CodeGen/ARM/cmse-clrm-it-block.ll b/llvm/test/CodeGen/ARM/cmse-clrm-it-block.ll
new file mode 100644
index 000000000000..377e5609d93f
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmse-clrm-it-block.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+8msecext %s -o - | FileCheck %s
+
+define hidden i32 @f(i32 %0, i32 (i32)* nocapture %1) local_unnamed_addr #0 {
+ %3 = call i32 %1(i32 %0) #2
+ %4 = icmp eq i32 %3, 1
+ br i1 %4, label %6, label %5
+
+5: ; preds = %2
+ call void bitcast (void (...)* @g to void ()*)() #3
+ unreachable
+
+6: ; preds = %2
+ ret i32 1
+}
+; CHECK-NOT: clrm eq
+; CHECK: clrm {r1, r2, r3, r12, apsr}
+; CHECK: bl g
+
+declare dso_local void @g(...) local_unnamed_addr #1
+
+attributes #0 = { nounwind "cmse_nonsecure_entry" }
+attributes #1 = { noreturn }
+attributes #2 = { nounwind "cmse_nonsecure_call" }
+attributes #3 = { noreturn nounwind }
diff --git a/llvm/test/CodeGen/ARM/cmse-expand-bxns-ret.mir b/llvm/test/CodeGen/ARM/cmse-expand-bxns-ret.mir
new file mode 100644
index 000000000000..af7453665a50
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmse-expand-bxns-ret.mir
@@ -0,0 +1,26 @@
+# RUN: llc -mcpu=cortex-m33 -run-pass=arm-pseudo %s -o - | FileCheck %s
+--- |
+ target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+ target triple = "thumbv8m.main-arm-none-eabi"
+
+ define hidden void @foo() local_unnamed_addr #0 {
+ entry:
+ ret void
+ }
+...
+---
+name: foo
+alignment: 2
+tracksRegLiveness: true
+frameInfo:
+ maxCallFrameSize: 0
+body: |
+ bb.0.entry:
+ tBXNS_RET
+...
+
+# CHECK-LABEL: name: foo
+# CHECK: bb.0.entry:
+# CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+# CHECK: bb.1.entry:
+# CHECK-NEXT: successors: %bb.2(0x80000000)
diff --git a/llvm/test/CodeGen/ARM/cmse.ll b/llvm/test/CodeGen/ARM/cmse.ll
new file mode 100644
index 000000000000..2b540994d0b0
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmse.ll
@@ -0,0 +1,346 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc %s -o - -mtriple=thumbv8m.base | \
+; RUN: FileCheck %s --check-prefix=CHECK-8B --check-prefix=CHECK-8B-LE
+; RUN: llc %s -o - -mtriple=thumbebv8m.base | \
+; RUN: FileCheck %s --check-prefix=CHECK-8B --check-prefix=CHECK-8B-BE
+; RUN: llc %s -o - -mtriple=thumbv8m.main | \
+; RUN: FileCheck %s --check-prefix=CHECK-8M --check-prefix=CHECK-8M-LE
+; RUN: llc %s -o - -mtriple=thumbebv8m.main | \
+; RUN: FileCheck %s --check-prefix=CHECK-8M --check-prefix=CHECK-8M-BE
+
+; RUN: llc %s -o - -mtriple=thumbv8.1m.main | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-LE
+; RUN: llc %s -o - -mtriple=thumbebv8.1m.main | \
+; RUN: FileCheck %s --check-prefix=CHECK-81M --check-prefix=CHECK-81M-BE
+
+define void @func1(void ()* nocapture %fptr) #0 {
+; CHECK-8B-LABEL: func1:
+; CHECK-8B: @ %bb.0: @ %entry
+; CHECK-8B-NEXT: push {r7, lr}
+; CHECK-8B-NEXT: push {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r7, r11
+; CHECK-8B-NEXT: mov r6, r10
+; CHECK-8B-NEXT: mov r5, r9
+; CHECK-8B-NEXT: mov r4, r8
+; CHECK-8B-NEXT: push {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r1, #1
+; CHECK-8B-NEXT: bics r0, r1
+; CHECK-8B-NEXT: mov r1, r0
+; CHECK-8B-NEXT: mov r2, r0
+; CHECK-8B-NEXT: mov r3, r0
+; CHECK-8B-NEXT: mov r4, r0
+; CHECK-8B-NEXT: mov r5, r0
+; CHECK-8B-NEXT: mov r6, r0
+; CHECK-8B-NEXT: mov r7, r0
+; CHECK-8B-NEXT: mov r8, r0
+; CHECK-8B-NEXT: mov r9, r0
+; CHECK-8B-NEXT: mov r10, r0
+; CHECK-8B-NEXT: mov r11, r0
+; CHECK-8B-NEXT: mov r12, r0
+; CHECK-8B-NEXT: msr apsr, r0
+; CHECK-8B-NEXT: blxns r0
+; CHECK-8B-NEXT: pop {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r8, r4
+; CHECK-8B-NEXT: mov r9, r5
+; CHECK-8B-NEXT: mov r10, r6
+; CHECK-8B-NEXT: mov r11, r7
+; CHECK-8B-NEXT: pop {r4, r5, r6, r7}
+; CHECK-8B-NEXT: pop {r7}
+; CHECK-8B-NEXT: pop {r0}
+; CHECK-8B-NEXT: mov lr, r0
+; CHECK-8B-NEXT: mov r0, lr
+; CHECK-8B-NEXT: mov r1, lr
+; CHECK-8B-NEXT: mov r2, lr
+; CHECK-8B-NEXT: mov r3, lr
+; CHECK-8B-NEXT: mov r12, lr
+; CHECK-8B-NEXT: msr apsr, lr
+; CHECK-8B-NEXT: bxns lr
+;
+; CHECK-8M-LABEL: func1:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: bic r0, r0, #1
+; CHECK-8M-NEXT: msr apsr_nzcvq, r0
+; CHECK-8M-NEXT: mov r1, r0
+; CHECK-8M-NEXT: mov r2, r0
+; CHECK-8M-NEXT: mov r3, r0
+; CHECK-8M-NEXT: mov r4, r0
+; CHECK-8M-NEXT: mov r5, r0
+; CHECK-8M-NEXT: mov r6, r0
+; CHECK-8M-NEXT: mov r7, r0
+; CHECK-8M-NEXT: mov r8, r0
+; CHECK-8M-NEXT: mov r9, r0
+; CHECK-8M-NEXT: mov r10, r0
+; CHECK-8M-NEXT: mov r11, r0
+; CHECK-8M-NEXT: mov r12, r0
+; CHECK-8M-NEXT: blxns r0
+; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: pop.w {r7, lr}
+; CHECK-8M-NEXT: mov r0, lr
+; CHECK-8M-NEXT: mov r1, lr
+; CHECK-8M-NEXT: mov r2, lr
+; CHECK-8M-NEXT: mov r3, lr
+; CHECK-8M-NEXT: mov r12, lr
+; CHECK-8M-NEXT: msr apsr_nzcvq, lr
+; CHECK-8M-NEXT: bxns lr
+;
+; CHECK-81M-LABEL: func1:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: sub sp, #4
+; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: bic r0, r0, #1
+; CHECK-81M-NEXT: sub sp, #136
+; CHECK-81M-NEXT: vlstm sp
+; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT: blxns r0
+; CHECK-81M-NEXT: vlldm sp
+; CHECK-81M-NEXT: add sp, #136
+; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: add sp, #4
+; CHECK-81M-NEXT: pop.w {r7, lr}
+; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-81M-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-81M-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
+; CHECK-81M-NEXT: bxns lr
+entry:
+ call void %fptr() #1
+ ret void
+}
+
+attributes #0 = { "cmse_nonsecure_entry" nounwind }
+attributes #1 = { "cmse_nonsecure_call" nounwind }
+
+define void @func2(void ()* nocapture %fptr) #2 {
+; CHECK-8B-LABEL: func2:
+; CHECK-8B: @ %bb.0: @ %entry
+; CHECK-8B-NEXT: push {r7, lr}
+; CHECK-8B-NEXT: push {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r7, r11
+; CHECK-8B-NEXT: mov r6, r10
+; CHECK-8B-NEXT: mov r5, r9
+; CHECK-8B-NEXT: mov r4, r8
+; CHECK-8B-NEXT: push {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r1, #1
+; CHECK-8B-NEXT: bics r0, r1
+; CHECK-8B-NEXT: mov r1, r0
+; CHECK-8B-NEXT: mov r2, r0
+; CHECK-8B-NEXT: mov r3, r0
+; CHECK-8B-NEXT: mov r4, r0
+; CHECK-8B-NEXT: mov r5, r0
+; CHECK-8B-NEXT: mov r6, r0
+; CHECK-8B-NEXT: mov r7, r0
+; CHECK-8B-NEXT: mov r8, r0
+; CHECK-8B-NEXT: mov r9, r0
+; CHECK-8B-NEXT: mov r10, r0
+; CHECK-8B-NEXT: mov r11, r0
+; CHECK-8B-NEXT: mov r12, r0
+; CHECK-8B-NEXT: msr apsr, r0
+; CHECK-8B-NEXT: blxns r0
+; CHECK-8B-NEXT: pop {r4, r5, r6, r7}
+; CHECK-8B-NEXT: mov r8, r4
+; CHECK-8B-NEXT: mov r9, r5
+; CHECK-8B-NEXT: mov r10, r6
+; CHECK-8B-NEXT: mov r11, r7
+; CHECK-8B-NEXT: pop {r4, r5, r6, r7}
+; CHECK-8B-NEXT: pop {r7, pc}
+;
+; CHECK-8M-LABEL: func2:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: bic r0, r0, #1
+; CHECK-8M-NEXT: msr apsr_nzcvq, r0
+; CHECK-8M-NEXT: mov r1, r0
+; CHECK-8M-NEXT: mov r2, r0
+; CHECK-8M-NEXT: mov r3, r0
+; CHECK-8M-NEXT: mov r4, r0
+; CHECK-8M-NEXT: mov r5, r0
+; CHECK-8M-NEXT: mov r6, r0
+; CHECK-8M-NEXT: mov r7, r0
+; CHECK-8M-NEXT: mov r8, r0
+; CHECK-8M-NEXT: mov r9, r0
+; CHECK-8M-NEXT: mov r10, r0
+; CHECK-8M-NEXT: mov r11, r0
+; CHECK-8M-NEXT: mov r12, r0
+; CHECK-8M-NEXT: blxns r0
+; CHECK-8M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-8M-NEXT: pop {r7, pc}
+;
+; CHECK-81M-LABEL: func2:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: bic r0, r0, #1
+; CHECK-81M-NEXT: sub sp, #136
+; CHECK-81M-NEXT: vlstm sp
+; CHECK-81M-NEXT: clrm {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, apsr}
+; CHECK-81M-NEXT: blxns r0
+; CHECK-81M-NEXT: vlldm sp
+; CHECK-81M-NEXT: add sp, #136
+; CHECK-81M-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11}
+; CHECK-81M-NEXT: pop {r7, pc}
+entry:
+ tail call void %fptr() #3
+ ret void
+}
+
+attributes #2 = { nounwind }
+attributes #3 = { "cmse_nonsecure_call" nounwind }
+
+define void @func3() #4 {
+; CHECK-8B-LABEL: func3:
+; CHECK-8B: @ %bb.0: @ %entry
+; CHECK-8B-NEXT: mov r0, lr
+; CHECK-8B-NEXT: mov r1, lr
+; CHECK-8B-NEXT: mov r2, lr
+; CHECK-8B-NEXT: mov r3, lr
+; CHECK-8B-NEXT: mov r12, lr
+; CHECK-8B-NEXT: msr apsr, lr
+; CHECK-8B-NEXT: bxns lr
+;
+; CHECK-8M-LABEL: func3:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: mov r0, lr
+; CHECK-8M-NEXT: mov r1, lr
+; CHECK-8M-NEXT: mov r2, lr
+; CHECK-8M-NEXT: mov r3, lr
+; CHECK-8M-NEXT: mov r12, lr
+; CHECK-8M-NEXT: msr apsr_nzcvq, lr
+; CHECK-8M-NEXT: bxns lr
+;
+; CHECK-81M-LABEL: func3:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-81M-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-81M-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
+; CHECK-81M-NEXT: bxns lr
+entry:
+ ret void
+}
+
+define void @func4() #4 {
+; CHECK-8B-LABEL: func4:
+; CHECK-8B: @ %bb.0: @ %entry
+; CHECK-8B-NEXT: push {r7, lr}
+; CHECK-8B-NEXT: bl func3
+; CHECK-8B-NEXT: pop {r7}
+; CHECK-8B-NEXT: pop {r0}
+; CHECK-8B-NEXT: mov lr, r0
+; CHECK-8B-NEXT: mov r0, lr
+; CHECK-8B-NEXT: mov r1, lr
+; CHECK-8B-NEXT: mov r2, lr
+; CHECK-8B-NEXT: mov r3, lr
+; CHECK-8B-NEXT: mov r12, lr
+; CHECK-8B-NEXT: msr apsr, lr
+; CHECK-8B-NEXT: bxns lr
+;
+; CHECK-8M-LABEL: func4:
+; CHECK-8M: @ %bb.0: @ %entry
+; CHECK-8M-NEXT: push {r7, lr}
+; CHECK-8M-NEXT: bl func3
+; CHECK-8M-NEXT: pop.w {r7, lr}
+; CHECK-8M-NEXT: mov r0, lr
+; CHECK-8M-NEXT: mov r1, lr
+; CHECK-8M-NEXT: mov r2, lr
+; CHECK-8M-NEXT: mov r3, lr
+; CHECK-8M-NEXT: mov r12, lr
+; CHECK-8M-NEXT: msr apsr_nzcvq, lr
+; CHECK-8M-NEXT: bxns lr
+;
+; CHECK-81M-LABEL: func4:
+; CHECK-81M: @ %bb.0: @ %entry
+; CHECK-81M-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-81M-NEXT: push {r7, lr}
+; CHECK-81M-NEXT: sub sp, #4
+; CHECK-81M-NEXT: bl func3
+; CHECK-81M-NEXT: add sp, #4
+; CHECK-81M-NEXT: pop.w {r7, lr}
+; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-81M-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-81M-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
+; CHECK-81M-NEXT: bxns lr
+entry:
+ tail call void @func3() #5
+ ret void
+}
+
+declare void @func51(i8 *);
+
+define void @func5() #4 {
+; CHECK-8B-LABEL: func5:
+; CHECK-8B: @ %bb.0:
+; CHECK-8B-NEXT: push {r4, r6, r7, lr}
+; CHECK-8B-NEXT: add r7, sp, #8
+; CHECK-8B-NEXT: sub sp, #16
+; CHECK-8B-NEXT: mov r4, sp
+; CHECK-8B-NEXT: lsrs r4, r4, #4
+; CHECK-8B-NEXT: lsls r4, r4, #4
+; CHECK-8B-NEXT: mov sp, r4
+; CHECK-8B-NEXT: mov r0, sp
+; CHECK-8B-NEXT: bl func51
+; CHECK-8B-NEXT: subs r4, r7, #7
+; CHECK-8B-NEXT: subs r4, #1
+; CHECK-8B-NEXT: mov sp, r4
+; CHECK-8B-NEXT: pop {r4, r6, r7}
+; CHECK-8B-NEXT: pop {r0}
+; CHECK-8B-NEXT: mov lr, r0
+; CHECK-8B-NEXT: mov r0, lr
+; CHECK-8B-NEXT: mov r1, lr
+; CHECK-8B-NEXT: mov r2, lr
+; CHECK-8B-NEXT: mov r3, lr
+; CHECK-8B-NEXT: mov r12, lr
+; CHECK-8B-NEXT: msr apsr, lr
+; CHECK-8B-NEXT: bxns lr
+;
+; CHECK-8M-LABEL: func5:
+; CHECK-8M: @ %bb.0:
+; CHECK-8M-NEXT: push {r4, r6, r7, lr}
+; CHECK-8M-NEXT: add r7, sp, #8
+; CHECK-8M-NEXT: sub sp, #16
+; CHECK-8M-NEXT: mov r4, sp
+; CHECK-8M-NEXT: bfc r4, #0, #4
+; CHECK-8M-NEXT: mov sp, r4
+; CHECK-8M-NEXT: mov r0, sp
+; CHECK-8M-NEXT: bl func51
+; CHECK-8M-NEXT: sub.w r4, r7, #8
+; CHECK-8M-NEXT: mov sp, r4
+; CHECK-8M-NEXT: pop.w {r4, r6, r7, lr}
+; CHECK-8M-NEXT: mov r0, lr
+; CHECK-8M-NEXT: mov r1, lr
+; CHECK-8M-NEXT: mov r2, lr
+; CHECK-8M-NEXT: mov r3, lr
+; CHECK-8M-NEXT: mov r12, lr
+; CHECK-8M-NEXT: msr apsr_nzcvq, lr
+; CHECK-8M-NEXT: bxns lr
+;
+; CHECK-81M-LABEL: func5:
+; CHECK-81M: @ %bb.0:
+; CHECK-81M-NEXT: vstr fpcxtns, [sp, #-4]!
+; CHECK-81M-NEXT: push {r4, r6, r7, lr}
+; CHECK-81M-NEXT: add r7, sp, #8
+; CHECK-81M-NEXT: sub sp, #12
+; CHECK-81M-NEXT: mov r4, sp
+; CHECK-81M-NEXT: bfc r4, #0, #4
+; CHECK-81M-NEXT: mov sp, r4
+; CHECK-81M-NEXT: mov r0, sp
+; CHECK-81M-NEXT: bl func51
+; CHECK-81M-NEXT: sub.w r4, r7, #8
+; CHECK-81M-NEXT: mov sp, r4
+; CHECK-81M-NEXT: pop.w {r4, r6, r7, lr}
+; CHECK-81M-NEXT: vscclrm {s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, vpr}
+; CHECK-81M-NEXT: vldr fpcxtns, [sp], #4
+; CHECK-81M-NEXT: clrm {r0, r1, r2, r3, r12, apsr}
+; CHECK-81M-NEXT: bxns lr
+ %1 = alloca i8, align 16
+ call void @func51(i8* nonnull %1) #5
+ ret void
+}
+
+
+attributes #4 = { "cmse_nonsecure_entry" nounwind }
+attributes #5 = { nounwind }
+
More information about the llvm-commits
mailing list