[clang] [llvm] [ARM] Save floating point registers and status registers with save_fp function attribute (PR #89654)

Benson Chu via cfe-commits cfe-commits at lists.llvm.org
Wed Apr 9 08:52:50 PDT 2025


https://github.com/pestctrl updated https://github.com/llvm/llvm-project/pull/89654

>From 01c6d6dd84c6cae12f66378017876bee8f71f439 Mon Sep 17 00:00:00 2001
From: Benson Chu <bchu at localhost.localdomain>
Date: Mon, 10 Mar 2025 10:51:25 -0500
Subject: [PATCH] [ARM][Thumb] Save FPSCR + FPEXC for save-vfp attribute

FPSCR and FPEXC will be stored in FPStatusRegs, after GPRCS2 has been
saved.

- GPRCS1
- GPRCS2
- FPStatusRegs (new)
- DPRCS
- GPRCS3
- DPRCS2

FPSCR is present on all targets with a VFP, but the FPEXC register is
not present on Cortex-M devices, so different amounts of bytes are
being pushed onto the stack depending on our target, which would
affect alignment for subsequent saves.

DPRCS1 will sum up all previous bytes that were saved, and will emit
extra instructions to ensure that its alignment is correct. My
assumption is that if DPRCS1 is able to correct its alignment to be
correct, then all subsequent saves will also have correct alignment.

Avoid annotating the saving of FPSCR and FPEXC for functions marked
with the interrupt_save_fp attribute, even though this is done as part
of frame setup.  Since these are status registers, there really is no
viable way of annotating this. Since these aren't GPRs or DPRs, they
can't be used with .save or .vsave directives. Instead, just record
that the intermediate registers r4 and r5 are saved to the stack
again.

Co-authored-by: Jake Vossen <jake at vossen.dev>
Co-authored-by: Alan Phipps <a-phipps at ti.com>
---
 clang/include/clang/Basic/Attr.td             |  16 +
 clang/include/clang/Basic/AttrDocs.td         |  13 +
 .../clang/Basic/DiagnosticSemaKinds.td        |   8 +-
 clang/include/clang/Sema/SemaARM.h            |   1 +
 clang/lib/CodeGen/Targets/ARM.cpp             |   6 +
 clang/lib/Sema/SemaARM.cpp                    |  30 +-
 clang/lib/Sema/SemaDeclAttr.cpp               |   3 +
 .../arm-interrupt-save-fp-attr-status-regs.c  |  34 ++
 .../test/CodeGen/arm-interrupt-save-fp-attr.c |  39 +++
 clang/test/Sema/arm-interrupt-attr.c          |   2 +-
 clang/test/Sema/arm-interrupt-save-fp-attr.c  |  26 ++
 llvm/include/llvm/IR/IntrinsicsARM.td         |   2 +-
 llvm/lib/Target/ARM/ARMAsmPrinter.cpp         |  16 +
 llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp   |  19 ++
 llvm/lib/Target/ARM/ARMCallingConv.td         |  42 ++-
 llvm/lib/Target/ARM/ARMFrameLowering.cpp      | 164 +++++++++-
 llvm/lib/Target/ARM/ARMFrameLowering.h        |   9 +
 llvm/lib/Target/ARM/ARMInstrVFP.td            |   3 +
 llvm/lib/Target/ARM/ARMMachineFunctionInfo.h  |   3 +
 llvm/lib/Target/ARM/ARMRegisterInfo.td        |   7 +
 .../interrupt-save-fp-attr-status-regs.mir    | 279 ++++++++++++++++
 .../CodeGen/ARM/interrupt-save-fp-attr.ll     | 303 ++++++++++++++++++
 22 files changed, 999 insertions(+), 26 deletions(-)
 create mode 100644 clang/test/CodeGen/arm-interrupt-save-fp-attr-status-regs.c
 create mode 100644 clang/test/CodeGen/arm-interrupt-save-fp-attr.c
 create mode 100644 clang/test/Sema/arm-interrupt-save-fp-attr.c
 create mode 100644 llvm/test/CodeGen/ARM/interrupt-save-fp-attr-status-regs.mir
 create mode 100644 llvm/test/CodeGen/ARM/interrupt-save-fp-attr.ll

diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index b7ad432738b29..4bd7a4e5f6bed 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -999,6 +999,22 @@ def ARMInterrupt : InheritableAttr, TargetSpecificAttr<TargetARM> {
   let Documentation = [ARMInterruptDocs];
 }
 
+def ARMInterruptSaveFP : InheritableAttr, TargetSpecificAttr<TargetARM> {
+  let Spellings = [GNU<"interrupt_save_fp">];
+  let Args = [EnumArgument<"Interrupt", "InterruptType", /*is_string=*/true,
+                           ["IRQ", "FIQ", "SWI", "ABORT", "UNDEF", ""],
+                           ["IRQ", "FIQ", "SWI", "ABORT", "UNDEF", "Generic"],
+                           1>];
+  let HasCustomParsing = 0;
+  let Documentation = [ARMInterruptSaveFPDocs];
+}
+
+def ARMSaveFP : InheritableAttr, TargetSpecificAttr<TargetARM> {
+  let Spellings = [];
+  let Subjects = SubjectList<[Function]>;
+  let Documentation = [InternalOnly];
+}
+
 def AVRInterrupt : InheritableAttr, TargetSpecificAttr<TargetAVR> {
   let Spellings = [GCC<"interrupt">];
   let Subjects = SubjectList<[Function]>;
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index c8b371280e35d..faefe70a2e63a 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -2662,6 +2662,19 @@ The semantics are as follows:
   }];
 }
 
+def ARMInterruptSaveFPDocs : Documentation {
+    let Category = DocCatFunction;
+  let Heading = "interrupt_save_fp (ARM)";
+  let Content = [{
+Clang supports the GNU style ``__attribute__((interrupt_save_fp("TYPE")))``
+on ARM targets. This attribute behaves the same way as the ARM interrupt
+attribute, except the general purpose floating point registers are also saved,
+along with FPEXC and FPSCR. Note, even on M-class CPUs, where the floating
+point context can be automatically saved depending on the FPCCR, the general
+purpose floating point registers will be saved.
+  }];
+}
+
 def BPFPreserveAccessIndexDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index d17519d4c4155..17ce73abcee1f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -401,8 +401,14 @@ def warn_anyx86_excessive_regsave : Warning<
   InGroup<DiagGroup<"excessive-regsave">>;
 def warn_arm_interrupt_vfp_clobber : Warning<
   "interrupt service routine with vfp enabled may clobber the "
-  "interruptee's vfp state">,
+  "interruptee's vfp state; "
+  "consider using the `interrupt_save_fp` attribute to prevent this behavior">,
   InGroup<DiagGroup<"arm-interrupt-vfp-clobber">>;
+def warn_arm_interrupt_save_fp_without_vfp_unit : Warning<
+   "`interrupt_save_fp` only applies to targets that have a VFP unit enabled "
+   "for this compilation; this will be treated as a regular `interrupt` "
+   "attribute">,
+   InGroup<DiagGroup<"arm-interrupt-save-fp-no-vfp-unit">>;
 def err_arm_interrupt_called : Error<
   "interrupt service routine cannot be called directly">;
 def warn_interrupt_signal_attribute_invalid : Warning<
diff --git a/clang/include/clang/Sema/SemaARM.h b/clang/include/clang/Sema/SemaARM.h
index 7beb1906a122f..ce79e94ebdd9b 100644
--- a/clang/include/clang/Sema/SemaARM.h
+++ b/clang/include/clang/Sema/SemaARM.h
@@ -79,6 +79,7 @@ class SemaARM : public SemaBase {
   void handleNewAttr(Decl *D, const ParsedAttr &AL);
   void handleCmseNSEntryAttr(Decl *D, const ParsedAttr &AL);
   void handleInterruptAttr(Decl *D, const ParsedAttr &AL);
+  void handleInterruptSaveFPAttr(Decl *D, const ParsedAttr &AL);
 
   void CheckSMEFunctionDefAttributes(const FunctionDecl *FD);
 };
diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp
index a6d9a5549355c..dddf51a827159 100644
--- a/clang/lib/CodeGen/Targets/ARM.cpp
+++ b/clang/lib/CodeGen/Targets/ARM.cpp
@@ -190,6 +190,12 @@ class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
 
     Fn->addFnAttr("interrupt", Kind);
 
+    // Note: the ARMSaveFPAttr can only exist if we also have an interrupt
+    // attribute
+    const ARMSaveFPAttr *SaveFPAttr = FD->getAttr<ARMSaveFPAttr>();
+    if (SaveFPAttr)
+      Fn->addFnAttr("save-fp");
+
     ARMABIKind ABI = getABIInfo<ARMABIInfo>().getABIKind();
     if (ABI == ARMABIKind::APCS)
       return;
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index 3f53fb200a93d..2f9caaeb67088 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -1307,14 +1307,38 @@ void SemaARM::handleInterruptAttr(Decl *D, const ParsedAttr &AL) {
     return;
   }
 
-  const TargetInfo &TI = getASTContext().getTargetInfo();
-  if (TI.hasFeature("vfp"))
-    Diag(D->getLocation(), diag::warn_arm_interrupt_vfp_clobber);
+  if (!D->hasAttr<ARMSaveFPAttr>()) {
+    const TargetInfo &TI = getASTContext().getTargetInfo();
+    if (TI.hasFeature("vfp"))
+      Diag(D->getLocation(), diag::warn_arm_interrupt_vfp_clobber);
+  }
 
   D->addAttr(::new (getASTContext())
                  ARMInterruptAttr(getASTContext(), AL, Kind));
 }
 
+void SemaARM::handleInterruptSaveFPAttr(Decl *D, const ParsedAttr &AL) {
+  // Go ahead and add ARMSaveFPAttr because handleInterruptAttr() checks for
+  // it when deciding to issue a diagnostic about clobbering floating point
+  // registers, which ARMSaveFPAttr prevents.
+  D->addAttr(::new (SemaRef.Context) ARMSaveFPAttr(SemaRef.Context, AL));
+  SemaRef.ARM().handleInterruptAttr(D, AL);
+
+  // If ARM().handleInterruptAttr() failed, remove ARMSaveFPAttr.
+  if (!D->hasAttr<ARMInterruptAttr>()) {
+    D->dropAttr<ARMSaveFPAttr>();
+    return;
+  }
+
+  // If VFP not enabled, remove ARMSaveFPAttr but leave ARMInterruptAttr.
+  bool VFP = SemaRef.Context.getTargetInfo().hasFeature("vfp");
+
+  if (!VFP) {
+    SemaRef.Diag(D->getLocation(), diag::warn_arm_interrupt_save_fp_without_vfp_unit);
+    D->dropAttr<ARMSaveFPAttr>();
+  }
+}
+
 // Check if the function definition uses any AArch64 SME features without
 // having the '+sme' feature enabled and warn user if sme locally streaming
 // function returns or uses arguments with VL-based types.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index d76afe9d6464d..82ed7bc859cb3 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -6985,6 +6985,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL,
   case ParsedAttr::AT_Interrupt:
     handleInterruptAttr(S, D, AL);
     break;
+  case ParsedAttr::AT_ARMInterruptSaveFP:
+    S.ARM().handleInterruptSaveFPAttr(D, AL);
+    break;
   case ParsedAttr::AT_X86ForceAlignArgPointer:
     S.X86().handleForceAlignArgPointerAttr(D, AL);
     break;
diff --git a/clang/test/CodeGen/arm-interrupt-save-fp-attr-status-regs.c b/clang/test/CodeGen/arm-interrupt-save-fp-attr-status-regs.c
new file mode 100644
index 0000000000000..4e7cafabdc6c2
--- /dev/null
+++ b/clang/test/CodeGen/arm-interrupt-save-fp-attr-status-regs.c
@@ -0,0 +1,34 @@
+// REQUIRES: arm-registered-target
+// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r5 -mfpu=vfpv3-d16 -marm -S -o - %s \
+// RUN: | FileCheck %s --check-prefix=CHECK-R
+// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r5 -mfpu=vfpv3-d16 -mthumb -S -o - %s \
+// RUN: | FileCheck %s --check-prefix=CHECK-R
+// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r4 -mfpu=vfpv3-d16 -marm -S -o - %s \
+// RUN: | FileCheck %s --check-prefix=CHECK-R
+// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r4 -mfpu=vfpv3-d16 -mthumb -S -o - %s \
+// RUN: | FileCheck %s --check-prefix=CHECK-R
+// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -S -o - %s \
+// RUN: | FileCheck %s --check-prefix=CHECK-M
+// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-m33 -mfpu=fpv5-sp-d16 -S -o - %s \
+// RUN: | FileCheck %s --check-prefix=CHECK-M
+
+void bar();
+
+__attribute__((interrupt_save_fp)) void test_generic_interrupt() {
+    // CHECK-R:      vmrs	r4, fpscr
+    // CHECK-R-NEXT: vmrs	r5, fpexc
+    // CHECK-R-NEXT: .save  {r4, r5}
+    // CHECK-R-NEXT: push	{r4, r5}
+    // .....
+    // CHECK-R:      pop	{r4, r5}
+    // CHECK-R-NEXT: vmsr	fpscr, r4
+    // CHECK-R-NEXT: vmsr	fpexc, r5
+
+    // CHECK-M:      vmrs	r4, fpscr
+    // CHECK-M-NEXT: .save  {r4}
+    // CHECK-M-NEXT: push	{r4}
+    // .....
+    // CHECK-M:      pop	{r4}
+    // CHECK-M-NEXT: vmsr	fpscr, r4
+    bar();
+}
diff --git a/clang/test/CodeGen/arm-interrupt-save-fp-attr.c b/clang/test/CodeGen/arm-interrupt-save-fp-attr.c
new file mode 100644
index 0000000000000..5db8b3daa7212
--- /dev/null
+++ b/clang/test/CodeGen/arm-interrupt-save-fp-attr.c
@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -triple thumb-apple-darwin -target-abi aapcs -target-feature +vfp4 -target-cpu cortex-m3 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple arm-apple-darwin -target-abi apcs-gnu -target-feature +vfp4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-APCS
+
+__attribute__((interrupt_save_fp)) void test_generic_interrupt() {
+  // CHECK: define{{.*}} arm_aapcscc void @test_generic_interrupt() [[GENERIC_ATTR:#[0-9]+]]
+
+  // CHECK-APCS: define{{.*}} void @test_generic_interrupt() [[GENERIC_ATTR:#[0-9]+]]
+}
+
+__attribute__((interrupt_save_fp("IRQ"))) void test_irq_interrupt() {
+  // CHECK: define{{.*}} arm_aapcscc void @test_irq_interrupt() [[IRQ_ATTR:#[0-9]+]]
+}
+
+__attribute__((interrupt_save_fp("FIQ"))) void test_fiq_interrupt() {
+  // CHECK: define{{.*}} arm_aapcscc void @test_fiq_interrupt() [[FIQ_ATTR:#[0-9]+]]
+}
+
+__attribute__((interrupt_save_fp("SWI"))) void test_swi_interrupt() {
+  // CHECK: define{{.*}} arm_aapcscc void @test_swi_interrupt() [[SWI_ATTR:#[0-9]+]]
+}
+
+__attribute__((interrupt_save_fp("ABORT"))) void test_abort_interrupt() {
+  // CHECK: define{{.*}} arm_aapcscc void @test_abort_interrupt() [[ABORT_ATTR:#[0-9]+]]
+}
+
+
+__attribute__((interrupt_save_fp("UNDEF"))) void test_undef_interrupt() {
+  // CHECK: define{{.*}} arm_aapcscc void @test_undef_interrupt() [[UNDEF_ATTR:#[0-9]+]]
+}
+
+
+// CHECK: attributes [[GENERIC_ATTR]] = { {{.*}} {{"interrupt"[^=]}}{{.*}} "save-fp"
+// CHECK: attributes [[IRQ_ATTR]] = { {{.*}} "interrupt"="IRQ" {{.*}} "save-fp"
+// CHECK: attributes [[FIQ_ATTR]] = { {{.*}} "interrupt"="FIQ" {{.*}} "save-fp"
+// CHECK: attributes [[SWI_ATTR]] = { {{.*}} "interrupt"="SWI" {{.*}} "save-fp"
+// CHECK: attributes [[ABORT_ATTR]] = { {{.*}} "interrupt"="ABORT" {{.*}} "save-fp"
+// CHECK: attributes [[UNDEF_ATTR]] = { {{.*}} "interrupt"="UNDEF" {{.*}} "save-fp"
+
+// CHECK-APCS: attributes [[GENERIC_ATTR]] = { {{.*}} "interrupt" {{.*}} "save-fp"
\ No newline at end of file
diff --git a/clang/test/Sema/arm-interrupt-attr.c b/clang/test/Sema/arm-interrupt-attr.c
index f2698eedacea1..b0bf031a573fa 100644
--- a/clang/test/Sema/arm-interrupt-attr.c
+++ b/clang/test/Sema/arm-interrupt-attr.c
@@ -3,7 +3,7 @@
 
 
 #ifdef __ARM_FP
-__attribute__((interrupt("IRQ"))) void float_irq(void); // expected-warning {{interrupt service routine with vfp enabled may clobber the interruptee's vfp state}}
+__attribute__((interrupt("IRQ"))) void float_irq(void); // expected-warning {{interrupt service routine with vfp enabled may clobber the interruptee's vfp state; consider using the `interrupt_save_fp` attribute to prevent this behavior}}
 #else // !defined(__ARM_FP)
 __attribute__((interrupt("irq"))) void foo1(void) {} // expected-warning {{'interrupt' attribute argument not supported: irq}}
 __attribute__((interrupt(IRQ))) void foo(void) {} // expected-error {{'interrupt' attribute requires a string}}
diff --git a/clang/test/Sema/arm-interrupt-save-fp-attr.c b/clang/test/Sema/arm-interrupt-save-fp-attr.c
new file mode 100644
index 0000000000000..f9949f7f94ba8
--- /dev/null
+++ b/clang/test/Sema/arm-interrupt-save-fp-attr.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 %s -triple arm-none-eabi -verify -fsyntax-only
+// RUN: %clang_cc1 %s -triple arm-none-eabi -target-feature +vfp2 -verify -fsyntax-only
+
+
+#if !defined(__ARM_FP)
+__attribute__((interrupt_save_fp("IRQ"))) void float_irq(void); // expected-warning {{`interrupt_save_fp` only applies to targets that have a VFP unit enabled for this compilation; this will be treated as a regular `interrupt` attribute}}
+#else // defined(__ARM_FP)
+__attribute__((interrupt_save_fp("irq"))) void foo1(void) {} // expected-warning {{'interrupt_save_fp' attribute argument not supported: irq}}
+__attribute__((interrupt_save_fp(IRQ))) void foo(void) {} // expected-error {{'interrupt_save_fp' attribute requires a string}}
+__attribute__((interrupt_save_fp("IRQ", 1))) void foo2(void) {} // expected-error {{'interrupt_save_fp' attribute takes no more than 1 argument}}
+__attribute__((interrupt_save_fp("IRQ"))) void foo3(void) {}
+__attribute__((interrupt_save_fp("FIQ"))) void foo4(void) {}
+__attribute__((interrupt_save_fp("SWI"))) void foo5(void) {}
+__attribute__((interrupt_save_fp("ABORT"))) void foo6(void) {}
+__attribute__((interrupt_save_fp("UNDEF"))) void foo7(void) {}
+__attribute__((interrupt_save_fp)) void foo8(void) {}
+__attribute__((interrupt_save_fp())) void foo9(void) {}
+__attribute__((interrupt_save_fp(""))) void foo10(void) {}
+
+__attribute__((interrupt_save_fp("IRQ"))) void callee(void) {}
+
+void caller(void)
+{
+    callee(); // expected-error {{interrupt service routine cannot be called directly}}
+}
+#endif // __ARM_FP
diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index b18d3fcc9e3f4..9b7dd8099368d 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -311,7 +311,7 @@ def int_arm_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">,
 // VFP
 
 def int_arm_get_fpscr : ClangBuiltin<"__builtin_arm_get_fpscr">,
-                       DefaultAttrsIntrinsic<[llvm_i32_ty], [], []>;
+                       DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrReadMem]>;
 def int_arm_set_fpscr : ClangBuiltin<"__builtin_arm_set_fpscr">,
                        DefaultAttrsIntrinsic<[], [llvm_i32_ty], []>;
 def int_arm_vcvtr : DefaultAttrsIntrinsic<[llvm_float_ty],
diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index db8e467eedf64..a3bb3780b9b2b 100644
--- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -1202,6 +1202,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
     SrcReg = ~0U;
     DstReg = MI->getOperand(0).getReg();
     break;
+  case ARM::VMRS:
+    SrcReg = ARM::FPSCR;
+    DstReg = MI->getOperand(0).getReg();
+    break;
+  case ARM::VMRS_FPEXC:
+    SrcReg = ARM::FPEXC;
+    DstReg = MI->getOperand(0).getReg();
+    break;
   default:
     SrcReg = MI->getOperand(1).getReg();
     DstReg = MI->getOperand(0).getReg();
@@ -1368,6 +1376,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
         // correct ".save" later.
         AFI->EHPrologueRemappedRegs[DstReg] = SrcReg;
         break;
+      case ARM::VMRS:
+      case ARM::VMRS_FPEXC:
+        // If a function spills FPSCR or FPEXC, we copy the values to low
+        // registers before pushing them.  However, we can't issue annotations
+        // for FP status registers because ".save" requires GPR registers, and
+        // ".vsave" requires DPR registers, so don't record the copy and simply
+        // emit annotations for the source registers used for the store.
+        break;
       case ARM::tLDRpci: {
         // Grab the constpool index and check, whether it corresponds to
         // original or cloned constpool entry.
diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 9ae0054521b05..bc20daf0cfbbc 100644
--- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -80,6 +80,25 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
                                        ? CSR_ATPCS_SplitPush_SwiftTail_SaveList
                                        : CSR_AAPCS_SwiftTail_SaveList);
   } else if (F.hasFnAttribute("interrupt")) {
+
+    // Don't save the floating point registers if target does not have floating
+    // point registers.
+    if (STI.hasFPRegs() && F.hasFnAttribute("save-fp")) {
+      bool HasNEON = STI.hasNEON();
+
+      if (STI.isMClass()) {
+        assert(!HasNEON && "NEON is only for Cortex-R/A");
+        return PushPopSplit == ARMSubtarget::SplitR7
+                   ? CSR_ATPCS_SplitPush_FP_SaveList
+                   : CSR_AAPCS_FP_SaveList;
+      }
+      if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") {
+        return HasNEON ? CSR_FIQ_FP_NEON_SaveList : CSR_FIQ_FP_SaveList;
+      }
+      return HasNEON ? CSR_GenericInt_FP_NEON_SaveList
+                     : CSR_GenericInt_FP_SaveList;
+    }
+
     if (STI.isMClass()) {
       // M-class CPUs have hardware which saves the registers needed to allow a
       // function conforming to the AAPCS to function as a handler.
diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td
index f1ab1c3103740..ca3e827659c24 100644
--- a/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -268,19 +268,14 @@ def CC_ARM_Win32_CFGuard_Check : CallingConv<[
 def CSR_NoRegs : CalleeSavedRegs<(add)>;
 def CSR_FPRegs : CalleeSavedRegs<(add (sequence "D%u", 0, 31))>;
 
+def CSR_FP_Interrupt_Regs : CalleeSavedRegs<(add FPSCR, FPEXC, (sequence "D%u", 15, 0))>;
+def CSR_FP_NEON_Interrupt_Regs : CalleeSavedRegs<(add CSR_FP_Interrupt_Regs, 
+                                                  (sequence "D%u", 31, 16))>;
+
 def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
                                      (sequence "D%u", 15, 8))>;
 
-// The Windows Control Flow Guard Check function preserves the same registers as
-// AAPCS, and also preserves all floating point registers.
-def CSR_Win_AAPCS_CFGuard_Check : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7,
-                                     R6, R5, R4, (sequence "D%u", 15, 0))>;
-
-// R8 is used to pass swifterror, remove it from CSR.
-def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>;
-
-// R10 is used to pass swiftself, remove it from CSR.
-def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>;
+def CSR_AAPCS_FP : CalleeSavedRegs<(add CSR_AAPCS, CSR_FP_Interrupt_Regs)>;
 
 // The order of callee-saved registers needs to match the order we actually push
 // them in FrameLowering, because this order is what's used by
@@ -294,6 +289,21 @@ def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4,
                                                (sequence "D%u", 15, 8),
                                                LR, R11)>;
 
+def CSR_ATPCS_SplitPush_FP : CalleeSavedRegs<(add CSR_ATPCS_SplitPush,
+                                              CSR_FP_Interrupt_Regs)>;
+
+// The Windows Control Flow Guard Check function preserves the same registers as
+// AAPCS, and also preserves all floating point registers.
+def CSR_Win_AAPCS_CFGuard_Check : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7,
+                                     R6, R5, R4, (sequence "D%u", 15, 0))>;
+
+// R8 is used to pass swifterror, remove it from CSR.
+def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>;
+
+// R10 is used to pass swiftself, remove it from CSR.
+def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>;
+
+
 // R8 is used to pass swifterror, remove it from CSR.
 def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush,
                                                       R8)>;
@@ -361,6 +371,13 @@ def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS,
 // generally does rather than tracking its liveness as a normal register.
 def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>;
 
+def CSR_GenericInt_FP : CalleeSavedRegs<(add CSR_GenericInt, 
+                                         CSR_FP_Interrupt_Regs)>;
+
+def CSR_GenericInt_FP_NEON : CalleeSavedRegs<(add CSR_GenericInt_FP, 
+                                              CSR_FP_NEON_Interrupt_Regs)>;
+
+
 // The fast interrupt handlers have more private state and get their own copies
 // of R8-R12, in addition to SP and LR. As before, mark LR for saving too.
 
@@ -369,4 +386,9 @@ def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>;
 // registers.
 def CSR_FIQ : CalleeSavedRegs<(add LR, R11, (sequence "R%u", 7, 0))>;
 
+def CSR_FIQ_FP : CalleeSavedRegs<(add CSR_FIQ, CSR_FP_Interrupt_Regs)>;
+
+def CSR_FIQ_FP_NEON : CalleeSavedRegs<(add CSR_FIQ_FP, 
+                                       CSR_FP_NEON_Interrupt_Regs)>;
+
 
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index 475f53fc03399..e0698313904c7 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -175,6 +175,7 @@ skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,
 enum class SpillArea {
   GPRCS1,
   GPRCS2,
+  FPStatus,
   DPRCS1,
   DPRCS2,
   GPRCS3,
@@ -220,6 +221,10 @@ SpillArea getSpillArea(Register Reg,
   case ARM::FPCXTNS:
     return SpillArea::FPCXT;
 
+  case ARM::FPSCR:
+  case ARM::FPEXC:
+    return SpillArea::FPStatus;
+
   case ARM::R0:
   case ARM::R1:
   case ARM::R2:
@@ -917,8 +922,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
 
   // Determine the sizes of each callee-save spill areas and record which frame
   // belongs to which callee-save spill areas.
-  unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCS1Size = 0, GPRCS3Size = 0,
-           DPRCS2Size = 0;
+  unsigned GPRCS1Size = 0, GPRCS2Size = 0, FPStatusSize = 0,
+           DPRCS1Size = 0, GPRCS3Size = 0, DPRCS2Size = 0;
   int FramePtrSpillFI = 0;
   int D8SpillFI = 0;
 
@@ -975,6 +980,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     case SpillArea::GPRCS2:
       GPRCS2Size += 4;
       break;
+    case SpillArea::FPStatus:
+      FPStatusSize += 4;
+      break;
     case SpillArea::DPRCS1:
       DPRCS1Size += 8;
       break;
@@ -982,7 +990,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       GPRCS3Size += 4;
       break;
     case SpillArea::DPRCS2:
-      DPRCS2Size += 4;
+      DPRCS2Size += 8;
       break;
     }
   }
@@ -1023,13 +1031,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
   unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
   unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
+  unsigned FPStatusOffset = GPRCS2Offset - FPStatusSize;
 
   Align DPRAlign = DPRCS1Size ? std::min(Align(8), Alignment) : Align(4);
-  unsigned DPRGapSize =
-      (ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size + GPRCS2Size) %
-      DPRAlign.value();
+  unsigned DPRGapSize = (ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
+                         GPRCS2Size + FPStatusSize) %
+                        DPRAlign.value();
 
-  unsigned DPRCS1Offset = GPRCS2Offset - DPRGapSize - DPRCS1Size;
+  unsigned DPRCS1Offset = FPStatusOffset - DPRGapSize - DPRCS1Size;
 
   if (HasFP) {
     // Offset from the CFA to the saved frame pointer, will be negative.
@@ -1054,6 +1063,19 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       BeforeFPPush = false;
   }
 
+  // Move past FP status save area.
+  if (FPStatusSize > 0) {
+    while (MBBI != MBB.end()) {
+      unsigned Opc = MBBI->getOpcode();
+      if (Opc == ARM::VMRS || Opc == ARM::VMRS_FPEXC)
+        MBBI++;
+      else
+        break;
+    }
+    LastPush = MBBI++;
+    DefCFAOffsetCandidates.addInst(LastPush, FPStatusSize);
+  }
+
   // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
   // .cfi_offset operations will reflect that.
   if (DPRGapSize) {
@@ -1224,7 +1246,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       FPPushInst = GPRCS3Push;
       FPOffsetAfterPush = MFI.getObjectOffset(FramePtrSpillFI) +
                           ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size +
-                          GPRCS2Size + DPRCS1Size + DPRGapSize +
+                          FPStatusSize + GPRCS2Size + DPRCS1Size + DPRGapSize +
                           sizeOfSPAdjustment(*FPPushInst);
       LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS3, offset "
                         << FPOffsetAfterPush << "  after that push\n");
@@ -1297,6 +1319,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       case SpillArea::GPRCS3:
         CFIPos = std::next(GPRCS3Push);
         break;
+      case SpillArea::FPStatus:
       case SpillArea::FPCXT:
       case SpillArea::DPRCS2:
         // FPCXT and DPRCS2 are not represented in the DWARF info.
@@ -1332,6 +1355,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
   AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
   AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
+  AFI->setFPStatusSavesSize(FPStatusSize);
   AFI->setDPRCalleeSavedGapSize(DPRGapSize);
   AFI->setDPRCalleeSavedArea1Size(DPRCS1Size);
   AFI->setGPRCalleeSavedArea3Size(GPRCS3Size);
@@ -1458,8 +1482,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
     NumBytes -=
         (ReservedArgStack + AFI->getFPCXTSaveAreaSize() +
          AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() +
-         AFI->getDPRCalleeSavedGapSize() + AFI->getDPRCalleeSavedArea1Size() +
-         AFI->getGPRCalleeSavedArea3Size());
+         AFI->getFPStatusSavesSize() + AFI->getDPRCalleeSavedGapSize() +
+         AFI->getDPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea3Size());
 
     // Reset SP based on frame pointer only if the stack frame extends beyond
     // frame pointer stack slot or target is ELF and the function has FP.
@@ -1835,6 +1859,108 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
   }
 }
 
+void ARMFrameLowering::emitFPStatusSaves(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI,
+                                         ArrayRef<CalleeSavedInfo> CSI,
+                                         unsigned PushOpc) const {
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+
+  SmallVector<MCRegister> Regs;
+  auto RegPresent = [&CSI](MCRegister Reg) {
+    return llvm::any_of(CSI, [Reg](const CalleeSavedInfo &C) {
+      return C.getReg() == Reg;
+    });
+  };
+
+  // If we need to save FPSCR, then we must move FPSCR into R4 with the VMRS
+  // instruction.
+  if (RegPresent(ARM::FPSCR)) {
+    BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::VMRS), ARM::R4)
+        .add(predOps(ARMCC::AL))
+        .setMIFlags(MachineInstr::FrameSetup);
+
+    Regs.push_back(ARM::R4);
+  }
+
+  // If we need to save FPEXC, then we must move FPEXC into R5 with the
+  // VMRS_FPEXC instruction.
+  if (RegPresent(ARM::FPEXC)) {
+    BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::VMRS_FPEXC), ARM::R5)
+        .add(predOps(ARMCC::AL))
+        .setMIFlags(MachineInstr::FrameSetup);
+
+    Regs.push_back(ARM::R5);
+  }
+
+  // If neither FPSCR and FPEXC are present, then do nothing.
+  if (Regs.size() == 0)
+    return;
+
+  // Push both R4 and R5 onto the stack, if present.
+  MachineInstrBuilder MIB =
+      BuildMI(MBB, MI, DebugLoc(), TII.get(PushOpc), ARM::SP)
+          .addReg(ARM::SP)
+          .add(predOps(ARMCC::AL))
+          .setMIFlags(MachineInstr::FrameSetup);
+
+  for (Register Reg : Regs) {
+    MIB.addReg(Reg);
+  }
+}
+
+void ARMFrameLowering::emitFPStatusRestores(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+    MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc) const {
+  MachineFunction &MF = *MBB.getParent();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+
+  SmallVector<MCRegister> Regs;
+  auto RegPresent = [&CSI](MCRegister Reg) {
+    return llvm::any_of(CSI, [Reg](const CalleeSavedInfo &C) {
+      return C.getReg() == Reg;
+    });
+  };
+
+  // Do nothing if we don't need to restore any FP status registers.
+  if (!RegPresent(ARM::FPSCR) && !RegPresent(ARM::FPEXC))
+    return;
+
+  // Pop registers off of the stack.
+  MachineInstrBuilder MIB =
+      BuildMI(MBB, MI, DebugLoc(), TII.get(LdmOpc), ARM::SP)
+          .addReg(ARM::SP)
+          .add(predOps(ARMCC::AL))
+          .setMIFlags(MachineInstr::FrameDestroy);
+
+  // If FPSCR was saved, it will be popped into R4.
+  if (RegPresent(ARM::FPSCR)) {
+    MIB.addReg(ARM::R4, RegState::Define);
+  }
+
+  // If FPEXC was saved, it will be popped into R5.
+  if (RegPresent(ARM::FPEXC)) {
+    MIB.addReg(ARM::R5, RegState::Define);
+  }
+
+  // Move the FPSCR value back into the register with the VMSR instruction.
+  if (RegPresent(ARM::FPSCR)) {
+    BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VMSR))
+        .addReg(ARM::R4)
+        .add(predOps(ARMCC::AL))
+        .setMIFlags(MachineInstr::FrameDestroy);
+  }
+
+  // Move the FPEXC value back into the register with the VMSR_FPEXC
+  // instruction.
+  if (RegPresent(ARM::FPEXC)) {
+    BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VMSR_FPEXC))
+        .addReg(ARM::R5)
+        .add(predOps(ARMCC::AL))
+        .setMIFlags(MachineInstr::FrameDestroy);
+  }
+}
+
 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
 /// starting from d8.  Also insert stack realignment code and leave the stack
 /// pointer pointing to the d8 spill slot.
@@ -2154,6 +2280,7 @@ bool ARMFrameLowering::spillCalleeSavedRegisters(
 
   emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS1);
   emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS2);
+  emitFPStatusSaves(MBB, MI, CSI, PushOpc);
   emitPushInst(MBB, MI, CSI, FltOpc, 0, true, IsDPRCS1);
   emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, IsGPRCS3);
 
@@ -2211,6 +2338,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters(
 
   emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS3);
   emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, IsDPRCS1);
+  emitFPStatusRestores(MBB, MI, CSI, PopOpc);
   emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS2);
   emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, IsGPRCS1);
 
@@ -2427,6 +2555,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
   unsigned NumFPRSpills = 0;
   SmallVector<unsigned, 4> UnspilledCS1GPRs;
   SmallVector<unsigned, 4> UnspilledCS2GPRs;
+  const Function &F = MF.getFunction();
   const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
       MF.getSubtarget().getRegisterInfo());
   const ARMBaseInstrInfo &TII =
@@ -2440,6 +2569,21 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
   ARMSubtarget::PushPopSplitVariation PushPopSplit =
       STI.getPushPopSplitVariation(MF);
 
+  // For a floating point interrupt, save these registers always, since LLVM
+  // currently doesn't model reads/writes to these registers.
+  if (F.hasFnAttribute("interrupt") && F.hasFnAttribute("save-fp")) {
+    SavedRegs.set(ARM::FPSCR);
+    SavedRegs.set(ARM::R4);
+
+    // This register will only be present on non-MClass registers.
+    if (STI.isMClass()) {
+      SavedRegs.reset(ARM::FPEXC);
+    } else {
+      SavedRegs.set(ARM::FPEXC);
+      SavedRegs.set(ARM::R5);
+    }
+  }
+
   // Spill R4 if Thumb2 function requires stack realignment - it will be used as
   // scratch register. Also spill R4 if Thumb2 function has varsized objects,
   // since it's not always possible to restore sp from fp in a single
diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h
index ff51f1a7af022..9dc88d4671c38 100644
--- a/llvm/lib/Target/ARM/ARMFrameLowering.h
+++ b/llvm/lib/Target/ARM/ARMFrameLowering.h
@@ -99,6 +99,15 @@ class ARMFrameLowering : public TargetFrameLowering {
                    unsigned LdrOpc, bool isVarArg, bool NoGap,
                    function_ref<bool(unsigned)> Func) const;
 
+  void emitFPStatusSaves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                         ArrayRef<CalleeSavedInfo> CSI,
+                         unsigned PushOneOpc) const;
+
+  void emitFPStatusRestores(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            MutableArrayRef<CalleeSavedInfo> CSI,
+                            unsigned LdrOpc) const;
+
   MachineBasicBlock::iterator
   eliminateCallFramePseudoInstr(MachineFunction &MF,
                                 MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 754517f3bc4d5..2ee2cb2fda4a0 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -2513,6 +2513,9 @@ class MovFromVFP<bits<4> opc19_16, dag oops, dag iops, string opc, string asm,
   let Inst{3-0}   = 0b0000;
   let Unpredictable{7-5} = 0b111;
   let Unpredictable{3-0} = 0b1111;
+
+  // Needed to avoid errors when a MachineInstrt::FrameSetup flag is set.
+  let mayStore = 0;
 }
 
 let DecoderMethod = "DecodeForVMRSandVMSR" in {
diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index e330d83cd80d5..a9c4b53f9ae64 100644
--- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -89,6 +89,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   unsigned FRSaveSize = 0;
   unsigned GPRCS1Size = 0;
   unsigned GPRCS2Size = 0;
+  unsigned FPStatusSize = 0;
   unsigned DPRCSAlignGapSize = 0;
   unsigned DPRCS1Size = 0;
   unsigned GPRCS3Size = 0;
@@ -205,6 +206,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   unsigned getFrameRecordSavedAreaSize() const { return FRSaveSize; }
   unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; }
   unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; }
+  unsigned getFPStatusSavesSize() const       { return FPStatusSize; }
   unsigned getDPRCalleeSavedGapSize() const   { return DPRCSAlignGapSize; }
   unsigned getDPRCalleeSavedArea1Size() const { return DPRCS1Size; }
   unsigned getGPRCalleeSavedArea3Size() const { return GPRCS3Size; }
@@ -213,6 +215,7 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   void setFrameRecordSavedAreaSize(unsigned s) { FRSaveSize = s; }
   void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; }
   void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
+  void setFPStatusSavesSize(unsigned s)       { FPStatusSize = s; }
   void setDPRCalleeSavedGapSize(unsigned s)   { DPRCSAlignGapSize = s; }
   void setDPRCalleeSavedArea1Size(unsigned s) { DPRCS1Size = s; }
   void setGPRCalleeSavedArea3Size(unsigned s) { GPRCS3Size = s; }
diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td
index f5a675e2976bb..5a31b88ba7f70 100644
--- a/llvm/lib/Target/ARM/ARMRegisterInfo.td
+++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td
@@ -417,6 +417,13 @@ def cl_FPSCR_NZCV : RegisterClass<"ARM", [i32], 32, (add FPSCR_NZCV)> {
   let CopyCost = -1;
 }
 
+// This RegisterClass is required to add FPSCR and FPEXC into a calling
+// convention.
+def FP_STATUS_REGS : RegisterClass<"ARM", [i32], 32, (add FPSCR, FPEXC)> {
+  let CopyCost = -1;  // Don't allow copying of status registers.
+  let isAllocatable = 0;
+}
+
 // Scalar single precision floating point register class..
 // FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack
 // to avoid partial-write dependencies on D or Q (depending on platform)
diff --git a/llvm/test/CodeGen/ARM/interrupt-save-fp-attr-status-regs.mir b/llvm/test/CodeGen/ARM/interrupt-save-fp-attr-status-regs.mir
new file mode 100644
index 0000000000000..9a557c506922a
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/interrupt-save-fp-attr-status-regs.mir
@@ -0,0 +1,279 @@
+# RUN: llc -mtriple=armv7-none-none-eabihf -mcpu=cortex-r5 -o - %s -run-pass=prologepilog \
+# RUN: | FileCheck --check-prefix=CHECK-R-ARM %s
+# RUN: llc -mtriple=armv7-none-none-eabihf -mcpu=cortex-r4 -o - %s -run-pass=prologepilog \
+# RUN: | FileCheck --check-prefix=CHECK-R-ARM %s
+# RUN: llc -mtriple=thumbv7-none-none-eabihf -mcpu=cortex-r5 -o - %s -run-pass=prologepilog \
+# RUN: | FileCheck --check-prefix=CHECK-R-THUMB %s
+# RUN: llc -mtriple=thumbv7-none-none-eabihf -mcpu=cortex-r4 -o - %s -run-pass=prologepilog \
+# RUN: | FileCheck --check-prefix=CHECK-R-THUMB %s
+# RUN: llc -mtriple=thumbv7-none-none-eabihf -mcpu=cortex-m3 -o - %s -run-pass=prologepilog \
+# RUN: | FileCheck --check-prefix=CHECK-M-THUMB %s
+# RUN: llc -mtriple=thumbv7-none-none-eabihf -mcpu=cortex-m4 -o - %s -run-pass=prologepilog \
+# RUN: | FileCheck --check-prefix=CHECK-M-THUMB %s
+# RUN: llc -mtriple=thumbv8-none-none-eabihf -mcpu=cortex-m33 -o - %s -run-pass=prologepilog \
+# RUN: | FileCheck --check-prefix=CHECK-M-THUMB %s
+
+# =============================================================================
+# ============================ cortex-r arm-mode ==============================
+# =============================================================================
+# This IRQ will save 104 bytes:
+#
+# |---------+------+----------|
+# | reg     | size | zone     |
+# |---------+------+----------|
+# | LR      |  4x1 | GPR      |
+# | R12-R10 |  4x3 |          |
+# | R5-R0   |  4x6 |          |
+# |---------+------+----------|
+# | FPEXC   |  4x1 | FPStatus |
+# | FPSCR   |  4x1 |          |
+# |---------+------+----------|
+# | D7-D0   |  8x8 | FPRegs   |
+# |---------+------+----------|
+# |         |  112 |          |
+# |---------+------+----------|
+#
+# ================================= Prologue =================================
+#
+# Frame pointer (r11) will be store at $original_sp - 12, but we can't save the
+# FP until after we save the GPR zone of registers. The GPR zone of registers
+# moves the stack by 40 bytes. So $original_sp = $current_sp + 40. Thus,
+# $current_sp + 40 - 12 = $current_sp + 28. Thus, we see the instruction:
+#
+# $r11 = ADDri $sp, 28
+#
+# We don't have dwarf information for the FPEXC and FPSCR registers, so there's
+# no CFI_INSTRUCTION for those saves. So, we should see an 8 byte disparity in
+# the register offsets. $r0 is -40, and $d7 is -56.
+#
+# (-40) - (-56) = 16.
+# 16 = 8 (bytes from $d7) + 8 (bytes from FPSCR + FPEXC)
+#
+# There's an extra BFC to force the stack to be aligned.
+#
+# $sp = BFC $sp, 4294967288 /* ~0x7 */
+#
+# ================================= Epilogue =================================
+#
+# We use the frame pointer to restore the SP. Since $r11 is currently pointing
+# to the previous $r11's stack position (aka base_of_stack - 12), and we
+# allocated 112 bytes, $sp = $r11 - (112 - 12), or $sp = $r11 - 100, which is
+# why we see this instruction:
+#
+# $sp = SUBri $r11, 100
+
+# CHECK-R-ARM-LABEL: name: irq_fn
+# CHECK-R-ARM-LABEL: bb.0 (%ir-block.0):
+# CHECK-R-ARM:         $sp = frame-setup STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r0, killed $r1, killed $r2, killed $r3, killed $r4, killed $r5, killed $r10, killed $r11, killed $r12, killed $lr
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 40
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $ra_auth_code, -8
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $r11, -12
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $r10, -16
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $r5, -20
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $r4, -24
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $r3, -28
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $r2, -32
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $r1, -36
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $r0, -40
+# CHECK-R-ARM-NEXT:    $r11 = frame-setup ADDri killed $sp, 28, 14 /* CC::al */, $noreg, $noreg
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION def_cfa $r11, 12
+# CHECK-R-ARM-NEXT:    $r4 = frame-setup VMRS 14 /* CC::al */, $noreg, implicit $fpscr
+# CHECK-R-ARM-NEXT:    $r5 = frame-setup VMRS_FPEXC 14 /* CC::al */, $noreg, implicit $fpscr
+# CHECK-R-ARM-NEXT:    $sp = frame-setup STMDB_UPD $sp, 14 /* CC::al */, $noreg, $r4, $r5
+# CHECK-R-ARM-NEXT:    $sp = frame-setup VSTMDDB_UPD $sp, 14 /* CC::al */, $noreg, killed $d0, killed $d1, killed $d2, killed $d3, killed $d4, killed $d5, killed $d6, killed $d7
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $d7, -56
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $d6, -64
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $d5, -72
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $d4, -80
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $d3, -88
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $d2, -96
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $d1, -104
+# CHECK-R-ARM-NEXT:    frame-setup CFI_INSTRUCTION offset $d0, -112
+# CHECK-R-ARM-NEXT:    $sp = BFC killed $sp, 4294967288, 14 /* CC::al */, $noreg
+# CHECK-R-ARM-NEXT:    BL @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+# CHECK-R-ARM-NEXT:    $sp = frame-destroy SUBri killed $r11, 100, 14 /* CC::al */, $noreg, $noreg
+# CHECK-R-ARM-NEXT:    $sp = frame-destroy VLDMDIA_UPD $sp, 14 /* CC::al */, $noreg, def $d0, def $d1, def $d2, def $d3, def $d4, def $d5, def $d6, def $d7
+# CHECK-R-ARM-NEXT:    $sp = frame-destroy LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $r5
+# CHECK-R-ARM-NEXT:    frame-destroy VMSR $r4, 14 /* CC::al */, $noreg, implicit-def $fpscr
+# CHECK-R-ARM-NEXT:    frame-destroy VMSR_FPEXC $r5, 14 /* CC::al */, $noreg, implicit-def $fpscr
+
+# =============================================================================
+# =========================== cortex-r thumb-mode =============================
+# =============================================================================
+# This IRQ will save 112 bytes:
+#
+# |-------+------+----------|
+# | reg   | size | zone     |
+# |-------+------+----------|
+# | LR    |  4x1 | GPR      |
+# | R12   |  4x1 |          |
+# | R7-R0 |  4x8 |          |
+# |-------+------+----------|
+# | FPEXC |  4x1 | FPStatus |
+# | FPSCR |  4x1 |          |
+# |-------+------+----------|
+# | D7-D0 |  8x8 | FPRegs   |
+# |-------+------+----------|
+# |       |  112 |          |
+# |-------+------+----------|
+#
+# ================================= Prologue =================================
+#
+# Frame pointer (r7) will be store at $original_sp - 12, but we can't save the
+# FP until after we save the GPR zone of registers. The GPR zone of registers
+# moves the stack by 40 bytes. So $original_sp = $current_sp + 40. Thus,
+# $current_sp + 40 - 12 = $current_sp + 28. Thus, we see the instruction:
+#
+# $r7 = t2ADDri $sp, 28
+#
+# We don't have dwarf information for the FPEXC and FPSCR registers, so there's
+# no CFI_INSTRUCTION for those saves. So, we should see an 8 byte disparity in
+# the register offsets. $r0 is -40, and $d7 is -56.
+#
+# (-32) - (-48) = 16.
+# 16 = 8 (bytes from $d7) + 8 (bytes from FPSCR + FPEXC)
+#
+# There's an extra BFC to force the stack to be aligned. This is done in 3
+# steps, because the value of $sp needs to be moved into a low register
+# (r0-r7), and then operated on, and then moved back.
+#
+# $r4 = tMOVr $sp, 14
+# $r4 = t2BFC $r4, 4294967288 /* ~0x7 */, 14
+# $sp = tMOVr $r4, 14
+#
+# ================================= Epilogue =================================
+#
+# We use the frame pointer to restore the SP. Since $r7 is currently pointing
+# to the previous $r7's stack position (aka base_of_stack - 12), and we
+# allocated 112 bytes, $sp = $r7 - (112 - 12), or $sp = $r7 - 100, which is
+# why we see this instruction:
+#
+# $r4 = t2SUBri $r7, 100
+# $sp = tMOVr $r4
+
+# CHECK-R-THUMB-LABEL: name: irq_fn
+# CHECK-R-THUMB-LABEL: bb.0 (%ir-block.0):
+# CHECK-R-THUMB:        $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r0, killed $r1, killed $r2, killed $r3, killed $r4, killed $r5, killed $r6, killed $r7, killed $r12, killed $lr
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 40
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $ra_auth_code, -8
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r7, -12
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r6, -16
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r5, -20
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r4, -24
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r3, -28
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r2, -32
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r1, -36
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r0, -40
+# CHECK-R-THUMB-NEXT:    $r7 = frame-setup t2ADDri killed $sp, 28, 14 /* CC::al */, $noreg, $noreg
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION def_cfa $r7, 12
+# CHECK-R-THUMB-NEXT:    $r4 = frame-setup VMRS 14 /* CC::al */, $noreg, implicit $fpscr
+# CHECK-R-THUMB-NEXT:    $r5 = frame-setup VMRS_FPEXC 14 /* CC::al */, $noreg, implicit $fpscr
+# CHECK-R-THUMB-NEXT:    $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, $r4, $r5
+# CHECK-R-THUMB-NEXT:    $sp = frame-setup VSTMDDB_UPD $sp, 14 /* CC::al */, $noreg, killed $d0, killed $d1, killed $d2, killed $d3, killed $d4, killed $d5, killed $d6, killed $d7
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d7, -56
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d6, -64
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d5, -72
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d4, -80
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d3, -88
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d2, -96
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d1, -104
+# CHECK-R-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d0, -112
+# CHECK-R-THUMB-NEXT:    $r4 = tMOVr killed $sp, 14 /* CC::al */, $noreg
+# CHECK-R-THUMB-NEXT:    $r4 = t2BFC killed $r4, 4294967288, 14 /* CC::al */, $noreg
+# CHECK-R-THUMB-NEXT:    $sp = tMOVr killed $r4, 14 /* CC::al */, $noreg
+# CHECK-R-THUMB-NEXT:    BL @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+# CHECK-R-THUMB-NEXT:    $r4 = frame-destroy t2SUBri killed $r7, 100, 14 /* CC::al */, $noreg, $noreg
+# CHECK-R-THUMB-NEXT:    $sp = frame-destroy tMOVr $r4, 14 /* CC::al */, $noreg
+# CHECK-R-THUMB-NEXT:    $sp = frame-destroy VLDMDIA_UPD $sp, 14 /* CC::al */, $noreg, def $d0, def $d1, def $d2, def $d3, def $d4, def $d5, def $d6, def $d7
+# CHECK-R-THUMB-NEXT:    $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $r5
+# CHECK-R-THUMB-NEXT:    frame-destroy VMSR $r4, 14 /* CC::al */, $noreg, implicit-def $fpscr
+# CHECK-R-THUMB-NEXT:    frame-destroy VMSR_FPEXC $r5, 14 /* CC::al */, $noreg, implicit-def $fpscr
+# CHECK-R-THUMB-NEXT:    $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r0, def $r1, def $r2, def $r3, def $r4, def $r5, def $r6, def $r7, def $r12, def $lr
+# CHECK-R-THUMB-NEXT:    SUBS_PC_LR 4, 14 /* CC::al */, $noreg
+
+# =============================================================================
+# ============================== cortex-m thumb ===============================
+# =============================================================================
+# This IRQ will save 88 bytes:
+#
+# |---------+------+----------|
+# | reg     | size | zone     |
+# |---------+------+----------|
+# | LR      |  4x1 | GPR      |
+# | R7-R6   |  4x2 |          |
+# | R4      |  4x1 |          |
+# |---------+------+----------|
+# | FPSCR   |  4x1 | FPStatus |
+# |---------+------+----------|
+# | EMPTY   |  4x1 | Align    |
+# |---------+------+----------|
+# | D7-D0   |  8x8 | FPRegs   |
+# |---------+------+----------|
+# |         |   88 |          |
+# |---------+------+----------|
+
+# CHECK-M-THUMB-LABEL: name: irq_fn
+# CHECK-M-THUMB-LABEL: bb.0 (%ir-block.0):
+# CHECK-M-THUMB:         $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r6, killed $r7, killed $lr
+# CHECK-M-THUMB-NEXT:    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+# CHECK-M-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $lr, -4
+# CHECK-M-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r7, -8
+# CHECK-M-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r6, -12
+# CHECK-M-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $r4, -16
+# CHECK-M-THUMB-NEXT:    $r7 = frame-setup t2ADDri killed $sp, 8, 14 /* CC::al */, $noreg, $noreg
+# CHECK-M-THUMB-NEXT:    frame-setup CFI_INSTRUCTION def_cfa $r7, 8
+# CHECK-M-THUMB-NEXT:    $r4 = frame-setup VMRS 14 /* CC::al */, $noreg, implicit $fpscr
+# CHECK-M-THUMB-NEXT:    $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, $r4
+# CHECK-M-THUMB-NEXT:    $sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg
+# CHECK-M-THUMB-NEXT:    $sp = frame-setup VSTMDDB_UPD $sp, 14 /* CC::al */, $noreg, killed $d0, killed $d1, killed $d2, killed $d3, killed $d4, killed $d5, killed $d6, killed $d7
+# CHECK-M-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d7, -32
+# CHECK-M-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d6, -40
+# CHECK-M-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d5, -48
+# CHECK-M-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d4, -56
+# CHECK-M-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d3, -64
+# CHECK-M-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d2, -72
+# CHECK-M-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d1, -80
+# CHECK-M-THUMB-NEXT:    frame-setup CFI_INSTRUCTION offset $d0, -88
+# CHECK-M-THUMB-NEXT:    $r4 = tMOVr killed $sp, 14 /* CC::al */, $noreg
+# CHECK-M-THUMB-NEXT:    $r4 = t2BFC killed $r4, 4294967288, 14 /* CC::al */, $noreg
+# CHECK-M-THUMB-NEXT:    $sp = tMOVr killed $r4, 14 /* CC::al */, $noreg
+# CHECK-M-THUMB-NEXT:    BL @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+# CHECK-M-THUMB-NEXT:    $r4 = frame-destroy t2SUBri killed $r7, 80, 14 /* CC::al */, $noreg, $noreg
+# CHECK-M-THUMB-NEXT:    $sp = frame-destroy tMOVr $r4, 14 /* CC::al */, $noreg
+# CHECK-M-THUMB-NEXT:    $sp = frame-destroy VLDMDIA_UPD $sp, 14 /* CC::al */, $noreg, def $d0, def $d1, def $d2, def $d3, def $d4, def $d5, def $d6, def $d7
+# CHECK-M-THUMB-NEXT:    $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg
+# CHECK-M-THUMB-NEXT:    $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4
+# CHECK-M-THUMB-NEXT:    frame-destroy VMSR $r4, 14 /* CC::al */, $noreg, implicit-def $fpscr
+# CHECK-M-THUMB-NEXT:    $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $r6, def $r7, def $lr
+# CHECK-M-THUMB-NEXT:    SUBS_PC_LR 4, 14 /* CC::al */, $noreg
+
+--- |
+  ; ModuleID = '/scratch/benson/tools2/llvm_cgt/llvm-project/llvm/test/CodeGen/ARM/fp-attr-fpscr.ll'
+  source_filename = "/scratch/benson/tools2/llvm_cgt/llvm-project/llvm/test/CodeGen/ARM/fp-attr-fpscr.ll"
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+
+  declare arm_aapcscc void @bar()
+
+  ; Function Attrs: alignstack(8)
+  define arm_aapcscc void @irq_fn() #1 {
+    call arm_aapcscc void @bar()
+    ret void
+  }
+
+  attributes #1 = { alignstack=8 "interrupt"="IRQ" "target-features"="+fpregs" "save-fp" }
+
+...
+---
+name:            irq_fn
+frameInfo:
+  adjustsStack:    true
+alignment:       16
+body:             |
+  bb.0 (%ir-block.0):
+    ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
+    BL @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
+    ADJCALLSTACKUP 0, -1, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
+    SUBS_PC_LR 4, 14 /* CC::al */, $noreg
+...
diff --git a/llvm/test/CodeGen/ARM/interrupt-save-fp-attr.ll b/llvm/test/CodeGen/ARM/interrupt-save-fp-attr.ll
new file mode 100644
index 0000000000000..e96dfa4a38e12
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/interrupt-save-fp-attr.ll
@@ -0,0 +1,303 @@
+; RUN: llc -mtriple=arm-ti-none-eabihf -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A %s
+; RUN: llc -mtriple=thumb-ti-none-eabihf -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A-THUMB %s
+; RUN: llc -mtriple=thumb-ti-none-eabihf -mcpu=cortex-m4 -o - %s | FileCheck --check-prefix=CHECK-M %s
+; RUN: llc -mtriple=thumbv7em-ti-none-eabihf -mcpu=cortex-m4 -o - %s | FileCheck --check-prefix=CHECK-M %s
+; RUN: llc -mtriple=thumbv7r5-ti-none-eabihf -mcpu=cortex-r5 -o - %s | FileCheck --check-prefix=CHECK-R-THUMB %s
+; RUN: llc -mtriple=armv7r5-ti-none-eabihf -mcpu=cortex-r5 -o - %s | FileCheck --check-prefix=CHECK-R %s
+
+declare arm_aapcscc void @bar()
+
+ at bigvar = global [16 x i32] zeroinitializer
+
+define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" "save-fp"{
+  ; Must save all registers except banked sp and lr (we save lr anyway because
+  ; we actually need it at the end to execute the return ourselves).
+
+  ; Also need special function return setting pc and CPSR simultaneously.
+  ; CHECK-A-LABEL: irq_fn:
+  ; CHECK-A: push {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr}
+  ; CHECK-A: add r11, sp, #28
+  ; CHECK-A: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-A: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+  ; [...]
+  ; CHECK-A: sub sp, r11, #228
+  ; CHECK-A: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+  ; CHECK-A: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr}
+  ; CHECK-A-LABEL: .Lfunc_end0
+
+  ; CHECK-A-THUMB-LABEL: irq_fn:
+  ; CHECK-A-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr}
+  ; CHECK-A-THUMB: add r7, sp, #28
+  ; CHECK-A-THUMB: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-A-THUMB: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+  ; [...]
+  ; CHECK-A-THUMB: sub.w r4, r7, #228
+  ; CHECK-A-THUMB: mov sp, r4
+  ; CHECK-A-THUMB: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+  ; CHECK-A-THUMB: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr}
+  ; CHECK-A-THUMB-LABEL: .Lfunc_end0
+
+  ; CHECK-R-LABEL: irq_fn:
+  ; CHECK-R: push {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr}
+  ; CHECK-R: add r11, sp, #28
+  ; CHECK-R: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; [...]
+  ; CHECK-R: sub sp, r11, #100
+  ; CHECK-R: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-R: pop {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr}
+  ; CHECK-R-LABEL: .Lfunc_end0
+
+  ; CHECK-R-THUMB-LABEL: irq_fn:
+  ; CHECK-R-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr}
+  ; CHECK-R-THUMB: add r7, sp, #28
+  ; CHECK-R-THUMB: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; [...]
+  ; CHECK-R-THUMB: sub.w r4, r7, #100
+  ; CHECK-R-THUMB: mov sp, r4
+  ; CHECK-R-THUMB: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-R-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr}
+  ; CHECK-R-THUMB-LABEL: .Lfunc_end0
+
+  ; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to
+  ; appropriate sentinel so no special return needed).
+  ; CHECK-M-LABEL: irq_fn:
+  ; CHECK-M: push {r4, r6, r7, lr}
+  ; CHECK-M: add r7, sp, #8
+  ; CHECK-M: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; [...]
+  ; CHECK-M: sub.w r4, r7, #80
+  ; CHECK-M: mov sp, r4
+  ; CHECK-M: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-M: pop {r4, r6, r7, pc}
+  ; CHECK-M-LABEL: .Lfunc_end0
+
+  call arm_aapcscc void @bar()
+  ret void
+}
+
+; We don't push/pop r12, as it is banked for FIQ
+define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" "save-fp" {
+  ; CHECK-A-LABEL: fiq_fn:
+  ; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
+  ; CHECK-A: add r11, sp, #32
+  ; [...]
+  ; CHECK-A: sub sp, r11, #40
+  ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
+  ; CHECK-A-LABEL: .Lfunc_end1
+
+  ; CHECK-A-THUMB-LABEL: fiq_fn:
+  ; CHECK-A-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
+  ; CHECK-A-THUMB: add r7, sp, #28
+  ; [...]
+  ; CHECK-A-THUMB: sub.w r4, r7, #36
+  ; CHECK-A-THUMB: mov sp, r4
+  ; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
+  ; CHECK-A-THUMB-LABEL: .Lfunc_end1
+
+  ; CHECK-R-LABEL: fiq_fn:
+  ; CHECK-R: push {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
+  ; CHECK-R: add r11, sp, #32
+  ; [...]
+  ; CHECK-R: sub sp, r11, #40
+  ; CHECK-R: pop {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
+  ; CHECK-R-LABEL: .Lfunc_end1
+
+  ; CHECK-R-THUMB-LABEL: fiq_fn:
+  ; CHECK-R-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
+  ; CHECK-R-THUMB: add r7, sp, #28
+  ; [...]
+  ; CHECK-R-THUMB: sub.w r4, r7, #36
+  ; CHECK-R-THUMB: mov sp, r4
+  ; CHECK-R-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr}
+  ; CHECK-R-THUMB-LABEL: .Lfunc_end1
+
+  ; CHECK-M-LABEL: fiq_fn:
+  ; CHECK-M: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+  ; CHECK-M: add r7, sp, #12
+  ; [...]
+  ; CHECK-M: sub.w r4, r7, #16
+  ; CHECK-M: mov sp, r4
+  ; CHECK-M: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  ; CHECK-M-LABEL: .Lfunc_end1
+
+  %val = load volatile [16 x i32], [16 x i32]* @bigvar
+  store volatile [16 x i32] %val, [16 x i32]* @bigvar
+  ret void
+}
+
+define arm_aapcscc void @swi_fn() alignstack(8) "interrupt"="SWI" "save-fp" {
+  ; CHECK-A-LABEL: swi_fn:
+  ; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
+  ; CHECK-A: add r11, sp, #44
+  ; [...]
+  ; CHECK-A: sub sp, r11, #52
+  ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
+  ; CHECK-A-LABEL: .Lfunc_end2
+
+  ; CHECK-A-THUMB-LABEL: swi_fn:
+  ; CHECK-A-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
+  ; CHECK-A-THUMB: add r7, sp, #28
+  ; [...]
+  ; CHECK-A-THUMB: sub.w r4, r7, #36
+  ; CHECK-A-THUMB: mov sp, r4
+  ; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
+  ; CHECK-A-THUMB-LABEL: .Lfunc_end2
+
+  ; CHECK-R-LABEL: swi_fn:
+  ; CHECK-R: push {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
+  ; CHECK-R: add r11, sp, #44
+  ; [...]
+  ; CHECK-R: sub sp, r11, #52
+  ; CHECK-R: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
+  ; CHECK-R-LABEL: .Lfunc_end2
+
+  ; CHECK-R-THUMB-LABEL: swi_fn:
+  ; CHECK-R-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
+  ; CHECK-R-THUMB: add r7, sp, #28
+  ; [...]
+  ; CHECK-R-THUMB: sub.w r4, r7, #36
+  ; CHECK-R-THUMB: mov sp, r4
+  ; CHECK-R-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr}
+  ; CHECK-R-THUMB-LABEL: .Lfunc_end2
+
+  ; CHECK-M-LABEL: swi_fn:
+  ; CHECK-M: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+  ; CHECK-M: add r7, sp, #12
+  ; [...]
+  ; CHECK-M: sub.w r4, r7, #16
+  ; CHECK-M: mov sp, r4
+  ; CHECK-M: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+  ; CHECK-M-LABEL: .Lfunc_end2
+
+  %val = load volatile [16 x i32], [16 x i32]* @bigvar
+  store volatile [16 x i32] %val, [16 x i32]* @bigvar
+  ret void
+}
+
+define arm_aapcscc void @undef_fn() alignstack(8) "interrupt"="UNDEF" "save-fp" {
+  ; CHECK-A-LABEL: undef_fn:
+  ; CHECK-A: push {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr}
+  ; CHECK-A: add r11, sp, #28
+  ; CHECK-A: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-A: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+  ; [...]
+  ; CHECK-A: sub sp, r11, #228
+  ; CHECK-A: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+  ; CHECK-A: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr}
+  ; CHECK-A-LABEL: .Lfunc_end3
+
+  ; CHECK-A-THUMB-LABEL: undef_fn:
+  ; CHECK-A-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr}
+  ; CHECK-A-THUMB: add r7, sp, #28
+  ; CHECK-A-THUMB: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-A-THUMB: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+  ; [...]
+  ; CHECK-A-THUMB: sub.w r4, r7, #228
+  ; CHECK-A-THUMB: mov sp, r4
+  ; CHECK-A-THUMB: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+  ; CHECK-A-THUMB: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr}
+  ; CHECK-A-THUMB-LABEL: .Lfunc_end3
+
+  ; CHECK-R-LABEL: undef_fn:
+  ; CHECK-R: push {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr}
+  ; CHECK-R: add r11, sp, #28
+  ; CHECK-R: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; [...]
+  ; CHECK-R: sub sp, r11, #100
+  ; CHECK-R: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-R: pop {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr}
+  ; CHECK-R-LABEL: .Lfunc_end3
+
+  ; CHECK-R-THUMB-LABEL: undef_fn:
+  ; CHECK-R-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr}
+  ; CHECK-R-THUMB: add r7, sp, #28
+  ; CHECK-R-THUMB: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; [...]
+  ; CHECK-R-THUMB: sub.w r4, r7, #100
+  ; CHECK-R-THUMB: mov sp, r4
+  ; CHECK-R-THUMB: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-R-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr}
+  ; CHECK-R-THUMB-LABEL: .Lfunc_end3
+
+  ; CHECK-M-LABEL: undef_fn:
+  ; CHECK-M: push {r4, r6, r7, lr}
+  ; CHECK-M: add r7, sp, #8
+  ; CHECK-M: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; [...]
+  ; CHECK-M: sub.w r4, r7, #80
+  ; CHECK-M: mov sp, r4
+  ; CHECK-M: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-M: pop {r4, r6, r7, pc}
+  ; CHECK-M-LABEL: .Lfunc_end3
+
+  call void @bar()
+  ret void
+}
+
+define arm_aapcscc void @abort_fn() alignstack(8) "interrupt"="ABORT" "save-fp" {
+  ; CHECK-A-LABEL: abort_fn:
+  ; CHECK-A: push {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr}
+  ; CHECK-A: add r11, sp, #28
+  ; CHECK-A: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-A: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+  ; [...]
+  ; CHECK-A: sub sp, r11, #228
+  ; CHECK-A: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+  ; CHECK-A: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr}
+  ; CHECK-A-LABEL: .Lfunc_end4
+
+  ; CHECK-A-THUMB-LABEL: abort_fn:
+  ; CHECK-A-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr}
+  ; CHECK-A-THUMB: add r7, sp, #28
+  ; CHECK-A-THUMB: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-A-THUMB: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+  ; [...]
+  ; CHECK-A-THUMB: sub.w r4, r7, #228
+  ; CHECK-A-THUMB: mov sp, r4
+  ; CHECK-A-THUMB: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+  ; CHECK-A-THUMB: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr}
+  ; CHECK-A-THUMB-LABEL: .Lfunc_end4
+
+  ; CHECK-R-LABEL: abort_fn:
+  ; CHECK-R: push {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr}
+  ; CHECK-R: add r11, sp, #28
+  ; CHECK-R: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; [...]
+  ; CHECK-R: sub sp, r11, #100
+  ; CHECK-R: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-R: pop {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr}
+  ; CHECK-R-LABEL: .Lfunc_end4
+
+  ; CHECK-R-THUMB-LABEL: abort_fn:
+  ; CHECK-R-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr}
+  ; CHECK-R-THUMB: add r7, sp, #28
+  ; CHECK-R-THUMB: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; [...]
+  ; CHECK-R-THUMB: sub.w r4, r7, #100
+  ; CHECK-R-THUMB: mov sp, r4
+  ; CHECK-R-THUMB: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-R-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr}
+  ; CHECK-R-THUMB-LABEL: .Lfunc_end4
+
+  ; CHECK-M-LABEL: abort_fn:
+  ; CHECK-M: push {r4, r6, r7, lr}
+  ; CHECK-M: add r7, sp, #8
+  ; CHECK-M: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; [...]
+  ; CHECK-M: sub.w r4, r7, #80
+  ; CHECK-M: mov sp, r4
+  ; CHECK-M: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+  ; CHECK-M: pop {r4, r6, r7, pc}
+  ; CHECK-M-LABEL: .Lfunc_end4
+
+  call void @bar()
+  ret void
+}
+
+ at var = global double 0.0



More information about the cfe-commits mailing list