[llvm-branch-commits] [llvm] dd399a6 - [RISCV] Use ldp/sdp for EPI

Jun Ma via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sun Dec 19 18:35:22 PST 2021


Author: Jun Ma
Date: 2021-12-17T12:08:28+08:00
New Revision: dd399a6194e8506d2af87794d78cb920c66f19b0

URL: https://github.com/llvm/llvm-project/commit/dd399a6194e8506d2af87794d78cb920c66f19b0
DIFF: https://github.com/llvm/llvm-project/commit/dd399a6194e8506d2af87794d78cb920c66f19b0.diff

LOG: [RISCV] Use ldp/sdp for EPI

Added: 
    llvm/test/CodeGen/RISCV/callee-saved-n3.ll
    llvm/test/CodeGen/RISCV/large-stack-n3.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVFrameLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index f5d491938050..9e1381ec5c6e 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1010,7 +1010,11 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
 
   // Return the FirstSPAdjustAmount if the StackSize can not fit in signed
   // 12-bit and there exists a callee saved register need to be pushed.
-  if (!isInt<12>(StackSize) && (CSI.size() > 0)) {
+  if (!hasFP(MF) && !isInt<10>(StackSize) &&
+      STI.hasFeature(RISCV::Feature64Bit) &&
+      STI.hasFeature(RISCV::FeatureStdExtXin) && (CSI.size() > 1)) {
+    return 512 - getStackAlign().value();
+  } else if (!isInt<12>(StackSize) && (CSI.size() > 0)) {
     // FirstSPAdjustAmount is choosed as (2048 - StackAlign)
     // because 2048 will cause sp = sp + 2048 in epilogue split into
     // multi-instructions. The offset smaller than 2048 can fit in signle
@@ -1048,12 +1052,43 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
 
   // Manually spill values not spilled by libcall.
   const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI);
-  for (auto &CS : NonLibcallCSI) {
+  unsigned Count = NonLibcallCSI.size();
+  for (unsigned i = 0; i < Count; i += 1) {
     // Insert the spill to the stack frame.
-    Register Reg = CS.getReg();
+    Register Reg = NonLibcallCSI[i].getReg();
+    int FI = NonLibcallCSI[i].getFrameIdx();
+    bool IsN3 = STI.hasFeature(RISCV::Feature64Bit) &&
+                STI.hasFeature(RISCV::FeatureStdExtXin);
+    if (IsN3 && unsigned(i + 1) < Count) {
+      unsigned NextReg = NonLibcallCSI[i + 1].getReg();
+      int NextFI = NonLibcallCSI[i + 1].getFrameIdx();
+      if (!hasFP(*MF) && RISCV::GPRRegClass.contains(Reg) &&
+          RISCV::GPRRegClass.contains(NextReg) && FI + 1 == NextFI) {
+
+        MachineFrameInfo &MFI = MF->getFrameInfo();
+        MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(RISCV::SDP));
+
+        MIB.addReg(Reg, getKillRegState(!MBB.isLiveIn(Reg)));
+        MIB.addMemOperand(MF->getMachineMemOperand(
+            MachinePointerInfo::getFixedStack(*MF, FI),
+            MachineMemOperand::MOStore, MFI.getObjectSize(FI),
+            MFI.getObjectAlign(FI)));
+
+        MIB.addReg(NextReg, getKillRegState(!MBB.isLiveIn(NextReg)));
+        MIB.addMemOperand(MF->getMachineMemOperand(
+            MachinePointerInfo::getFixedStack(*MF, NextFI),
+            MachineMemOperand::MOStore, MFI.getObjectSize(NextFI),
+            MFI.getObjectAlign(NextFI)));
+
+        MIB.addFrameIndex(FI).addImm(0);
+
+        i += 1;
+        continue;
+      }
+    }
+
     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-    TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), CS.getFrameIdx(),
-                            RC, TRI);
+    TII.storeRegToStackSlot(MBB, MI, Reg, !MBB.isLiveIn(Reg), FI, RC, TRI);
   }
 
   return true;
@@ -1078,10 +1113,43 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
   // load-to-use data hazard between loading RA and return by RA.
   // loadRegFromStackSlot can insert multiple instructions.
   const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI);
-  for (auto &CS : NonLibcallCSI) {
-    Register Reg = CS.getReg();
+  unsigned Count = NonLibcallCSI.size();
+  for (unsigned i = 0; i < Count; i += 1) {
+    // Insert the spill to the stack frame.
+    Register Reg = NonLibcallCSI[i].getReg();
+    int FI = NonLibcallCSI[i].getFrameIdx();
+    bool IsN3 = STI.hasFeature(RISCV::Feature64Bit) &&
+                STI.hasFeature(RISCV::FeatureStdExtXin);
+    if (IsN3 && unsigned(i + 1) < Count) {
+      unsigned NextReg = NonLibcallCSI[i + 1].getReg();
+      int NextFI = NonLibcallCSI[i + 1].getFrameIdx();
+      if (!hasFP(*MF) && RISCV::GPRRegClass.contains(Reg) &&
+          RISCV::GPRRegClass.contains(NextReg) && FI + 1 == NextFI) {
+
+        MachineFrameInfo &MFI = MF->getFrameInfo();
+        MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(RISCV::LDP));
+
+        MIB.addReg(Reg, RegState::Define);
+        MIB.addMemOperand(MF->getMachineMemOperand(
+            MachinePointerInfo::getFixedStack(*MF, FI),
+            MachineMemOperand::MOLoad, MFI.getObjectSize(FI),
+            MFI.getObjectAlign(FI)));
+
+        MIB.addReg(NextReg, RegState::Define);
+        MIB.addMemOperand(MF->getMachineMemOperand(
+            MachinePointerInfo::getFixedStack(*MF, NextFI),
+            MachineMemOperand::MOLoad, MFI.getObjectSize(NextFI),
+            MFI.getObjectAlign(NextFI)));
+
+        MIB.addFrameIndex(FI).addImm(0);
+
+        i += 1;
+        continue;
+      }
+    }
+
     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
-    TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);
+    TII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, TRI);
     assert(MI != MBB.begin() && "loadRegFromStackSlot didn't insert any code!");
   }
 

diff  --git a/llvm/test/CodeGen/RISCV/callee-saved-n3.ll b/llvm/test/CodeGen/RISCV/callee-saved-n3.ll
new file mode 100644
index 000000000000..affbb0034efb
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/callee-saved-n3.ll
@@ -0,0 +1,808 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-xin -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+f,+experimental-xin -target-abi lp64f -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-xin -target-abi lp64f -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+d,+experimental-xin -target-abi lp64d -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-xin -verify-machineinstrs -frame-pointer=all < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I-WITH-FP
+
+ at var = global [32 x i32] zeroinitializer
+
+; This function tests that RISCVRegisterInfo::getCalleeSavedRegs returns
+; something appropriate.
+
+define void @callee() nounwind {
+; RV64I-LABEL: callee:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -160
+; RV64I-NEXT:    sdp ra, s0, 152(sp) # 16-byte Folded Spill
+; RV64I-NEXT:    sdp s1, s2, 136(sp) # 16-byte Folded Spill
+; RV64I-NEXT:    sdp s3, s4, 120(sp) # 16-byte Folded Spill
+; RV64I-NEXT:    sdp s5, s6, 104(sp) # 16-byte Folded Spill
+; RV64I-NEXT:    sdp s7, s8, 88(sp) # 16-byte Folded Spill
+; RV64I-NEXT:    sdp s9, s10, 72(sp) # 16-byte Folded Spill
+; RV64I-NEXT:    sd s11, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a7, %hi(var)
+; RV64I-NEXT:    lw a0, %lo(var)(a7)
+; RV64I-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, %lo(var+4)(a7)
+; RV64I-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, %lo(var+8)(a7)
+; RV64I-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, %lo(var+12)(a7)
+; RV64I-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addi a5, a7, %lo(var)
+; RV64I-NEXT:    lw a0, 16(a5)
+; RV64I-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 20(a5)
+; RV64I-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw t4, 24(a5)
+; RV64I-NEXT:    lw t5, 28(a5)
+; RV64I-NEXT:    lw t6, 32(a5)
+; RV64I-NEXT:    lw s2, 36(a5)
+; RV64I-NEXT:    lw s3, 40(a5)
+; RV64I-NEXT:    lw s4, 44(a5)
+; RV64I-NEXT:    lw s5, 48(a5)
+; RV64I-NEXT:    lw s6, 52(a5)
+; RV64I-NEXT:    lw s7, 56(a5)
+; RV64I-NEXT:    lw s8, 60(a5)
+; RV64I-NEXT:    lw s9, 64(a5)
+; RV64I-NEXT:    lw s10, 68(a5)
+; RV64I-NEXT:    lw s11, 72(a5)
+; RV64I-NEXT:    lw ra, 76(a5)
+; RV64I-NEXT:    lw s1, 80(a5)
+; RV64I-NEXT:    lw t3, 84(a5)
+; RV64I-NEXT:    lw t2, 88(a5)
+; RV64I-NEXT:    lw t1, 92(a5)
+; RV64I-NEXT:    lw t0, 96(a5)
+; RV64I-NEXT:    lw s0, 100(a5)
+; RV64I-NEXT:    lw a6, 104(a5)
+; RV64I-NEXT:    lw a4, 108(a5)
+; RV64I-NEXT:    lw a0, 124(a5)
+; RV64I-NEXT:    lw a1, 120(a5)
+; RV64I-NEXT:    lw a2, 116(a5)
+; RV64I-NEXT:    lw a3, 112(a5)
+; RV64I-NEXT:    sw a0, 124(a5)
+; RV64I-NEXT:    sw a1, 120(a5)
+; RV64I-NEXT:    sw a2, 116(a5)
+; RV64I-NEXT:    sw a3, 112(a5)
+; RV64I-NEXT:    sw a4, 108(a5)
+; RV64I-NEXT:    sw a6, 104(a5)
+; RV64I-NEXT:    sw s0, 100(a5)
+; RV64I-NEXT:    sw t0, 96(a5)
+; RV64I-NEXT:    sw t1, 92(a5)
+; RV64I-NEXT:    sw t2, 88(a5)
+; RV64I-NEXT:    sw t3, 84(a5)
+; RV64I-NEXT:    sw s1, 80(a5)
+; RV64I-NEXT:    sw ra, 76(a5)
+; RV64I-NEXT:    sw s11, 72(a5)
+; RV64I-NEXT:    sw s10, 68(a5)
+; RV64I-NEXT:    sw s9, 64(a5)
+; RV64I-NEXT:    sw s8, 60(a5)
+; RV64I-NEXT:    sw s7, 56(a5)
+; RV64I-NEXT:    sw s6, 52(a5)
+; RV64I-NEXT:    sw s5, 48(a5)
+; RV64I-NEXT:    sw s4, 44(a5)
+; RV64I-NEXT:    sw s3, 40(a5)
+; RV64I-NEXT:    sw s2, 36(a5)
+; RV64I-NEXT:    sw t6, 32(a5)
+; RV64I-NEXT:    sw t5, 28(a5)
+; RV64I-NEXT:    sw t4, 24(a5)
+; RV64I-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 20(a5)
+; RV64I-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 16(a5)
+; RV64I-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var+12)(a7)
+; RV64I-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var+8)(a7)
+; RV64I-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var+4)(a7)
+; RV64I-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var)(a7)
+; RV64I-NEXT:    ldp ra, s0, 152(sp) # 16-byte Folded Reload
+; RV64I-NEXT:    ldp s1, s2, 136(sp) # 16-byte Folded Reload
+; RV64I-NEXT:    ldp s3, s4, 120(sp) # 16-byte Folded Reload
+; RV64I-NEXT:    ldp s5, s6, 104(sp) # 16-byte Folded Reload
+; RV64I-NEXT:    ldp s7, s8, 88(sp) # 16-byte Folded Reload
+; RV64I-NEXT:    ldp s9, s10, 72(sp) # 16-byte Folded Reload
+; RV64I-NEXT:    ld s11, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 160
+; RV64I-NEXT:    ret
+;
+; RV64I-WITH-FP-LABEL: callee:
+; RV64I-WITH-FP:       # %bb.0:
+; RV64I-WITH-FP-NEXT:    addi sp, sp, -160
+; RV64I-WITH-FP-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s0, 144(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s1, 136(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s2, 128(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s3, 120(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s4, 112(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s5, 104(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s6, 96(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s7, 88(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s8, 80(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s9, 72(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s10, 64(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s11, 56(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    addi s0, sp, 160
+; RV64I-WITH-FP-NEXT:    lui a7, %hi(var)
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var)(a7)
+; RV64I-WITH-FP-NEXT:    sd a0, -112(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+4)(a7)
+; RV64I-WITH-FP-NEXT:    sd a0, -120(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+8)(a7)
+; RV64I-WITH-FP-NEXT:    sd a0, -128(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+12)(a7)
+; RV64I-WITH-FP-NEXT:    sd a0, -136(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    addi a5, a7, %lo(var)
+; RV64I-WITH-FP-NEXT:    lw a0, 16(a5)
+; RV64I-WITH-FP-NEXT:    sd a0, -144(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 20(a5)
+; RV64I-WITH-FP-NEXT:    sd a0, -152(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 24(a5)
+; RV64I-WITH-FP-NEXT:    sd a0, -160(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw t5, 28(a5)
+; RV64I-WITH-FP-NEXT:    lw t6, 32(a5)
+; RV64I-WITH-FP-NEXT:    lw s2, 36(a5)
+; RV64I-WITH-FP-NEXT:    lw s3, 40(a5)
+; RV64I-WITH-FP-NEXT:    lw s4, 44(a5)
+; RV64I-WITH-FP-NEXT:    lw s5, 48(a5)
+; RV64I-WITH-FP-NEXT:    lw s6, 52(a5)
+; RV64I-WITH-FP-NEXT:    lw s7, 56(a5)
+; RV64I-WITH-FP-NEXT:    lw s8, 60(a5)
+; RV64I-WITH-FP-NEXT:    lw s9, 64(a5)
+; RV64I-WITH-FP-NEXT:    lw s10, 68(a5)
+; RV64I-WITH-FP-NEXT:    lw s11, 72(a5)
+; RV64I-WITH-FP-NEXT:    lw ra, 76(a5)
+; RV64I-WITH-FP-NEXT:    lw t4, 80(a5)
+; RV64I-WITH-FP-NEXT:    lw t3, 84(a5)
+; RV64I-WITH-FP-NEXT:    lw t2, 88(a5)
+; RV64I-WITH-FP-NEXT:    lw s1, 92(a5)
+; RV64I-WITH-FP-NEXT:    lw t1, 96(a5)
+; RV64I-WITH-FP-NEXT:    lw t0, 100(a5)
+; RV64I-WITH-FP-NEXT:    lw a6, 104(a5)
+; RV64I-WITH-FP-NEXT:    lw a4, 108(a5)
+; RV64I-WITH-FP-NEXT:    lw a0, 124(a5)
+; RV64I-WITH-FP-NEXT:    lw a1, 120(a5)
+; RV64I-WITH-FP-NEXT:    lw a2, 116(a5)
+; RV64I-WITH-FP-NEXT:    lw a3, 112(a5)
+; RV64I-WITH-FP-NEXT:    sw a0, 124(a5)
+; RV64I-WITH-FP-NEXT:    sw a1, 120(a5)
+; RV64I-WITH-FP-NEXT:    sw a2, 116(a5)
+; RV64I-WITH-FP-NEXT:    sw a3, 112(a5)
+; RV64I-WITH-FP-NEXT:    sw a4, 108(a5)
+; RV64I-WITH-FP-NEXT:    sw a6, 104(a5)
+; RV64I-WITH-FP-NEXT:    sw t0, 100(a5)
+; RV64I-WITH-FP-NEXT:    sw t1, 96(a5)
+; RV64I-WITH-FP-NEXT:    sw s1, 92(a5)
+; RV64I-WITH-FP-NEXT:    sw t2, 88(a5)
+; RV64I-WITH-FP-NEXT:    sw t3, 84(a5)
+; RV64I-WITH-FP-NEXT:    sw t4, 80(a5)
+; RV64I-WITH-FP-NEXT:    sw ra, 76(a5)
+; RV64I-WITH-FP-NEXT:    sw s11, 72(a5)
+; RV64I-WITH-FP-NEXT:    sw s10, 68(a5)
+; RV64I-WITH-FP-NEXT:    sw s9, 64(a5)
+; RV64I-WITH-FP-NEXT:    sw s8, 60(a5)
+; RV64I-WITH-FP-NEXT:    sw s7, 56(a5)
+; RV64I-WITH-FP-NEXT:    sw s6, 52(a5)
+; RV64I-WITH-FP-NEXT:    sw s5, 48(a5)
+; RV64I-WITH-FP-NEXT:    sw s4, 44(a5)
+; RV64I-WITH-FP-NEXT:    sw s3, 40(a5)
+; RV64I-WITH-FP-NEXT:    sw s2, 36(a5)
+; RV64I-WITH-FP-NEXT:    sw t6, 32(a5)
+; RV64I-WITH-FP-NEXT:    sw t5, 28(a5)
+; RV64I-WITH-FP-NEXT:    ld a0, -160(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 24(a5)
+; RV64I-WITH-FP-NEXT:    ld a0, -152(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 20(a5)
+; RV64I-WITH-FP-NEXT:    ld a0, -144(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 16(a5)
+; RV64I-WITH-FP-NEXT:    ld a0, -136(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+12)(a7)
+; RV64I-WITH-FP-NEXT:    ld a0, -128(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+8)(a7)
+; RV64I-WITH-FP-NEXT:    ld a0, -120(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+4)(a7)
+; RV64I-WITH-FP-NEXT:    ld a0, -112(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var)(a7)
+; RV64I-WITH-FP-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s2, 128(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s3, 120(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s4, 112(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s5, 104(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s6, 96(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s7, 88(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s8, 80(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s9, 72(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s10, 64(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s11, 56(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    addi sp, sp, 160
+; RV64I-WITH-FP-NEXT:    ret
+  %val = load [32 x i32], [32 x i32]* @var
+  store volatile [32 x i32] %val, [32 x i32]* @var
+  ret void
+}
+
+; This function tests that RISCVRegisterInfo::getCallPreservedMask returns
+; something appropriate.
+
+define void @caller() nounwind {
+; RV32I-LABEL: caller:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -144
+; RV32I-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui s0, %hi(var)
+; RV32I-NEXT:    lw a0, %lo(var)(s0)
+; RV32I-NEXT:    sw a0, 88(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, %lo(var+4)(s0)
+; RV32I-NEXT:    sw a0, 84(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, %lo(var+8)(s0)
+; RV32I-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, %lo(var+12)(s0)
+; RV32I-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    addi s1, s0, %lo(var)
+; RV32I-NEXT:    lw a0, 16(s1)
+; RV32I-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 20(s1)
+; RV32I-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 24(s1)
+; RV32I-NEXT:    sw a0, 64(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 28(s1)
+; RV32I-NEXT:    sw a0, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 32(s1)
+; RV32I-NEXT:    sw a0, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 36(s1)
+; RV32I-NEXT:    sw a0, 52(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 40(s1)
+; RV32I-NEXT:    sw a0, 48(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 44(s1)
+; RV32I-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 48(s1)
+; RV32I-NEXT:    sw a0, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 52(s1)
+; RV32I-NEXT:    sw a0, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 56(s1)
+; RV32I-NEXT:    sw a0, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 60(s1)
+; RV32I-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 64(s1)
+; RV32I-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 68(s1)
+; RV32I-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 72(s1)
+; RV32I-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 76(s1)
+; RV32I-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 80(s1)
+; RV32I-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a0, 84(s1)
+; RV32I-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw s4, 88(s1)
+; RV32I-NEXT:    lw s5, 92(s1)
+; RV32I-NEXT:    lw s6, 96(s1)
+; RV32I-NEXT:    lw s7, 100(s1)
+; RV32I-NEXT:    lw s8, 104(s1)
+; RV32I-NEXT:    lw s9, 108(s1)
+; RV32I-NEXT:    lw s10, 112(s1)
+; RV32I-NEXT:    lw s11, 116(s1)
+; RV32I-NEXT:    lw s2, 120(s1)
+; RV32I-NEXT:    lw s3, 124(s1)
+; RV32I-NEXT:    call callee at plt
+; RV32I-NEXT:    sw s3, 124(s1)
+; RV32I-NEXT:    sw s2, 120(s1)
+; RV32I-NEXT:    sw s11, 116(s1)
+; RV32I-NEXT:    sw s10, 112(s1)
+; RV32I-NEXT:    sw s9, 108(s1)
+; RV32I-NEXT:    sw s8, 104(s1)
+; RV32I-NEXT:    sw s7, 100(s1)
+; RV32I-NEXT:    sw s6, 96(s1)
+; RV32I-NEXT:    sw s5, 92(s1)
+; RV32I-NEXT:    sw s4, 88(s1)
+; RV32I-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 84(s1)
+; RV32I-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 80(s1)
+; RV32I-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 76(s1)
+; RV32I-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 72(s1)
+; RV32I-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 68(s1)
+; RV32I-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 64(s1)
+; RV32I-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 60(s1)
+; RV32I-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 56(s1)
+; RV32I-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 52(s1)
+; RV32I-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 48(s1)
+; RV32I-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 44(s1)
+; RV32I-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 40(s1)
+; RV32I-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 36(s1)
+; RV32I-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 32(s1)
+; RV32I-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 28(s1)
+; RV32I-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 24(s1)
+; RV32I-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 20(s1)
+; RV32I-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, 16(s1)
+; RV32I-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, %lo(var+12)(s0)
+; RV32I-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, %lo(var+8)(s0)
+; RV32I-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, %lo(var+4)(s0)
+; RV32I-NEXT:    lw a0, 88(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a0, %lo(var)(s0)
+; RV32I-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 144
+; RV32I-NEXT:    ret
+;
+; RV32I-WITH-FP-LABEL: caller:
+; RV32I-WITH-FP:       # %bb.0:
+; RV32I-WITH-FP-NEXT:    addi sp, sp, -144
+; RV32I-WITH-FP-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    addi s0, sp, 144
+; RV32I-WITH-FP-NEXT:    lui s6, %hi(var)
+; RV32I-WITH-FP-NEXT:    lw a0, %lo(var)(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, -56(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+4)(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, -60(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+8)(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, -64(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+12)(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, -68(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    addi s1, s6, %lo(var)
+; RV32I-WITH-FP-NEXT:    lw a0, 16(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -72(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 20(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -76(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 24(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -80(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 28(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -84(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 32(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -88(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 36(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -92(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 40(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -96(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 44(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -100(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 48(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -104(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 52(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -108(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 56(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -112(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 60(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -116(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 64(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -120(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 68(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -124(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 72(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -128(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 76(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -132(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 80(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -136(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 84(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -140(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a0, 88(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, -144(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw s8, 92(s1)
+; RV32I-WITH-FP-NEXT:    lw s9, 96(s1)
+; RV32I-WITH-FP-NEXT:    lw s10, 100(s1)
+; RV32I-WITH-FP-NEXT:    lw s11, 104(s1)
+; RV32I-WITH-FP-NEXT:    lw s2, 108(s1)
+; RV32I-WITH-FP-NEXT:    lw s3, 112(s1)
+; RV32I-WITH-FP-NEXT:    lw s4, 116(s1)
+; RV32I-WITH-FP-NEXT:    lw s5, 120(s1)
+; RV32I-WITH-FP-NEXT:    lw s7, 124(s1)
+; RV32I-WITH-FP-NEXT:    call callee at plt
+; RV32I-WITH-FP-NEXT:    sw s7, 124(s1)
+; RV32I-WITH-FP-NEXT:    sw s5, 120(s1)
+; RV32I-WITH-FP-NEXT:    sw s4, 116(s1)
+; RV32I-WITH-FP-NEXT:    sw s3, 112(s1)
+; RV32I-WITH-FP-NEXT:    sw s2, 108(s1)
+; RV32I-WITH-FP-NEXT:    sw s11, 104(s1)
+; RV32I-WITH-FP-NEXT:    sw s10, 100(s1)
+; RV32I-WITH-FP-NEXT:    sw s9, 96(s1)
+; RV32I-WITH-FP-NEXT:    sw s8, 92(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -144(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 88(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -140(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 84(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -136(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 80(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -132(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 76(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -128(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 72(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -124(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 68(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -120(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 64(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -116(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 60(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -112(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 56(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -108(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 52(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -104(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 48(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -100(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 44(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -96(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 40(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -92(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 36(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -88(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 32(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -84(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 28(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -80(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 24(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -76(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 20(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -72(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, 16(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, -68(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+12)(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, -64(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+8)(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, -60(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+4)(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, -56(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a0, %lo(var)(s6)
+; RV32I-WITH-FP-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    addi sp, sp, 144
+; RV32I-WITH-FP-NEXT:    ret
+;
+; RV64I-LABEL: caller:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -288
+; RV64I-NEXT:    sdp ra, s0, 280(sp) # 16-byte Folded Spill
+; RV64I-NEXT:    sdp s1, s2, 264(sp) # 16-byte Folded Spill
+; RV64I-NEXT:    sdp s3, s4, 248(sp) # 16-byte Folded Spill
+; RV64I-NEXT:    sdp s5, s6, 232(sp) # 16-byte Folded Spill
+; RV64I-NEXT:    sdp s7, s8, 216(sp) # 16-byte Folded Spill
+; RV64I-NEXT:    sdp s9, s10, 200(sp) # 16-byte Folded Spill
+; RV64I-NEXT:    sd s11, 184(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui s0, %hi(var)
+; RV64I-NEXT:    lw a0, %lo(var)(s0)
+; RV64I-NEXT:    sd a0, 176(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, %lo(var+4)(s0)
+; RV64I-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, %lo(var+8)(s0)
+; RV64I-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, %lo(var+12)(s0)
+; RV64I-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addi s1, s0, %lo(var)
+; RV64I-NEXT:    lw a0, 16(s1)
+; RV64I-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 20(s1)
+; RV64I-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 24(s1)
+; RV64I-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 28(s1)
+; RV64I-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 32(s1)
+; RV64I-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 36(s1)
+; RV64I-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 40(s1)
+; RV64I-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 44(s1)
+; RV64I-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 48(s1)
+; RV64I-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 52(s1)
+; RV64I-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 56(s1)
+; RV64I-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 60(s1)
+; RV64I-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 64(s1)
+; RV64I-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 68(s1)
+; RV64I-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 72(s1)
+; RV64I-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 76(s1)
+; RV64I-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 80(s1)
+; RV64I-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a0, 84(s1)
+; RV64I-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw s4, 88(s1)
+; RV64I-NEXT:    lw s5, 92(s1)
+; RV64I-NEXT:    lw s6, 96(s1)
+; RV64I-NEXT:    lw s7, 100(s1)
+; RV64I-NEXT:    lw s8, 104(s1)
+; RV64I-NEXT:    lw s9, 108(s1)
+; RV64I-NEXT:    lw s10, 112(s1)
+; RV64I-NEXT:    lw s11, 116(s1)
+; RV64I-NEXT:    lw s2, 120(s1)
+; RV64I-NEXT:    lw s3, 124(s1)
+; RV64I-NEXT:    call callee at plt
+; RV64I-NEXT:    sw s3, 124(s1)
+; RV64I-NEXT:    sw s2, 120(s1)
+; RV64I-NEXT:    sw s11, 116(s1)
+; RV64I-NEXT:    sw s10, 112(s1)
+; RV64I-NEXT:    sw s9, 108(s1)
+; RV64I-NEXT:    sw s8, 104(s1)
+; RV64I-NEXT:    sw s7, 100(s1)
+; RV64I-NEXT:    sw s6, 96(s1)
+; RV64I-NEXT:    sw s5, 92(s1)
+; RV64I-NEXT:    sw s4, 88(s1)
+; RV64I-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 84(s1)
+; RV64I-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 80(s1)
+; RV64I-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 76(s1)
+; RV64I-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 72(s1)
+; RV64I-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 68(s1)
+; RV64I-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 64(s1)
+; RV64I-NEXT:    ld a0, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 60(s1)
+; RV64I-NEXT:    ld a0, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 56(s1)
+; RV64I-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 52(s1)
+; RV64I-NEXT:    ld a0, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 48(s1)
+; RV64I-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 44(s1)
+; RV64I-NEXT:    ld a0, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 40(s1)
+; RV64I-NEXT:    ld a0, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 36(s1)
+; RV64I-NEXT:    ld a0, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 32(s1)
+; RV64I-NEXT:    ld a0, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 28(s1)
+; RV64I-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 24(s1)
+; RV64I-NEXT:    ld a0, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 20(s1)
+; RV64I-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 16(s1)
+; RV64I-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var+12)(s0)
+; RV64I-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var+8)(s0)
+; RV64I-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var+4)(s0)
+; RV64I-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, %lo(var)(s0)
+; RV64I-NEXT:    ldp ra, s0, 280(sp) # 16-byte Folded Reload
+; RV64I-NEXT:    ldp s1, s2, 264(sp) # 16-byte Folded Reload
+; RV64I-NEXT:    ldp s3, s4, 248(sp) # 16-byte Folded Reload
+; RV64I-NEXT:    ldp s5, s6, 232(sp) # 16-byte Folded Reload
+; RV64I-NEXT:    ldp s7, s8, 216(sp) # 16-byte Folded Reload
+; RV64I-NEXT:    ldp s9, s10, 200(sp) # 16-byte Folded Reload
+; RV64I-NEXT:    ld s11, 184(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 288
+; RV64I-NEXT:    ret
+;
+; RV64I-WITH-FP-LABEL: caller:
+; RV64I-WITH-FP:       # %bb.0:
+; RV64I-WITH-FP-NEXT:    addi sp, sp, -288
+; RV64I-WITH-FP-NEXT:    sd ra, 280(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s0, 272(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s1, 264(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s2, 256(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s3, 248(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s4, 240(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s5, 232(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s6, 224(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s7, 216(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s8, 208(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s9, 200(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s10, 192(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    sd s11, 184(sp) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    addi s0, sp, 288
+; RV64I-WITH-FP-NEXT:    lui s6, %hi(var)
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var)(s6)
+; RV64I-WITH-FP-NEXT:    sd a0, -112(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+4)(s6)
+; RV64I-WITH-FP-NEXT:    sd a0, -120(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+8)(s6)
+; RV64I-WITH-FP-NEXT:    sd a0, -128(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+12)(s6)
+; RV64I-WITH-FP-NEXT:    sd a0, -136(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    addi s1, s6, %lo(var)
+; RV64I-WITH-FP-NEXT:    lw a0, 16(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -144(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 20(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -152(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 24(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -160(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 28(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -168(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 32(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -176(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 36(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -184(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 40(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -192(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 44(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -200(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 48(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -208(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 52(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -216(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 56(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -224(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 60(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -232(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 64(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -240(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 68(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -248(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 72(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -256(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 76(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -264(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 80(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -272(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 84(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -280(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a0, 88(s1)
+; RV64I-WITH-FP-NEXT:    sd a0, -288(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw s8, 92(s1)
+; RV64I-WITH-FP-NEXT:    lw s9, 96(s1)
+; RV64I-WITH-FP-NEXT:    lw s10, 100(s1)
+; RV64I-WITH-FP-NEXT:    lw s11, 104(s1)
+; RV64I-WITH-FP-NEXT:    lw s2, 108(s1)
+; RV64I-WITH-FP-NEXT:    lw s3, 112(s1)
+; RV64I-WITH-FP-NEXT:    lw s4, 116(s1)
+; RV64I-WITH-FP-NEXT:    lw s5, 120(s1)
+; RV64I-WITH-FP-NEXT:    lw s7, 124(s1)
+; RV64I-WITH-FP-NEXT:    call callee at plt
+; RV64I-WITH-FP-NEXT:    sw s7, 124(s1)
+; RV64I-WITH-FP-NEXT:    sw s5, 120(s1)
+; RV64I-WITH-FP-NEXT:    sw s4, 116(s1)
+; RV64I-WITH-FP-NEXT:    sw s3, 112(s1)
+; RV64I-WITH-FP-NEXT:    sw s2, 108(s1)
+; RV64I-WITH-FP-NEXT:    sw s11, 104(s1)
+; RV64I-WITH-FP-NEXT:    sw s10, 100(s1)
+; RV64I-WITH-FP-NEXT:    sw s9, 96(s1)
+; RV64I-WITH-FP-NEXT:    sw s8, 92(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -288(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 88(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -280(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 84(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -272(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 80(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -264(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 76(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -256(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 72(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -248(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 68(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -240(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 64(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -232(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 60(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -224(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 56(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -216(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 52(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -208(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 48(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -200(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 44(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -192(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 40(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -184(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 36(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -176(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 32(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -168(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 28(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -160(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 24(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -152(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 20(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -144(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, 16(s1)
+; RV64I-WITH-FP-NEXT:    ld a0, -136(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+12)(s6)
+; RV64I-WITH-FP-NEXT:    ld a0, -128(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+8)(s6)
+; RV64I-WITH-FP-NEXT:    ld a0, -120(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+4)(s6)
+; RV64I-WITH-FP-NEXT:    ld a0, -112(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var)(s6)
+; RV64I-WITH-FP-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s2, 256(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s3, 248(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s4, 240(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s5, 232(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s6, 224(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s7, 216(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s8, 208(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s9, 200(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s10, 192(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s11, 184(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    addi sp, sp, 288
+; RV64I-WITH-FP-NEXT:    ret
+
+  %val = load [32 x i32], [32 x i32]* @var
+  call void @callee()
+  store volatile [32 x i32] %val, [32 x i32]* @var
+  ret void
+}

diff  --git a/llvm/test/CodeGen/RISCV/large-stack-n3.ll b/llvm/test/CodeGen/RISCV/large-stack-n3.ll
new file mode 100644
index 000000000000..f678f4c61193
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/large-stack-n3.ll
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64  -mattr=+experimental-xin -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV64I-FPELIM %s
+; RUN: llc -mtriple=riscv64  -mattr=+experimental-xin -verify-machineinstrs -frame-pointer=all < %s \
+; RUN:   | FileCheck -check-prefix=RV64I-WITHFP %s
+
+; TODO: the quality of the generated code is poor
+
+define void @test() {
+; RV64I-FPELIM-LABEL: test:
+; RV64I-FPELIM:       # %bb.0:
+; RV64I-FPELIM-NEXT:    lui a0, 74565
+; RV64I-FPELIM-NEXT:    addiw a0, a0, 1664
+; RV64I-FPELIM-NEXT:    sub sp, sp, a0
+; RV64I-FPELIM-NEXT:    .cfi_def_cfa_offset 305419904
+; RV64I-FPELIM-NEXT:    lui a0, 74565
+; RV64I-FPELIM-NEXT:    addiw a0, a0, 1664
+; RV64I-FPELIM-NEXT:    add sp, sp, a0
+; RV64I-FPELIM-NEXT:    ret
+;
+; RV64I-WITHFP-LABEL: test:
+; RV64I-WITHFP:       # %bb.0:
+; RV64I-WITHFP-NEXT:    addi sp, sp, -2032
+; RV64I-WITHFP-NEXT:    .cfi_def_cfa_offset 2032
+; RV64I-WITHFP-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64I-WITHFP-NEXT:    sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64I-WITHFP-NEXT:    .cfi_offset ra, -8
+; RV64I-WITHFP-NEXT:    .cfi_offset s0, -16
+; RV64I-WITHFP-NEXT:    addi s0, sp, 2032
+; RV64I-WITHFP-NEXT:    .cfi_def_cfa s0, 0
+; RV64I-WITHFP-NEXT:    lui a0, 74565
+; RV64I-WITHFP-NEXT:    addiw a0, a0, -352
+; RV64I-WITHFP-NEXT:    sub sp, sp, a0
+; RV64I-WITHFP-NEXT:    lui a0, 74565
+; RV64I-WITHFP-NEXT:    addiw a0, a0, -352
+; RV64I-WITHFP-NEXT:    add sp, sp, a0
+; RV64I-WITHFP-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64I-WITHFP-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
+; RV64I-WITHFP-NEXT:    addi sp, sp, 2032
+; RV64I-WITHFP-NEXT:    ret
+  %tmp = alloca [ 305419896 x i8 ] , align 4
+  ret void
+}
+
+; This test case artificially produces register pressure which should force
+; use of the emergency spill slot.
+
+define void @test_emergency_spill_slot(i32 %a) {
+; RV64I-FPELIM-LABEL: test_emergency_spill_slot:
+; RV64I-FPELIM:       # %bb.0:
+; RV64I-FPELIM-NEXT:    addi sp, sp, -496
+; RV64I-FPELIM-NEXT:    .cfi_def_cfa_offset 496
+; RV64I-FPELIM-NEXT:    sdp s0, s1, 488(sp) # 16-byte Folded Spill
+; RV64I-FPELIM-NEXT:    lui a1, 78
+; RV64I-FPELIM-NEXT:    .cfi_offset s0, -8
+; RV64I-FPELIM-NEXT:    .cfi_offset s1, -16
+; RV64I-FPELIM-NEXT:    lui a2, 98
+; RV64I-FPELIM-NEXT:    addiw a2, a2, -1872
+; RV64I-FPELIM-NEXT:    sub sp, sp, a2
+; RV64I-FPELIM-NEXT:    .cfi_def_cfa_offset 400032
+; RV64I-FPELIM-NEXT:    addiw a1, a1, 512
+; RV64I-FPELIM-NEXT:    addi a2, sp, 16
+; RV64I-FPELIM-NEXT:    add a1, a2, a1
+; RV64I-FPELIM-NEXT:    #APP
+; RV64I-FPELIM-NEXT:    nop
+; RV64I-FPELIM-EMPTY:
+; RV64I-FPELIM-NEXT:    #NO_APP
+; RV64I-FPELIM-NEXT:    sw a0, 0(a1)
+; RV64I-FPELIM-NEXT:    lui a0, 98
+; RV64I-FPELIM-NEXT:    addiw a0, a0, -1872
+; RV64I-FPELIM-NEXT:    add sp, sp, a0
+; RV64I-FPELIM-NEXT:    #APP
+; RV64I-FPELIM-NEXT:    nop
+; RV64I-FPELIM-EMPTY:
+; RV64I-FPELIM-NEXT:    #NO_APP
+; RV64I-FPELIM-NEXT:    ldp s0, s1, 488(sp) # 16-byte Folded Reload
+; RV64I-FPELIM-NEXT:    addi sp, sp, 496
+; RV64I-FPELIM-NEXT:    ret
+;
+; RV64I-WITHFP-LABEL: test_emergency_spill_slot:
+; RV64I-WITHFP:       # %bb.0:
+; RV64I-WITHFP-NEXT:    addi sp, sp, -2032
+; RV64I-WITHFP-NEXT:    .cfi_def_cfa_offset 2032
+; RV64I-WITHFP-NEXT:    sd ra, 2024(sp) # 8-byte Folded Spill
+; RV64I-WITHFP-NEXT:    sd s0, 2016(sp) # 8-byte Folded Spill
+; RV64I-WITHFP-NEXT:    sd s1, 2008(sp) # 8-byte Folded Spill
+; RV64I-WITHFP-NEXT:    sd s2, 2000(sp) # 8-byte Folded Spill
+; RV64I-WITHFP-NEXT:    .cfi_offset ra, -8
+; RV64I-WITHFP-NEXT:    .cfi_offset s0, -16
+; RV64I-WITHFP-NEXT:    .cfi_offset s1, -24
+; RV64I-WITHFP-NEXT:    .cfi_offset s2, -32
+; RV64I-WITHFP-NEXT:    addi s0, sp, 2032
+; RV64I-WITHFP-NEXT:    .cfi_def_cfa s0, 0
+; RV64I-WITHFP-NEXT:    lui a1, 97
+; RV64I-WITHFP-NEXT:    addiw a1, a1, 704
+; RV64I-WITHFP-NEXT:    sub sp, sp, a1
+; RV64I-WITHFP-NEXT:    lui a1, 78
+; RV64I-WITHFP-NEXT:    addiw a1, a1, 512
+; RV64I-WITHFP-NEXT:    lui a2, 1048478
+; RV64I-WITHFP-NEXT:    addiw a2, a2, 1368
+; RV64I-WITHFP-NEXT:    add a2, s0, a2
+; RV64I-WITHFP-NEXT:    add a1, a2, a1
+; RV64I-WITHFP-NEXT:    #APP
+; RV64I-WITHFP-NEXT:    nop
+; RV64I-WITHFP-EMPTY:
+; RV64I-WITHFP-NEXT:    #NO_APP
+; RV64I-WITHFP-NEXT:    sw a0, 0(a1)
+; RV64I-WITHFP-NEXT:    #APP
+; RV64I-WITHFP-NEXT:    nop
+; RV64I-WITHFP-EMPTY:
+; RV64I-WITHFP-NEXT:    #NO_APP
+; RV64I-WITHFP-NEXT:    lui a0, 97
+; RV64I-WITHFP-NEXT:    addiw a0, a0, 704
+; RV64I-WITHFP-NEXT:    add sp, sp, a0
+; RV64I-WITHFP-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64I-WITHFP-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
+; RV64I-WITHFP-NEXT:    ld s1, 2008(sp) # 8-byte Folded Reload
+; RV64I-WITHFP-NEXT:    ld s2, 2000(sp) # 8-byte Folded Reload
+; RV64I-WITHFP-NEXT:    addi sp, sp, 2032
+; RV64I-WITHFP-NEXT:    ret
+  %data = alloca [ 100000 x i32 ] , align 4
+  %ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %data, i32 0, i32 80000
+  %1 = tail call { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } asm sideeffect "nop", "=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r,=r"()
+  %asmresult0 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 0
+  %asmresult1 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 1
+  %asmresult2 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 2
+  %asmresult3 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 3
+  %asmresult4 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 4
+  %asmresult5 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 5
+  %asmresult6 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 6
+  %asmresult7 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 7
+  %asmresult8 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 8
+  %asmresult9 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 9
+  %asmresult10 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 10
+  %asmresult11 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 11
+  %asmresult12 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 12
+  %asmresult13 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 13
+  %asmresult14 = extractvalue { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 } %1, 14
+  store volatile i32 %a, i32* %ptr
+  tail call void asm sideeffect "nop", "r,r,r,r,r,r,r,r,r,r,r,r,r,r,r"(i32 %asmresult0, i32 %asmresult1, i32 %asmresult2, i32 %asmresult3, i32 %asmresult4, i32 %asmresult5, i32 %asmresult6, i32 %asmresult7, i32 %asmresult8, i32 %asmresult9, i32 %asmresult10, i32 %asmresult11, i32 %asmresult12, i32 %asmresult13, i32 %asmresult14)
+  ret void
+}


        


More information about the llvm-branch-commits mailing list