[llvm] [RISCV] Add a rematerializable pseudo instruction for LUI+ADDI for global addresses. (PR #93142)

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed May 22 23:35:07 PDT 2024


https://github.com/topperc created https://github.com/llvm/llvm-project/pull/93142

This allows register allocation to rematerialize these instead of spilling and reloading. We need to make it a single instruction due to limitations in rematerialization.
    
This pseudo is expanded to an LUI+ADDI pair between regalloc and post RA scheduling.

Co-authored-by: Jesse Huang <jesse.huang at sifive.com>

Stacked on #93129

>From c5379195f4824e5e6d9739d790a9508feda6d2b9 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 22 May 2024 19:48:00 -0700
Subject: [PATCH 1/2] [RISCV] Restrict when we fold an ADD_LO into a load/store
 address.

Don't fold if there are loads/stores that use the ADD_LO with a
non-zero immediate offset that can't be folded based on alignment.

This avoids cases where some loads/stores use the LUI directly and
other loads/store uses the result of an ADDI that depends on the LUI.

This increases the latency to the load that we no longer fold, but
reduces the need for a temporary register to hold the LUI result
for multiple uses.

This is preparation for instroducing a rematerializable LUI+ADDI
pseudoinstruction.

Co-authored-by: Jesse Huang <jesse.huang at sifive.com>
---
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp   |   75 +-
 llvm/test/CodeGen/RISCV/bfloat-mem.ll         |   10 +-
 llvm/test/CodeGen/RISCV/byval.ll              |    8 +-
 .../test/CodeGen/RISCV/callee-saved-fpr32s.ll | 2404 +++++++-------
 .../test/CodeGen/RISCV/callee-saved-fpr64s.ll | 1468 +++++----
 llvm/test/CodeGen/RISCV/callee-saved-gprs.ll  | 2910 ++++++++---------
 llvm/test/CodeGen/RISCV/double-mem.ll         |   38 +-
 llvm/test/CodeGen/RISCV/float-mem.ll          |   20 +-
 .../test/CodeGen/RISCV/fold-addi-loadstore.ll |   32 +-
 .../test/CodeGen/RISCV/global-merge-offset.ll |   14 +-
 llvm/test/CodeGen/RISCV/global-merge.ll       |    2 +-
 llvm/test/CodeGen/RISCV/half-mem.ll           |   40 +-
 .../CodeGen/RISCV/hoist-global-addr-base.ll   |    4 +-
 llvm/test/CodeGen/RISCV/mem.ll                |   12 +-
 llvm/test/CodeGen/RISCV/mem64.ll              |   12 +-
 llvm/test/CodeGen/RISCV/memcpy.ll             |  140 +-
 llvm/test/CodeGen/RISCV/push-pop-popret.ll    | 2906 ++++++++--------
 .../CodeGen/RISCV/rv64-legal-i32/mem64.ll     |   12 +-
 .../rvv/fixed-vectors-store-merge-crash.ll    |   12 +-
 llvm/test/CodeGen/RISCV/saverestore.ll        | 2377 +++++++++++++-
 .../CodeGen/RISCV/zext-with-load-is-free.ll   |    8 +-
 21 files changed, 7295 insertions(+), 5209 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index d965dd4fc9a95..7c0908f18e28b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2457,6 +2457,61 @@ static bool isWorthFoldingAdd(SDValue Add) {
   return true;
 }
 
+// To prevent SelectAddrRegImm from folding offsets that conflicts with the
+// fusion of PseudoLIAddr, check if the offset of every use of a given address
+// is within the alignment
+static bool areUserOffsetsWithinAlignment(SDValue Addr, Align Alignment) {
+  for (auto *Use : Addr->uses()) {
+    if (!Use->isMachineOpcode()) {
+      // Don't allow stores of the value. It must be used as the address.
+      if (Use->getOpcode() == ISD::STORE &&
+          cast<StoreSDNode>(Use)->getValue() == Addr)
+        return false;
+      if (Use->getOpcode() == ISD::ATOMIC_STORE &&
+          cast<AtomicSDNode>(Use)->getVal() == Addr)
+        return false;
+      // If the user is direct load/store, there is no offset.
+      if (Use->getOpcode() == ISD::LOAD || Use->getOpcode() == ISD::STORE ||
+          Use->getOpcode() == ISD::ATOMIC_LOAD ||
+          Use->getOpcode() == ISD::ATOMIC_STORE)
+        continue;
+      if (Use->getOpcode() == ISD::ADD &&
+          isa<ConstantSDNode>(Use->getOperand(1)) &&
+          Alignment > cast<ConstantSDNode>(Use->getOperand(1))->getSExtValue())
+        continue;
+
+      return false;
+    }
+
+    // If user is already selected, get offsets from load/store instructions
+    unsigned int Opcode = Use->getMachineOpcode();
+    if (Opcode == RISCV::LB || Opcode == RISCV::LBU || Opcode == RISCV::LH ||
+        Opcode == RISCV::LHU || Opcode == RISCV::LW || Opcode == RISCV::LWU ||
+        Opcode == RISCV::LD || Opcode == RISCV::FLH || Opcode == RISCV::FLW ||
+        Opcode == RISCV::FLD) {
+      if (auto *Offset = dyn_cast<ConstantSDNode>(Use->getOperand(1))) {
+        if (Offset->isZero() || Alignment > Offset->getSExtValue())
+          continue;
+      }
+      return false;
+    }
+    if (Opcode == RISCV::SB || Opcode == RISCV::SH || Opcode == RISCV::SW ||
+        Opcode == RISCV::SD || Opcode == RISCV::FSH || Opcode == RISCV::FSW ||
+        Opcode == RISCV::FSD) {
+      // Also check if Addr is used as the value of store.
+      if (Use->getOperand(0) == Addr)
+        return false;
+      if (auto *Offset = dyn_cast<ConstantSDNode>(Use->getOperand(2))) {
+        if (Offset->isZero() || Alignment > Offset->getSExtValue())
+          continue;
+      }
+      return false;
+    }
+    return false;
+  }
+
+  return true;
+}
 bool RISCVDAGToDAGISel::SelectAddrRegRegScale(SDValue Addr,
                                               unsigned MaxShiftAmount,
                                               SDValue &Base, SDValue &Index,
@@ -2520,9 +2575,21 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
   MVT VT = Addr.getSimpleValueType();
 
   if (Addr.getOpcode() == RISCVISD::ADD_LO) {
-    Base = Addr.getOperand(0);
-    Offset = Addr.getOperand(1);
-    return true;
+    bool CanFold = true;
+    // Unconditionally fold if operand 1 is not a global address (e.g.
+    // externsymbol)
+    if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
+      const DataLayout &DL = CurDAG->getDataLayout();
+      Align Alignment = commonAlignment(
+          GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
+      if (!areUserOffsetsWithinAlignment(Addr, Alignment))
+        CanFold = false;
+    }
+    if (CanFold) {
+      Base = Addr.getOperand(0);
+      Offset = Addr.getOperand(1);
+      return true;
+    }
   }
 
   int64_t RV32ZdinxRange = IsINX ? 4 : 0;
@@ -2541,7 +2608,7 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
           const DataLayout &DL = CurDAG->getDataLayout();
           Align Alignment = commonAlignment(
               GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
-          if (CVal == 0 || Alignment > CVal) {
+          if (areUserOffsetsWithinAlignment(Base, Alignment)) {
             int64_t CombinedOffset = CVal + GA->getOffset();
             Base = Base.getOperand(0);
             Offset = CurDAG->getTargetGlobalAddress(
diff --git a/llvm/test/CodeGen/RISCV/bfloat-mem.ll b/llvm/test/CodeGen/RISCV/bfloat-mem.ll
index 4b6c0c29d660b..39340c85cfadc 100644
--- a/llvm/test/CodeGen/RISCV/bfloat-mem.ll
+++ b/llvm/test/CodeGen/RISCV/bfloat-mem.ll
@@ -53,11 +53,11 @@ define bfloat @flh_fsh_global(bfloat %a, bfloat %b) nounwind {
 ; CHECK-NEXT:    fadd.s fa5, fa4, fa5
 ; CHECK-NEXT:    fcvt.bf16.s fa0, fa5
 ; CHECK-NEXT:    lui a0, %hi(G)
-; CHECK-NEXT:    flh fa5, %lo(G)(a0)
-; CHECK-NEXT:    addi a1, a0, %lo(G)
-; CHECK-NEXT:    fsh fa0, %lo(G)(a0)
-; CHECK-NEXT:    flh fa5, 18(a1)
-; CHECK-NEXT:    fsh fa0, 18(a1)
+; CHECK-NEXT:    addi a0, a0, %lo(G)
+; CHECK-NEXT:    flh fa5, 0(a0)
+; CHECK-NEXT:    fsh fa0, 0(a0)
+; CHECK-NEXT:    flh fa5, 18(a0)
+; CHECK-NEXT:    fsh fa0, 18(a0)
 ; CHECK-NEXT:    ret
   %1 = fadd bfloat %a, %b
   %2 = load volatile bfloat, ptr @G
diff --git a/llvm/test/CodeGen/RISCV/byval.ll b/llvm/test/CodeGen/RISCV/byval.ll
index 9151f3b03e7c2..c5e48ee75e482 100644
--- a/llvm/test/CodeGen/RISCV/byval.ll
+++ b/llvm/test/CodeGen/RISCV/byval.ll
@@ -22,15 +22,15 @@ define void @caller() nounwind {
 ; RV32I-NEXT:    addi sp, sp, -32
 ; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lui a0, %hi(foo)
-; RV32I-NEXT:    lw a1, %lo(foo)(a0)
-; RV32I-NEXT:    sw a1, 12(sp)
 ; RV32I-NEXT:    addi a0, a0, %lo(foo)
 ; RV32I-NEXT:    lw a1, 12(a0)
 ; RV32I-NEXT:    sw a1, 24(sp)
 ; RV32I-NEXT:    lw a1, 8(a0)
 ; RV32I-NEXT:    sw a1, 20(sp)
-; RV32I-NEXT:    lw a0, 4(a0)
-; RV32I-NEXT:    sw a0, 16(sp)
+; RV32I-NEXT:    lw a1, 4(a0)
+; RV32I-NEXT:    sw a1, 16(sp)
+; RV32I-NEXT:    lw a0, 0(a0)
+; RV32I-NEXT:    sw a0, 12(sp)
 ; RV32I-NEXT:    addi a0, sp, 12
 ; RV32I-NEXT:    call callee
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
index 2122b3fd91788..b525f9aa59c09 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
@@ -28,281 +28,281 @@ define void @callee() nounwind {
 ; ILP32-LABEL: callee:
 ; ILP32:       # %bb.0:
 ; ILP32-NEXT:    lui a0, %hi(var)
-; ILP32-NEXT:    flw fa5, %lo(var)(a0)
-; ILP32-NEXT:    flw fa4, %lo(var+4)(a0)
-; ILP32-NEXT:    flw fa3, %lo(var+8)(a0)
-; ILP32-NEXT:    flw fa2, %lo(var+12)(a0)
-; ILP32-NEXT:    addi a1, a0, %lo(var)
-; ILP32-NEXT:    flw fa1, 16(a1)
-; ILP32-NEXT:    flw fa0, 20(a1)
-; ILP32-NEXT:    flw ft0, 24(a1)
-; ILP32-NEXT:    flw ft1, 28(a1)
-; ILP32-NEXT:    flw ft2, 32(a1)
-; ILP32-NEXT:    flw ft3, 36(a1)
-; ILP32-NEXT:    flw ft4, 40(a1)
-; ILP32-NEXT:    flw ft5, 44(a1)
-; ILP32-NEXT:    flw ft6, 48(a1)
-; ILP32-NEXT:    flw ft7, 52(a1)
-; ILP32-NEXT:    flw fa6, 56(a1)
-; ILP32-NEXT:    flw fa7, 60(a1)
-; ILP32-NEXT:    flw ft8, 64(a1)
-; ILP32-NEXT:    flw ft9, 68(a1)
-; ILP32-NEXT:    flw ft10, 72(a1)
-; ILP32-NEXT:    flw ft11, 76(a1)
-; ILP32-NEXT:    flw fs0, 80(a1)
-; ILP32-NEXT:    flw fs1, 84(a1)
-; ILP32-NEXT:    flw fs2, 88(a1)
-; ILP32-NEXT:    flw fs3, 92(a1)
-; ILP32-NEXT:    flw fs4, 96(a1)
-; ILP32-NEXT:    flw fs5, 100(a1)
-; ILP32-NEXT:    flw fs6, 104(a1)
-; ILP32-NEXT:    flw fs7, 108(a1)
-; ILP32-NEXT:    flw fs8, 124(a1)
-; ILP32-NEXT:    flw fs9, 120(a1)
-; ILP32-NEXT:    flw fs10, 116(a1)
-; ILP32-NEXT:    flw fs11, 112(a1)
-; ILP32-NEXT:    fsw fs8, 124(a1)
-; ILP32-NEXT:    fsw fs9, 120(a1)
-; ILP32-NEXT:    fsw fs10, 116(a1)
-; ILP32-NEXT:    fsw fs11, 112(a1)
-; ILP32-NEXT:    fsw fs7, 108(a1)
-; ILP32-NEXT:    fsw fs6, 104(a1)
-; ILP32-NEXT:    fsw fs5, 100(a1)
-; ILP32-NEXT:    fsw fs4, 96(a1)
-; ILP32-NEXT:    fsw fs3, 92(a1)
-; ILP32-NEXT:    fsw fs2, 88(a1)
-; ILP32-NEXT:    fsw fs1, 84(a1)
-; ILP32-NEXT:    fsw fs0, 80(a1)
-; ILP32-NEXT:    fsw ft11, 76(a1)
-; ILP32-NEXT:    fsw ft10, 72(a1)
-; ILP32-NEXT:    fsw ft9, 68(a1)
-; ILP32-NEXT:    fsw ft8, 64(a1)
-; ILP32-NEXT:    fsw fa7, 60(a1)
-; ILP32-NEXT:    fsw fa6, 56(a1)
-; ILP32-NEXT:    fsw ft7, 52(a1)
-; ILP32-NEXT:    fsw ft6, 48(a1)
-; ILP32-NEXT:    fsw ft5, 44(a1)
-; ILP32-NEXT:    fsw ft4, 40(a1)
-; ILP32-NEXT:    fsw ft3, 36(a1)
-; ILP32-NEXT:    fsw ft2, 32(a1)
-; ILP32-NEXT:    fsw ft1, 28(a1)
-; ILP32-NEXT:    fsw ft0, 24(a1)
-; ILP32-NEXT:    fsw fa0, 20(a1)
-; ILP32-NEXT:    fsw fa1, 16(a1)
-; ILP32-NEXT:    fsw fa2, %lo(var+12)(a0)
-; ILP32-NEXT:    fsw fa3, %lo(var+8)(a0)
-; ILP32-NEXT:    fsw fa4, %lo(var+4)(a0)
-; ILP32-NEXT:    fsw fa5, %lo(var)(a0)
+; ILP32-NEXT:    addi a0, a0, %lo(var)
+; ILP32-NEXT:    flw fa5, 0(a0)
+; ILP32-NEXT:    flw fa4, 4(a0)
+; ILP32-NEXT:    flw fa3, 8(a0)
+; ILP32-NEXT:    flw fa2, 12(a0)
+; ILP32-NEXT:    flw fa1, 16(a0)
+; ILP32-NEXT:    flw fa0, 20(a0)
+; ILP32-NEXT:    flw ft0, 24(a0)
+; ILP32-NEXT:    flw ft1, 28(a0)
+; ILP32-NEXT:    flw ft2, 32(a0)
+; ILP32-NEXT:    flw ft3, 36(a0)
+; ILP32-NEXT:    flw ft4, 40(a0)
+; ILP32-NEXT:    flw ft5, 44(a0)
+; ILP32-NEXT:    flw ft6, 48(a0)
+; ILP32-NEXT:    flw ft7, 52(a0)
+; ILP32-NEXT:    flw fa6, 56(a0)
+; ILP32-NEXT:    flw fa7, 60(a0)
+; ILP32-NEXT:    flw ft8, 64(a0)
+; ILP32-NEXT:    flw ft9, 68(a0)
+; ILP32-NEXT:    flw ft10, 72(a0)
+; ILP32-NEXT:    flw ft11, 76(a0)
+; ILP32-NEXT:    flw fs0, 80(a0)
+; ILP32-NEXT:    flw fs1, 84(a0)
+; ILP32-NEXT:    flw fs2, 88(a0)
+; ILP32-NEXT:    flw fs3, 92(a0)
+; ILP32-NEXT:    flw fs4, 96(a0)
+; ILP32-NEXT:    flw fs5, 100(a0)
+; ILP32-NEXT:    flw fs6, 104(a0)
+; ILP32-NEXT:    flw fs7, 108(a0)
+; ILP32-NEXT:    flw fs8, 124(a0)
+; ILP32-NEXT:    flw fs9, 120(a0)
+; ILP32-NEXT:    flw fs10, 116(a0)
+; ILP32-NEXT:    flw fs11, 112(a0)
+; ILP32-NEXT:    fsw fs8, 124(a0)
+; ILP32-NEXT:    fsw fs9, 120(a0)
+; ILP32-NEXT:    fsw fs10, 116(a0)
+; ILP32-NEXT:    fsw fs11, 112(a0)
+; ILP32-NEXT:    fsw fs7, 108(a0)
+; ILP32-NEXT:    fsw fs6, 104(a0)
+; ILP32-NEXT:    fsw fs5, 100(a0)
+; ILP32-NEXT:    fsw fs4, 96(a0)
+; ILP32-NEXT:    fsw fs3, 92(a0)
+; ILP32-NEXT:    fsw fs2, 88(a0)
+; ILP32-NEXT:    fsw fs1, 84(a0)
+; ILP32-NEXT:    fsw fs0, 80(a0)
+; ILP32-NEXT:    fsw ft11, 76(a0)
+; ILP32-NEXT:    fsw ft10, 72(a0)
+; ILP32-NEXT:    fsw ft9, 68(a0)
+; ILP32-NEXT:    fsw ft8, 64(a0)
+; ILP32-NEXT:    fsw fa7, 60(a0)
+; ILP32-NEXT:    fsw fa6, 56(a0)
+; ILP32-NEXT:    fsw ft7, 52(a0)
+; ILP32-NEXT:    fsw ft6, 48(a0)
+; ILP32-NEXT:    fsw ft5, 44(a0)
+; ILP32-NEXT:    fsw ft4, 40(a0)
+; ILP32-NEXT:    fsw ft3, 36(a0)
+; ILP32-NEXT:    fsw ft2, 32(a0)
+; ILP32-NEXT:    fsw ft1, 28(a0)
+; ILP32-NEXT:    fsw ft0, 24(a0)
+; ILP32-NEXT:    fsw fa0, 20(a0)
+; ILP32-NEXT:    fsw fa1, 16(a0)
+; ILP32-NEXT:    fsw fa2, 12(a0)
+; ILP32-NEXT:    fsw fa3, 8(a0)
+; ILP32-NEXT:    fsw fa4, 4(a0)
+; ILP32-NEXT:    fsw fa5, 0(a0)
 ; ILP32-NEXT:    ret
 ;
 ; ILP32E-LABEL: callee:
 ; ILP32E:       # %bb.0:
 ; ILP32E-NEXT:    lui a0, %hi(var)
-; ILP32E-NEXT:    flw fa5, %lo(var)(a0)
-; ILP32E-NEXT:    flw fa4, %lo(var+4)(a0)
-; ILP32E-NEXT:    flw fa3, %lo(var+8)(a0)
-; ILP32E-NEXT:    flw fa2, %lo(var+12)(a0)
-; ILP32E-NEXT:    addi a1, a0, %lo(var)
-; ILP32E-NEXT:    flw fa1, 16(a1)
-; ILP32E-NEXT:    flw fa0, 20(a1)
-; ILP32E-NEXT:    flw ft0, 24(a1)
-; ILP32E-NEXT:    flw ft1, 28(a1)
-; ILP32E-NEXT:    flw ft2, 32(a1)
-; ILP32E-NEXT:    flw ft3, 36(a1)
-; ILP32E-NEXT:    flw ft4, 40(a1)
-; ILP32E-NEXT:    flw ft5, 44(a1)
-; ILP32E-NEXT:    flw ft6, 48(a1)
-; ILP32E-NEXT:    flw ft7, 52(a1)
-; ILP32E-NEXT:    flw fa6, 56(a1)
-; ILP32E-NEXT:    flw fa7, 60(a1)
-; ILP32E-NEXT:    flw ft8, 64(a1)
-; ILP32E-NEXT:    flw ft9, 68(a1)
-; ILP32E-NEXT:    flw ft10, 72(a1)
-; ILP32E-NEXT:    flw ft11, 76(a1)
-; ILP32E-NEXT:    flw fs0, 80(a1)
-; ILP32E-NEXT:    flw fs1, 84(a1)
-; ILP32E-NEXT:    flw fs2, 88(a1)
-; ILP32E-NEXT:    flw fs3, 92(a1)
-; ILP32E-NEXT:    flw fs4, 96(a1)
-; ILP32E-NEXT:    flw fs5, 100(a1)
-; ILP32E-NEXT:    flw fs6, 104(a1)
-; ILP32E-NEXT:    flw fs7, 108(a1)
-; ILP32E-NEXT:    flw fs8, 124(a1)
-; ILP32E-NEXT:    flw fs9, 120(a1)
-; ILP32E-NEXT:    flw fs10, 116(a1)
-; ILP32E-NEXT:    flw fs11, 112(a1)
-; ILP32E-NEXT:    fsw fs8, 124(a1)
-; ILP32E-NEXT:    fsw fs9, 120(a1)
-; ILP32E-NEXT:    fsw fs10, 116(a1)
-; ILP32E-NEXT:    fsw fs11, 112(a1)
-; ILP32E-NEXT:    fsw fs7, 108(a1)
-; ILP32E-NEXT:    fsw fs6, 104(a1)
-; ILP32E-NEXT:    fsw fs5, 100(a1)
-; ILP32E-NEXT:    fsw fs4, 96(a1)
-; ILP32E-NEXT:    fsw fs3, 92(a1)
-; ILP32E-NEXT:    fsw fs2, 88(a1)
-; ILP32E-NEXT:    fsw fs1, 84(a1)
-; ILP32E-NEXT:    fsw fs0, 80(a1)
-; ILP32E-NEXT:    fsw ft11, 76(a1)
-; ILP32E-NEXT:    fsw ft10, 72(a1)
-; ILP32E-NEXT:    fsw ft9, 68(a1)
-; ILP32E-NEXT:    fsw ft8, 64(a1)
-; ILP32E-NEXT:    fsw fa7, 60(a1)
-; ILP32E-NEXT:    fsw fa6, 56(a1)
-; ILP32E-NEXT:    fsw ft7, 52(a1)
-; ILP32E-NEXT:    fsw ft6, 48(a1)
-; ILP32E-NEXT:    fsw ft5, 44(a1)
-; ILP32E-NEXT:    fsw ft4, 40(a1)
-; ILP32E-NEXT:    fsw ft3, 36(a1)
-; ILP32E-NEXT:    fsw ft2, 32(a1)
-; ILP32E-NEXT:    fsw ft1, 28(a1)
-; ILP32E-NEXT:    fsw ft0, 24(a1)
-; ILP32E-NEXT:    fsw fa0, 20(a1)
-; ILP32E-NEXT:    fsw fa1, 16(a1)
-; ILP32E-NEXT:    fsw fa2, %lo(var+12)(a0)
-; ILP32E-NEXT:    fsw fa3, %lo(var+8)(a0)
-; ILP32E-NEXT:    fsw fa4, %lo(var+4)(a0)
-; ILP32E-NEXT:    fsw fa5, %lo(var)(a0)
+; ILP32E-NEXT:    addi a0, a0, %lo(var)
+; ILP32E-NEXT:    flw fa5, 0(a0)
+; ILP32E-NEXT:    flw fa4, 4(a0)
+; ILP32E-NEXT:    flw fa3, 8(a0)
+; ILP32E-NEXT:    flw fa2, 12(a0)
+; ILP32E-NEXT:    flw fa1, 16(a0)
+; ILP32E-NEXT:    flw fa0, 20(a0)
+; ILP32E-NEXT:    flw ft0, 24(a0)
+; ILP32E-NEXT:    flw ft1, 28(a0)
+; ILP32E-NEXT:    flw ft2, 32(a0)
+; ILP32E-NEXT:    flw ft3, 36(a0)
+; ILP32E-NEXT:    flw ft4, 40(a0)
+; ILP32E-NEXT:    flw ft5, 44(a0)
+; ILP32E-NEXT:    flw ft6, 48(a0)
+; ILP32E-NEXT:    flw ft7, 52(a0)
+; ILP32E-NEXT:    flw fa6, 56(a0)
+; ILP32E-NEXT:    flw fa7, 60(a0)
+; ILP32E-NEXT:    flw ft8, 64(a0)
+; ILP32E-NEXT:    flw ft9, 68(a0)
+; ILP32E-NEXT:    flw ft10, 72(a0)
+; ILP32E-NEXT:    flw ft11, 76(a0)
+; ILP32E-NEXT:    flw fs0, 80(a0)
+; ILP32E-NEXT:    flw fs1, 84(a0)
+; ILP32E-NEXT:    flw fs2, 88(a0)
+; ILP32E-NEXT:    flw fs3, 92(a0)
+; ILP32E-NEXT:    flw fs4, 96(a0)
+; ILP32E-NEXT:    flw fs5, 100(a0)
+; ILP32E-NEXT:    flw fs6, 104(a0)
+; ILP32E-NEXT:    flw fs7, 108(a0)
+; ILP32E-NEXT:    flw fs8, 124(a0)
+; ILP32E-NEXT:    flw fs9, 120(a0)
+; ILP32E-NEXT:    flw fs10, 116(a0)
+; ILP32E-NEXT:    flw fs11, 112(a0)
+; ILP32E-NEXT:    fsw fs8, 124(a0)
+; ILP32E-NEXT:    fsw fs9, 120(a0)
+; ILP32E-NEXT:    fsw fs10, 116(a0)
+; ILP32E-NEXT:    fsw fs11, 112(a0)
+; ILP32E-NEXT:    fsw fs7, 108(a0)
+; ILP32E-NEXT:    fsw fs6, 104(a0)
+; ILP32E-NEXT:    fsw fs5, 100(a0)
+; ILP32E-NEXT:    fsw fs4, 96(a0)
+; ILP32E-NEXT:    fsw fs3, 92(a0)
+; ILP32E-NEXT:    fsw fs2, 88(a0)
+; ILP32E-NEXT:    fsw fs1, 84(a0)
+; ILP32E-NEXT:    fsw fs0, 80(a0)
+; ILP32E-NEXT:    fsw ft11, 76(a0)
+; ILP32E-NEXT:    fsw ft10, 72(a0)
+; ILP32E-NEXT:    fsw ft9, 68(a0)
+; ILP32E-NEXT:    fsw ft8, 64(a0)
+; ILP32E-NEXT:    fsw fa7, 60(a0)
+; ILP32E-NEXT:    fsw fa6, 56(a0)
+; ILP32E-NEXT:    fsw ft7, 52(a0)
+; ILP32E-NEXT:    fsw ft6, 48(a0)
+; ILP32E-NEXT:    fsw ft5, 44(a0)
+; ILP32E-NEXT:    fsw ft4, 40(a0)
+; ILP32E-NEXT:    fsw ft3, 36(a0)
+; ILP32E-NEXT:    fsw ft2, 32(a0)
+; ILP32E-NEXT:    fsw ft1, 28(a0)
+; ILP32E-NEXT:    fsw ft0, 24(a0)
+; ILP32E-NEXT:    fsw fa0, 20(a0)
+; ILP32E-NEXT:    fsw fa1, 16(a0)
+; ILP32E-NEXT:    fsw fa2, 12(a0)
+; ILP32E-NEXT:    fsw fa3, 8(a0)
+; ILP32E-NEXT:    fsw fa4, 4(a0)
+; ILP32E-NEXT:    fsw fa5, 0(a0)
 ; ILP32E-NEXT:    ret
 ;
 ; LP64-LABEL: callee:
 ; LP64:       # %bb.0:
 ; LP64-NEXT:    lui a0, %hi(var)
-; LP64-NEXT:    flw fa5, %lo(var)(a0)
-; LP64-NEXT:    flw fa4, %lo(var+4)(a0)
-; LP64-NEXT:    flw fa3, %lo(var+8)(a0)
-; LP64-NEXT:    flw fa2, %lo(var+12)(a0)
-; LP64-NEXT:    addi a1, a0, %lo(var)
-; LP64-NEXT:    flw fa1, 16(a1)
-; LP64-NEXT:    flw fa0, 20(a1)
-; LP64-NEXT:    flw ft0, 24(a1)
-; LP64-NEXT:    flw ft1, 28(a1)
-; LP64-NEXT:    flw ft2, 32(a1)
-; LP64-NEXT:    flw ft3, 36(a1)
-; LP64-NEXT:    flw ft4, 40(a1)
-; LP64-NEXT:    flw ft5, 44(a1)
-; LP64-NEXT:    flw ft6, 48(a1)
-; LP64-NEXT:    flw ft7, 52(a1)
-; LP64-NEXT:    flw fa6, 56(a1)
-; LP64-NEXT:    flw fa7, 60(a1)
-; LP64-NEXT:    flw ft8, 64(a1)
-; LP64-NEXT:    flw ft9, 68(a1)
-; LP64-NEXT:    flw ft10, 72(a1)
-; LP64-NEXT:    flw ft11, 76(a1)
-; LP64-NEXT:    flw fs0, 80(a1)
-; LP64-NEXT:    flw fs1, 84(a1)
-; LP64-NEXT:    flw fs2, 88(a1)
-; LP64-NEXT:    flw fs3, 92(a1)
-; LP64-NEXT:    flw fs4, 96(a1)
-; LP64-NEXT:    flw fs5, 100(a1)
-; LP64-NEXT:    flw fs6, 104(a1)
-; LP64-NEXT:    flw fs7, 108(a1)
-; LP64-NEXT:    flw fs8, 124(a1)
-; LP64-NEXT:    flw fs9, 120(a1)
-; LP64-NEXT:    flw fs10, 116(a1)
-; LP64-NEXT:    flw fs11, 112(a1)
-; LP64-NEXT:    fsw fs8, 124(a1)
-; LP64-NEXT:    fsw fs9, 120(a1)
-; LP64-NEXT:    fsw fs10, 116(a1)
-; LP64-NEXT:    fsw fs11, 112(a1)
-; LP64-NEXT:    fsw fs7, 108(a1)
-; LP64-NEXT:    fsw fs6, 104(a1)
-; LP64-NEXT:    fsw fs5, 100(a1)
-; LP64-NEXT:    fsw fs4, 96(a1)
-; LP64-NEXT:    fsw fs3, 92(a1)
-; LP64-NEXT:    fsw fs2, 88(a1)
-; LP64-NEXT:    fsw fs1, 84(a1)
-; LP64-NEXT:    fsw fs0, 80(a1)
-; LP64-NEXT:    fsw ft11, 76(a1)
-; LP64-NEXT:    fsw ft10, 72(a1)
-; LP64-NEXT:    fsw ft9, 68(a1)
-; LP64-NEXT:    fsw ft8, 64(a1)
-; LP64-NEXT:    fsw fa7, 60(a1)
-; LP64-NEXT:    fsw fa6, 56(a1)
-; LP64-NEXT:    fsw ft7, 52(a1)
-; LP64-NEXT:    fsw ft6, 48(a1)
-; LP64-NEXT:    fsw ft5, 44(a1)
-; LP64-NEXT:    fsw ft4, 40(a1)
-; LP64-NEXT:    fsw ft3, 36(a1)
-; LP64-NEXT:    fsw ft2, 32(a1)
-; LP64-NEXT:    fsw ft1, 28(a1)
-; LP64-NEXT:    fsw ft0, 24(a1)
-; LP64-NEXT:    fsw fa0, 20(a1)
-; LP64-NEXT:    fsw fa1, 16(a1)
-; LP64-NEXT:    fsw fa2, %lo(var+12)(a0)
-; LP64-NEXT:    fsw fa3, %lo(var+8)(a0)
-; LP64-NEXT:    fsw fa4, %lo(var+4)(a0)
-; LP64-NEXT:    fsw fa5, %lo(var)(a0)
+; LP64-NEXT:    addi a0, a0, %lo(var)
+; LP64-NEXT:    flw fa5, 0(a0)
+; LP64-NEXT:    flw fa4, 4(a0)
+; LP64-NEXT:    flw fa3, 8(a0)
+; LP64-NEXT:    flw fa2, 12(a0)
+; LP64-NEXT:    flw fa1, 16(a0)
+; LP64-NEXT:    flw fa0, 20(a0)
+; LP64-NEXT:    flw ft0, 24(a0)
+; LP64-NEXT:    flw ft1, 28(a0)
+; LP64-NEXT:    flw ft2, 32(a0)
+; LP64-NEXT:    flw ft3, 36(a0)
+; LP64-NEXT:    flw ft4, 40(a0)
+; LP64-NEXT:    flw ft5, 44(a0)
+; LP64-NEXT:    flw ft6, 48(a0)
+; LP64-NEXT:    flw ft7, 52(a0)
+; LP64-NEXT:    flw fa6, 56(a0)
+; LP64-NEXT:    flw fa7, 60(a0)
+; LP64-NEXT:    flw ft8, 64(a0)
+; LP64-NEXT:    flw ft9, 68(a0)
+; LP64-NEXT:    flw ft10, 72(a0)
+; LP64-NEXT:    flw ft11, 76(a0)
+; LP64-NEXT:    flw fs0, 80(a0)
+; LP64-NEXT:    flw fs1, 84(a0)
+; LP64-NEXT:    flw fs2, 88(a0)
+; LP64-NEXT:    flw fs3, 92(a0)
+; LP64-NEXT:    flw fs4, 96(a0)
+; LP64-NEXT:    flw fs5, 100(a0)
+; LP64-NEXT:    flw fs6, 104(a0)
+; LP64-NEXT:    flw fs7, 108(a0)
+; LP64-NEXT:    flw fs8, 124(a0)
+; LP64-NEXT:    flw fs9, 120(a0)
+; LP64-NEXT:    flw fs10, 116(a0)
+; LP64-NEXT:    flw fs11, 112(a0)
+; LP64-NEXT:    fsw fs8, 124(a0)
+; LP64-NEXT:    fsw fs9, 120(a0)
+; LP64-NEXT:    fsw fs10, 116(a0)
+; LP64-NEXT:    fsw fs11, 112(a0)
+; LP64-NEXT:    fsw fs7, 108(a0)
+; LP64-NEXT:    fsw fs6, 104(a0)
+; LP64-NEXT:    fsw fs5, 100(a0)
+; LP64-NEXT:    fsw fs4, 96(a0)
+; LP64-NEXT:    fsw fs3, 92(a0)
+; LP64-NEXT:    fsw fs2, 88(a0)
+; LP64-NEXT:    fsw fs1, 84(a0)
+; LP64-NEXT:    fsw fs0, 80(a0)
+; LP64-NEXT:    fsw ft11, 76(a0)
+; LP64-NEXT:    fsw ft10, 72(a0)
+; LP64-NEXT:    fsw ft9, 68(a0)
+; LP64-NEXT:    fsw ft8, 64(a0)
+; LP64-NEXT:    fsw fa7, 60(a0)
+; LP64-NEXT:    fsw fa6, 56(a0)
+; LP64-NEXT:    fsw ft7, 52(a0)
+; LP64-NEXT:    fsw ft6, 48(a0)
+; LP64-NEXT:    fsw ft5, 44(a0)
+; LP64-NEXT:    fsw ft4, 40(a0)
+; LP64-NEXT:    fsw ft3, 36(a0)
+; LP64-NEXT:    fsw ft2, 32(a0)
+; LP64-NEXT:    fsw ft1, 28(a0)
+; LP64-NEXT:    fsw ft0, 24(a0)
+; LP64-NEXT:    fsw fa0, 20(a0)
+; LP64-NEXT:    fsw fa1, 16(a0)
+; LP64-NEXT:    fsw fa2, 12(a0)
+; LP64-NEXT:    fsw fa3, 8(a0)
+; LP64-NEXT:    fsw fa4, 4(a0)
+; LP64-NEXT:    fsw fa5, 0(a0)
 ; LP64-NEXT:    ret
 ;
 ; LP64E-LABEL: callee:
 ; LP64E:       # %bb.0:
 ; LP64E-NEXT:    lui a0, %hi(var)
-; LP64E-NEXT:    flw fa5, %lo(var)(a0)
-; LP64E-NEXT:    flw fa4, %lo(var+4)(a0)
-; LP64E-NEXT:    flw fa3, %lo(var+8)(a0)
-; LP64E-NEXT:    flw fa2, %lo(var+12)(a0)
-; LP64E-NEXT:    addi a1, a0, %lo(var)
-; LP64E-NEXT:    flw fa1, 16(a1)
-; LP64E-NEXT:    flw fa0, 20(a1)
-; LP64E-NEXT:    flw ft0, 24(a1)
-; LP64E-NEXT:    flw ft1, 28(a1)
-; LP64E-NEXT:    flw ft2, 32(a1)
-; LP64E-NEXT:    flw ft3, 36(a1)
-; LP64E-NEXT:    flw ft4, 40(a1)
-; LP64E-NEXT:    flw ft5, 44(a1)
-; LP64E-NEXT:    flw ft6, 48(a1)
-; LP64E-NEXT:    flw ft7, 52(a1)
-; LP64E-NEXT:    flw fa6, 56(a1)
-; LP64E-NEXT:    flw fa7, 60(a1)
-; LP64E-NEXT:    flw ft8, 64(a1)
-; LP64E-NEXT:    flw ft9, 68(a1)
-; LP64E-NEXT:    flw ft10, 72(a1)
-; LP64E-NEXT:    flw ft11, 76(a1)
-; LP64E-NEXT:    flw fs0, 80(a1)
-; LP64E-NEXT:    flw fs1, 84(a1)
-; LP64E-NEXT:    flw fs2, 88(a1)
-; LP64E-NEXT:    flw fs3, 92(a1)
-; LP64E-NEXT:    flw fs4, 96(a1)
-; LP64E-NEXT:    flw fs5, 100(a1)
-; LP64E-NEXT:    flw fs6, 104(a1)
-; LP64E-NEXT:    flw fs7, 108(a1)
-; LP64E-NEXT:    flw fs8, 124(a1)
-; LP64E-NEXT:    flw fs9, 120(a1)
-; LP64E-NEXT:    flw fs10, 116(a1)
-; LP64E-NEXT:    flw fs11, 112(a1)
-; LP64E-NEXT:    fsw fs8, 124(a1)
-; LP64E-NEXT:    fsw fs9, 120(a1)
-; LP64E-NEXT:    fsw fs10, 116(a1)
-; LP64E-NEXT:    fsw fs11, 112(a1)
-; LP64E-NEXT:    fsw fs7, 108(a1)
-; LP64E-NEXT:    fsw fs6, 104(a1)
-; LP64E-NEXT:    fsw fs5, 100(a1)
-; LP64E-NEXT:    fsw fs4, 96(a1)
-; LP64E-NEXT:    fsw fs3, 92(a1)
-; LP64E-NEXT:    fsw fs2, 88(a1)
-; LP64E-NEXT:    fsw fs1, 84(a1)
-; LP64E-NEXT:    fsw fs0, 80(a1)
-; LP64E-NEXT:    fsw ft11, 76(a1)
-; LP64E-NEXT:    fsw ft10, 72(a1)
-; LP64E-NEXT:    fsw ft9, 68(a1)
-; LP64E-NEXT:    fsw ft8, 64(a1)
-; LP64E-NEXT:    fsw fa7, 60(a1)
-; LP64E-NEXT:    fsw fa6, 56(a1)
-; LP64E-NEXT:    fsw ft7, 52(a1)
-; LP64E-NEXT:    fsw ft6, 48(a1)
-; LP64E-NEXT:    fsw ft5, 44(a1)
-; LP64E-NEXT:    fsw ft4, 40(a1)
-; LP64E-NEXT:    fsw ft3, 36(a1)
-; LP64E-NEXT:    fsw ft2, 32(a1)
-; LP64E-NEXT:    fsw ft1, 28(a1)
-; LP64E-NEXT:    fsw ft0, 24(a1)
-; LP64E-NEXT:    fsw fa0, 20(a1)
-; LP64E-NEXT:    fsw fa1, 16(a1)
-; LP64E-NEXT:    fsw fa2, %lo(var+12)(a0)
-; LP64E-NEXT:    fsw fa3, %lo(var+8)(a0)
-; LP64E-NEXT:    fsw fa4, %lo(var+4)(a0)
-; LP64E-NEXT:    fsw fa5, %lo(var)(a0)
+; LP64E-NEXT:    addi a0, a0, %lo(var)
+; LP64E-NEXT:    flw fa5, 0(a0)
+; LP64E-NEXT:    flw fa4, 4(a0)
+; LP64E-NEXT:    flw fa3, 8(a0)
+; LP64E-NEXT:    flw fa2, 12(a0)
+; LP64E-NEXT:    flw fa1, 16(a0)
+; LP64E-NEXT:    flw fa0, 20(a0)
+; LP64E-NEXT:    flw ft0, 24(a0)
+; LP64E-NEXT:    flw ft1, 28(a0)
+; LP64E-NEXT:    flw ft2, 32(a0)
+; LP64E-NEXT:    flw ft3, 36(a0)
+; LP64E-NEXT:    flw ft4, 40(a0)
+; LP64E-NEXT:    flw ft5, 44(a0)
+; LP64E-NEXT:    flw ft6, 48(a0)
+; LP64E-NEXT:    flw ft7, 52(a0)
+; LP64E-NEXT:    flw fa6, 56(a0)
+; LP64E-NEXT:    flw fa7, 60(a0)
+; LP64E-NEXT:    flw ft8, 64(a0)
+; LP64E-NEXT:    flw ft9, 68(a0)
+; LP64E-NEXT:    flw ft10, 72(a0)
+; LP64E-NEXT:    flw ft11, 76(a0)
+; LP64E-NEXT:    flw fs0, 80(a0)
+; LP64E-NEXT:    flw fs1, 84(a0)
+; LP64E-NEXT:    flw fs2, 88(a0)
+; LP64E-NEXT:    flw fs3, 92(a0)
+; LP64E-NEXT:    flw fs4, 96(a0)
+; LP64E-NEXT:    flw fs5, 100(a0)
+; LP64E-NEXT:    flw fs6, 104(a0)
+; LP64E-NEXT:    flw fs7, 108(a0)
+; LP64E-NEXT:    flw fs8, 124(a0)
+; LP64E-NEXT:    flw fs9, 120(a0)
+; LP64E-NEXT:    flw fs10, 116(a0)
+; LP64E-NEXT:    flw fs11, 112(a0)
+; LP64E-NEXT:    fsw fs8, 124(a0)
+; LP64E-NEXT:    fsw fs9, 120(a0)
+; LP64E-NEXT:    fsw fs10, 116(a0)
+; LP64E-NEXT:    fsw fs11, 112(a0)
+; LP64E-NEXT:    fsw fs7, 108(a0)
+; LP64E-NEXT:    fsw fs6, 104(a0)
+; LP64E-NEXT:    fsw fs5, 100(a0)
+; LP64E-NEXT:    fsw fs4, 96(a0)
+; LP64E-NEXT:    fsw fs3, 92(a0)
+; LP64E-NEXT:    fsw fs2, 88(a0)
+; LP64E-NEXT:    fsw fs1, 84(a0)
+; LP64E-NEXT:    fsw fs0, 80(a0)
+; LP64E-NEXT:    fsw ft11, 76(a0)
+; LP64E-NEXT:    fsw ft10, 72(a0)
+; LP64E-NEXT:    fsw ft9, 68(a0)
+; LP64E-NEXT:    fsw ft8, 64(a0)
+; LP64E-NEXT:    fsw fa7, 60(a0)
+; LP64E-NEXT:    fsw fa6, 56(a0)
+; LP64E-NEXT:    fsw ft7, 52(a0)
+; LP64E-NEXT:    fsw ft6, 48(a0)
+; LP64E-NEXT:    fsw ft5, 44(a0)
+; LP64E-NEXT:    fsw ft4, 40(a0)
+; LP64E-NEXT:    fsw ft3, 36(a0)
+; LP64E-NEXT:    fsw ft2, 32(a0)
+; LP64E-NEXT:    fsw ft1, 28(a0)
+; LP64E-NEXT:    fsw ft0, 24(a0)
+; LP64E-NEXT:    fsw fa0, 20(a0)
+; LP64E-NEXT:    fsw fa1, 16(a0)
+; LP64E-NEXT:    fsw fa2, 12(a0)
+; LP64E-NEXT:    fsw fa3, 8(a0)
+; LP64E-NEXT:    fsw fa4, 4(a0)
+; LP64E-NEXT:    fsw fa5, 0(a0)
 ; LP64E-NEXT:    ret
 ;
 ; ILP32F-LABEL: callee:
@@ -321,71 +321,71 @@ define void @callee() nounwind {
 ; ILP32F-NEXT:    fsw fs10, 4(sp) # 4-byte Folded Spill
 ; ILP32F-NEXT:    fsw fs11, 0(sp) # 4-byte Folded Spill
 ; ILP32F-NEXT:    lui a0, %hi(var)
-; ILP32F-NEXT:    flw fa5, %lo(var)(a0)
-; ILP32F-NEXT:    flw fa4, %lo(var+4)(a0)
-; ILP32F-NEXT:    flw fa3, %lo(var+8)(a0)
-; ILP32F-NEXT:    flw fa2, %lo(var+12)(a0)
-; ILP32F-NEXT:    addi a1, a0, %lo(var)
-; ILP32F-NEXT:    flw fa1, 16(a1)
-; ILP32F-NEXT:    flw fa0, 20(a1)
-; ILP32F-NEXT:    flw ft0, 24(a1)
-; ILP32F-NEXT:    flw ft1, 28(a1)
-; ILP32F-NEXT:    flw ft2, 32(a1)
-; ILP32F-NEXT:    flw ft3, 36(a1)
-; ILP32F-NEXT:    flw ft4, 40(a1)
-; ILP32F-NEXT:    flw ft5, 44(a1)
-; ILP32F-NEXT:    flw ft6, 48(a1)
-; ILP32F-NEXT:    flw ft7, 52(a1)
-; ILP32F-NEXT:    flw fa6, 56(a1)
-; ILP32F-NEXT:    flw fa7, 60(a1)
-; ILP32F-NEXT:    flw ft8, 64(a1)
-; ILP32F-NEXT:    flw ft9, 68(a1)
-; ILP32F-NEXT:    flw ft10, 72(a1)
-; ILP32F-NEXT:    flw ft11, 76(a1)
-; ILP32F-NEXT:    flw fs0, 80(a1)
-; ILP32F-NEXT:    flw fs1, 84(a1)
-; ILP32F-NEXT:    flw fs2, 88(a1)
-; ILP32F-NEXT:    flw fs3, 92(a1)
-; ILP32F-NEXT:    flw fs4, 96(a1)
-; ILP32F-NEXT:    flw fs5, 100(a1)
-; ILP32F-NEXT:    flw fs6, 104(a1)
-; ILP32F-NEXT:    flw fs7, 108(a1)
-; ILP32F-NEXT:    flw fs8, 124(a1)
-; ILP32F-NEXT:    flw fs9, 120(a1)
-; ILP32F-NEXT:    flw fs10, 116(a1)
-; ILP32F-NEXT:    flw fs11, 112(a1)
-; ILP32F-NEXT:    fsw fs8, 124(a1)
-; ILP32F-NEXT:    fsw fs9, 120(a1)
-; ILP32F-NEXT:    fsw fs10, 116(a1)
-; ILP32F-NEXT:    fsw fs11, 112(a1)
-; ILP32F-NEXT:    fsw fs7, 108(a1)
-; ILP32F-NEXT:    fsw fs6, 104(a1)
-; ILP32F-NEXT:    fsw fs5, 100(a1)
-; ILP32F-NEXT:    fsw fs4, 96(a1)
-; ILP32F-NEXT:    fsw fs3, 92(a1)
-; ILP32F-NEXT:    fsw fs2, 88(a1)
-; ILP32F-NEXT:    fsw fs1, 84(a1)
-; ILP32F-NEXT:    fsw fs0, 80(a1)
-; ILP32F-NEXT:    fsw ft11, 76(a1)
-; ILP32F-NEXT:    fsw ft10, 72(a1)
-; ILP32F-NEXT:    fsw ft9, 68(a1)
-; ILP32F-NEXT:    fsw ft8, 64(a1)
-; ILP32F-NEXT:    fsw fa7, 60(a1)
-; ILP32F-NEXT:    fsw fa6, 56(a1)
-; ILP32F-NEXT:    fsw ft7, 52(a1)
-; ILP32F-NEXT:    fsw ft6, 48(a1)
-; ILP32F-NEXT:    fsw ft5, 44(a1)
-; ILP32F-NEXT:    fsw ft4, 40(a1)
-; ILP32F-NEXT:    fsw ft3, 36(a1)
-; ILP32F-NEXT:    fsw ft2, 32(a1)
-; ILP32F-NEXT:    fsw ft1, 28(a1)
-; ILP32F-NEXT:    fsw ft0, 24(a1)
-; ILP32F-NEXT:    fsw fa0, 20(a1)
-; ILP32F-NEXT:    fsw fa1, 16(a1)
-; ILP32F-NEXT:    fsw fa2, %lo(var+12)(a0)
-; ILP32F-NEXT:    fsw fa3, %lo(var+8)(a0)
-; ILP32F-NEXT:    fsw fa4, %lo(var+4)(a0)
-; ILP32F-NEXT:    fsw fa5, %lo(var)(a0)
+; ILP32F-NEXT:    addi a0, a0, %lo(var)
+; ILP32F-NEXT:    flw fa5, 0(a0)
+; ILP32F-NEXT:    flw fa4, 4(a0)
+; ILP32F-NEXT:    flw fa3, 8(a0)
+; ILP32F-NEXT:    flw fa2, 12(a0)
+; ILP32F-NEXT:    flw fa1, 16(a0)
+; ILP32F-NEXT:    flw fa0, 20(a0)
+; ILP32F-NEXT:    flw ft0, 24(a0)
+; ILP32F-NEXT:    flw ft1, 28(a0)
+; ILP32F-NEXT:    flw ft2, 32(a0)
+; ILP32F-NEXT:    flw ft3, 36(a0)
+; ILP32F-NEXT:    flw ft4, 40(a0)
+; ILP32F-NEXT:    flw ft5, 44(a0)
+; ILP32F-NEXT:    flw ft6, 48(a0)
+; ILP32F-NEXT:    flw ft7, 52(a0)
+; ILP32F-NEXT:    flw fa6, 56(a0)
+; ILP32F-NEXT:    flw fa7, 60(a0)
+; ILP32F-NEXT:    flw ft8, 64(a0)
+; ILP32F-NEXT:    flw ft9, 68(a0)
+; ILP32F-NEXT:    flw ft10, 72(a0)
+; ILP32F-NEXT:    flw ft11, 76(a0)
+; ILP32F-NEXT:    flw fs0, 80(a0)
+; ILP32F-NEXT:    flw fs1, 84(a0)
+; ILP32F-NEXT:    flw fs2, 88(a0)
+; ILP32F-NEXT:    flw fs3, 92(a0)
+; ILP32F-NEXT:    flw fs4, 96(a0)
+; ILP32F-NEXT:    flw fs5, 100(a0)
+; ILP32F-NEXT:    flw fs6, 104(a0)
+; ILP32F-NEXT:    flw fs7, 108(a0)
+; ILP32F-NEXT:    flw fs8, 124(a0)
+; ILP32F-NEXT:    flw fs9, 120(a0)
+; ILP32F-NEXT:    flw fs10, 116(a0)
+; ILP32F-NEXT:    flw fs11, 112(a0)
+; ILP32F-NEXT:    fsw fs8, 124(a0)
+; ILP32F-NEXT:    fsw fs9, 120(a0)
+; ILP32F-NEXT:    fsw fs10, 116(a0)
+; ILP32F-NEXT:    fsw fs11, 112(a0)
+; ILP32F-NEXT:    fsw fs7, 108(a0)
+; ILP32F-NEXT:    fsw fs6, 104(a0)
+; ILP32F-NEXT:    fsw fs5, 100(a0)
+; ILP32F-NEXT:    fsw fs4, 96(a0)
+; ILP32F-NEXT:    fsw fs3, 92(a0)
+; ILP32F-NEXT:    fsw fs2, 88(a0)
+; ILP32F-NEXT:    fsw fs1, 84(a0)
+; ILP32F-NEXT:    fsw fs0, 80(a0)
+; ILP32F-NEXT:    fsw ft11, 76(a0)
+; ILP32F-NEXT:    fsw ft10, 72(a0)
+; ILP32F-NEXT:    fsw ft9, 68(a0)
+; ILP32F-NEXT:    fsw ft8, 64(a0)
+; ILP32F-NEXT:    fsw fa7, 60(a0)
+; ILP32F-NEXT:    fsw fa6, 56(a0)
+; ILP32F-NEXT:    fsw ft7, 52(a0)
+; ILP32F-NEXT:    fsw ft6, 48(a0)
+; ILP32F-NEXT:    fsw ft5, 44(a0)
+; ILP32F-NEXT:    fsw ft4, 40(a0)
+; ILP32F-NEXT:    fsw ft3, 36(a0)
+; ILP32F-NEXT:    fsw ft2, 32(a0)
+; ILP32F-NEXT:    fsw ft1, 28(a0)
+; ILP32F-NEXT:    fsw ft0, 24(a0)
+; ILP32F-NEXT:    fsw fa0, 20(a0)
+; ILP32F-NEXT:    fsw fa1, 16(a0)
+; ILP32F-NEXT:    fsw fa2, 12(a0)
+; ILP32F-NEXT:    fsw fa3, 8(a0)
+; ILP32F-NEXT:    fsw fa4, 4(a0)
+; ILP32F-NEXT:    fsw fa5, 0(a0)
 ; ILP32F-NEXT:    flw fs0, 44(sp) # 4-byte Folded Reload
 ; ILP32F-NEXT:    flw fs1, 40(sp) # 4-byte Folded Reload
 ; ILP32F-NEXT:    flw fs2, 36(sp) # 4-byte Folded Reload
@@ -417,71 +417,71 @@ define void @callee() nounwind {
 ; LP64F-NEXT:    fsw fs10, 4(sp) # 4-byte Folded Spill
 ; LP64F-NEXT:    fsw fs11, 0(sp) # 4-byte Folded Spill
 ; LP64F-NEXT:    lui a0, %hi(var)
-; LP64F-NEXT:    flw fa5, %lo(var)(a0)
-; LP64F-NEXT:    flw fa4, %lo(var+4)(a0)
-; LP64F-NEXT:    flw fa3, %lo(var+8)(a0)
-; LP64F-NEXT:    flw fa2, %lo(var+12)(a0)
-; LP64F-NEXT:    addi a1, a0, %lo(var)
-; LP64F-NEXT:    flw fa1, 16(a1)
-; LP64F-NEXT:    flw fa0, 20(a1)
-; LP64F-NEXT:    flw ft0, 24(a1)
-; LP64F-NEXT:    flw ft1, 28(a1)
-; LP64F-NEXT:    flw ft2, 32(a1)
-; LP64F-NEXT:    flw ft3, 36(a1)
-; LP64F-NEXT:    flw ft4, 40(a1)
-; LP64F-NEXT:    flw ft5, 44(a1)
-; LP64F-NEXT:    flw ft6, 48(a1)
-; LP64F-NEXT:    flw ft7, 52(a1)
-; LP64F-NEXT:    flw fa6, 56(a1)
-; LP64F-NEXT:    flw fa7, 60(a1)
-; LP64F-NEXT:    flw ft8, 64(a1)
-; LP64F-NEXT:    flw ft9, 68(a1)
-; LP64F-NEXT:    flw ft10, 72(a1)
-; LP64F-NEXT:    flw ft11, 76(a1)
-; LP64F-NEXT:    flw fs0, 80(a1)
-; LP64F-NEXT:    flw fs1, 84(a1)
-; LP64F-NEXT:    flw fs2, 88(a1)
-; LP64F-NEXT:    flw fs3, 92(a1)
-; LP64F-NEXT:    flw fs4, 96(a1)
-; LP64F-NEXT:    flw fs5, 100(a1)
-; LP64F-NEXT:    flw fs6, 104(a1)
-; LP64F-NEXT:    flw fs7, 108(a1)
-; LP64F-NEXT:    flw fs8, 124(a1)
-; LP64F-NEXT:    flw fs9, 120(a1)
-; LP64F-NEXT:    flw fs10, 116(a1)
-; LP64F-NEXT:    flw fs11, 112(a1)
-; LP64F-NEXT:    fsw fs8, 124(a1)
-; LP64F-NEXT:    fsw fs9, 120(a1)
-; LP64F-NEXT:    fsw fs10, 116(a1)
-; LP64F-NEXT:    fsw fs11, 112(a1)
-; LP64F-NEXT:    fsw fs7, 108(a1)
-; LP64F-NEXT:    fsw fs6, 104(a1)
-; LP64F-NEXT:    fsw fs5, 100(a1)
-; LP64F-NEXT:    fsw fs4, 96(a1)
-; LP64F-NEXT:    fsw fs3, 92(a1)
-; LP64F-NEXT:    fsw fs2, 88(a1)
-; LP64F-NEXT:    fsw fs1, 84(a1)
-; LP64F-NEXT:    fsw fs0, 80(a1)
-; LP64F-NEXT:    fsw ft11, 76(a1)
-; LP64F-NEXT:    fsw ft10, 72(a1)
-; LP64F-NEXT:    fsw ft9, 68(a1)
-; LP64F-NEXT:    fsw ft8, 64(a1)
-; LP64F-NEXT:    fsw fa7, 60(a1)
-; LP64F-NEXT:    fsw fa6, 56(a1)
-; LP64F-NEXT:    fsw ft7, 52(a1)
-; LP64F-NEXT:    fsw ft6, 48(a1)
-; LP64F-NEXT:    fsw ft5, 44(a1)
-; LP64F-NEXT:    fsw ft4, 40(a1)
-; LP64F-NEXT:    fsw ft3, 36(a1)
-; LP64F-NEXT:    fsw ft2, 32(a1)
-; LP64F-NEXT:    fsw ft1, 28(a1)
-; LP64F-NEXT:    fsw ft0, 24(a1)
-; LP64F-NEXT:    fsw fa0, 20(a1)
-; LP64F-NEXT:    fsw fa1, 16(a1)
-; LP64F-NEXT:    fsw fa2, %lo(var+12)(a0)
-; LP64F-NEXT:    fsw fa3, %lo(var+8)(a0)
-; LP64F-NEXT:    fsw fa4, %lo(var+4)(a0)
-; LP64F-NEXT:    fsw fa5, %lo(var)(a0)
+; LP64F-NEXT:    addi a0, a0, %lo(var)
+; LP64F-NEXT:    flw fa5, 0(a0)
+; LP64F-NEXT:    flw fa4, 4(a0)
+; LP64F-NEXT:    flw fa3, 8(a0)
+; LP64F-NEXT:    flw fa2, 12(a0)
+; LP64F-NEXT:    flw fa1, 16(a0)
+; LP64F-NEXT:    flw fa0, 20(a0)
+; LP64F-NEXT:    flw ft0, 24(a0)
+; LP64F-NEXT:    flw ft1, 28(a0)
+; LP64F-NEXT:    flw ft2, 32(a0)
+; LP64F-NEXT:    flw ft3, 36(a0)
+; LP64F-NEXT:    flw ft4, 40(a0)
+; LP64F-NEXT:    flw ft5, 44(a0)
+; LP64F-NEXT:    flw ft6, 48(a0)
+; LP64F-NEXT:    flw ft7, 52(a0)
+; LP64F-NEXT:    flw fa6, 56(a0)
+; LP64F-NEXT:    flw fa7, 60(a0)
+; LP64F-NEXT:    flw ft8, 64(a0)
+; LP64F-NEXT:    flw ft9, 68(a0)
+; LP64F-NEXT:    flw ft10, 72(a0)
+; LP64F-NEXT:    flw ft11, 76(a0)
+; LP64F-NEXT:    flw fs0, 80(a0)
+; LP64F-NEXT:    flw fs1, 84(a0)
+; LP64F-NEXT:    flw fs2, 88(a0)
+; LP64F-NEXT:    flw fs3, 92(a0)
+; LP64F-NEXT:    flw fs4, 96(a0)
+; LP64F-NEXT:    flw fs5, 100(a0)
+; LP64F-NEXT:    flw fs6, 104(a0)
+; LP64F-NEXT:    flw fs7, 108(a0)
+; LP64F-NEXT:    flw fs8, 124(a0)
+; LP64F-NEXT:    flw fs9, 120(a0)
+; LP64F-NEXT:    flw fs10, 116(a0)
+; LP64F-NEXT:    flw fs11, 112(a0)
+; LP64F-NEXT:    fsw fs8, 124(a0)
+; LP64F-NEXT:    fsw fs9, 120(a0)
+; LP64F-NEXT:    fsw fs10, 116(a0)
+; LP64F-NEXT:    fsw fs11, 112(a0)
+; LP64F-NEXT:    fsw fs7, 108(a0)
+; LP64F-NEXT:    fsw fs6, 104(a0)
+; LP64F-NEXT:    fsw fs5, 100(a0)
+; LP64F-NEXT:    fsw fs4, 96(a0)
+; LP64F-NEXT:    fsw fs3, 92(a0)
+; LP64F-NEXT:    fsw fs2, 88(a0)
+; LP64F-NEXT:    fsw fs1, 84(a0)
+; LP64F-NEXT:    fsw fs0, 80(a0)
+; LP64F-NEXT:    fsw ft11, 76(a0)
+; LP64F-NEXT:    fsw ft10, 72(a0)
+; LP64F-NEXT:    fsw ft9, 68(a0)
+; LP64F-NEXT:    fsw ft8, 64(a0)
+; LP64F-NEXT:    fsw fa7, 60(a0)
+; LP64F-NEXT:    fsw fa6, 56(a0)
+; LP64F-NEXT:    fsw ft7, 52(a0)
+; LP64F-NEXT:    fsw ft6, 48(a0)
+; LP64F-NEXT:    fsw ft5, 44(a0)
+; LP64F-NEXT:    fsw ft4, 40(a0)
+; LP64F-NEXT:    fsw ft3, 36(a0)
+; LP64F-NEXT:    fsw ft2, 32(a0)
+; LP64F-NEXT:    fsw ft1, 28(a0)
+; LP64F-NEXT:    fsw ft0, 24(a0)
+; LP64F-NEXT:    fsw fa0, 20(a0)
+; LP64F-NEXT:    fsw fa1, 16(a0)
+; LP64F-NEXT:    fsw fa2, 12(a0)
+; LP64F-NEXT:    fsw fa3, 8(a0)
+; LP64F-NEXT:    fsw fa4, 4(a0)
+; LP64F-NEXT:    fsw fa5, 0(a0)
 ; LP64F-NEXT:    flw fs0, 44(sp) # 4-byte Folded Reload
 ; LP64F-NEXT:    flw fs1, 40(sp) # 4-byte Folded Reload
 ; LP64F-NEXT:    flw fs2, 36(sp) # 4-byte Folded Reload
@@ -513,71 +513,71 @@ define void @callee() nounwind {
 ; ILP32D-NEXT:    fsd fs10, 8(sp) # 8-byte Folded Spill
 ; ILP32D-NEXT:    fsd fs11, 0(sp) # 8-byte Folded Spill
 ; ILP32D-NEXT:    lui a0, %hi(var)
-; ILP32D-NEXT:    flw fa5, %lo(var)(a0)
-; ILP32D-NEXT:    flw fa4, %lo(var+4)(a0)
-; ILP32D-NEXT:    flw fa3, %lo(var+8)(a0)
-; ILP32D-NEXT:    flw fa2, %lo(var+12)(a0)
-; ILP32D-NEXT:    addi a1, a0, %lo(var)
-; ILP32D-NEXT:    flw fa1, 16(a1)
-; ILP32D-NEXT:    flw fa0, 20(a1)
-; ILP32D-NEXT:    flw ft0, 24(a1)
-; ILP32D-NEXT:    flw ft1, 28(a1)
-; ILP32D-NEXT:    flw ft2, 32(a1)
-; ILP32D-NEXT:    flw ft3, 36(a1)
-; ILP32D-NEXT:    flw ft4, 40(a1)
-; ILP32D-NEXT:    flw ft5, 44(a1)
-; ILP32D-NEXT:    flw ft6, 48(a1)
-; ILP32D-NEXT:    flw ft7, 52(a1)
-; ILP32D-NEXT:    flw fa6, 56(a1)
-; ILP32D-NEXT:    flw fa7, 60(a1)
-; ILP32D-NEXT:    flw ft8, 64(a1)
-; ILP32D-NEXT:    flw ft9, 68(a1)
-; ILP32D-NEXT:    flw ft10, 72(a1)
-; ILP32D-NEXT:    flw ft11, 76(a1)
-; ILP32D-NEXT:    flw fs0, 80(a1)
-; ILP32D-NEXT:    flw fs1, 84(a1)
-; ILP32D-NEXT:    flw fs2, 88(a1)
-; ILP32D-NEXT:    flw fs3, 92(a1)
-; ILP32D-NEXT:    flw fs4, 96(a1)
-; ILP32D-NEXT:    flw fs5, 100(a1)
-; ILP32D-NEXT:    flw fs6, 104(a1)
-; ILP32D-NEXT:    flw fs7, 108(a1)
-; ILP32D-NEXT:    flw fs8, 124(a1)
-; ILP32D-NEXT:    flw fs9, 120(a1)
-; ILP32D-NEXT:    flw fs10, 116(a1)
-; ILP32D-NEXT:    flw fs11, 112(a1)
-; ILP32D-NEXT:    fsw fs8, 124(a1)
-; ILP32D-NEXT:    fsw fs9, 120(a1)
-; ILP32D-NEXT:    fsw fs10, 116(a1)
-; ILP32D-NEXT:    fsw fs11, 112(a1)
-; ILP32D-NEXT:    fsw fs7, 108(a1)
-; ILP32D-NEXT:    fsw fs6, 104(a1)
-; ILP32D-NEXT:    fsw fs5, 100(a1)
-; ILP32D-NEXT:    fsw fs4, 96(a1)
-; ILP32D-NEXT:    fsw fs3, 92(a1)
-; ILP32D-NEXT:    fsw fs2, 88(a1)
-; ILP32D-NEXT:    fsw fs1, 84(a1)
-; ILP32D-NEXT:    fsw fs0, 80(a1)
-; ILP32D-NEXT:    fsw ft11, 76(a1)
-; ILP32D-NEXT:    fsw ft10, 72(a1)
-; ILP32D-NEXT:    fsw ft9, 68(a1)
-; ILP32D-NEXT:    fsw ft8, 64(a1)
-; ILP32D-NEXT:    fsw fa7, 60(a1)
-; ILP32D-NEXT:    fsw fa6, 56(a1)
-; ILP32D-NEXT:    fsw ft7, 52(a1)
-; ILP32D-NEXT:    fsw ft6, 48(a1)
-; ILP32D-NEXT:    fsw ft5, 44(a1)
-; ILP32D-NEXT:    fsw ft4, 40(a1)
-; ILP32D-NEXT:    fsw ft3, 36(a1)
-; ILP32D-NEXT:    fsw ft2, 32(a1)
-; ILP32D-NEXT:    fsw ft1, 28(a1)
-; ILP32D-NEXT:    fsw ft0, 24(a1)
-; ILP32D-NEXT:    fsw fa0, 20(a1)
-; ILP32D-NEXT:    fsw fa1, 16(a1)
-; ILP32D-NEXT:    fsw fa2, %lo(var+12)(a0)
-; ILP32D-NEXT:    fsw fa3, %lo(var+8)(a0)
-; ILP32D-NEXT:    fsw fa4, %lo(var+4)(a0)
-; ILP32D-NEXT:    fsw fa5, %lo(var)(a0)
+; ILP32D-NEXT:    addi a0, a0, %lo(var)
+; ILP32D-NEXT:    flw fa5, 0(a0)
+; ILP32D-NEXT:    flw fa4, 4(a0)
+; ILP32D-NEXT:    flw fa3, 8(a0)
+; ILP32D-NEXT:    flw fa2, 12(a0)
+; ILP32D-NEXT:    flw fa1, 16(a0)
+; ILP32D-NEXT:    flw fa0, 20(a0)
+; ILP32D-NEXT:    flw ft0, 24(a0)
+; ILP32D-NEXT:    flw ft1, 28(a0)
+; ILP32D-NEXT:    flw ft2, 32(a0)
+; ILP32D-NEXT:    flw ft3, 36(a0)
+; ILP32D-NEXT:    flw ft4, 40(a0)
+; ILP32D-NEXT:    flw ft5, 44(a0)
+; ILP32D-NEXT:    flw ft6, 48(a0)
+; ILP32D-NEXT:    flw ft7, 52(a0)
+; ILP32D-NEXT:    flw fa6, 56(a0)
+; ILP32D-NEXT:    flw fa7, 60(a0)
+; ILP32D-NEXT:    flw ft8, 64(a0)
+; ILP32D-NEXT:    flw ft9, 68(a0)
+; ILP32D-NEXT:    flw ft10, 72(a0)
+; ILP32D-NEXT:    flw ft11, 76(a0)
+; ILP32D-NEXT:    flw fs0, 80(a0)
+; ILP32D-NEXT:    flw fs1, 84(a0)
+; ILP32D-NEXT:    flw fs2, 88(a0)
+; ILP32D-NEXT:    flw fs3, 92(a0)
+; ILP32D-NEXT:    flw fs4, 96(a0)
+; ILP32D-NEXT:    flw fs5, 100(a0)
+; ILP32D-NEXT:    flw fs6, 104(a0)
+; ILP32D-NEXT:    flw fs7, 108(a0)
+; ILP32D-NEXT:    flw fs8, 124(a0)
+; ILP32D-NEXT:    flw fs9, 120(a0)
+; ILP32D-NEXT:    flw fs10, 116(a0)
+; ILP32D-NEXT:    flw fs11, 112(a0)
+; ILP32D-NEXT:    fsw fs8, 124(a0)
+; ILP32D-NEXT:    fsw fs9, 120(a0)
+; ILP32D-NEXT:    fsw fs10, 116(a0)
+; ILP32D-NEXT:    fsw fs11, 112(a0)
+; ILP32D-NEXT:    fsw fs7, 108(a0)
+; ILP32D-NEXT:    fsw fs6, 104(a0)
+; ILP32D-NEXT:    fsw fs5, 100(a0)
+; ILP32D-NEXT:    fsw fs4, 96(a0)
+; ILP32D-NEXT:    fsw fs3, 92(a0)
+; ILP32D-NEXT:    fsw fs2, 88(a0)
+; ILP32D-NEXT:    fsw fs1, 84(a0)
+; ILP32D-NEXT:    fsw fs0, 80(a0)
+; ILP32D-NEXT:    fsw ft11, 76(a0)
+; ILP32D-NEXT:    fsw ft10, 72(a0)
+; ILP32D-NEXT:    fsw ft9, 68(a0)
+; ILP32D-NEXT:    fsw ft8, 64(a0)
+; ILP32D-NEXT:    fsw fa7, 60(a0)
+; ILP32D-NEXT:    fsw fa6, 56(a0)
+; ILP32D-NEXT:    fsw ft7, 52(a0)
+; ILP32D-NEXT:    fsw ft6, 48(a0)
+; ILP32D-NEXT:    fsw ft5, 44(a0)
+; ILP32D-NEXT:    fsw ft4, 40(a0)
+; ILP32D-NEXT:    fsw ft3, 36(a0)
+; ILP32D-NEXT:    fsw ft2, 32(a0)
+; ILP32D-NEXT:    fsw ft1, 28(a0)
+; ILP32D-NEXT:    fsw ft0, 24(a0)
+; ILP32D-NEXT:    fsw fa0, 20(a0)
+; ILP32D-NEXT:    fsw fa1, 16(a0)
+; ILP32D-NEXT:    fsw fa2, 12(a0)
+; ILP32D-NEXT:    fsw fa3, 8(a0)
+; ILP32D-NEXT:    fsw fa4, 4(a0)
+; ILP32D-NEXT:    fsw fa5, 0(a0)
 ; ILP32D-NEXT:    fld fs0, 88(sp) # 8-byte Folded Reload
 ; ILP32D-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
 ; ILP32D-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
@@ -609,71 +609,71 @@ define void @callee() nounwind {
 ; LP64D-NEXT:    fsd fs10, 8(sp) # 8-byte Folded Spill
 ; LP64D-NEXT:    fsd fs11, 0(sp) # 8-byte Folded Spill
 ; LP64D-NEXT:    lui a0, %hi(var)
-; LP64D-NEXT:    flw fa5, %lo(var)(a0)
-; LP64D-NEXT:    flw fa4, %lo(var+4)(a0)
-; LP64D-NEXT:    flw fa3, %lo(var+8)(a0)
-; LP64D-NEXT:    flw fa2, %lo(var+12)(a0)
-; LP64D-NEXT:    addi a1, a0, %lo(var)
-; LP64D-NEXT:    flw fa1, 16(a1)
-; LP64D-NEXT:    flw fa0, 20(a1)
-; LP64D-NEXT:    flw ft0, 24(a1)
-; LP64D-NEXT:    flw ft1, 28(a1)
-; LP64D-NEXT:    flw ft2, 32(a1)
-; LP64D-NEXT:    flw ft3, 36(a1)
-; LP64D-NEXT:    flw ft4, 40(a1)
-; LP64D-NEXT:    flw ft5, 44(a1)
-; LP64D-NEXT:    flw ft6, 48(a1)
-; LP64D-NEXT:    flw ft7, 52(a1)
-; LP64D-NEXT:    flw fa6, 56(a1)
-; LP64D-NEXT:    flw fa7, 60(a1)
-; LP64D-NEXT:    flw ft8, 64(a1)
-; LP64D-NEXT:    flw ft9, 68(a1)
-; LP64D-NEXT:    flw ft10, 72(a1)
-; LP64D-NEXT:    flw ft11, 76(a1)
-; LP64D-NEXT:    flw fs0, 80(a1)
-; LP64D-NEXT:    flw fs1, 84(a1)
-; LP64D-NEXT:    flw fs2, 88(a1)
-; LP64D-NEXT:    flw fs3, 92(a1)
-; LP64D-NEXT:    flw fs4, 96(a1)
-; LP64D-NEXT:    flw fs5, 100(a1)
-; LP64D-NEXT:    flw fs6, 104(a1)
-; LP64D-NEXT:    flw fs7, 108(a1)
-; LP64D-NEXT:    flw fs8, 124(a1)
-; LP64D-NEXT:    flw fs9, 120(a1)
-; LP64D-NEXT:    flw fs10, 116(a1)
-; LP64D-NEXT:    flw fs11, 112(a1)
-; LP64D-NEXT:    fsw fs8, 124(a1)
-; LP64D-NEXT:    fsw fs9, 120(a1)
-; LP64D-NEXT:    fsw fs10, 116(a1)
-; LP64D-NEXT:    fsw fs11, 112(a1)
-; LP64D-NEXT:    fsw fs7, 108(a1)
-; LP64D-NEXT:    fsw fs6, 104(a1)
-; LP64D-NEXT:    fsw fs5, 100(a1)
-; LP64D-NEXT:    fsw fs4, 96(a1)
-; LP64D-NEXT:    fsw fs3, 92(a1)
-; LP64D-NEXT:    fsw fs2, 88(a1)
-; LP64D-NEXT:    fsw fs1, 84(a1)
-; LP64D-NEXT:    fsw fs0, 80(a1)
-; LP64D-NEXT:    fsw ft11, 76(a1)
-; LP64D-NEXT:    fsw ft10, 72(a1)
-; LP64D-NEXT:    fsw ft9, 68(a1)
-; LP64D-NEXT:    fsw ft8, 64(a1)
-; LP64D-NEXT:    fsw fa7, 60(a1)
-; LP64D-NEXT:    fsw fa6, 56(a1)
-; LP64D-NEXT:    fsw ft7, 52(a1)
-; LP64D-NEXT:    fsw ft6, 48(a1)
-; LP64D-NEXT:    fsw ft5, 44(a1)
-; LP64D-NEXT:    fsw ft4, 40(a1)
-; LP64D-NEXT:    fsw ft3, 36(a1)
-; LP64D-NEXT:    fsw ft2, 32(a1)
-; LP64D-NEXT:    fsw ft1, 28(a1)
-; LP64D-NEXT:    fsw ft0, 24(a1)
-; LP64D-NEXT:    fsw fa0, 20(a1)
-; LP64D-NEXT:    fsw fa1, 16(a1)
-; LP64D-NEXT:    fsw fa2, %lo(var+12)(a0)
-; LP64D-NEXT:    fsw fa3, %lo(var+8)(a0)
-; LP64D-NEXT:    fsw fa4, %lo(var+4)(a0)
-; LP64D-NEXT:    fsw fa5, %lo(var)(a0)
+; LP64D-NEXT:    addi a0, a0, %lo(var)
+; LP64D-NEXT:    flw fa5, 0(a0)
+; LP64D-NEXT:    flw fa4, 4(a0)
+; LP64D-NEXT:    flw fa3, 8(a0)
+; LP64D-NEXT:    flw fa2, 12(a0)
+; LP64D-NEXT:    flw fa1, 16(a0)
+; LP64D-NEXT:    flw fa0, 20(a0)
+; LP64D-NEXT:    flw ft0, 24(a0)
+; LP64D-NEXT:    flw ft1, 28(a0)
+; LP64D-NEXT:    flw ft2, 32(a0)
+; LP64D-NEXT:    flw ft3, 36(a0)
+; LP64D-NEXT:    flw ft4, 40(a0)
+; LP64D-NEXT:    flw ft5, 44(a0)
+; LP64D-NEXT:    flw ft6, 48(a0)
+; LP64D-NEXT:    flw ft7, 52(a0)
+; LP64D-NEXT:    flw fa6, 56(a0)
+; LP64D-NEXT:    flw fa7, 60(a0)
+; LP64D-NEXT:    flw ft8, 64(a0)
+; LP64D-NEXT:    flw ft9, 68(a0)
+; LP64D-NEXT:    flw ft10, 72(a0)
+; LP64D-NEXT:    flw ft11, 76(a0)
+; LP64D-NEXT:    flw fs0, 80(a0)
+; LP64D-NEXT:    flw fs1, 84(a0)
+; LP64D-NEXT:    flw fs2, 88(a0)
+; LP64D-NEXT:    flw fs3, 92(a0)
+; LP64D-NEXT:    flw fs4, 96(a0)
+; LP64D-NEXT:    flw fs5, 100(a0)
+; LP64D-NEXT:    flw fs6, 104(a0)
+; LP64D-NEXT:    flw fs7, 108(a0)
+; LP64D-NEXT:    flw fs8, 124(a0)
+; LP64D-NEXT:    flw fs9, 120(a0)
+; LP64D-NEXT:    flw fs10, 116(a0)
+; LP64D-NEXT:    flw fs11, 112(a0)
+; LP64D-NEXT:    fsw fs8, 124(a0)
+; LP64D-NEXT:    fsw fs9, 120(a0)
+; LP64D-NEXT:    fsw fs10, 116(a0)
+; LP64D-NEXT:    fsw fs11, 112(a0)
+; LP64D-NEXT:    fsw fs7, 108(a0)
+; LP64D-NEXT:    fsw fs6, 104(a0)
+; LP64D-NEXT:    fsw fs5, 100(a0)
+; LP64D-NEXT:    fsw fs4, 96(a0)
+; LP64D-NEXT:    fsw fs3, 92(a0)
+; LP64D-NEXT:    fsw fs2, 88(a0)
+; LP64D-NEXT:    fsw fs1, 84(a0)
+; LP64D-NEXT:    fsw fs0, 80(a0)
+; LP64D-NEXT:    fsw ft11, 76(a0)
+; LP64D-NEXT:    fsw ft10, 72(a0)
+; LP64D-NEXT:    fsw ft9, 68(a0)
+; LP64D-NEXT:    fsw ft8, 64(a0)
+; LP64D-NEXT:    fsw fa7, 60(a0)
+; LP64D-NEXT:    fsw fa6, 56(a0)
+; LP64D-NEXT:    fsw ft7, 52(a0)
+; LP64D-NEXT:    fsw ft6, 48(a0)
+; LP64D-NEXT:    fsw ft5, 44(a0)
+; LP64D-NEXT:    fsw ft4, 40(a0)
+; LP64D-NEXT:    fsw ft3, 36(a0)
+; LP64D-NEXT:    fsw ft2, 32(a0)
+; LP64D-NEXT:    fsw ft1, 28(a0)
+; LP64D-NEXT:    fsw ft0, 24(a0)
+; LP64D-NEXT:    fsw fa0, 20(a0)
+; LP64D-NEXT:    fsw fa1, 16(a0)
+; LP64D-NEXT:    fsw fa2, 12(a0)
+; LP64D-NEXT:    fsw fa3, 8(a0)
+; LP64D-NEXT:    fsw fa4, 4(a0)
+; LP64D-NEXT:    fsw fa5, 0(a0)
 ; LP64D-NEXT:    fld fs0, 88(sp) # 8-byte Folded Reload
 ; LP64D-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
 ; LP64D-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
@@ -706,571 +706,563 @@ define void @caller() nounwind {
 ; ILP32-NEXT:    addi sp, sp, -144
 ; ILP32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
 ; ILP32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    lui s0, %hi(var)
-; ILP32-NEXT:    flw fa5, %lo(var)(s0)
+; ILP32-NEXT:    lui a0, %hi(var)
+; ILP32-NEXT:    addi s0, a0, %lo(var)
+; ILP32-NEXT:    flw fa5, 0(s0)
+; ILP32-NEXT:    fsw fa5, 132(sp) # 4-byte Folded Spill
+; ILP32-NEXT:    flw fa5, 4(s0)
 ; ILP32-NEXT:    fsw fa5, 128(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, %lo(var+4)(s0)
+; ILP32-NEXT:    flw fa5, 8(s0)
 ; ILP32-NEXT:    fsw fa5, 124(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, %lo(var+8)(s0)
+; ILP32-NEXT:    flw fa5, 12(s0)
 ; ILP32-NEXT:    fsw fa5, 120(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, %lo(var+12)(s0)
+; ILP32-NEXT:    flw fa5, 16(s0)
 ; ILP32-NEXT:    fsw fa5, 116(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    addi s1, s0, %lo(var)
-; ILP32-NEXT:    flw fa5, 16(s1)
+; ILP32-NEXT:    flw fa5, 20(s0)
 ; ILP32-NEXT:    fsw fa5, 112(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 20(s1)
+; ILP32-NEXT:    flw fa5, 24(s0)
 ; ILP32-NEXT:    fsw fa5, 108(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 24(s1)
+; ILP32-NEXT:    flw fa5, 28(s0)
 ; ILP32-NEXT:    fsw fa5, 104(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 28(s1)
+; ILP32-NEXT:    flw fa5, 32(s0)
 ; ILP32-NEXT:    fsw fa5, 100(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 32(s1)
+; ILP32-NEXT:    flw fa5, 36(s0)
 ; ILP32-NEXT:    fsw fa5, 96(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 36(s1)
+; ILP32-NEXT:    flw fa5, 40(s0)
 ; ILP32-NEXT:    fsw fa5, 92(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 40(s1)
+; ILP32-NEXT:    flw fa5, 44(s0)
 ; ILP32-NEXT:    fsw fa5, 88(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 44(s1)
+; ILP32-NEXT:    flw fa5, 48(s0)
 ; ILP32-NEXT:    fsw fa5, 84(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 48(s1)
+; ILP32-NEXT:    flw fa5, 52(s0)
 ; ILP32-NEXT:    fsw fa5, 80(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 52(s1)
+; ILP32-NEXT:    flw fa5, 56(s0)
 ; ILP32-NEXT:    fsw fa5, 76(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 56(s1)
+; ILP32-NEXT:    flw fa5, 60(s0)
 ; ILP32-NEXT:    fsw fa5, 72(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 60(s1)
+; ILP32-NEXT:    flw fa5, 64(s0)
 ; ILP32-NEXT:    fsw fa5, 68(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 64(s1)
+; ILP32-NEXT:    flw fa5, 68(s0)
 ; ILP32-NEXT:    fsw fa5, 64(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 68(s1)
+; ILP32-NEXT:    flw fa5, 72(s0)
 ; ILP32-NEXT:    fsw fa5, 60(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 72(s1)
+; ILP32-NEXT:    flw fa5, 76(s0)
 ; ILP32-NEXT:    fsw fa5, 56(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 76(s1)
+; ILP32-NEXT:    flw fa5, 80(s0)
 ; ILP32-NEXT:    fsw fa5, 52(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 80(s1)
+; ILP32-NEXT:    flw fa5, 84(s0)
 ; ILP32-NEXT:    fsw fa5, 48(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 84(s1)
+; ILP32-NEXT:    flw fa5, 88(s0)
 ; ILP32-NEXT:    fsw fa5, 44(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 88(s1)
+; ILP32-NEXT:    flw fa5, 92(s0)
 ; ILP32-NEXT:    fsw fa5, 40(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 92(s1)
+; ILP32-NEXT:    flw fa5, 96(s0)
 ; ILP32-NEXT:    fsw fa5, 36(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 96(s1)
+; ILP32-NEXT:    flw fa5, 100(s0)
 ; ILP32-NEXT:    fsw fa5, 32(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 100(s1)
+; ILP32-NEXT:    flw fa5, 104(s0)
 ; ILP32-NEXT:    fsw fa5, 28(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 104(s1)
+; ILP32-NEXT:    flw fa5, 108(s0)
 ; ILP32-NEXT:    fsw fa5, 24(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 108(s1)
+; ILP32-NEXT:    flw fa5, 112(s0)
 ; ILP32-NEXT:    fsw fa5, 20(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 112(s1)
+; ILP32-NEXT:    flw fa5, 116(s0)
 ; ILP32-NEXT:    fsw fa5, 16(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 116(s1)
+; ILP32-NEXT:    flw fa5, 120(s0)
 ; ILP32-NEXT:    fsw fa5, 12(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 120(s1)
+; ILP32-NEXT:    flw fa5, 124(s0)
 ; ILP32-NEXT:    fsw fa5, 8(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    flw fa5, 124(s1)
-; ILP32-NEXT:    fsw fa5, 4(sp) # 4-byte Folded Spill
 ; ILP32-NEXT:    call callee
-; ILP32-NEXT:    flw fa5, 4(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 124(s1)
 ; ILP32-NEXT:    flw fa5, 8(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 120(s1)
+; ILP32-NEXT:    fsw fa5, 124(s0)
 ; ILP32-NEXT:    flw fa5, 12(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 116(s1)
+; ILP32-NEXT:    fsw fa5, 120(s0)
 ; ILP32-NEXT:    flw fa5, 16(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 112(s1)
+; ILP32-NEXT:    fsw fa5, 116(s0)
 ; ILP32-NEXT:    flw fa5, 20(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 108(s1)
+; ILP32-NEXT:    fsw fa5, 112(s0)
 ; ILP32-NEXT:    flw fa5, 24(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 104(s1)
+; ILP32-NEXT:    fsw fa5, 108(s0)
 ; ILP32-NEXT:    flw fa5, 28(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 100(s1)
+; ILP32-NEXT:    fsw fa5, 104(s0)
 ; ILP32-NEXT:    flw fa5, 32(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 96(s1)
+; ILP32-NEXT:    fsw fa5, 100(s0)
 ; ILP32-NEXT:    flw fa5, 36(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 92(s1)
+; ILP32-NEXT:    fsw fa5, 96(s0)
 ; ILP32-NEXT:    flw fa5, 40(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 88(s1)
+; ILP32-NEXT:    fsw fa5, 92(s0)
 ; ILP32-NEXT:    flw fa5, 44(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 84(s1)
+; ILP32-NEXT:    fsw fa5, 88(s0)
 ; ILP32-NEXT:    flw fa5, 48(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 80(s1)
+; ILP32-NEXT:    fsw fa5, 84(s0)
 ; ILP32-NEXT:    flw fa5, 52(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 76(s1)
+; ILP32-NEXT:    fsw fa5, 80(s0)
 ; ILP32-NEXT:    flw fa5, 56(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 72(s1)
+; ILP32-NEXT:    fsw fa5, 76(s0)
 ; ILP32-NEXT:    flw fa5, 60(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 68(s1)
+; ILP32-NEXT:    fsw fa5, 72(s0)
 ; ILP32-NEXT:    flw fa5, 64(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 64(s1)
+; ILP32-NEXT:    fsw fa5, 68(s0)
 ; ILP32-NEXT:    flw fa5, 68(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 60(s1)
+; ILP32-NEXT:    fsw fa5, 64(s0)
 ; ILP32-NEXT:    flw fa5, 72(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 56(s1)
+; ILP32-NEXT:    fsw fa5, 60(s0)
 ; ILP32-NEXT:    flw fa5, 76(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 52(s1)
+; ILP32-NEXT:    fsw fa5, 56(s0)
 ; ILP32-NEXT:    flw fa5, 80(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 48(s1)
+; ILP32-NEXT:    fsw fa5, 52(s0)
 ; ILP32-NEXT:    flw fa5, 84(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 44(s1)
+; ILP32-NEXT:    fsw fa5, 48(s0)
 ; ILP32-NEXT:    flw fa5, 88(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 40(s1)
+; ILP32-NEXT:    fsw fa5, 44(s0)
 ; ILP32-NEXT:    flw fa5, 92(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 36(s1)
+; ILP32-NEXT:    fsw fa5, 40(s0)
 ; ILP32-NEXT:    flw fa5, 96(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 32(s1)
+; ILP32-NEXT:    fsw fa5, 36(s0)
 ; ILP32-NEXT:    flw fa5, 100(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 28(s1)
+; ILP32-NEXT:    fsw fa5, 32(s0)
 ; ILP32-NEXT:    flw fa5, 104(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 24(s1)
+; ILP32-NEXT:    fsw fa5, 28(s0)
 ; ILP32-NEXT:    flw fa5, 108(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 20(s1)
+; ILP32-NEXT:    fsw fa5, 24(s0)
 ; ILP32-NEXT:    flw fa5, 112(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, 16(s1)
+; ILP32-NEXT:    fsw fa5, 20(s0)
 ; ILP32-NEXT:    flw fa5, 116(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, %lo(var+12)(s0)
+; ILP32-NEXT:    fsw fa5, 16(s0)
 ; ILP32-NEXT:    flw fa5, 120(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, %lo(var+8)(s0)
+; ILP32-NEXT:    fsw fa5, 12(s0)
 ; ILP32-NEXT:    flw fa5, 124(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, %lo(var+4)(s0)
+; ILP32-NEXT:    fsw fa5, 8(s0)
 ; ILP32-NEXT:    flw fa5, 128(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    fsw fa5, %lo(var)(s0)
+; ILP32-NEXT:    fsw fa5, 4(s0)
+; ILP32-NEXT:    flw fa5, 132(sp) # 4-byte Folded Reload
+; ILP32-NEXT:    fsw fa5, 0(s0)
 ; ILP32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
 ; ILP32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
 ; ILP32-NEXT:    addi sp, sp, 144
 ; ILP32-NEXT:    ret
 ;
 ; ILP32E-LABEL: caller:
 ; ILP32E:       # %bb.0:
-; ILP32E-NEXT:    addi sp, sp, -140
-; ILP32E-NEXT:    sw ra, 136(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    sw s0, 132(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    sw s1, 128(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    lui s0, %hi(var)
-; ILP32E-NEXT:    flw fa5, %lo(var)(s0)
+; ILP32E-NEXT:    addi sp, sp, -136
+; ILP32E-NEXT:    sw ra, 132(sp) # 4-byte Folded Spill
+; ILP32E-NEXT:    sw s0, 128(sp) # 4-byte Folded Spill
+; ILP32E-NEXT:    lui a0, %hi(var)
+; ILP32E-NEXT:    addi s0, a0, %lo(var)
+; ILP32E-NEXT:    flw fa5, 0(s0)
 ; ILP32E-NEXT:    fsw fa5, 124(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, %lo(var+4)(s0)
+; ILP32E-NEXT:    flw fa5, 4(s0)
 ; ILP32E-NEXT:    fsw fa5, 120(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, %lo(var+8)(s0)
+; ILP32E-NEXT:    flw fa5, 8(s0)
 ; ILP32E-NEXT:    fsw fa5, 116(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, %lo(var+12)(s0)
+; ILP32E-NEXT:    flw fa5, 12(s0)
 ; ILP32E-NEXT:    fsw fa5, 112(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    addi s1, s0, %lo(var)
-; ILP32E-NEXT:    flw fa5, 16(s1)
+; ILP32E-NEXT:    flw fa5, 16(s0)
 ; ILP32E-NEXT:    fsw fa5, 108(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 20(s1)
+; ILP32E-NEXT:    flw fa5, 20(s0)
 ; ILP32E-NEXT:    fsw fa5, 104(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 24(s1)
+; ILP32E-NEXT:    flw fa5, 24(s0)
 ; ILP32E-NEXT:    fsw fa5, 100(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 28(s1)
+; ILP32E-NEXT:    flw fa5, 28(s0)
 ; ILP32E-NEXT:    fsw fa5, 96(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 32(s1)
+; ILP32E-NEXT:    flw fa5, 32(s0)
 ; ILP32E-NEXT:    fsw fa5, 92(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 36(s1)
+; ILP32E-NEXT:    flw fa5, 36(s0)
 ; ILP32E-NEXT:    fsw fa5, 88(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 40(s1)
+; ILP32E-NEXT:    flw fa5, 40(s0)
 ; ILP32E-NEXT:    fsw fa5, 84(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 44(s1)
+; ILP32E-NEXT:    flw fa5, 44(s0)
 ; ILP32E-NEXT:    fsw fa5, 80(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 48(s1)
+; ILP32E-NEXT:    flw fa5, 48(s0)
 ; ILP32E-NEXT:    fsw fa5, 76(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 52(s1)
+; ILP32E-NEXT:    flw fa5, 52(s0)
 ; ILP32E-NEXT:    fsw fa5, 72(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 56(s1)
+; ILP32E-NEXT:    flw fa5, 56(s0)
 ; ILP32E-NEXT:    fsw fa5, 68(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 60(s1)
+; ILP32E-NEXT:    flw fa5, 60(s0)
 ; ILP32E-NEXT:    fsw fa5, 64(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 64(s1)
+; ILP32E-NEXT:    flw fa5, 64(s0)
 ; ILP32E-NEXT:    fsw fa5, 60(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 68(s1)
+; ILP32E-NEXT:    flw fa5, 68(s0)
 ; ILP32E-NEXT:    fsw fa5, 56(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 72(s1)
+; ILP32E-NEXT:    flw fa5, 72(s0)
 ; ILP32E-NEXT:    fsw fa5, 52(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 76(s1)
+; ILP32E-NEXT:    flw fa5, 76(s0)
 ; ILP32E-NEXT:    fsw fa5, 48(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 80(s1)
+; ILP32E-NEXT:    flw fa5, 80(s0)
 ; ILP32E-NEXT:    fsw fa5, 44(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 84(s1)
+; ILP32E-NEXT:    flw fa5, 84(s0)
 ; ILP32E-NEXT:    fsw fa5, 40(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 88(s1)
+; ILP32E-NEXT:    flw fa5, 88(s0)
 ; ILP32E-NEXT:    fsw fa5, 36(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 92(s1)
+; ILP32E-NEXT:    flw fa5, 92(s0)
 ; ILP32E-NEXT:    fsw fa5, 32(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 96(s1)
+; ILP32E-NEXT:    flw fa5, 96(s0)
 ; ILP32E-NEXT:    fsw fa5, 28(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 100(s1)
+; ILP32E-NEXT:    flw fa5, 100(s0)
 ; ILP32E-NEXT:    fsw fa5, 24(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 104(s1)
+; ILP32E-NEXT:    flw fa5, 104(s0)
 ; ILP32E-NEXT:    fsw fa5, 20(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 108(s1)
+; ILP32E-NEXT:    flw fa5, 108(s0)
 ; ILP32E-NEXT:    fsw fa5, 16(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 112(s1)
+; ILP32E-NEXT:    flw fa5, 112(s0)
 ; ILP32E-NEXT:    fsw fa5, 12(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 116(s1)
+; ILP32E-NEXT:    flw fa5, 116(s0)
 ; ILP32E-NEXT:    fsw fa5, 8(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 120(s1)
+; ILP32E-NEXT:    flw fa5, 120(s0)
 ; ILP32E-NEXT:    fsw fa5, 4(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    flw fa5, 124(s1)
+; ILP32E-NEXT:    flw fa5, 124(s0)
 ; ILP32E-NEXT:    fsw fa5, 0(sp) # 4-byte Folded Spill
 ; ILP32E-NEXT:    call callee
 ; ILP32E-NEXT:    flw fa5, 0(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 124(s1)
+; ILP32E-NEXT:    fsw fa5, 124(s0)
 ; ILP32E-NEXT:    flw fa5, 4(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 120(s1)
+; ILP32E-NEXT:    fsw fa5, 120(s0)
 ; ILP32E-NEXT:    flw fa5, 8(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 116(s1)
+; ILP32E-NEXT:    fsw fa5, 116(s0)
 ; ILP32E-NEXT:    flw fa5, 12(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 112(s1)
+; ILP32E-NEXT:    fsw fa5, 112(s0)
 ; ILP32E-NEXT:    flw fa5, 16(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 108(s1)
+; ILP32E-NEXT:    fsw fa5, 108(s0)
 ; ILP32E-NEXT:    flw fa5, 20(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 104(s1)
+; ILP32E-NEXT:    fsw fa5, 104(s0)
 ; ILP32E-NEXT:    flw fa5, 24(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 100(s1)
+; ILP32E-NEXT:    fsw fa5, 100(s0)
 ; ILP32E-NEXT:    flw fa5, 28(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 96(s1)
+; ILP32E-NEXT:    fsw fa5, 96(s0)
 ; ILP32E-NEXT:    flw fa5, 32(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 92(s1)
+; ILP32E-NEXT:    fsw fa5, 92(s0)
 ; ILP32E-NEXT:    flw fa5, 36(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 88(s1)
+; ILP32E-NEXT:    fsw fa5, 88(s0)
 ; ILP32E-NEXT:    flw fa5, 40(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 84(s1)
+; ILP32E-NEXT:    fsw fa5, 84(s0)
 ; ILP32E-NEXT:    flw fa5, 44(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 80(s1)
+; ILP32E-NEXT:    fsw fa5, 80(s0)
 ; ILP32E-NEXT:    flw fa5, 48(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 76(s1)
+; ILP32E-NEXT:    fsw fa5, 76(s0)
 ; ILP32E-NEXT:    flw fa5, 52(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 72(s1)
+; ILP32E-NEXT:    fsw fa5, 72(s0)
 ; ILP32E-NEXT:    flw fa5, 56(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 68(s1)
+; ILP32E-NEXT:    fsw fa5, 68(s0)
 ; ILP32E-NEXT:    flw fa5, 60(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 64(s1)
+; ILP32E-NEXT:    fsw fa5, 64(s0)
 ; ILP32E-NEXT:    flw fa5, 64(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 60(s1)
+; ILP32E-NEXT:    fsw fa5, 60(s0)
 ; ILP32E-NEXT:    flw fa5, 68(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 56(s1)
+; ILP32E-NEXT:    fsw fa5, 56(s0)
 ; ILP32E-NEXT:    flw fa5, 72(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 52(s1)
+; ILP32E-NEXT:    fsw fa5, 52(s0)
 ; ILP32E-NEXT:    flw fa5, 76(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 48(s1)
+; ILP32E-NEXT:    fsw fa5, 48(s0)
 ; ILP32E-NEXT:    flw fa5, 80(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 44(s1)
+; ILP32E-NEXT:    fsw fa5, 44(s0)
 ; ILP32E-NEXT:    flw fa5, 84(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 40(s1)
+; ILP32E-NEXT:    fsw fa5, 40(s0)
 ; ILP32E-NEXT:    flw fa5, 88(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 36(s1)
+; ILP32E-NEXT:    fsw fa5, 36(s0)
 ; ILP32E-NEXT:    flw fa5, 92(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 32(s1)
+; ILP32E-NEXT:    fsw fa5, 32(s0)
 ; ILP32E-NEXT:    flw fa5, 96(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 28(s1)
+; ILP32E-NEXT:    fsw fa5, 28(s0)
 ; ILP32E-NEXT:    flw fa5, 100(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 24(s1)
+; ILP32E-NEXT:    fsw fa5, 24(s0)
 ; ILP32E-NEXT:    flw fa5, 104(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 20(s1)
+; ILP32E-NEXT:    fsw fa5, 20(s0)
 ; ILP32E-NEXT:    flw fa5, 108(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, 16(s1)
+; ILP32E-NEXT:    fsw fa5, 16(s0)
 ; ILP32E-NEXT:    flw fa5, 112(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, %lo(var+12)(s0)
+; ILP32E-NEXT:    fsw fa5, 12(s0)
 ; ILP32E-NEXT:    flw fa5, 116(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, %lo(var+8)(s0)
+; ILP32E-NEXT:    fsw fa5, 8(s0)
 ; ILP32E-NEXT:    flw fa5, 120(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, %lo(var+4)(s0)
+; ILP32E-NEXT:    fsw fa5, 4(s0)
 ; ILP32E-NEXT:    flw fa5, 124(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    fsw fa5, %lo(var)(s0)
-; ILP32E-NEXT:    lw ra, 136(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    lw s0, 132(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    lw s1, 128(sp) # 4-byte Folded Reload
-; ILP32E-NEXT:    addi sp, sp, 140
+; ILP32E-NEXT:    fsw fa5, 0(s0)
+; ILP32E-NEXT:    lw ra, 132(sp) # 4-byte Folded Reload
+; ILP32E-NEXT:    lw s0, 128(sp) # 4-byte Folded Reload
+; ILP32E-NEXT:    addi sp, sp, 136
 ; ILP32E-NEXT:    ret
 ;
 ; LP64-LABEL: caller:
 ; LP64:       # %bb.0:
-; LP64-NEXT:    addi sp, sp, -160
-; LP64-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
-; LP64-NEXT:    sd s0, 144(sp) # 8-byte Folded Spill
-; LP64-NEXT:    sd s1, 136(sp) # 8-byte Folded Spill
-; LP64-NEXT:    lui s0, %hi(var)
-; LP64-NEXT:    flw fa5, %lo(var)(s0)
-; LP64-NEXT:    fsw fa5, 132(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, %lo(var+4)(s0)
-; LP64-NEXT:    fsw fa5, 128(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, %lo(var+8)(s0)
+; LP64-NEXT:    addi sp, sp, -144
+; LP64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; LP64-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; LP64-NEXT:    lui a0, %hi(var)
+; LP64-NEXT:    addi s0, a0, %lo(var)
+; LP64-NEXT:    flw fa5, 0(s0)
 ; LP64-NEXT:    fsw fa5, 124(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, %lo(var+12)(s0)
+; LP64-NEXT:    flw fa5, 4(s0)
 ; LP64-NEXT:    fsw fa5, 120(sp) # 4-byte Folded Spill
-; LP64-NEXT:    addi s1, s0, %lo(var)
-; LP64-NEXT:    flw fa5, 16(s1)
+; LP64-NEXT:    flw fa5, 8(s0)
 ; LP64-NEXT:    fsw fa5, 116(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 20(s1)
+; LP64-NEXT:    flw fa5, 12(s0)
 ; LP64-NEXT:    fsw fa5, 112(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 24(s1)
+; LP64-NEXT:    flw fa5, 16(s0)
 ; LP64-NEXT:    fsw fa5, 108(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 28(s1)
+; LP64-NEXT:    flw fa5, 20(s0)
 ; LP64-NEXT:    fsw fa5, 104(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 32(s1)
+; LP64-NEXT:    flw fa5, 24(s0)
 ; LP64-NEXT:    fsw fa5, 100(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 36(s1)
+; LP64-NEXT:    flw fa5, 28(s0)
 ; LP64-NEXT:    fsw fa5, 96(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 40(s1)
+; LP64-NEXT:    flw fa5, 32(s0)
 ; LP64-NEXT:    fsw fa5, 92(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 44(s1)
+; LP64-NEXT:    flw fa5, 36(s0)
 ; LP64-NEXT:    fsw fa5, 88(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 48(s1)
+; LP64-NEXT:    flw fa5, 40(s0)
 ; LP64-NEXT:    fsw fa5, 84(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 52(s1)
+; LP64-NEXT:    flw fa5, 44(s0)
 ; LP64-NEXT:    fsw fa5, 80(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 56(s1)
+; LP64-NEXT:    flw fa5, 48(s0)
 ; LP64-NEXT:    fsw fa5, 76(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 60(s1)
+; LP64-NEXT:    flw fa5, 52(s0)
 ; LP64-NEXT:    fsw fa5, 72(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 64(s1)
+; LP64-NEXT:    flw fa5, 56(s0)
 ; LP64-NEXT:    fsw fa5, 68(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 68(s1)
+; LP64-NEXT:    flw fa5, 60(s0)
 ; LP64-NEXT:    fsw fa5, 64(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 72(s1)
+; LP64-NEXT:    flw fa5, 64(s0)
 ; LP64-NEXT:    fsw fa5, 60(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 76(s1)
+; LP64-NEXT:    flw fa5, 68(s0)
 ; LP64-NEXT:    fsw fa5, 56(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 80(s1)
+; LP64-NEXT:    flw fa5, 72(s0)
 ; LP64-NEXT:    fsw fa5, 52(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 84(s1)
+; LP64-NEXT:    flw fa5, 76(s0)
 ; LP64-NEXT:    fsw fa5, 48(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 88(s1)
+; LP64-NEXT:    flw fa5, 80(s0)
 ; LP64-NEXT:    fsw fa5, 44(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 92(s1)
+; LP64-NEXT:    flw fa5, 84(s0)
 ; LP64-NEXT:    fsw fa5, 40(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 96(s1)
+; LP64-NEXT:    flw fa5, 88(s0)
 ; LP64-NEXT:    fsw fa5, 36(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 100(s1)
+; LP64-NEXT:    flw fa5, 92(s0)
 ; LP64-NEXT:    fsw fa5, 32(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 104(s1)
+; LP64-NEXT:    flw fa5, 96(s0)
 ; LP64-NEXT:    fsw fa5, 28(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 108(s1)
+; LP64-NEXT:    flw fa5, 100(s0)
 ; LP64-NEXT:    fsw fa5, 24(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 112(s1)
+; LP64-NEXT:    flw fa5, 104(s0)
 ; LP64-NEXT:    fsw fa5, 20(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 116(s1)
+; LP64-NEXT:    flw fa5, 108(s0)
 ; LP64-NEXT:    fsw fa5, 16(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 120(s1)
+; LP64-NEXT:    flw fa5, 112(s0)
 ; LP64-NEXT:    fsw fa5, 12(sp) # 4-byte Folded Spill
-; LP64-NEXT:    flw fa5, 124(s1)
+; LP64-NEXT:    flw fa5, 116(s0)
 ; LP64-NEXT:    fsw fa5, 8(sp) # 4-byte Folded Spill
+; LP64-NEXT:    flw fa5, 120(s0)
+; LP64-NEXT:    fsw fa5, 4(sp) # 4-byte Folded Spill
+; LP64-NEXT:    flw fa5, 124(s0)
+; LP64-NEXT:    fsw fa5, 0(sp) # 4-byte Folded Spill
 ; LP64-NEXT:    call callee
+; LP64-NEXT:    flw fa5, 0(sp) # 4-byte Folded Reload
+; LP64-NEXT:    fsw fa5, 124(s0)
+; LP64-NEXT:    flw fa5, 4(sp) # 4-byte Folded Reload
+; LP64-NEXT:    fsw fa5, 120(s0)
 ; LP64-NEXT:    flw fa5, 8(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 124(s1)
+; LP64-NEXT:    fsw fa5, 116(s0)
 ; LP64-NEXT:    flw fa5, 12(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 120(s1)
+; LP64-NEXT:    fsw fa5, 112(s0)
 ; LP64-NEXT:    flw fa5, 16(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 116(s1)
+; LP64-NEXT:    fsw fa5, 108(s0)
 ; LP64-NEXT:    flw fa5, 20(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 112(s1)
+; LP64-NEXT:    fsw fa5, 104(s0)
 ; LP64-NEXT:    flw fa5, 24(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 108(s1)
+; LP64-NEXT:    fsw fa5, 100(s0)
 ; LP64-NEXT:    flw fa5, 28(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 104(s1)
+; LP64-NEXT:    fsw fa5, 96(s0)
 ; LP64-NEXT:    flw fa5, 32(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 100(s1)
+; LP64-NEXT:    fsw fa5, 92(s0)
 ; LP64-NEXT:    flw fa5, 36(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 96(s1)
+; LP64-NEXT:    fsw fa5, 88(s0)
 ; LP64-NEXT:    flw fa5, 40(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 92(s1)
+; LP64-NEXT:    fsw fa5, 84(s0)
 ; LP64-NEXT:    flw fa5, 44(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 88(s1)
+; LP64-NEXT:    fsw fa5, 80(s0)
 ; LP64-NEXT:    flw fa5, 48(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 84(s1)
+; LP64-NEXT:    fsw fa5, 76(s0)
 ; LP64-NEXT:    flw fa5, 52(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 80(s1)
+; LP64-NEXT:    fsw fa5, 72(s0)
 ; LP64-NEXT:    flw fa5, 56(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 76(s1)
+; LP64-NEXT:    fsw fa5, 68(s0)
 ; LP64-NEXT:    flw fa5, 60(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 72(s1)
+; LP64-NEXT:    fsw fa5, 64(s0)
 ; LP64-NEXT:    flw fa5, 64(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 68(s1)
+; LP64-NEXT:    fsw fa5, 60(s0)
 ; LP64-NEXT:    flw fa5, 68(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 64(s1)
+; LP64-NEXT:    fsw fa5, 56(s0)
 ; LP64-NEXT:    flw fa5, 72(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 60(s1)
+; LP64-NEXT:    fsw fa5, 52(s0)
 ; LP64-NEXT:    flw fa5, 76(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 56(s1)
+; LP64-NEXT:    fsw fa5, 48(s0)
 ; LP64-NEXT:    flw fa5, 80(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 52(s1)
+; LP64-NEXT:    fsw fa5, 44(s0)
 ; LP64-NEXT:    flw fa5, 84(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 48(s1)
+; LP64-NEXT:    fsw fa5, 40(s0)
 ; LP64-NEXT:    flw fa5, 88(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 44(s1)
+; LP64-NEXT:    fsw fa5, 36(s0)
 ; LP64-NEXT:    flw fa5, 92(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 40(s1)
+; LP64-NEXT:    fsw fa5, 32(s0)
 ; LP64-NEXT:    flw fa5, 96(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 36(s1)
+; LP64-NEXT:    fsw fa5, 28(s0)
 ; LP64-NEXT:    flw fa5, 100(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 32(s1)
+; LP64-NEXT:    fsw fa5, 24(s0)
 ; LP64-NEXT:    flw fa5, 104(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 28(s1)
+; LP64-NEXT:    fsw fa5, 20(s0)
 ; LP64-NEXT:    flw fa5, 108(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 24(s1)
+; LP64-NEXT:    fsw fa5, 16(s0)
 ; LP64-NEXT:    flw fa5, 112(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 20(s1)
+; LP64-NEXT:    fsw fa5, 12(s0)
 ; LP64-NEXT:    flw fa5, 116(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, 16(s1)
+; LP64-NEXT:    fsw fa5, 8(s0)
 ; LP64-NEXT:    flw fa5, 120(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, %lo(var+12)(s0)
+; LP64-NEXT:    fsw fa5, 4(s0)
 ; LP64-NEXT:    flw fa5, 124(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, %lo(var+8)(s0)
-; LP64-NEXT:    flw fa5, 128(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, %lo(var+4)(s0)
-; LP64-NEXT:    flw fa5, 132(sp) # 4-byte Folded Reload
-; LP64-NEXT:    fsw fa5, %lo(var)(s0)
-; LP64-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
-; LP64-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
-; LP64-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
-; LP64-NEXT:    addi sp, sp, 160
+; LP64-NEXT:    fsw fa5, 0(s0)
+; LP64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; LP64-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
+; LP64-NEXT:    addi sp, sp, 144
 ; LP64-NEXT:    ret
 ;
 ; LP64E-LABEL: caller:
 ; LP64E:       # %bb.0:
-; LP64E-NEXT:    addi sp, sp, -152
-; LP64E-NEXT:    sd ra, 144(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    sd s0, 136(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    sd s1, 128(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    lui s0, %hi(var)
-; LP64E-NEXT:    flw fa5, %lo(var)(s0)
+; LP64E-NEXT:    addi sp, sp, -144
+; LP64E-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; LP64E-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; LP64E-NEXT:    lui a0, %hi(var)
+; LP64E-NEXT:    addi s0, a0, %lo(var)
+; LP64E-NEXT:    flw fa5, 0(s0)
 ; LP64E-NEXT:    fsw fa5, 124(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, %lo(var+4)(s0)
+; LP64E-NEXT:    flw fa5, 4(s0)
 ; LP64E-NEXT:    fsw fa5, 120(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, %lo(var+8)(s0)
+; LP64E-NEXT:    flw fa5, 8(s0)
 ; LP64E-NEXT:    fsw fa5, 116(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, %lo(var+12)(s0)
+; LP64E-NEXT:    flw fa5, 12(s0)
 ; LP64E-NEXT:    fsw fa5, 112(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    addi s1, s0, %lo(var)
-; LP64E-NEXT:    flw fa5, 16(s1)
+; LP64E-NEXT:    flw fa5, 16(s0)
 ; LP64E-NEXT:    fsw fa5, 108(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 20(s1)
+; LP64E-NEXT:    flw fa5, 20(s0)
 ; LP64E-NEXT:    fsw fa5, 104(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 24(s1)
+; LP64E-NEXT:    flw fa5, 24(s0)
 ; LP64E-NEXT:    fsw fa5, 100(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 28(s1)
+; LP64E-NEXT:    flw fa5, 28(s0)
 ; LP64E-NEXT:    fsw fa5, 96(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 32(s1)
+; LP64E-NEXT:    flw fa5, 32(s0)
 ; LP64E-NEXT:    fsw fa5, 92(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 36(s1)
+; LP64E-NEXT:    flw fa5, 36(s0)
 ; LP64E-NEXT:    fsw fa5, 88(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 40(s1)
+; LP64E-NEXT:    flw fa5, 40(s0)
 ; LP64E-NEXT:    fsw fa5, 84(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 44(s1)
+; LP64E-NEXT:    flw fa5, 44(s0)
 ; LP64E-NEXT:    fsw fa5, 80(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 48(s1)
+; LP64E-NEXT:    flw fa5, 48(s0)
 ; LP64E-NEXT:    fsw fa5, 76(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 52(s1)
+; LP64E-NEXT:    flw fa5, 52(s0)
 ; LP64E-NEXT:    fsw fa5, 72(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 56(s1)
+; LP64E-NEXT:    flw fa5, 56(s0)
 ; LP64E-NEXT:    fsw fa5, 68(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 60(s1)
+; LP64E-NEXT:    flw fa5, 60(s0)
 ; LP64E-NEXT:    fsw fa5, 64(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 64(s1)
+; LP64E-NEXT:    flw fa5, 64(s0)
 ; LP64E-NEXT:    fsw fa5, 60(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 68(s1)
+; LP64E-NEXT:    flw fa5, 68(s0)
 ; LP64E-NEXT:    fsw fa5, 56(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 72(s1)
+; LP64E-NEXT:    flw fa5, 72(s0)
 ; LP64E-NEXT:    fsw fa5, 52(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 76(s1)
+; LP64E-NEXT:    flw fa5, 76(s0)
 ; LP64E-NEXT:    fsw fa5, 48(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 80(s1)
+; LP64E-NEXT:    flw fa5, 80(s0)
 ; LP64E-NEXT:    fsw fa5, 44(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 84(s1)
+; LP64E-NEXT:    flw fa5, 84(s0)
 ; LP64E-NEXT:    fsw fa5, 40(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 88(s1)
+; LP64E-NEXT:    flw fa5, 88(s0)
 ; LP64E-NEXT:    fsw fa5, 36(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 92(s1)
+; LP64E-NEXT:    flw fa5, 92(s0)
 ; LP64E-NEXT:    fsw fa5, 32(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 96(s1)
+; LP64E-NEXT:    flw fa5, 96(s0)
 ; LP64E-NEXT:    fsw fa5, 28(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 100(s1)
+; LP64E-NEXT:    flw fa5, 100(s0)
 ; LP64E-NEXT:    fsw fa5, 24(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 104(s1)
+; LP64E-NEXT:    flw fa5, 104(s0)
 ; LP64E-NEXT:    fsw fa5, 20(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 108(s1)
+; LP64E-NEXT:    flw fa5, 108(s0)
 ; LP64E-NEXT:    fsw fa5, 16(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 112(s1)
+; LP64E-NEXT:    flw fa5, 112(s0)
 ; LP64E-NEXT:    fsw fa5, 12(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 116(s1)
+; LP64E-NEXT:    flw fa5, 116(s0)
 ; LP64E-NEXT:    fsw fa5, 8(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 120(s1)
+; LP64E-NEXT:    flw fa5, 120(s0)
 ; LP64E-NEXT:    fsw fa5, 4(sp) # 4-byte Folded Spill
-; LP64E-NEXT:    flw fa5, 124(s1)
+; LP64E-NEXT:    flw fa5, 124(s0)
 ; LP64E-NEXT:    fsw fa5, 0(sp) # 4-byte Folded Spill
 ; LP64E-NEXT:    call callee
 ; LP64E-NEXT:    flw fa5, 0(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 124(s1)
+; LP64E-NEXT:    fsw fa5, 124(s0)
 ; LP64E-NEXT:    flw fa5, 4(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 120(s1)
+; LP64E-NEXT:    fsw fa5, 120(s0)
 ; LP64E-NEXT:    flw fa5, 8(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 116(s1)
+; LP64E-NEXT:    fsw fa5, 116(s0)
 ; LP64E-NEXT:    flw fa5, 12(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 112(s1)
+; LP64E-NEXT:    fsw fa5, 112(s0)
 ; LP64E-NEXT:    flw fa5, 16(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 108(s1)
+; LP64E-NEXT:    fsw fa5, 108(s0)
 ; LP64E-NEXT:    flw fa5, 20(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 104(s1)
+; LP64E-NEXT:    fsw fa5, 104(s0)
 ; LP64E-NEXT:    flw fa5, 24(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 100(s1)
+; LP64E-NEXT:    fsw fa5, 100(s0)
 ; LP64E-NEXT:    flw fa5, 28(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 96(s1)
+; LP64E-NEXT:    fsw fa5, 96(s0)
 ; LP64E-NEXT:    flw fa5, 32(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 92(s1)
+; LP64E-NEXT:    fsw fa5, 92(s0)
 ; LP64E-NEXT:    flw fa5, 36(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 88(s1)
+; LP64E-NEXT:    fsw fa5, 88(s0)
 ; LP64E-NEXT:    flw fa5, 40(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 84(s1)
+; LP64E-NEXT:    fsw fa5, 84(s0)
 ; LP64E-NEXT:    flw fa5, 44(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 80(s1)
+; LP64E-NEXT:    fsw fa5, 80(s0)
 ; LP64E-NEXT:    flw fa5, 48(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 76(s1)
+; LP64E-NEXT:    fsw fa5, 76(s0)
 ; LP64E-NEXT:    flw fa5, 52(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 72(s1)
+; LP64E-NEXT:    fsw fa5, 72(s0)
 ; LP64E-NEXT:    flw fa5, 56(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 68(s1)
+; LP64E-NEXT:    fsw fa5, 68(s0)
 ; LP64E-NEXT:    flw fa5, 60(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 64(s1)
+; LP64E-NEXT:    fsw fa5, 64(s0)
 ; LP64E-NEXT:    flw fa5, 64(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 60(s1)
+; LP64E-NEXT:    fsw fa5, 60(s0)
 ; LP64E-NEXT:    flw fa5, 68(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 56(s1)
+; LP64E-NEXT:    fsw fa5, 56(s0)
 ; LP64E-NEXT:    flw fa5, 72(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 52(s1)
+; LP64E-NEXT:    fsw fa5, 52(s0)
 ; LP64E-NEXT:    flw fa5, 76(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 48(s1)
+; LP64E-NEXT:    fsw fa5, 48(s0)
 ; LP64E-NEXT:    flw fa5, 80(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 44(s1)
+; LP64E-NEXT:    fsw fa5, 44(s0)
 ; LP64E-NEXT:    flw fa5, 84(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 40(s1)
+; LP64E-NEXT:    fsw fa5, 40(s0)
 ; LP64E-NEXT:    flw fa5, 88(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 36(s1)
+; LP64E-NEXT:    fsw fa5, 36(s0)
 ; LP64E-NEXT:    flw fa5, 92(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 32(s1)
+; LP64E-NEXT:    fsw fa5, 32(s0)
 ; LP64E-NEXT:    flw fa5, 96(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 28(s1)
+; LP64E-NEXT:    fsw fa5, 28(s0)
 ; LP64E-NEXT:    flw fa5, 100(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 24(s1)
+; LP64E-NEXT:    fsw fa5, 24(s0)
 ; LP64E-NEXT:    flw fa5, 104(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 20(s1)
+; LP64E-NEXT:    fsw fa5, 20(s0)
 ; LP64E-NEXT:    flw fa5, 108(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, 16(s1)
+; LP64E-NEXT:    fsw fa5, 16(s0)
 ; LP64E-NEXT:    flw fa5, 112(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, %lo(var+12)(s0)
+; LP64E-NEXT:    fsw fa5, 12(s0)
 ; LP64E-NEXT:    flw fa5, 116(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, %lo(var+8)(s0)
+; LP64E-NEXT:    fsw fa5, 8(s0)
 ; LP64E-NEXT:    flw fa5, 120(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, %lo(var+4)(s0)
+; LP64E-NEXT:    fsw fa5, 4(s0)
 ; LP64E-NEXT:    flw fa5, 124(sp) # 4-byte Folded Reload
-; LP64E-NEXT:    fsw fa5, %lo(var)(s0)
-; LP64E-NEXT:    ld ra, 144(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    ld s0, 136(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    ld s1, 128(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    addi sp, sp, 152
+; LP64E-NEXT:    fsw fa5, 0(s0)
+; LP64E-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; LP64E-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
+; LP64E-NEXT:    addi sp, sp, 144
 ; LP64E-NEXT:    ret
 ;
 ; ILP32F-LABEL: caller:
@@ -1278,285 +1270,281 @@ define void @caller() nounwind {
 ; ILP32F-NEXT:    addi sp, sp, -144
 ; ILP32F-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
 ; ILP32F-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    fsw fs0, 128(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    fsw fs1, 124(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    fsw fs2, 120(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    fsw fs3, 116(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    fsw fs4, 112(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    fsw fs5, 108(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    fsw fs6, 104(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    fsw fs7, 100(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    fsw fs8, 96(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    fsw fs9, 92(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    fsw fs10, 88(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    fsw fs11, 84(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    lui s0, %hi(var)
-; ILP32F-NEXT:    flw fa5, %lo(var)(s0)
+; ILP32F-NEXT:    fsw fs0, 132(sp) # 4-byte Folded Spill
+; ILP32F-NEXT:    fsw fs1, 128(sp) # 4-byte Folded Spill
+; ILP32F-NEXT:    fsw fs2, 124(sp) # 4-byte Folded Spill
+; ILP32F-NEXT:    fsw fs3, 120(sp) # 4-byte Folded Spill
+; ILP32F-NEXT:    fsw fs4, 116(sp) # 4-byte Folded Spill
+; ILP32F-NEXT:    fsw fs5, 112(sp) # 4-byte Folded Spill
+; ILP32F-NEXT:    fsw fs6, 108(sp) # 4-byte Folded Spill
+; ILP32F-NEXT:    fsw fs7, 104(sp) # 4-byte Folded Spill
+; ILP32F-NEXT:    fsw fs8, 100(sp) # 4-byte Folded Spill
+; ILP32F-NEXT:    fsw fs9, 96(sp) # 4-byte Folded Spill
+; ILP32F-NEXT:    fsw fs10, 92(sp) # 4-byte Folded Spill
+; ILP32F-NEXT:    fsw fs11, 88(sp) # 4-byte Folded Spill
+; ILP32F-NEXT:    lui a0, %hi(var)
+; ILP32F-NEXT:    addi s0, a0, %lo(var)
+; ILP32F-NEXT:    flw fa5, 0(s0)
+; ILP32F-NEXT:    fsw fa5, 84(sp) # 4-byte Folded Spill
+; ILP32F-NEXT:    flw fa5, 4(s0)
 ; ILP32F-NEXT:    fsw fa5, 80(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, %lo(var+4)(s0)
+; ILP32F-NEXT:    flw fa5, 8(s0)
 ; ILP32F-NEXT:    fsw fa5, 76(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, %lo(var+8)(s0)
+; ILP32F-NEXT:    flw fa5, 12(s0)
 ; ILP32F-NEXT:    fsw fa5, 72(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, %lo(var+12)(s0)
+; ILP32F-NEXT:    flw fa5, 16(s0)
 ; ILP32F-NEXT:    fsw fa5, 68(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    addi s1, s0, %lo(var)
-; ILP32F-NEXT:    flw fa5, 16(s1)
+; ILP32F-NEXT:    flw fa5, 20(s0)
 ; ILP32F-NEXT:    fsw fa5, 64(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 20(s1)
+; ILP32F-NEXT:    flw fa5, 24(s0)
 ; ILP32F-NEXT:    fsw fa5, 60(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 24(s1)
+; ILP32F-NEXT:    flw fa5, 28(s0)
 ; ILP32F-NEXT:    fsw fa5, 56(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 28(s1)
+; ILP32F-NEXT:    flw fa5, 32(s0)
 ; ILP32F-NEXT:    fsw fa5, 52(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 32(s1)
+; ILP32F-NEXT:    flw fa5, 36(s0)
 ; ILP32F-NEXT:    fsw fa5, 48(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 36(s1)
+; ILP32F-NEXT:    flw fa5, 40(s0)
 ; ILP32F-NEXT:    fsw fa5, 44(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 40(s1)
+; ILP32F-NEXT:    flw fa5, 44(s0)
 ; ILP32F-NEXT:    fsw fa5, 40(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 44(s1)
+; ILP32F-NEXT:    flw fa5, 48(s0)
 ; ILP32F-NEXT:    fsw fa5, 36(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 48(s1)
+; ILP32F-NEXT:    flw fa5, 52(s0)
 ; ILP32F-NEXT:    fsw fa5, 32(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 52(s1)
+; ILP32F-NEXT:    flw fa5, 56(s0)
 ; ILP32F-NEXT:    fsw fa5, 28(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 56(s1)
+; ILP32F-NEXT:    flw fa5, 60(s0)
 ; ILP32F-NEXT:    fsw fa5, 24(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 60(s1)
+; ILP32F-NEXT:    flw fa5, 64(s0)
 ; ILP32F-NEXT:    fsw fa5, 20(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 64(s1)
+; ILP32F-NEXT:    flw fa5, 68(s0)
 ; ILP32F-NEXT:    fsw fa5, 16(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 68(s1)
+; ILP32F-NEXT:    flw fa5, 72(s0)
 ; ILP32F-NEXT:    fsw fa5, 12(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 72(s1)
+; ILP32F-NEXT:    flw fa5, 76(s0)
 ; ILP32F-NEXT:    fsw fa5, 8(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fa5, 76(s1)
-; ILP32F-NEXT:    fsw fa5, 4(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    flw fs8, 80(s1)
-; ILP32F-NEXT:    flw fs9, 84(s1)
-; ILP32F-NEXT:    flw fs10, 88(s1)
-; ILP32F-NEXT:    flw fs11, 92(s1)
-; ILP32F-NEXT:    flw fs0, 96(s1)
-; ILP32F-NEXT:    flw fs1, 100(s1)
-; ILP32F-NEXT:    flw fs2, 104(s1)
-; ILP32F-NEXT:    flw fs3, 108(s1)
-; ILP32F-NEXT:    flw fs4, 112(s1)
-; ILP32F-NEXT:    flw fs5, 116(s1)
-; ILP32F-NEXT:    flw fs6, 120(s1)
-; ILP32F-NEXT:    flw fs7, 124(s1)
+; ILP32F-NEXT:    flw fs8, 80(s0)
+; ILP32F-NEXT:    flw fs9, 84(s0)
+; ILP32F-NEXT:    flw fs10, 88(s0)
+; ILP32F-NEXT:    flw fs11, 92(s0)
+; ILP32F-NEXT:    flw fs0, 96(s0)
+; ILP32F-NEXT:    flw fs1, 100(s0)
+; ILP32F-NEXT:    flw fs2, 104(s0)
+; ILP32F-NEXT:    flw fs3, 108(s0)
+; ILP32F-NEXT:    flw fs4, 112(s0)
+; ILP32F-NEXT:    flw fs5, 116(s0)
+; ILP32F-NEXT:    flw fs6, 120(s0)
+; ILP32F-NEXT:    flw fs7, 124(s0)
 ; ILP32F-NEXT:    call callee
-; ILP32F-NEXT:    fsw fs7, 124(s1)
-; ILP32F-NEXT:    fsw fs6, 120(s1)
-; ILP32F-NEXT:    fsw fs5, 116(s1)
-; ILP32F-NEXT:    fsw fs4, 112(s1)
-; ILP32F-NEXT:    fsw fs3, 108(s1)
-; ILP32F-NEXT:    fsw fs2, 104(s1)
-; ILP32F-NEXT:    fsw fs1, 100(s1)
-; ILP32F-NEXT:    fsw fs0, 96(s1)
-; ILP32F-NEXT:    fsw fs11, 92(s1)
-; ILP32F-NEXT:    fsw fs10, 88(s1)
-; ILP32F-NEXT:    fsw fs9, 84(s1)
-; ILP32F-NEXT:    fsw fs8, 80(s1)
-; ILP32F-NEXT:    flw fa5, 4(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 76(s1)
+; ILP32F-NEXT:    fsw fs7, 124(s0)
+; ILP32F-NEXT:    fsw fs6, 120(s0)
+; ILP32F-NEXT:    fsw fs5, 116(s0)
+; ILP32F-NEXT:    fsw fs4, 112(s0)
+; ILP32F-NEXT:    fsw fs3, 108(s0)
+; ILP32F-NEXT:    fsw fs2, 104(s0)
+; ILP32F-NEXT:    fsw fs1, 100(s0)
+; ILP32F-NEXT:    fsw fs0, 96(s0)
+; ILP32F-NEXT:    fsw fs11, 92(s0)
+; ILP32F-NEXT:    fsw fs10, 88(s0)
+; ILP32F-NEXT:    fsw fs9, 84(s0)
+; ILP32F-NEXT:    fsw fs8, 80(s0)
 ; ILP32F-NEXT:    flw fa5, 8(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 72(s1)
+; ILP32F-NEXT:    fsw fa5, 76(s0)
 ; ILP32F-NEXT:    flw fa5, 12(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 68(s1)
+; ILP32F-NEXT:    fsw fa5, 72(s0)
 ; ILP32F-NEXT:    flw fa5, 16(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 64(s1)
+; ILP32F-NEXT:    fsw fa5, 68(s0)
 ; ILP32F-NEXT:    flw fa5, 20(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 60(s1)
+; ILP32F-NEXT:    fsw fa5, 64(s0)
 ; ILP32F-NEXT:    flw fa5, 24(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 56(s1)
+; ILP32F-NEXT:    fsw fa5, 60(s0)
 ; ILP32F-NEXT:    flw fa5, 28(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 52(s1)
+; ILP32F-NEXT:    fsw fa5, 56(s0)
 ; ILP32F-NEXT:    flw fa5, 32(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 48(s1)
+; ILP32F-NEXT:    fsw fa5, 52(s0)
 ; ILP32F-NEXT:    flw fa5, 36(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 44(s1)
+; ILP32F-NEXT:    fsw fa5, 48(s0)
 ; ILP32F-NEXT:    flw fa5, 40(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 40(s1)
+; ILP32F-NEXT:    fsw fa5, 44(s0)
 ; ILP32F-NEXT:    flw fa5, 44(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 36(s1)
+; ILP32F-NEXT:    fsw fa5, 40(s0)
 ; ILP32F-NEXT:    flw fa5, 48(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 32(s1)
+; ILP32F-NEXT:    fsw fa5, 36(s0)
 ; ILP32F-NEXT:    flw fa5, 52(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 28(s1)
+; ILP32F-NEXT:    fsw fa5, 32(s0)
 ; ILP32F-NEXT:    flw fa5, 56(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 24(s1)
+; ILP32F-NEXT:    fsw fa5, 28(s0)
 ; ILP32F-NEXT:    flw fa5, 60(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 20(s1)
+; ILP32F-NEXT:    fsw fa5, 24(s0)
 ; ILP32F-NEXT:    flw fa5, 64(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, 16(s1)
+; ILP32F-NEXT:    fsw fa5, 20(s0)
 ; ILP32F-NEXT:    flw fa5, 68(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, %lo(var+12)(s0)
+; ILP32F-NEXT:    fsw fa5, 16(s0)
 ; ILP32F-NEXT:    flw fa5, 72(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, %lo(var+8)(s0)
+; ILP32F-NEXT:    fsw fa5, 12(s0)
 ; ILP32F-NEXT:    flw fa5, 76(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, %lo(var+4)(s0)
+; ILP32F-NEXT:    fsw fa5, 8(s0)
 ; ILP32F-NEXT:    flw fa5, 80(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    fsw fa5, %lo(var)(s0)
+; ILP32F-NEXT:    fsw fa5, 4(s0)
+; ILP32F-NEXT:    flw fa5, 84(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    fsw fa5, 0(s0)
 ; ILP32F-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
 ; ILP32F-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs0, 128(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs1, 124(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs2, 120(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs3, 116(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs4, 112(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs5, 108(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs6, 104(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs7, 100(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs8, 96(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs9, 92(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs10, 88(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs11, 84(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs0, 132(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs1, 128(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs2, 124(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs3, 120(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs4, 116(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs5, 112(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs6, 108(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs7, 104(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs8, 100(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs9, 96(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs10, 92(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs11, 88(sp) # 4-byte Folded Reload
 ; ILP32F-NEXT:    addi sp, sp, 144
 ; ILP32F-NEXT:    ret
 ;
 ; LP64F-LABEL: caller:
 ; LP64F:       # %bb.0:
-; LP64F-NEXT:    addi sp, sp, -160
-; LP64F-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
-; LP64F-NEXT:    sd s0, 144(sp) # 8-byte Folded Spill
-; LP64F-NEXT:    sd s1, 136(sp) # 8-byte Folded Spill
-; LP64F-NEXT:    fsw fs0, 132(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    fsw fs1, 128(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    fsw fs2, 124(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    fsw fs3, 120(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    fsw fs4, 116(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    fsw fs5, 112(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    fsw fs6, 108(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    fsw fs7, 104(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    fsw fs8, 100(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    fsw fs9, 96(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    fsw fs10, 92(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    fsw fs11, 88(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    lui s0, %hi(var)
-; LP64F-NEXT:    flw fa5, %lo(var)(s0)
-; LP64F-NEXT:    fsw fa5, 84(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, %lo(var+4)(s0)
-; LP64F-NEXT:    fsw fa5, 80(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, %lo(var+8)(s0)
+; LP64F-NEXT:    addi sp, sp, -144
+; LP64F-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; LP64F-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; LP64F-NEXT:    fsw fs0, 124(sp) # 4-byte Folded Spill
+; LP64F-NEXT:    fsw fs1, 120(sp) # 4-byte Folded Spill
+; LP64F-NEXT:    fsw fs2, 116(sp) # 4-byte Folded Spill
+; LP64F-NEXT:    fsw fs3, 112(sp) # 4-byte Folded Spill
+; LP64F-NEXT:    fsw fs4, 108(sp) # 4-byte Folded Spill
+; LP64F-NEXT:    fsw fs5, 104(sp) # 4-byte Folded Spill
+; LP64F-NEXT:    fsw fs6, 100(sp) # 4-byte Folded Spill
+; LP64F-NEXT:    fsw fs7, 96(sp) # 4-byte Folded Spill
+; LP64F-NEXT:    fsw fs8, 92(sp) # 4-byte Folded Spill
+; LP64F-NEXT:    fsw fs9, 88(sp) # 4-byte Folded Spill
+; LP64F-NEXT:    fsw fs10, 84(sp) # 4-byte Folded Spill
+; LP64F-NEXT:    fsw fs11, 80(sp) # 4-byte Folded Spill
+; LP64F-NEXT:    lui a0, %hi(var)
+; LP64F-NEXT:    addi s0, a0, %lo(var)
+; LP64F-NEXT:    flw fa5, 0(s0)
 ; LP64F-NEXT:    fsw fa5, 76(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, %lo(var+12)(s0)
+; LP64F-NEXT:    flw fa5, 4(s0)
 ; LP64F-NEXT:    fsw fa5, 72(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    addi s1, s0, %lo(var)
-; LP64F-NEXT:    flw fa5, 16(s1)
+; LP64F-NEXT:    flw fa5, 8(s0)
 ; LP64F-NEXT:    fsw fa5, 68(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 20(s1)
+; LP64F-NEXT:    flw fa5, 12(s0)
 ; LP64F-NEXT:    fsw fa5, 64(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 24(s1)
+; LP64F-NEXT:    flw fa5, 16(s0)
 ; LP64F-NEXT:    fsw fa5, 60(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 28(s1)
+; LP64F-NEXT:    flw fa5, 20(s0)
 ; LP64F-NEXT:    fsw fa5, 56(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 32(s1)
+; LP64F-NEXT:    flw fa5, 24(s0)
 ; LP64F-NEXT:    fsw fa5, 52(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 36(s1)
+; LP64F-NEXT:    flw fa5, 28(s0)
 ; LP64F-NEXT:    fsw fa5, 48(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 40(s1)
+; LP64F-NEXT:    flw fa5, 32(s0)
 ; LP64F-NEXT:    fsw fa5, 44(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 44(s1)
+; LP64F-NEXT:    flw fa5, 36(s0)
 ; LP64F-NEXT:    fsw fa5, 40(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 48(s1)
+; LP64F-NEXT:    flw fa5, 40(s0)
 ; LP64F-NEXT:    fsw fa5, 36(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 52(s1)
+; LP64F-NEXT:    flw fa5, 44(s0)
 ; LP64F-NEXT:    fsw fa5, 32(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 56(s1)
+; LP64F-NEXT:    flw fa5, 48(s0)
 ; LP64F-NEXT:    fsw fa5, 28(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 60(s1)
+; LP64F-NEXT:    flw fa5, 52(s0)
 ; LP64F-NEXT:    fsw fa5, 24(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 64(s1)
+; LP64F-NEXT:    flw fa5, 56(s0)
 ; LP64F-NEXT:    fsw fa5, 20(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 68(s1)
+; LP64F-NEXT:    flw fa5, 60(s0)
 ; LP64F-NEXT:    fsw fa5, 16(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 72(s1)
+; LP64F-NEXT:    flw fa5, 64(s0)
 ; LP64F-NEXT:    fsw fa5, 12(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fa5, 76(s1)
+; LP64F-NEXT:    flw fa5, 68(s0)
 ; LP64F-NEXT:    fsw fa5, 8(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    flw fs8, 80(s1)
-; LP64F-NEXT:    flw fs9, 84(s1)
-; LP64F-NEXT:    flw fs10, 88(s1)
-; LP64F-NEXT:    flw fs11, 92(s1)
-; LP64F-NEXT:    flw fs0, 96(s1)
-; LP64F-NEXT:    flw fs1, 100(s1)
-; LP64F-NEXT:    flw fs2, 104(s1)
-; LP64F-NEXT:    flw fs3, 108(s1)
-; LP64F-NEXT:    flw fs4, 112(s1)
-; LP64F-NEXT:    flw fs5, 116(s1)
-; LP64F-NEXT:    flw fs6, 120(s1)
-; LP64F-NEXT:    flw fs7, 124(s1)
+; LP64F-NEXT:    flw fa5, 72(s0)
+; LP64F-NEXT:    fsw fa5, 4(sp) # 4-byte Folded Spill
+; LP64F-NEXT:    flw fa5, 76(s0)
+; LP64F-NEXT:    fsw fa5, 0(sp) # 4-byte Folded Spill
+; LP64F-NEXT:    flw fs8, 80(s0)
+; LP64F-NEXT:    flw fs9, 84(s0)
+; LP64F-NEXT:    flw fs10, 88(s0)
+; LP64F-NEXT:    flw fs11, 92(s0)
+; LP64F-NEXT:    flw fs0, 96(s0)
+; LP64F-NEXT:    flw fs1, 100(s0)
+; LP64F-NEXT:    flw fs2, 104(s0)
+; LP64F-NEXT:    flw fs3, 108(s0)
+; LP64F-NEXT:    flw fs4, 112(s0)
+; LP64F-NEXT:    flw fs5, 116(s0)
+; LP64F-NEXT:    flw fs6, 120(s0)
+; LP64F-NEXT:    flw fs7, 124(s0)
 ; LP64F-NEXT:    call callee
-; LP64F-NEXT:    fsw fs7, 124(s1)
-; LP64F-NEXT:    fsw fs6, 120(s1)
-; LP64F-NEXT:    fsw fs5, 116(s1)
-; LP64F-NEXT:    fsw fs4, 112(s1)
-; LP64F-NEXT:    fsw fs3, 108(s1)
-; LP64F-NEXT:    fsw fs2, 104(s1)
-; LP64F-NEXT:    fsw fs1, 100(s1)
-; LP64F-NEXT:    fsw fs0, 96(s1)
-; LP64F-NEXT:    fsw fs11, 92(s1)
-; LP64F-NEXT:    fsw fs10, 88(s1)
-; LP64F-NEXT:    fsw fs9, 84(s1)
-; LP64F-NEXT:    fsw fs8, 80(s1)
+; LP64F-NEXT:    fsw fs7, 124(s0)
+; LP64F-NEXT:    fsw fs6, 120(s0)
+; LP64F-NEXT:    fsw fs5, 116(s0)
+; LP64F-NEXT:    fsw fs4, 112(s0)
+; LP64F-NEXT:    fsw fs3, 108(s0)
+; LP64F-NEXT:    fsw fs2, 104(s0)
+; LP64F-NEXT:    fsw fs1, 100(s0)
+; LP64F-NEXT:    fsw fs0, 96(s0)
+; LP64F-NEXT:    fsw fs11, 92(s0)
+; LP64F-NEXT:    fsw fs10, 88(s0)
+; LP64F-NEXT:    fsw fs9, 84(s0)
+; LP64F-NEXT:    fsw fs8, 80(s0)
+; LP64F-NEXT:    flw fa5, 0(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    fsw fa5, 76(s0)
+; LP64F-NEXT:    flw fa5, 4(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    fsw fa5, 72(s0)
 ; LP64F-NEXT:    flw fa5, 8(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 76(s1)
+; LP64F-NEXT:    fsw fa5, 68(s0)
 ; LP64F-NEXT:    flw fa5, 12(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 72(s1)
+; LP64F-NEXT:    fsw fa5, 64(s0)
 ; LP64F-NEXT:    flw fa5, 16(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 68(s1)
+; LP64F-NEXT:    fsw fa5, 60(s0)
 ; LP64F-NEXT:    flw fa5, 20(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 64(s1)
+; LP64F-NEXT:    fsw fa5, 56(s0)
 ; LP64F-NEXT:    flw fa5, 24(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 60(s1)
+; LP64F-NEXT:    fsw fa5, 52(s0)
 ; LP64F-NEXT:    flw fa5, 28(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 56(s1)
+; LP64F-NEXT:    fsw fa5, 48(s0)
 ; LP64F-NEXT:    flw fa5, 32(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 52(s1)
+; LP64F-NEXT:    fsw fa5, 44(s0)
 ; LP64F-NEXT:    flw fa5, 36(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 48(s1)
+; LP64F-NEXT:    fsw fa5, 40(s0)
 ; LP64F-NEXT:    flw fa5, 40(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 44(s1)
+; LP64F-NEXT:    fsw fa5, 36(s0)
 ; LP64F-NEXT:    flw fa5, 44(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 40(s1)
+; LP64F-NEXT:    fsw fa5, 32(s0)
 ; LP64F-NEXT:    flw fa5, 48(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 36(s1)
+; LP64F-NEXT:    fsw fa5, 28(s0)
 ; LP64F-NEXT:    flw fa5, 52(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 32(s1)
+; LP64F-NEXT:    fsw fa5, 24(s0)
 ; LP64F-NEXT:    flw fa5, 56(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 28(s1)
+; LP64F-NEXT:    fsw fa5, 20(s0)
 ; LP64F-NEXT:    flw fa5, 60(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 24(s1)
+; LP64F-NEXT:    fsw fa5, 16(s0)
 ; LP64F-NEXT:    flw fa5, 64(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 20(s1)
+; LP64F-NEXT:    fsw fa5, 12(s0)
 ; LP64F-NEXT:    flw fa5, 68(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, 16(s1)
+; LP64F-NEXT:    fsw fa5, 8(s0)
 ; LP64F-NEXT:    flw fa5, 72(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, %lo(var+12)(s0)
+; LP64F-NEXT:    fsw fa5, 4(s0)
 ; LP64F-NEXT:    flw fa5, 76(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, %lo(var+8)(s0)
-; LP64F-NEXT:    flw fa5, 80(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, %lo(var+4)(s0)
-; LP64F-NEXT:    flw fa5, 84(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    fsw fa5, %lo(var)(s0)
-; LP64F-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
-; LP64F-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
-; LP64F-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
-; LP64F-NEXT:    flw fs0, 132(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs1, 128(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs2, 124(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs3, 120(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs4, 116(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs5, 112(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs6, 108(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs7, 104(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs8, 100(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs9, 96(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs10, 92(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs11, 88(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    addi sp, sp, 160
+; LP64F-NEXT:    fsw fa5, 0(s0)
+; LP64F-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; LP64F-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
+; LP64F-NEXT:    flw fs0, 124(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs1, 120(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs2, 116(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs3, 112(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs4, 108(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs5, 104(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs6, 100(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs7, 96(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs8, 92(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs9, 88(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs10, 84(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs11, 80(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    addi sp, sp, 144
 ; LP64F-NEXT:    ret
 ;
 ; ILP32D-LABEL: caller:
@@ -1564,285 +1552,281 @@ define void @caller() nounwind {
 ; ILP32D-NEXT:    addi sp, sp, -192
 ; ILP32D-NEXT:    sw ra, 188(sp) # 4-byte Folded Spill
 ; ILP32D-NEXT:    sw s0, 184(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    sw s1, 180(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    fsd fs0, 168(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs1, 160(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs2, 152(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs3, 144(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs4, 136(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs5, 128(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs6, 120(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs7, 112(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs8, 104(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs9, 96(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs10, 88(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs11, 80(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    lui s0, %hi(var)
-; ILP32D-NEXT:    flw fa5, %lo(var)(s0)
+; ILP32D-NEXT:    fsd fs0, 176(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs1, 168(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs2, 160(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs3, 152(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs4, 144(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs5, 136(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs6, 128(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs7, 120(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs8, 112(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs9, 104(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs10, 96(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs11, 88(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    lui a0, %hi(var)
+; ILP32D-NEXT:    addi s0, a0, %lo(var)
+; ILP32D-NEXT:    flw fa5, 0(s0)
+; ILP32D-NEXT:    fsw fa5, 84(sp) # 4-byte Folded Spill
+; ILP32D-NEXT:    flw fa5, 4(s0)
+; ILP32D-NEXT:    fsw fa5, 80(sp) # 4-byte Folded Spill
+; ILP32D-NEXT:    flw fa5, 8(s0)
 ; ILP32D-NEXT:    fsw fa5, 76(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, %lo(var+4)(s0)
+; ILP32D-NEXT:    flw fa5, 12(s0)
 ; ILP32D-NEXT:    fsw fa5, 72(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, %lo(var+8)(s0)
+; ILP32D-NEXT:    flw fa5, 16(s0)
 ; ILP32D-NEXT:    fsw fa5, 68(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, %lo(var+12)(s0)
+; ILP32D-NEXT:    flw fa5, 20(s0)
 ; ILP32D-NEXT:    fsw fa5, 64(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    addi s1, s0, %lo(var)
-; ILP32D-NEXT:    flw fa5, 16(s1)
+; ILP32D-NEXT:    flw fa5, 24(s0)
 ; ILP32D-NEXT:    fsw fa5, 60(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 20(s1)
+; ILP32D-NEXT:    flw fa5, 28(s0)
 ; ILP32D-NEXT:    fsw fa5, 56(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 24(s1)
+; ILP32D-NEXT:    flw fa5, 32(s0)
 ; ILP32D-NEXT:    fsw fa5, 52(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 28(s1)
+; ILP32D-NEXT:    flw fa5, 36(s0)
 ; ILP32D-NEXT:    fsw fa5, 48(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 32(s1)
+; ILP32D-NEXT:    flw fa5, 40(s0)
 ; ILP32D-NEXT:    fsw fa5, 44(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 36(s1)
+; ILP32D-NEXT:    flw fa5, 44(s0)
 ; ILP32D-NEXT:    fsw fa5, 40(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 40(s1)
+; ILP32D-NEXT:    flw fa5, 48(s0)
 ; ILP32D-NEXT:    fsw fa5, 36(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 44(s1)
+; ILP32D-NEXT:    flw fa5, 52(s0)
 ; ILP32D-NEXT:    fsw fa5, 32(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 48(s1)
+; ILP32D-NEXT:    flw fa5, 56(s0)
 ; ILP32D-NEXT:    fsw fa5, 28(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 52(s1)
+; ILP32D-NEXT:    flw fa5, 60(s0)
 ; ILP32D-NEXT:    fsw fa5, 24(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 56(s1)
+; ILP32D-NEXT:    flw fa5, 64(s0)
 ; ILP32D-NEXT:    fsw fa5, 20(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 60(s1)
+; ILP32D-NEXT:    flw fa5, 68(s0)
 ; ILP32D-NEXT:    fsw fa5, 16(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 64(s1)
+; ILP32D-NEXT:    flw fa5, 72(s0)
 ; ILP32D-NEXT:    fsw fa5, 12(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 68(s1)
+; ILP32D-NEXT:    flw fa5, 76(s0)
 ; ILP32D-NEXT:    fsw fa5, 8(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 72(s1)
-; ILP32D-NEXT:    fsw fa5, 4(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fa5, 76(s1)
-; ILP32D-NEXT:    fsw fa5, 0(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    flw fs8, 80(s1)
-; ILP32D-NEXT:    flw fs9, 84(s1)
-; ILP32D-NEXT:    flw fs10, 88(s1)
-; ILP32D-NEXT:    flw fs11, 92(s1)
-; ILP32D-NEXT:    flw fs0, 96(s1)
-; ILP32D-NEXT:    flw fs1, 100(s1)
-; ILP32D-NEXT:    flw fs2, 104(s1)
-; ILP32D-NEXT:    flw fs3, 108(s1)
-; ILP32D-NEXT:    flw fs4, 112(s1)
-; ILP32D-NEXT:    flw fs5, 116(s1)
-; ILP32D-NEXT:    flw fs6, 120(s1)
-; ILP32D-NEXT:    flw fs7, 124(s1)
+; ILP32D-NEXT:    flw fs8, 80(s0)
+; ILP32D-NEXT:    flw fs9, 84(s0)
+; ILP32D-NEXT:    flw fs10, 88(s0)
+; ILP32D-NEXT:    flw fs11, 92(s0)
+; ILP32D-NEXT:    flw fs0, 96(s0)
+; ILP32D-NEXT:    flw fs1, 100(s0)
+; ILP32D-NEXT:    flw fs2, 104(s0)
+; ILP32D-NEXT:    flw fs3, 108(s0)
+; ILP32D-NEXT:    flw fs4, 112(s0)
+; ILP32D-NEXT:    flw fs5, 116(s0)
+; ILP32D-NEXT:    flw fs6, 120(s0)
+; ILP32D-NEXT:    flw fs7, 124(s0)
 ; ILP32D-NEXT:    call callee
-; ILP32D-NEXT:    fsw fs7, 124(s1)
-; ILP32D-NEXT:    fsw fs6, 120(s1)
-; ILP32D-NEXT:    fsw fs5, 116(s1)
-; ILP32D-NEXT:    fsw fs4, 112(s1)
-; ILP32D-NEXT:    fsw fs3, 108(s1)
-; ILP32D-NEXT:    fsw fs2, 104(s1)
-; ILP32D-NEXT:    fsw fs1, 100(s1)
-; ILP32D-NEXT:    fsw fs0, 96(s1)
-; ILP32D-NEXT:    fsw fs11, 92(s1)
-; ILP32D-NEXT:    fsw fs10, 88(s1)
-; ILP32D-NEXT:    fsw fs9, 84(s1)
-; ILP32D-NEXT:    fsw fs8, 80(s1)
-; ILP32D-NEXT:    flw fa5, 0(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 76(s1)
-; ILP32D-NEXT:    flw fa5, 4(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 72(s1)
+; ILP32D-NEXT:    fsw fs7, 124(s0)
+; ILP32D-NEXT:    fsw fs6, 120(s0)
+; ILP32D-NEXT:    fsw fs5, 116(s0)
+; ILP32D-NEXT:    fsw fs4, 112(s0)
+; ILP32D-NEXT:    fsw fs3, 108(s0)
+; ILP32D-NEXT:    fsw fs2, 104(s0)
+; ILP32D-NEXT:    fsw fs1, 100(s0)
+; ILP32D-NEXT:    fsw fs0, 96(s0)
+; ILP32D-NEXT:    fsw fs11, 92(s0)
+; ILP32D-NEXT:    fsw fs10, 88(s0)
+; ILP32D-NEXT:    fsw fs9, 84(s0)
+; ILP32D-NEXT:    fsw fs8, 80(s0)
 ; ILP32D-NEXT:    flw fa5, 8(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 68(s1)
+; ILP32D-NEXT:    fsw fa5, 76(s0)
 ; ILP32D-NEXT:    flw fa5, 12(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 64(s1)
+; ILP32D-NEXT:    fsw fa5, 72(s0)
 ; ILP32D-NEXT:    flw fa5, 16(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 60(s1)
+; ILP32D-NEXT:    fsw fa5, 68(s0)
 ; ILP32D-NEXT:    flw fa5, 20(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 56(s1)
+; ILP32D-NEXT:    fsw fa5, 64(s0)
 ; ILP32D-NEXT:    flw fa5, 24(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 52(s1)
+; ILP32D-NEXT:    fsw fa5, 60(s0)
 ; ILP32D-NEXT:    flw fa5, 28(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 48(s1)
+; ILP32D-NEXT:    fsw fa5, 56(s0)
 ; ILP32D-NEXT:    flw fa5, 32(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 44(s1)
+; ILP32D-NEXT:    fsw fa5, 52(s0)
 ; ILP32D-NEXT:    flw fa5, 36(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 40(s1)
+; ILP32D-NEXT:    fsw fa5, 48(s0)
 ; ILP32D-NEXT:    flw fa5, 40(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 36(s1)
+; ILP32D-NEXT:    fsw fa5, 44(s0)
 ; ILP32D-NEXT:    flw fa5, 44(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 32(s1)
+; ILP32D-NEXT:    fsw fa5, 40(s0)
 ; ILP32D-NEXT:    flw fa5, 48(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 28(s1)
+; ILP32D-NEXT:    fsw fa5, 36(s0)
 ; ILP32D-NEXT:    flw fa5, 52(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 24(s1)
+; ILP32D-NEXT:    fsw fa5, 32(s0)
 ; ILP32D-NEXT:    flw fa5, 56(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 20(s1)
+; ILP32D-NEXT:    fsw fa5, 28(s0)
 ; ILP32D-NEXT:    flw fa5, 60(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, 16(s1)
+; ILP32D-NEXT:    fsw fa5, 24(s0)
 ; ILP32D-NEXT:    flw fa5, 64(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, %lo(var+12)(s0)
+; ILP32D-NEXT:    fsw fa5, 20(s0)
 ; ILP32D-NEXT:    flw fa5, 68(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, %lo(var+8)(s0)
+; ILP32D-NEXT:    fsw fa5, 16(s0)
 ; ILP32D-NEXT:    flw fa5, 72(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, %lo(var+4)(s0)
+; ILP32D-NEXT:    fsw fa5, 12(s0)
 ; ILP32D-NEXT:    flw fa5, 76(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fsw fa5, %lo(var)(s0)
+; ILP32D-NEXT:    fsw fa5, 8(s0)
+; ILP32D-NEXT:    flw fa5, 80(sp) # 4-byte Folded Reload
+; ILP32D-NEXT:    fsw fa5, 4(s0)
+; ILP32D-NEXT:    flw fa5, 84(sp) # 4-byte Folded Reload
+; ILP32D-NEXT:    fsw fa5, 0(s0)
 ; ILP32D-NEXT:    lw ra, 188(sp) # 4-byte Folded Reload
 ; ILP32D-NEXT:    lw s0, 184(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    lw s1, 180(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fld fs0, 168(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs1, 160(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs2, 152(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs3, 144(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs4, 136(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs5, 128(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs6, 120(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs7, 112(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs8, 104(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs9, 96(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs10, 88(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs11, 80(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs0, 176(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs1, 168(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs2, 160(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs3, 152(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs4, 144(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs5, 136(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs6, 128(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs7, 120(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs8, 112(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs9, 104(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs10, 96(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs11, 88(sp) # 8-byte Folded Reload
 ; ILP32D-NEXT:    addi sp, sp, 192
 ; ILP32D-NEXT:    ret
 ;
 ; LP64D-LABEL: caller:
 ; LP64D:       # %bb.0:
-; LP64D-NEXT:    addi sp, sp, -208
-; LP64D-NEXT:    sd ra, 200(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    sd s0, 192(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    sd s1, 184(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs0, 176(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs1, 168(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs2, 160(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs3, 152(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs4, 144(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs5, 136(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs6, 128(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs7, 120(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs8, 112(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs9, 104(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs10, 96(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs11, 88(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    lui s0, %hi(var)
-; LP64D-NEXT:    flw fa5, %lo(var)(s0)
-; LP64D-NEXT:    fsw fa5, 84(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, %lo(var+4)(s0)
-; LP64D-NEXT:    fsw fa5, 80(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, %lo(var+8)(s0)
+; LP64D-NEXT:    addi sp, sp, -192
+; LP64D-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs0, 168(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs1, 160(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs2, 152(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs3, 144(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs4, 136(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs5, 128(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs6, 120(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs7, 112(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs8, 104(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs9, 96(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs10, 88(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs11, 80(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    lui a0, %hi(var)
+; LP64D-NEXT:    addi s0, a0, %lo(var)
+; LP64D-NEXT:    flw fa5, 0(s0)
 ; LP64D-NEXT:    fsw fa5, 76(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, %lo(var+12)(s0)
+; LP64D-NEXT:    flw fa5, 4(s0)
 ; LP64D-NEXT:    fsw fa5, 72(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    addi s1, s0, %lo(var)
-; LP64D-NEXT:    flw fa5, 16(s1)
+; LP64D-NEXT:    flw fa5, 8(s0)
 ; LP64D-NEXT:    fsw fa5, 68(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 20(s1)
+; LP64D-NEXT:    flw fa5, 12(s0)
 ; LP64D-NEXT:    fsw fa5, 64(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 24(s1)
+; LP64D-NEXT:    flw fa5, 16(s0)
 ; LP64D-NEXT:    fsw fa5, 60(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 28(s1)
+; LP64D-NEXT:    flw fa5, 20(s0)
 ; LP64D-NEXT:    fsw fa5, 56(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 32(s1)
+; LP64D-NEXT:    flw fa5, 24(s0)
 ; LP64D-NEXT:    fsw fa5, 52(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 36(s1)
+; LP64D-NEXT:    flw fa5, 28(s0)
 ; LP64D-NEXT:    fsw fa5, 48(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 40(s1)
+; LP64D-NEXT:    flw fa5, 32(s0)
 ; LP64D-NEXT:    fsw fa5, 44(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 44(s1)
+; LP64D-NEXT:    flw fa5, 36(s0)
 ; LP64D-NEXT:    fsw fa5, 40(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 48(s1)
+; LP64D-NEXT:    flw fa5, 40(s0)
 ; LP64D-NEXT:    fsw fa5, 36(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 52(s1)
+; LP64D-NEXT:    flw fa5, 44(s0)
 ; LP64D-NEXT:    fsw fa5, 32(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 56(s1)
+; LP64D-NEXT:    flw fa5, 48(s0)
 ; LP64D-NEXT:    fsw fa5, 28(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 60(s1)
+; LP64D-NEXT:    flw fa5, 52(s0)
 ; LP64D-NEXT:    fsw fa5, 24(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 64(s1)
+; LP64D-NEXT:    flw fa5, 56(s0)
 ; LP64D-NEXT:    fsw fa5, 20(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 68(s1)
+; LP64D-NEXT:    flw fa5, 60(s0)
 ; LP64D-NEXT:    fsw fa5, 16(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 72(s1)
+; LP64D-NEXT:    flw fa5, 64(s0)
 ; LP64D-NEXT:    fsw fa5, 12(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fa5, 76(s1)
+; LP64D-NEXT:    flw fa5, 68(s0)
 ; LP64D-NEXT:    fsw fa5, 8(sp) # 4-byte Folded Spill
-; LP64D-NEXT:    flw fs8, 80(s1)
-; LP64D-NEXT:    flw fs9, 84(s1)
-; LP64D-NEXT:    flw fs10, 88(s1)
-; LP64D-NEXT:    flw fs11, 92(s1)
-; LP64D-NEXT:    flw fs0, 96(s1)
-; LP64D-NEXT:    flw fs1, 100(s1)
-; LP64D-NEXT:    flw fs2, 104(s1)
-; LP64D-NEXT:    flw fs3, 108(s1)
-; LP64D-NEXT:    flw fs4, 112(s1)
-; LP64D-NEXT:    flw fs5, 116(s1)
-; LP64D-NEXT:    flw fs6, 120(s1)
-; LP64D-NEXT:    flw fs7, 124(s1)
+; LP64D-NEXT:    flw fa5, 72(s0)
+; LP64D-NEXT:    fsw fa5, 4(sp) # 4-byte Folded Spill
+; LP64D-NEXT:    flw fa5, 76(s0)
+; LP64D-NEXT:    fsw fa5, 0(sp) # 4-byte Folded Spill
+; LP64D-NEXT:    flw fs8, 80(s0)
+; LP64D-NEXT:    flw fs9, 84(s0)
+; LP64D-NEXT:    flw fs10, 88(s0)
+; LP64D-NEXT:    flw fs11, 92(s0)
+; LP64D-NEXT:    flw fs0, 96(s0)
+; LP64D-NEXT:    flw fs1, 100(s0)
+; LP64D-NEXT:    flw fs2, 104(s0)
+; LP64D-NEXT:    flw fs3, 108(s0)
+; LP64D-NEXT:    flw fs4, 112(s0)
+; LP64D-NEXT:    flw fs5, 116(s0)
+; LP64D-NEXT:    flw fs6, 120(s0)
+; LP64D-NEXT:    flw fs7, 124(s0)
 ; LP64D-NEXT:    call callee
-; LP64D-NEXT:    fsw fs7, 124(s1)
-; LP64D-NEXT:    fsw fs6, 120(s1)
-; LP64D-NEXT:    fsw fs5, 116(s1)
-; LP64D-NEXT:    fsw fs4, 112(s1)
-; LP64D-NEXT:    fsw fs3, 108(s1)
-; LP64D-NEXT:    fsw fs2, 104(s1)
-; LP64D-NEXT:    fsw fs1, 100(s1)
-; LP64D-NEXT:    fsw fs0, 96(s1)
-; LP64D-NEXT:    fsw fs11, 92(s1)
-; LP64D-NEXT:    fsw fs10, 88(s1)
-; LP64D-NEXT:    fsw fs9, 84(s1)
-; LP64D-NEXT:    fsw fs8, 80(s1)
+; LP64D-NEXT:    fsw fs7, 124(s0)
+; LP64D-NEXT:    fsw fs6, 120(s0)
+; LP64D-NEXT:    fsw fs5, 116(s0)
+; LP64D-NEXT:    fsw fs4, 112(s0)
+; LP64D-NEXT:    fsw fs3, 108(s0)
+; LP64D-NEXT:    fsw fs2, 104(s0)
+; LP64D-NEXT:    fsw fs1, 100(s0)
+; LP64D-NEXT:    fsw fs0, 96(s0)
+; LP64D-NEXT:    fsw fs11, 92(s0)
+; LP64D-NEXT:    fsw fs10, 88(s0)
+; LP64D-NEXT:    fsw fs9, 84(s0)
+; LP64D-NEXT:    fsw fs8, 80(s0)
+; LP64D-NEXT:    flw fa5, 0(sp) # 4-byte Folded Reload
+; LP64D-NEXT:    fsw fa5, 76(s0)
+; LP64D-NEXT:    flw fa5, 4(sp) # 4-byte Folded Reload
+; LP64D-NEXT:    fsw fa5, 72(s0)
 ; LP64D-NEXT:    flw fa5, 8(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 76(s1)
+; LP64D-NEXT:    fsw fa5, 68(s0)
 ; LP64D-NEXT:    flw fa5, 12(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 72(s1)
+; LP64D-NEXT:    fsw fa5, 64(s0)
 ; LP64D-NEXT:    flw fa5, 16(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 68(s1)
+; LP64D-NEXT:    fsw fa5, 60(s0)
 ; LP64D-NEXT:    flw fa5, 20(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 64(s1)
+; LP64D-NEXT:    fsw fa5, 56(s0)
 ; LP64D-NEXT:    flw fa5, 24(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 60(s1)
+; LP64D-NEXT:    fsw fa5, 52(s0)
 ; LP64D-NEXT:    flw fa5, 28(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 56(s1)
+; LP64D-NEXT:    fsw fa5, 48(s0)
 ; LP64D-NEXT:    flw fa5, 32(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 52(s1)
+; LP64D-NEXT:    fsw fa5, 44(s0)
 ; LP64D-NEXT:    flw fa5, 36(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 48(s1)
+; LP64D-NEXT:    fsw fa5, 40(s0)
 ; LP64D-NEXT:    flw fa5, 40(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 44(s1)
+; LP64D-NEXT:    fsw fa5, 36(s0)
 ; LP64D-NEXT:    flw fa5, 44(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 40(s1)
+; LP64D-NEXT:    fsw fa5, 32(s0)
 ; LP64D-NEXT:    flw fa5, 48(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 36(s1)
+; LP64D-NEXT:    fsw fa5, 28(s0)
 ; LP64D-NEXT:    flw fa5, 52(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 32(s1)
+; LP64D-NEXT:    fsw fa5, 24(s0)
 ; LP64D-NEXT:    flw fa5, 56(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 28(s1)
+; LP64D-NEXT:    fsw fa5, 20(s0)
 ; LP64D-NEXT:    flw fa5, 60(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 24(s1)
+; LP64D-NEXT:    fsw fa5, 16(s0)
 ; LP64D-NEXT:    flw fa5, 64(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 20(s1)
+; LP64D-NEXT:    fsw fa5, 12(s0)
 ; LP64D-NEXT:    flw fa5, 68(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, 16(s1)
+; LP64D-NEXT:    fsw fa5, 8(s0)
 ; LP64D-NEXT:    flw fa5, 72(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, %lo(var+12)(s0)
+; LP64D-NEXT:    fsw fa5, 4(s0)
 ; LP64D-NEXT:    flw fa5, 76(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, %lo(var+8)(s0)
-; LP64D-NEXT:    flw fa5, 80(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, %lo(var+4)(s0)
-; LP64D-NEXT:    flw fa5, 84(sp) # 4-byte Folded Reload
-; LP64D-NEXT:    fsw fa5, %lo(var)(s0)
-; LP64D-NEXT:    ld ra, 200(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    ld s0, 192(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    ld s1, 184(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs0, 176(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs1, 168(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs2, 160(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs3, 152(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs4, 144(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs5, 136(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs6, 128(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs7, 120(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs8, 112(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs9, 104(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs10, 96(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs11, 88(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    addi sp, sp, 208
+; LP64D-NEXT:    fsw fa5, 0(s0)
+; LP64D-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs0, 168(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs1, 160(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs2, 152(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs3, 144(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs4, 136(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs5, 128(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs6, 120(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs7, 112(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs8, 104(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs9, 96(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs10, 88(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs11, 80(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    addi sp, sp, 192
 ; LP64D-NEXT:    ret
   %val = load [32 x float], ptr @var
   call void @callee()
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
index 38e3c2d9256cd..0b761e354d442 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
@@ -22,211 +22,211 @@ define void @callee() nounwind {
 ; ILP32-LABEL: callee:
 ; ILP32:       # %bb.0:
 ; ILP32-NEXT:    lui a0, %hi(var)
-; ILP32-NEXT:    fld fa5, %lo(var)(a0)
-; ILP32-NEXT:    fld fa4, %lo(var+8)(a0)
-; ILP32-NEXT:    addi a1, a0, %lo(var)
-; ILP32-NEXT:    fld fa3, 16(a1)
-; ILP32-NEXT:    fld fa2, 24(a1)
-; ILP32-NEXT:    fld fa1, 32(a1)
-; ILP32-NEXT:    fld fa0, 40(a1)
-; ILP32-NEXT:    fld ft0, 48(a1)
-; ILP32-NEXT:    fld ft1, 56(a1)
-; ILP32-NEXT:    fld ft2, 64(a1)
-; ILP32-NEXT:    fld ft3, 72(a1)
-; ILP32-NEXT:    fld ft4, 80(a1)
-; ILP32-NEXT:    fld ft5, 88(a1)
-; ILP32-NEXT:    fld ft6, 96(a1)
-; ILP32-NEXT:    fld ft7, 104(a1)
-; ILP32-NEXT:    fld fa6, 112(a1)
-; ILP32-NEXT:    fld fa7, 120(a1)
-; ILP32-NEXT:    fld ft8, 128(a1)
-; ILP32-NEXT:    fld ft9, 136(a1)
-; ILP32-NEXT:    fld ft10, 144(a1)
-; ILP32-NEXT:    fld ft11, 152(a1)
-; ILP32-NEXT:    fld fs0, 160(a1)
-; ILP32-NEXT:    fld fs1, 168(a1)
-; ILP32-NEXT:    fld fs2, 176(a1)
-; ILP32-NEXT:    fld fs3, 184(a1)
-; ILP32-NEXT:    fld fs4, 192(a1)
-; ILP32-NEXT:    fld fs5, 200(a1)
-; ILP32-NEXT:    fld fs6, 208(a1)
-; ILP32-NEXT:    fld fs7, 216(a1)
-; ILP32-NEXT:    fld fs8, 248(a1)
-; ILP32-NEXT:    fld fs9, 240(a1)
-; ILP32-NEXT:    fld fs10, 232(a1)
-; ILP32-NEXT:    fld fs11, 224(a1)
-; ILP32-NEXT:    fsd fs8, 248(a1)
-; ILP32-NEXT:    fsd fs9, 240(a1)
-; ILP32-NEXT:    fsd fs10, 232(a1)
-; ILP32-NEXT:    fsd fs11, 224(a1)
-; ILP32-NEXT:    fsd fs7, 216(a1)
-; ILP32-NEXT:    fsd fs6, 208(a1)
-; ILP32-NEXT:    fsd fs5, 200(a1)
-; ILP32-NEXT:    fsd fs4, 192(a1)
-; ILP32-NEXT:    fsd fs3, 184(a1)
-; ILP32-NEXT:    fsd fs2, 176(a1)
-; ILP32-NEXT:    fsd fs1, 168(a1)
-; ILP32-NEXT:    fsd fs0, 160(a1)
-; ILP32-NEXT:    fsd ft11, 152(a1)
-; ILP32-NEXT:    fsd ft10, 144(a1)
-; ILP32-NEXT:    fsd ft9, 136(a1)
-; ILP32-NEXT:    fsd ft8, 128(a1)
-; ILP32-NEXT:    fsd fa7, 120(a1)
-; ILP32-NEXT:    fsd fa6, 112(a1)
-; ILP32-NEXT:    fsd ft7, 104(a1)
-; ILP32-NEXT:    fsd ft6, 96(a1)
-; ILP32-NEXT:    fsd ft5, 88(a1)
-; ILP32-NEXT:    fsd ft4, 80(a1)
-; ILP32-NEXT:    fsd ft3, 72(a1)
-; ILP32-NEXT:    fsd ft2, 64(a1)
-; ILP32-NEXT:    fsd ft1, 56(a1)
-; ILP32-NEXT:    fsd ft0, 48(a1)
-; ILP32-NEXT:    fsd fa0, 40(a1)
-; ILP32-NEXT:    fsd fa1, 32(a1)
-; ILP32-NEXT:    fsd fa2, 24(a1)
-; ILP32-NEXT:    fsd fa3, 16(a1)
-; ILP32-NEXT:    fsd fa4, %lo(var+8)(a0)
-; ILP32-NEXT:    fsd fa5, %lo(var)(a0)
+; ILP32-NEXT:    addi a0, a0, %lo(var)
+; ILP32-NEXT:    fld fa5, 0(a0)
+; ILP32-NEXT:    fld fa4, 8(a0)
+; ILP32-NEXT:    fld fa3, 16(a0)
+; ILP32-NEXT:    fld fa2, 24(a0)
+; ILP32-NEXT:    fld fa1, 32(a0)
+; ILP32-NEXT:    fld fa0, 40(a0)
+; ILP32-NEXT:    fld ft0, 48(a0)
+; ILP32-NEXT:    fld ft1, 56(a0)
+; ILP32-NEXT:    fld ft2, 64(a0)
+; ILP32-NEXT:    fld ft3, 72(a0)
+; ILP32-NEXT:    fld ft4, 80(a0)
+; ILP32-NEXT:    fld ft5, 88(a0)
+; ILP32-NEXT:    fld ft6, 96(a0)
+; ILP32-NEXT:    fld ft7, 104(a0)
+; ILP32-NEXT:    fld fa6, 112(a0)
+; ILP32-NEXT:    fld fa7, 120(a0)
+; ILP32-NEXT:    fld ft8, 128(a0)
+; ILP32-NEXT:    fld ft9, 136(a0)
+; ILP32-NEXT:    fld ft10, 144(a0)
+; ILP32-NEXT:    fld ft11, 152(a0)
+; ILP32-NEXT:    fld fs0, 160(a0)
+; ILP32-NEXT:    fld fs1, 168(a0)
+; ILP32-NEXT:    fld fs2, 176(a0)
+; ILP32-NEXT:    fld fs3, 184(a0)
+; ILP32-NEXT:    fld fs4, 192(a0)
+; ILP32-NEXT:    fld fs5, 200(a0)
+; ILP32-NEXT:    fld fs6, 208(a0)
+; ILP32-NEXT:    fld fs7, 216(a0)
+; ILP32-NEXT:    fld fs8, 248(a0)
+; ILP32-NEXT:    fld fs9, 240(a0)
+; ILP32-NEXT:    fld fs10, 232(a0)
+; ILP32-NEXT:    fld fs11, 224(a0)
+; ILP32-NEXT:    fsd fs8, 248(a0)
+; ILP32-NEXT:    fsd fs9, 240(a0)
+; ILP32-NEXT:    fsd fs10, 232(a0)
+; ILP32-NEXT:    fsd fs11, 224(a0)
+; ILP32-NEXT:    fsd fs7, 216(a0)
+; ILP32-NEXT:    fsd fs6, 208(a0)
+; ILP32-NEXT:    fsd fs5, 200(a0)
+; ILP32-NEXT:    fsd fs4, 192(a0)
+; ILP32-NEXT:    fsd fs3, 184(a0)
+; ILP32-NEXT:    fsd fs2, 176(a0)
+; ILP32-NEXT:    fsd fs1, 168(a0)
+; ILP32-NEXT:    fsd fs0, 160(a0)
+; ILP32-NEXT:    fsd ft11, 152(a0)
+; ILP32-NEXT:    fsd ft10, 144(a0)
+; ILP32-NEXT:    fsd ft9, 136(a0)
+; ILP32-NEXT:    fsd ft8, 128(a0)
+; ILP32-NEXT:    fsd fa7, 120(a0)
+; ILP32-NEXT:    fsd fa6, 112(a0)
+; ILP32-NEXT:    fsd ft7, 104(a0)
+; ILP32-NEXT:    fsd ft6, 96(a0)
+; ILP32-NEXT:    fsd ft5, 88(a0)
+; ILP32-NEXT:    fsd ft4, 80(a0)
+; ILP32-NEXT:    fsd ft3, 72(a0)
+; ILP32-NEXT:    fsd ft2, 64(a0)
+; ILP32-NEXT:    fsd ft1, 56(a0)
+; ILP32-NEXT:    fsd ft0, 48(a0)
+; ILP32-NEXT:    fsd fa0, 40(a0)
+; ILP32-NEXT:    fsd fa1, 32(a0)
+; ILP32-NEXT:    fsd fa2, 24(a0)
+; ILP32-NEXT:    fsd fa3, 16(a0)
+; ILP32-NEXT:    fsd fa4, 8(a0)
+; ILP32-NEXT:    fsd fa5, 0(a0)
 ; ILP32-NEXT:    ret
 ;
 ; LP64-LABEL: callee:
 ; LP64:       # %bb.0:
 ; LP64-NEXT:    lui a0, %hi(var)
-; LP64-NEXT:    fld fa5, %lo(var)(a0)
-; LP64-NEXT:    fld fa4, %lo(var+8)(a0)
-; LP64-NEXT:    addi a1, a0, %lo(var)
-; LP64-NEXT:    fld fa3, 16(a1)
-; LP64-NEXT:    fld fa2, 24(a1)
-; LP64-NEXT:    fld fa1, 32(a1)
-; LP64-NEXT:    fld fa0, 40(a1)
-; LP64-NEXT:    fld ft0, 48(a1)
-; LP64-NEXT:    fld ft1, 56(a1)
-; LP64-NEXT:    fld ft2, 64(a1)
-; LP64-NEXT:    fld ft3, 72(a1)
-; LP64-NEXT:    fld ft4, 80(a1)
-; LP64-NEXT:    fld ft5, 88(a1)
-; LP64-NEXT:    fld ft6, 96(a1)
-; LP64-NEXT:    fld ft7, 104(a1)
-; LP64-NEXT:    fld fa6, 112(a1)
-; LP64-NEXT:    fld fa7, 120(a1)
-; LP64-NEXT:    fld ft8, 128(a1)
-; LP64-NEXT:    fld ft9, 136(a1)
-; LP64-NEXT:    fld ft10, 144(a1)
-; LP64-NEXT:    fld ft11, 152(a1)
-; LP64-NEXT:    fld fs0, 160(a1)
-; LP64-NEXT:    fld fs1, 168(a1)
-; LP64-NEXT:    fld fs2, 176(a1)
-; LP64-NEXT:    fld fs3, 184(a1)
-; LP64-NEXT:    fld fs4, 192(a1)
-; LP64-NEXT:    fld fs5, 200(a1)
-; LP64-NEXT:    fld fs6, 208(a1)
-; LP64-NEXT:    fld fs7, 216(a1)
-; LP64-NEXT:    fld fs8, 248(a1)
-; LP64-NEXT:    fld fs9, 240(a1)
-; LP64-NEXT:    fld fs10, 232(a1)
-; LP64-NEXT:    fld fs11, 224(a1)
-; LP64-NEXT:    fsd fs8, 248(a1)
-; LP64-NEXT:    fsd fs9, 240(a1)
-; LP64-NEXT:    fsd fs10, 232(a1)
-; LP64-NEXT:    fsd fs11, 224(a1)
-; LP64-NEXT:    fsd fs7, 216(a1)
-; LP64-NEXT:    fsd fs6, 208(a1)
-; LP64-NEXT:    fsd fs5, 200(a1)
-; LP64-NEXT:    fsd fs4, 192(a1)
-; LP64-NEXT:    fsd fs3, 184(a1)
-; LP64-NEXT:    fsd fs2, 176(a1)
-; LP64-NEXT:    fsd fs1, 168(a1)
-; LP64-NEXT:    fsd fs0, 160(a1)
-; LP64-NEXT:    fsd ft11, 152(a1)
-; LP64-NEXT:    fsd ft10, 144(a1)
-; LP64-NEXT:    fsd ft9, 136(a1)
-; LP64-NEXT:    fsd ft8, 128(a1)
-; LP64-NEXT:    fsd fa7, 120(a1)
-; LP64-NEXT:    fsd fa6, 112(a1)
-; LP64-NEXT:    fsd ft7, 104(a1)
-; LP64-NEXT:    fsd ft6, 96(a1)
-; LP64-NEXT:    fsd ft5, 88(a1)
-; LP64-NEXT:    fsd ft4, 80(a1)
-; LP64-NEXT:    fsd ft3, 72(a1)
-; LP64-NEXT:    fsd ft2, 64(a1)
-; LP64-NEXT:    fsd ft1, 56(a1)
-; LP64-NEXT:    fsd ft0, 48(a1)
-; LP64-NEXT:    fsd fa0, 40(a1)
-; LP64-NEXT:    fsd fa1, 32(a1)
-; LP64-NEXT:    fsd fa2, 24(a1)
-; LP64-NEXT:    fsd fa3, 16(a1)
-; LP64-NEXT:    fsd fa4, %lo(var+8)(a0)
-; LP64-NEXT:    fsd fa5, %lo(var)(a0)
+; LP64-NEXT:    addi a0, a0, %lo(var)
+; LP64-NEXT:    fld fa5, 0(a0)
+; LP64-NEXT:    fld fa4, 8(a0)
+; LP64-NEXT:    fld fa3, 16(a0)
+; LP64-NEXT:    fld fa2, 24(a0)
+; LP64-NEXT:    fld fa1, 32(a0)
+; LP64-NEXT:    fld fa0, 40(a0)
+; LP64-NEXT:    fld ft0, 48(a0)
+; LP64-NEXT:    fld ft1, 56(a0)
+; LP64-NEXT:    fld ft2, 64(a0)
+; LP64-NEXT:    fld ft3, 72(a0)
+; LP64-NEXT:    fld ft4, 80(a0)
+; LP64-NEXT:    fld ft5, 88(a0)
+; LP64-NEXT:    fld ft6, 96(a0)
+; LP64-NEXT:    fld ft7, 104(a0)
+; LP64-NEXT:    fld fa6, 112(a0)
+; LP64-NEXT:    fld fa7, 120(a0)
+; LP64-NEXT:    fld ft8, 128(a0)
+; LP64-NEXT:    fld ft9, 136(a0)
+; LP64-NEXT:    fld ft10, 144(a0)
+; LP64-NEXT:    fld ft11, 152(a0)
+; LP64-NEXT:    fld fs0, 160(a0)
+; LP64-NEXT:    fld fs1, 168(a0)
+; LP64-NEXT:    fld fs2, 176(a0)
+; LP64-NEXT:    fld fs3, 184(a0)
+; LP64-NEXT:    fld fs4, 192(a0)
+; LP64-NEXT:    fld fs5, 200(a0)
+; LP64-NEXT:    fld fs6, 208(a0)
+; LP64-NEXT:    fld fs7, 216(a0)
+; LP64-NEXT:    fld fs8, 248(a0)
+; LP64-NEXT:    fld fs9, 240(a0)
+; LP64-NEXT:    fld fs10, 232(a0)
+; LP64-NEXT:    fld fs11, 224(a0)
+; LP64-NEXT:    fsd fs8, 248(a0)
+; LP64-NEXT:    fsd fs9, 240(a0)
+; LP64-NEXT:    fsd fs10, 232(a0)
+; LP64-NEXT:    fsd fs11, 224(a0)
+; LP64-NEXT:    fsd fs7, 216(a0)
+; LP64-NEXT:    fsd fs6, 208(a0)
+; LP64-NEXT:    fsd fs5, 200(a0)
+; LP64-NEXT:    fsd fs4, 192(a0)
+; LP64-NEXT:    fsd fs3, 184(a0)
+; LP64-NEXT:    fsd fs2, 176(a0)
+; LP64-NEXT:    fsd fs1, 168(a0)
+; LP64-NEXT:    fsd fs0, 160(a0)
+; LP64-NEXT:    fsd ft11, 152(a0)
+; LP64-NEXT:    fsd ft10, 144(a0)
+; LP64-NEXT:    fsd ft9, 136(a0)
+; LP64-NEXT:    fsd ft8, 128(a0)
+; LP64-NEXT:    fsd fa7, 120(a0)
+; LP64-NEXT:    fsd fa6, 112(a0)
+; LP64-NEXT:    fsd ft7, 104(a0)
+; LP64-NEXT:    fsd ft6, 96(a0)
+; LP64-NEXT:    fsd ft5, 88(a0)
+; LP64-NEXT:    fsd ft4, 80(a0)
+; LP64-NEXT:    fsd ft3, 72(a0)
+; LP64-NEXT:    fsd ft2, 64(a0)
+; LP64-NEXT:    fsd ft1, 56(a0)
+; LP64-NEXT:    fsd ft0, 48(a0)
+; LP64-NEXT:    fsd fa0, 40(a0)
+; LP64-NEXT:    fsd fa1, 32(a0)
+; LP64-NEXT:    fsd fa2, 24(a0)
+; LP64-NEXT:    fsd fa3, 16(a0)
+; LP64-NEXT:    fsd fa4, 8(a0)
+; LP64-NEXT:    fsd fa5, 0(a0)
 ; LP64-NEXT:    ret
 ;
 ; LP64E-LABEL: callee:
 ; LP64E:       # %bb.0:
 ; LP64E-NEXT:    lui a0, %hi(var)
-; LP64E-NEXT:    fld fa5, %lo(var)(a0)
-; LP64E-NEXT:    fld fa4, %lo(var+8)(a0)
-; LP64E-NEXT:    addi a1, a0, %lo(var)
-; LP64E-NEXT:    fld fa3, 16(a1)
-; LP64E-NEXT:    fld fa2, 24(a1)
-; LP64E-NEXT:    fld fa1, 32(a1)
-; LP64E-NEXT:    fld fa0, 40(a1)
-; LP64E-NEXT:    fld ft0, 48(a1)
-; LP64E-NEXT:    fld ft1, 56(a1)
-; LP64E-NEXT:    fld ft2, 64(a1)
-; LP64E-NEXT:    fld ft3, 72(a1)
-; LP64E-NEXT:    fld ft4, 80(a1)
-; LP64E-NEXT:    fld ft5, 88(a1)
-; LP64E-NEXT:    fld ft6, 96(a1)
-; LP64E-NEXT:    fld ft7, 104(a1)
-; LP64E-NEXT:    fld fa6, 112(a1)
-; LP64E-NEXT:    fld fa7, 120(a1)
-; LP64E-NEXT:    fld ft8, 128(a1)
-; LP64E-NEXT:    fld ft9, 136(a1)
-; LP64E-NEXT:    fld ft10, 144(a1)
-; LP64E-NEXT:    fld ft11, 152(a1)
-; LP64E-NEXT:    fld fs0, 160(a1)
-; LP64E-NEXT:    fld fs1, 168(a1)
-; LP64E-NEXT:    fld fs2, 176(a1)
-; LP64E-NEXT:    fld fs3, 184(a1)
-; LP64E-NEXT:    fld fs4, 192(a1)
-; LP64E-NEXT:    fld fs5, 200(a1)
-; LP64E-NEXT:    fld fs6, 208(a1)
-; LP64E-NEXT:    fld fs7, 216(a1)
-; LP64E-NEXT:    fld fs8, 248(a1)
-; LP64E-NEXT:    fld fs9, 240(a1)
-; LP64E-NEXT:    fld fs10, 232(a1)
-; LP64E-NEXT:    fld fs11, 224(a1)
-; LP64E-NEXT:    fsd fs8, 248(a1)
-; LP64E-NEXT:    fsd fs9, 240(a1)
-; LP64E-NEXT:    fsd fs10, 232(a1)
-; LP64E-NEXT:    fsd fs11, 224(a1)
-; LP64E-NEXT:    fsd fs7, 216(a1)
-; LP64E-NEXT:    fsd fs6, 208(a1)
-; LP64E-NEXT:    fsd fs5, 200(a1)
-; LP64E-NEXT:    fsd fs4, 192(a1)
-; LP64E-NEXT:    fsd fs3, 184(a1)
-; LP64E-NEXT:    fsd fs2, 176(a1)
-; LP64E-NEXT:    fsd fs1, 168(a1)
-; LP64E-NEXT:    fsd fs0, 160(a1)
-; LP64E-NEXT:    fsd ft11, 152(a1)
-; LP64E-NEXT:    fsd ft10, 144(a1)
-; LP64E-NEXT:    fsd ft9, 136(a1)
-; LP64E-NEXT:    fsd ft8, 128(a1)
-; LP64E-NEXT:    fsd fa7, 120(a1)
-; LP64E-NEXT:    fsd fa6, 112(a1)
-; LP64E-NEXT:    fsd ft7, 104(a1)
-; LP64E-NEXT:    fsd ft6, 96(a1)
-; LP64E-NEXT:    fsd ft5, 88(a1)
-; LP64E-NEXT:    fsd ft4, 80(a1)
-; LP64E-NEXT:    fsd ft3, 72(a1)
-; LP64E-NEXT:    fsd ft2, 64(a1)
-; LP64E-NEXT:    fsd ft1, 56(a1)
-; LP64E-NEXT:    fsd ft0, 48(a1)
-; LP64E-NEXT:    fsd fa0, 40(a1)
-; LP64E-NEXT:    fsd fa1, 32(a1)
-; LP64E-NEXT:    fsd fa2, 24(a1)
-; LP64E-NEXT:    fsd fa3, 16(a1)
-; LP64E-NEXT:    fsd fa4, %lo(var+8)(a0)
-; LP64E-NEXT:    fsd fa5, %lo(var)(a0)
+; LP64E-NEXT:    addi a0, a0, %lo(var)
+; LP64E-NEXT:    fld fa5, 0(a0)
+; LP64E-NEXT:    fld fa4, 8(a0)
+; LP64E-NEXT:    fld fa3, 16(a0)
+; LP64E-NEXT:    fld fa2, 24(a0)
+; LP64E-NEXT:    fld fa1, 32(a0)
+; LP64E-NEXT:    fld fa0, 40(a0)
+; LP64E-NEXT:    fld ft0, 48(a0)
+; LP64E-NEXT:    fld ft1, 56(a0)
+; LP64E-NEXT:    fld ft2, 64(a0)
+; LP64E-NEXT:    fld ft3, 72(a0)
+; LP64E-NEXT:    fld ft4, 80(a0)
+; LP64E-NEXT:    fld ft5, 88(a0)
+; LP64E-NEXT:    fld ft6, 96(a0)
+; LP64E-NEXT:    fld ft7, 104(a0)
+; LP64E-NEXT:    fld fa6, 112(a0)
+; LP64E-NEXT:    fld fa7, 120(a0)
+; LP64E-NEXT:    fld ft8, 128(a0)
+; LP64E-NEXT:    fld ft9, 136(a0)
+; LP64E-NEXT:    fld ft10, 144(a0)
+; LP64E-NEXT:    fld ft11, 152(a0)
+; LP64E-NEXT:    fld fs0, 160(a0)
+; LP64E-NEXT:    fld fs1, 168(a0)
+; LP64E-NEXT:    fld fs2, 176(a0)
+; LP64E-NEXT:    fld fs3, 184(a0)
+; LP64E-NEXT:    fld fs4, 192(a0)
+; LP64E-NEXT:    fld fs5, 200(a0)
+; LP64E-NEXT:    fld fs6, 208(a0)
+; LP64E-NEXT:    fld fs7, 216(a0)
+; LP64E-NEXT:    fld fs8, 248(a0)
+; LP64E-NEXT:    fld fs9, 240(a0)
+; LP64E-NEXT:    fld fs10, 232(a0)
+; LP64E-NEXT:    fld fs11, 224(a0)
+; LP64E-NEXT:    fsd fs8, 248(a0)
+; LP64E-NEXT:    fsd fs9, 240(a0)
+; LP64E-NEXT:    fsd fs10, 232(a0)
+; LP64E-NEXT:    fsd fs11, 224(a0)
+; LP64E-NEXT:    fsd fs7, 216(a0)
+; LP64E-NEXT:    fsd fs6, 208(a0)
+; LP64E-NEXT:    fsd fs5, 200(a0)
+; LP64E-NEXT:    fsd fs4, 192(a0)
+; LP64E-NEXT:    fsd fs3, 184(a0)
+; LP64E-NEXT:    fsd fs2, 176(a0)
+; LP64E-NEXT:    fsd fs1, 168(a0)
+; LP64E-NEXT:    fsd fs0, 160(a0)
+; LP64E-NEXT:    fsd ft11, 152(a0)
+; LP64E-NEXT:    fsd ft10, 144(a0)
+; LP64E-NEXT:    fsd ft9, 136(a0)
+; LP64E-NEXT:    fsd ft8, 128(a0)
+; LP64E-NEXT:    fsd fa7, 120(a0)
+; LP64E-NEXT:    fsd fa6, 112(a0)
+; LP64E-NEXT:    fsd ft7, 104(a0)
+; LP64E-NEXT:    fsd ft6, 96(a0)
+; LP64E-NEXT:    fsd ft5, 88(a0)
+; LP64E-NEXT:    fsd ft4, 80(a0)
+; LP64E-NEXT:    fsd ft3, 72(a0)
+; LP64E-NEXT:    fsd ft2, 64(a0)
+; LP64E-NEXT:    fsd ft1, 56(a0)
+; LP64E-NEXT:    fsd ft0, 48(a0)
+; LP64E-NEXT:    fsd fa0, 40(a0)
+; LP64E-NEXT:    fsd fa1, 32(a0)
+; LP64E-NEXT:    fsd fa2, 24(a0)
+; LP64E-NEXT:    fsd fa3, 16(a0)
+; LP64E-NEXT:    fsd fa4, 8(a0)
+; LP64E-NEXT:    fsd fa5, 0(a0)
 ; LP64E-NEXT:    ret
 ;
 ; ILP32D-LABEL: callee:
@@ -245,71 +245,71 @@ define void @callee() nounwind {
 ; ILP32D-NEXT:    fsd fs10, 8(sp) # 8-byte Folded Spill
 ; ILP32D-NEXT:    fsd fs11, 0(sp) # 8-byte Folded Spill
 ; ILP32D-NEXT:    lui a0, %hi(var)
-; ILP32D-NEXT:    fld fa5, %lo(var)(a0)
-; ILP32D-NEXT:    fld fa4, %lo(var+8)(a0)
-; ILP32D-NEXT:    addi a1, a0, %lo(var)
-; ILP32D-NEXT:    fld fa3, 16(a1)
-; ILP32D-NEXT:    fld fa2, 24(a1)
-; ILP32D-NEXT:    fld fa1, 32(a1)
-; ILP32D-NEXT:    fld fa0, 40(a1)
-; ILP32D-NEXT:    fld ft0, 48(a1)
-; ILP32D-NEXT:    fld ft1, 56(a1)
-; ILP32D-NEXT:    fld ft2, 64(a1)
-; ILP32D-NEXT:    fld ft3, 72(a1)
-; ILP32D-NEXT:    fld ft4, 80(a1)
-; ILP32D-NEXT:    fld ft5, 88(a1)
-; ILP32D-NEXT:    fld ft6, 96(a1)
-; ILP32D-NEXT:    fld ft7, 104(a1)
-; ILP32D-NEXT:    fld fa6, 112(a1)
-; ILP32D-NEXT:    fld fa7, 120(a1)
-; ILP32D-NEXT:    fld ft8, 128(a1)
-; ILP32D-NEXT:    fld ft9, 136(a1)
-; ILP32D-NEXT:    fld ft10, 144(a1)
-; ILP32D-NEXT:    fld ft11, 152(a1)
-; ILP32D-NEXT:    fld fs0, 160(a1)
-; ILP32D-NEXT:    fld fs1, 168(a1)
-; ILP32D-NEXT:    fld fs2, 176(a1)
-; ILP32D-NEXT:    fld fs3, 184(a1)
-; ILP32D-NEXT:    fld fs4, 192(a1)
-; ILP32D-NEXT:    fld fs5, 200(a1)
-; ILP32D-NEXT:    fld fs6, 208(a1)
-; ILP32D-NEXT:    fld fs7, 216(a1)
-; ILP32D-NEXT:    fld fs8, 248(a1)
-; ILP32D-NEXT:    fld fs9, 240(a1)
-; ILP32D-NEXT:    fld fs10, 232(a1)
-; ILP32D-NEXT:    fld fs11, 224(a1)
-; ILP32D-NEXT:    fsd fs8, 248(a1)
-; ILP32D-NEXT:    fsd fs9, 240(a1)
-; ILP32D-NEXT:    fsd fs10, 232(a1)
-; ILP32D-NEXT:    fsd fs11, 224(a1)
-; ILP32D-NEXT:    fsd fs7, 216(a1)
-; ILP32D-NEXT:    fsd fs6, 208(a1)
-; ILP32D-NEXT:    fsd fs5, 200(a1)
-; ILP32D-NEXT:    fsd fs4, 192(a1)
-; ILP32D-NEXT:    fsd fs3, 184(a1)
-; ILP32D-NEXT:    fsd fs2, 176(a1)
-; ILP32D-NEXT:    fsd fs1, 168(a1)
-; ILP32D-NEXT:    fsd fs0, 160(a1)
-; ILP32D-NEXT:    fsd ft11, 152(a1)
-; ILP32D-NEXT:    fsd ft10, 144(a1)
-; ILP32D-NEXT:    fsd ft9, 136(a1)
-; ILP32D-NEXT:    fsd ft8, 128(a1)
-; ILP32D-NEXT:    fsd fa7, 120(a1)
-; ILP32D-NEXT:    fsd fa6, 112(a1)
-; ILP32D-NEXT:    fsd ft7, 104(a1)
-; ILP32D-NEXT:    fsd ft6, 96(a1)
-; ILP32D-NEXT:    fsd ft5, 88(a1)
-; ILP32D-NEXT:    fsd ft4, 80(a1)
-; ILP32D-NEXT:    fsd ft3, 72(a1)
-; ILP32D-NEXT:    fsd ft2, 64(a1)
-; ILP32D-NEXT:    fsd ft1, 56(a1)
-; ILP32D-NEXT:    fsd ft0, 48(a1)
-; ILP32D-NEXT:    fsd fa0, 40(a1)
-; ILP32D-NEXT:    fsd fa1, 32(a1)
-; ILP32D-NEXT:    fsd fa2, 24(a1)
-; ILP32D-NEXT:    fsd fa3, 16(a1)
-; ILP32D-NEXT:    fsd fa4, %lo(var+8)(a0)
-; ILP32D-NEXT:    fsd fa5, %lo(var)(a0)
+; ILP32D-NEXT:    addi a0, a0, %lo(var)
+; ILP32D-NEXT:    fld fa5, 0(a0)
+; ILP32D-NEXT:    fld fa4, 8(a0)
+; ILP32D-NEXT:    fld fa3, 16(a0)
+; ILP32D-NEXT:    fld fa2, 24(a0)
+; ILP32D-NEXT:    fld fa1, 32(a0)
+; ILP32D-NEXT:    fld fa0, 40(a0)
+; ILP32D-NEXT:    fld ft0, 48(a0)
+; ILP32D-NEXT:    fld ft1, 56(a0)
+; ILP32D-NEXT:    fld ft2, 64(a0)
+; ILP32D-NEXT:    fld ft3, 72(a0)
+; ILP32D-NEXT:    fld ft4, 80(a0)
+; ILP32D-NEXT:    fld ft5, 88(a0)
+; ILP32D-NEXT:    fld ft6, 96(a0)
+; ILP32D-NEXT:    fld ft7, 104(a0)
+; ILP32D-NEXT:    fld fa6, 112(a0)
+; ILP32D-NEXT:    fld fa7, 120(a0)
+; ILP32D-NEXT:    fld ft8, 128(a0)
+; ILP32D-NEXT:    fld ft9, 136(a0)
+; ILP32D-NEXT:    fld ft10, 144(a0)
+; ILP32D-NEXT:    fld ft11, 152(a0)
+; ILP32D-NEXT:    fld fs0, 160(a0)
+; ILP32D-NEXT:    fld fs1, 168(a0)
+; ILP32D-NEXT:    fld fs2, 176(a0)
+; ILP32D-NEXT:    fld fs3, 184(a0)
+; ILP32D-NEXT:    fld fs4, 192(a0)
+; ILP32D-NEXT:    fld fs5, 200(a0)
+; ILP32D-NEXT:    fld fs6, 208(a0)
+; ILP32D-NEXT:    fld fs7, 216(a0)
+; ILP32D-NEXT:    fld fs8, 248(a0)
+; ILP32D-NEXT:    fld fs9, 240(a0)
+; ILP32D-NEXT:    fld fs10, 232(a0)
+; ILP32D-NEXT:    fld fs11, 224(a0)
+; ILP32D-NEXT:    fsd fs8, 248(a0)
+; ILP32D-NEXT:    fsd fs9, 240(a0)
+; ILP32D-NEXT:    fsd fs10, 232(a0)
+; ILP32D-NEXT:    fsd fs11, 224(a0)
+; ILP32D-NEXT:    fsd fs7, 216(a0)
+; ILP32D-NEXT:    fsd fs6, 208(a0)
+; ILP32D-NEXT:    fsd fs5, 200(a0)
+; ILP32D-NEXT:    fsd fs4, 192(a0)
+; ILP32D-NEXT:    fsd fs3, 184(a0)
+; ILP32D-NEXT:    fsd fs2, 176(a0)
+; ILP32D-NEXT:    fsd fs1, 168(a0)
+; ILP32D-NEXT:    fsd fs0, 160(a0)
+; ILP32D-NEXT:    fsd ft11, 152(a0)
+; ILP32D-NEXT:    fsd ft10, 144(a0)
+; ILP32D-NEXT:    fsd ft9, 136(a0)
+; ILP32D-NEXT:    fsd ft8, 128(a0)
+; ILP32D-NEXT:    fsd fa7, 120(a0)
+; ILP32D-NEXT:    fsd fa6, 112(a0)
+; ILP32D-NEXT:    fsd ft7, 104(a0)
+; ILP32D-NEXT:    fsd ft6, 96(a0)
+; ILP32D-NEXT:    fsd ft5, 88(a0)
+; ILP32D-NEXT:    fsd ft4, 80(a0)
+; ILP32D-NEXT:    fsd ft3, 72(a0)
+; ILP32D-NEXT:    fsd ft2, 64(a0)
+; ILP32D-NEXT:    fsd ft1, 56(a0)
+; ILP32D-NEXT:    fsd ft0, 48(a0)
+; ILP32D-NEXT:    fsd fa0, 40(a0)
+; ILP32D-NEXT:    fsd fa1, 32(a0)
+; ILP32D-NEXT:    fsd fa2, 24(a0)
+; ILP32D-NEXT:    fsd fa3, 16(a0)
+; ILP32D-NEXT:    fsd fa4, 8(a0)
+; ILP32D-NEXT:    fsd fa5, 0(a0)
 ; ILP32D-NEXT:    fld fs0, 88(sp) # 8-byte Folded Reload
 ; ILP32D-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
 ; ILP32D-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
@@ -341,71 +341,71 @@ define void @callee() nounwind {
 ; LP64D-NEXT:    fsd fs10, 8(sp) # 8-byte Folded Spill
 ; LP64D-NEXT:    fsd fs11, 0(sp) # 8-byte Folded Spill
 ; LP64D-NEXT:    lui a0, %hi(var)
-; LP64D-NEXT:    fld fa5, %lo(var)(a0)
-; LP64D-NEXT:    fld fa4, %lo(var+8)(a0)
-; LP64D-NEXT:    addi a1, a0, %lo(var)
-; LP64D-NEXT:    fld fa3, 16(a1)
-; LP64D-NEXT:    fld fa2, 24(a1)
-; LP64D-NEXT:    fld fa1, 32(a1)
-; LP64D-NEXT:    fld fa0, 40(a1)
-; LP64D-NEXT:    fld ft0, 48(a1)
-; LP64D-NEXT:    fld ft1, 56(a1)
-; LP64D-NEXT:    fld ft2, 64(a1)
-; LP64D-NEXT:    fld ft3, 72(a1)
-; LP64D-NEXT:    fld ft4, 80(a1)
-; LP64D-NEXT:    fld ft5, 88(a1)
-; LP64D-NEXT:    fld ft6, 96(a1)
-; LP64D-NEXT:    fld ft7, 104(a1)
-; LP64D-NEXT:    fld fa6, 112(a1)
-; LP64D-NEXT:    fld fa7, 120(a1)
-; LP64D-NEXT:    fld ft8, 128(a1)
-; LP64D-NEXT:    fld ft9, 136(a1)
-; LP64D-NEXT:    fld ft10, 144(a1)
-; LP64D-NEXT:    fld ft11, 152(a1)
-; LP64D-NEXT:    fld fs0, 160(a1)
-; LP64D-NEXT:    fld fs1, 168(a1)
-; LP64D-NEXT:    fld fs2, 176(a1)
-; LP64D-NEXT:    fld fs3, 184(a1)
-; LP64D-NEXT:    fld fs4, 192(a1)
-; LP64D-NEXT:    fld fs5, 200(a1)
-; LP64D-NEXT:    fld fs6, 208(a1)
-; LP64D-NEXT:    fld fs7, 216(a1)
-; LP64D-NEXT:    fld fs8, 248(a1)
-; LP64D-NEXT:    fld fs9, 240(a1)
-; LP64D-NEXT:    fld fs10, 232(a1)
-; LP64D-NEXT:    fld fs11, 224(a1)
-; LP64D-NEXT:    fsd fs8, 248(a1)
-; LP64D-NEXT:    fsd fs9, 240(a1)
-; LP64D-NEXT:    fsd fs10, 232(a1)
-; LP64D-NEXT:    fsd fs11, 224(a1)
-; LP64D-NEXT:    fsd fs7, 216(a1)
-; LP64D-NEXT:    fsd fs6, 208(a1)
-; LP64D-NEXT:    fsd fs5, 200(a1)
-; LP64D-NEXT:    fsd fs4, 192(a1)
-; LP64D-NEXT:    fsd fs3, 184(a1)
-; LP64D-NEXT:    fsd fs2, 176(a1)
-; LP64D-NEXT:    fsd fs1, 168(a1)
-; LP64D-NEXT:    fsd fs0, 160(a1)
-; LP64D-NEXT:    fsd ft11, 152(a1)
-; LP64D-NEXT:    fsd ft10, 144(a1)
-; LP64D-NEXT:    fsd ft9, 136(a1)
-; LP64D-NEXT:    fsd ft8, 128(a1)
-; LP64D-NEXT:    fsd fa7, 120(a1)
-; LP64D-NEXT:    fsd fa6, 112(a1)
-; LP64D-NEXT:    fsd ft7, 104(a1)
-; LP64D-NEXT:    fsd ft6, 96(a1)
-; LP64D-NEXT:    fsd ft5, 88(a1)
-; LP64D-NEXT:    fsd ft4, 80(a1)
-; LP64D-NEXT:    fsd ft3, 72(a1)
-; LP64D-NEXT:    fsd ft2, 64(a1)
-; LP64D-NEXT:    fsd ft1, 56(a1)
-; LP64D-NEXT:    fsd ft0, 48(a1)
-; LP64D-NEXT:    fsd fa0, 40(a1)
-; LP64D-NEXT:    fsd fa1, 32(a1)
-; LP64D-NEXT:    fsd fa2, 24(a1)
-; LP64D-NEXT:    fsd fa3, 16(a1)
-; LP64D-NEXT:    fsd fa4, %lo(var+8)(a0)
-; LP64D-NEXT:    fsd fa5, %lo(var)(a0)
+; LP64D-NEXT:    addi a0, a0, %lo(var)
+; LP64D-NEXT:    fld fa5, 0(a0)
+; LP64D-NEXT:    fld fa4, 8(a0)
+; LP64D-NEXT:    fld fa3, 16(a0)
+; LP64D-NEXT:    fld fa2, 24(a0)
+; LP64D-NEXT:    fld fa1, 32(a0)
+; LP64D-NEXT:    fld fa0, 40(a0)
+; LP64D-NEXT:    fld ft0, 48(a0)
+; LP64D-NEXT:    fld ft1, 56(a0)
+; LP64D-NEXT:    fld ft2, 64(a0)
+; LP64D-NEXT:    fld ft3, 72(a0)
+; LP64D-NEXT:    fld ft4, 80(a0)
+; LP64D-NEXT:    fld ft5, 88(a0)
+; LP64D-NEXT:    fld ft6, 96(a0)
+; LP64D-NEXT:    fld ft7, 104(a0)
+; LP64D-NEXT:    fld fa6, 112(a0)
+; LP64D-NEXT:    fld fa7, 120(a0)
+; LP64D-NEXT:    fld ft8, 128(a0)
+; LP64D-NEXT:    fld ft9, 136(a0)
+; LP64D-NEXT:    fld ft10, 144(a0)
+; LP64D-NEXT:    fld ft11, 152(a0)
+; LP64D-NEXT:    fld fs0, 160(a0)
+; LP64D-NEXT:    fld fs1, 168(a0)
+; LP64D-NEXT:    fld fs2, 176(a0)
+; LP64D-NEXT:    fld fs3, 184(a0)
+; LP64D-NEXT:    fld fs4, 192(a0)
+; LP64D-NEXT:    fld fs5, 200(a0)
+; LP64D-NEXT:    fld fs6, 208(a0)
+; LP64D-NEXT:    fld fs7, 216(a0)
+; LP64D-NEXT:    fld fs8, 248(a0)
+; LP64D-NEXT:    fld fs9, 240(a0)
+; LP64D-NEXT:    fld fs10, 232(a0)
+; LP64D-NEXT:    fld fs11, 224(a0)
+; LP64D-NEXT:    fsd fs8, 248(a0)
+; LP64D-NEXT:    fsd fs9, 240(a0)
+; LP64D-NEXT:    fsd fs10, 232(a0)
+; LP64D-NEXT:    fsd fs11, 224(a0)
+; LP64D-NEXT:    fsd fs7, 216(a0)
+; LP64D-NEXT:    fsd fs6, 208(a0)
+; LP64D-NEXT:    fsd fs5, 200(a0)
+; LP64D-NEXT:    fsd fs4, 192(a0)
+; LP64D-NEXT:    fsd fs3, 184(a0)
+; LP64D-NEXT:    fsd fs2, 176(a0)
+; LP64D-NEXT:    fsd fs1, 168(a0)
+; LP64D-NEXT:    fsd fs0, 160(a0)
+; LP64D-NEXT:    fsd ft11, 152(a0)
+; LP64D-NEXT:    fsd ft10, 144(a0)
+; LP64D-NEXT:    fsd ft9, 136(a0)
+; LP64D-NEXT:    fsd ft8, 128(a0)
+; LP64D-NEXT:    fsd fa7, 120(a0)
+; LP64D-NEXT:    fsd fa6, 112(a0)
+; LP64D-NEXT:    fsd ft7, 104(a0)
+; LP64D-NEXT:    fsd ft6, 96(a0)
+; LP64D-NEXT:    fsd ft5, 88(a0)
+; LP64D-NEXT:    fsd ft4, 80(a0)
+; LP64D-NEXT:    fsd ft3, 72(a0)
+; LP64D-NEXT:    fsd ft2, 64(a0)
+; LP64D-NEXT:    fsd ft1, 56(a0)
+; LP64D-NEXT:    fsd ft0, 48(a0)
+; LP64D-NEXT:    fsd fa0, 40(a0)
+; LP64D-NEXT:    fsd fa1, 32(a0)
+; LP64D-NEXT:    fsd fa2, 24(a0)
+; LP64D-NEXT:    fsd fa3, 16(a0)
+; LP64D-NEXT:    fsd fa4, 8(a0)
+; LP64D-NEXT:    fsd fa5, 0(a0)
 ; LP64D-NEXT:    fld fs0, 88(sp) # 8-byte Folded Reload
 ; LP64D-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
 ; LP64D-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
@@ -438,428 +438,422 @@ define void @caller() nounwind {
 ; ILP32-NEXT:    addi sp, sp, -272
 ; ILP32-NEXT:    sw ra, 268(sp) # 4-byte Folded Spill
 ; ILP32-NEXT:    sw s0, 264(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    sw s1, 260(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    lui s0, %hi(var)
-; ILP32-NEXT:    fld fa5, %lo(var)(s0)
+; ILP32-NEXT:    lui a0, %hi(var)
+; ILP32-NEXT:    addi s0, a0, %lo(var)
+; ILP32-NEXT:    fld fa5, 0(s0)
+; ILP32-NEXT:    fsd fa5, 256(sp) # 8-byte Folded Spill
+; ILP32-NEXT:    fld fa5, 8(s0)
 ; ILP32-NEXT:    fsd fa5, 248(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, %lo(var+8)(s0)
+; ILP32-NEXT:    fld fa5, 16(s0)
 ; ILP32-NEXT:    fsd fa5, 240(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    addi s1, s0, %lo(var)
-; ILP32-NEXT:    fld fa5, 16(s1)
+; ILP32-NEXT:    fld fa5, 24(s0)
 ; ILP32-NEXT:    fsd fa5, 232(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 24(s1)
+; ILP32-NEXT:    fld fa5, 32(s0)
 ; ILP32-NEXT:    fsd fa5, 224(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 32(s1)
+; ILP32-NEXT:    fld fa5, 40(s0)
 ; ILP32-NEXT:    fsd fa5, 216(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 40(s1)
+; ILP32-NEXT:    fld fa5, 48(s0)
 ; ILP32-NEXT:    fsd fa5, 208(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 48(s1)
+; ILP32-NEXT:    fld fa5, 56(s0)
 ; ILP32-NEXT:    fsd fa5, 200(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 56(s1)
+; ILP32-NEXT:    fld fa5, 64(s0)
 ; ILP32-NEXT:    fsd fa5, 192(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 64(s1)
+; ILP32-NEXT:    fld fa5, 72(s0)
 ; ILP32-NEXT:    fsd fa5, 184(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 72(s1)
+; ILP32-NEXT:    fld fa5, 80(s0)
 ; ILP32-NEXT:    fsd fa5, 176(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 80(s1)
+; ILP32-NEXT:    fld fa5, 88(s0)
 ; ILP32-NEXT:    fsd fa5, 168(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 88(s1)
+; ILP32-NEXT:    fld fa5, 96(s0)
 ; ILP32-NEXT:    fsd fa5, 160(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 96(s1)
+; ILP32-NEXT:    fld fa5, 104(s0)
 ; ILP32-NEXT:    fsd fa5, 152(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 104(s1)
+; ILP32-NEXT:    fld fa5, 112(s0)
 ; ILP32-NEXT:    fsd fa5, 144(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 112(s1)
+; ILP32-NEXT:    fld fa5, 120(s0)
 ; ILP32-NEXT:    fsd fa5, 136(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 120(s1)
+; ILP32-NEXT:    fld fa5, 128(s0)
 ; ILP32-NEXT:    fsd fa5, 128(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 128(s1)
+; ILP32-NEXT:    fld fa5, 136(s0)
 ; ILP32-NEXT:    fsd fa5, 120(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 136(s1)
+; ILP32-NEXT:    fld fa5, 144(s0)
 ; ILP32-NEXT:    fsd fa5, 112(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 144(s1)
+; ILP32-NEXT:    fld fa5, 152(s0)
 ; ILP32-NEXT:    fsd fa5, 104(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 152(s1)
+; ILP32-NEXT:    fld fa5, 160(s0)
 ; ILP32-NEXT:    fsd fa5, 96(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 160(s1)
+; ILP32-NEXT:    fld fa5, 168(s0)
 ; ILP32-NEXT:    fsd fa5, 88(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 168(s1)
+; ILP32-NEXT:    fld fa5, 176(s0)
 ; ILP32-NEXT:    fsd fa5, 80(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 176(s1)
+; ILP32-NEXT:    fld fa5, 184(s0)
 ; ILP32-NEXT:    fsd fa5, 72(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 184(s1)
+; ILP32-NEXT:    fld fa5, 192(s0)
 ; ILP32-NEXT:    fsd fa5, 64(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 192(s1)
+; ILP32-NEXT:    fld fa5, 200(s0)
 ; ILP32-NEXT:    fsd fa5, 56(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 200(s1)
+; ILP32-NEXT:    fld fa5, 208(s0)
 ; ILP32-NEXT:    fsd fa5, 48(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 208(s1)
+; ILP32-NEXT:    fld fa5, 216(s0)
 ; ILP32-NEXT:    fsd fa5, 40(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 216(s1)
+; ILP32-NEXT:    fld fa5, 224(s0)
 ; ILP32-NEXT:    fsd fa5, 32(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 224(s1)
+; ILP32-NEXT:    fld fa5, 232(s0)
 ; ILP32-NEXT:    fsd fa5, 24(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 232(s1)
+; ILP32-NEXT:    fld fa5, 240(s0)
 ; ILP32-NEXT:    fsd fa5, 16(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 240(s1)
+; ILP32-NEXT:    fld fa5, 248(s0)
 ; ILP32-NEXT:    fsd fa5, 8(sp) # 8-byte Folded Spill
-; ILP32-NEXT:    fld fa5, 248(s1)
-; ILP32-NEXT:    fsd fa5, 0(sp) # 8-byte Folded Spill
 ; ILP32-NEXT:    call callee
-; ILP32-NEXT:    fld fa5, 0(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 248(s1)
 ; ILP32-NEXT:    fld fa5, 8(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 240(s1)
+; ILP32-NEXT:    fsd fa5, 248(s0)
 ; ILP32-NEXT:    fld fa5, 16(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 232(s1)
+; ILP32-NEXT:    fsd fa5, 240(s0)
 ; ILP32-NEXT:    fld fa5, 24(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 224(s1)
+; ILP32-NEXT:    fsd fa5, 232(s0)
 ; ILP32-NEXT:    fld fa5, 32(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 216(s1)
+; ILP32-NEXT:    fsd fa5, 224(s0)
 ; ILP32-NEXT:    fld fa5, 40(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 208(s1)
+; ILP32-NEXT:    fsd fa5, 216(s0)
 ; ILP32-NEXT:    fld fa5, 48(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 200(s1)
+; ILP32-NEXT:    fsd fa5, 208(s0)
 ; ILP32-NEXT:    fld fa5, 56(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 192(s1)
+; ILP32-NEXT:    fsd fa5, 200(s0)
 ; ILP32-NEXT:    fld fa5, 64(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 184(s1)
+; ILP32-NEXT:    fsd fa5, 192(s0)
 ; ILP32-NEXT:    fld fa5, 72(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 176(s1)
+; ILP32-NEXT:    fsd fa5, 184(s0)
 ; ILP32-NEXT:    fld fa5, 80(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 168(s1)
+; ILP32-NEXT:    fsd fa5, 176(s0)
 ; ILP32-NEXT:    fld fa5, 88(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 160(s1)
+; ILP32-NEXT:    fsd fa5, 168(s0)
 ; ILP32-NEXT:    fld fa5, 96(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 152(s1)
+; ILP32-NEXT:    fsd fa5, 160(s0)
 ; ILP32-NEXT:    fld fa5, 104(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 144(s1)
+; ILP32-NEXT:    fsd fa5, 152(s0)
 ; ILP32-NEXT:    fld fa5, 112(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 136(s1)
+; ILP32-NEXT:    fsd fa5, 144(s0)
 ; ILP32-NEXT:    fld fa5, 120(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 128(s1)
+; ILP32-NEXT:    fsd fa5, 136(s0)
 ; ILP32-NEXT:    fld fa5, 128(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 120(s1)
+; ILP32-NEXT:    fsd fa5, 128(s0)
 ; ILP32-NEXT:    fld fa5, 136(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 112(s1)
+; ILP32-NEXT:    fsd fa5, 120(s0)
 ; ILP32-NEXT:    fld fa5, 144(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 104(s1)
+; ILP32-NEXT:    fsd fa5, 112(s0)
 ; ILP32-NEXT:    fld fa5, 152(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 96(s1)
+; ILP32-NEXT:    fsd fa5, 104(s0)
 ; ILP32-NEXT:    fld fa5, 160(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 88(s1)
+; ILP32-NEXT:    fsd fa5, 96(s0)
 ; ILP32-NEXT:    fld fa5, 168(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 80(s1)
+; ILP32-NEXT:    fsd fa5, 88(s0)
 ; ILP32-NEXT:    fld fa5, 176(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 72(s1)
+; ILP32-NEXT:    fsd fa5, 80(s0)
 ; ILP32-NEXT:    fld fa5, 184(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 64(s1)
+; ILP32-NEXT:    fsd fa5, 72(s0)
 ; ILP32-NEXT:    fld fa5, 192(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 56(s1)
+; ILP32-NEXT:    fsd fa5, 64(s0)
 ; ILP32-NEXT:    fld fa5, 200(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 48(s1)
+; ILP32-NEXT:    fsd fa5, 56(s0)
 ; ILP32-NEXT:    fld fa5, 208(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 40(s1)
+; ILP32-NEXT:    fsd fa5, 48(s0)
 ; ILP32-NEXT:    fld fa5, 216(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 32(s1)
+; ILP32-NEXT:    fsd fa5, 40(s0)
 ; ILP32-NEXT:    fld fa5, 224(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 24(s1)
+; ILP32-NEXT:    fsd fa5, 32(s0)
 ; ILP32-NEXT:    fld fa5, 232(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, 16(s1)
+; ILP32-NEXT:    fsd fa5, 24(s0)
 ; ILP32-NEXT:    fld fa5, 240(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, %lo(var+8)(s0)
+; ILP32-NEXT:    fsd fa5, 16(s0)
 ; ILP32-NEXT:    fld fa5, 248(sp) # 8-byte Folded Reload
-; ILP32-NEXT:    fsd fa5, %lo(var)(s0)
+; ILP32-NEXT:    fsd fa5, 8(s0)
+; ILP32-NEXT:    fld fa5, 256(sp) # 8-byte Folded Reload
+; ILP32-NEXT:    fsd fa5, 0(s0)
 ; ILP32-NEXT:    lw ra, 268(sp) # 4-byte Folded Reload
 ; ILP32-NEXT:    lw s0, 264(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    lw s1, 260(sp) # 4-byte Folded Reload
 ; ILP32-NEXT:    addi sp, sp, 272
 ; ILP32-NEXT:    ret
 ;
 ; LP64-LABEL: caller:
 ; LP64:       # %bb.0:
-; LP64-NEXT:    addi sp, sp, -288
-; LP64-NEXT:    sd ra, 280(sp) # 8-byte Folded Spill
-; LP64-NEXT:    sd s0, 272(sp) # 8-byte Folded Spill
-; LP64-NEXT:    sd s1, 264(sp) # 8-byte Folded Spill
-; LP64-NEXT:    lui s0, %hi(var)
-; LP64-NEXT:    fld fa5, %lo(var)(s0)
-; LP64-NEXT:    fsd fa5, 256(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, %lo(var+8)(s0)
+; LP64-NEXT:    addi sp, sp, -272
+; LP64-NEXT:    sd ra, 264(sp) # 8-byte Folded Spill
+; LP64-NEXT:    sd s0, 256(sp) # 8-byte Folded Spill
+; LP64-NEXT:    lui a0, %hi(var)
+; LP64-NEXT:    addi s0, a0, %lo(var)
+; LP64-NEXT:    fld fa5, 0(s0)
 ; LP64-NEXT:    fsd fa5, 248(sp) # 8-byte Folded Spill
-; LP64-NEXT:    addi s1, s0, %lo(var)
-; LP64-NEXT:    fld fa5, 16(s1)
+; LP64-NEXT:    fld fa5, 8(s0)
 ; LP64-NEXT:    fsd fa5, 240(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 24(s1)
+; LP64-NEXT:    fld fa5, 16(s0)
 ; LP64-NEXT:    fsd fa5, 232(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 32(s1)
+; LP64-NEXT:    fld fa5, 24(s0)
 ; LP64-NEXT:    fsd fa5, 224(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 40(s1)
+; LP64-NEXT:    fld fa5, 32(s0)
 ; LP64-NEXT:    fsd fa5, 216(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 48(s1)
+; LP64-NEXT:    fld fa5, 40(s0)
 ; LP64-NEXT:    fsd fa5, 208(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 56(s1)
+; LP64-NEXT:    fld fa5, 48(s0)
 ; LP64-NEXT:    fsd fa5, 200(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 64(s1)
+; LP64-NEXT:    fld fa5, 56(s0)
 ; LP64-NEXT:    fsd fa5, 192(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 72(s1)
+; LP64-NEXT:    fld fa5, 64(s0)
 ; LP64-NEXT:    fsd fa5, 184(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 80(s1)
+; LP64-NEXT:    fld fa5, 72(s0)
 ; LP64-NEXT:    fsd fa5, 176(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 88(s1)
+; LP64-NEXT:    fld fa5, 80(s0)
 ; LP64-NEXT:    fsd fa5, 168(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 96(s1)
+; LP64-NEXT:    fld fa5, 88(s0)
 ; LP64-NEXT:    fsd fa5, 160(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 104(s1)
+; LP64-NEXT:    fld fa5, 96(s0)
 ; LP64-NEXT:    fsd fa5, 152(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 112(s1)
+; LP64-NEXT:    fld fa5, 104(s0)
 ; LP64-NEXT:    fsd fa5, 144(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 120(s1)
+; LP64-NEXT:    fld fa5, 112(s0)
 ; LP64-NEXT:    fsd fa5, 136(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 128(s1)
+; LP64-NEXT:    fld fa5, 120(s0)
 ; LP64-NEXT:    fsd fa5, 128(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 136(s1)
+; LP64-NEXT:    fld fa5, 128(s0)
 ; LP64-NEXT:    fsd fa5, 120(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 144(s1)
+; LP64-NEXT:    fld fa5, 136(s0)
 ; LP64-NEXT:    fsd fa5, 112(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 152(s1)
+; LP64-NEXT:    fld fa5, 144(s0)
 ; LP64-NEXT:    fsd fa5, 104(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 160(s1)
+; LP64-NEXT:    fld fa5, 152(s0)
 ; LP64-NEXT:    fsd fa5, 96(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 168(s1)
+; LP64-NEXT:    fld fa5, 160(s0)
 ; LP64-NEXT:    fsd fa5, 88(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 176(s1)
+; LP64-NEXT:    fld fa5, 168(s0)
 ; LP64-NEXT:    fsd fa5, 80(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 184(s1)
+; LP64-NEXT:    fld fa5, 176(s0)
 ; LP64-NEXT:    fsd fa5, 72(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 192(s1)
+; LP64-NEXT:    fld fa5, 184(s0)
 ; LP64-NEXT:    fsd fa5, 64(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 200(s1)
+; LP64-NEXT:    fld fa5, 192(s0)
 ; LP64-NEXT:    fsd fa5, 56(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 208(s1)
+; LP64-NEXT:    fld fa5, 200(s0)
 ; LP64-NEXT:    fsd fa5, 48(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 216(s1)
+; LP64-NEXT:    fld fa5, 208(s0)
 ; LP64-NEXT:    fsd fa5, 40(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 224(s1)
+; LP64-NEXT:    fld fa5, 216(s0)
 ; LP64-NEXT:    fsd fa5, 32(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 232(s1)
+; LP64-NEXT:    fld fa5, 224(s0)
 ; LP64-NEXT:    fsd fa5, 24(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 240(s1)
+; LP64-NEXT:    fld fa5, 232(s0)
 ; LP64-NEXT:    fsd fa5, 16(sp) # 8-byte Folded Spill
-; LP64-NEXT:    fld fa5, 248(s1)
+; LP64-NEXT:    fld fa5, 240(s0)
 ; LP64-NEXT:    fsd fa5, 8(sp) # 8-byte Folded Spill
+; LP64-NEXT:    fld fa5, 248(s0)
+; LP64-NEXT:    fsd fa5, 0(sp) # 8-byte Folded Spill
 ; LP64-NEXT:    call callee
+; LP64-NEXT:    fld fa5, 0(sp) # 8-byte Folded Reload
+; LP64-NEXT:    fsd fa5, 248(s0)
 ; LP64-NEXT:    fld fa5, 8(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 248(s1)
+; LP64-NEXT:    fsd fa5, 240(s0)
 ; LP64-NEXT:    fld fa5, 16(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 240(s1)
+; LP64-NEXT:    fsd fa5, 232(s0)
 ; LP64-NEXT:    fld fa5, 24(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 232(s1)
+; LP64-NEXT:    fsd fa5, 224(s0)
 ; LP64-NEXT:    fld fa5, 32(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 224(s1)
+; LP64-NEXT:    fsd fa5, 216(s0)
 ; LP64-NEXT:    fld fa5, 40(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 216(s1)
+; LP64-NEXT:    fsd fa5, 208(s0)
 ; LP64-NEXT:    fld fa5, 48(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 208(s1)
+; LP64-NEXT:    fsd fa5, 200(s0)
 ; LP64-NEXT:    fld fa5, 56(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 200(s1)
+; LP64-NEXT:    fsd fa5, 192(s0)
 ; LP64-NEXT:    fld fa5, 64(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 192(s1)
+; LP64-NEXT:    fsd fa5, 184(s0)
 ; LP64-NEXT:    fld fa5, 72(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 184(s1)
+; LP64-NEXT:    fsd fa5, 176(s0)
 ; LP64-NEXT:    fld fa5, 80(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 176(s1)
+; LP64-NEXT:    fsd fa5, 168(s0)
 ; LP64-NEXT:    fld fa5, 88(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 168(s1)
+; LP64-NEXT:    fsd fa5, 160(s0)
 ; LP64-NEXT:    fld fa5, 96(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 160(s1)
+; LP64-NEXT:    fsd fa5, 152(s0)
 ; LP64-NEXT:    fld fa5, 104(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 152(s1)
+; LP64-NEXT:    fsd fa5, 144(s0)
 ; LP64-NEXT:    fld fa5, 112(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 144(s1)
+; LP64-NEXT:    fsd fa5, 136(s0)
 ; LP64-NEXT:    fld fa5, 120(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 136(s1)
+; LP64-NEXT:    fsd fa5, 128(s0)
 ; LP64-NEXT:    fld fa5, 128(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 128(s1)
+; LP64-NEXT:    fsd fa5, 120(s0)
 ; LP64-NEXT:    fld fa5, 136(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 120(s1)
+; LP64-NEXT:    fsd fa5, 112(s0)
 ; LP64-NEXT:    fld fa5, 144(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 112(s1)
+; LP64-NEXT:    fsd fa5, 104(s0)
 ; LP64-NEXT:    fld fa5, 152(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 104(s1)
+; LP64-NEXT:    fsd fa5, 96(s0)
 ; LP64-NEXT:    fld fa5, 160(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 96(s1)
+; LP64-NEXT:    fsd fa5, 88(s0)
 ; LP64-NEXT:    fld fa5, 168(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 88(s1)
+; LP64-NEXT:    fsd fa5, 80(s0)
 ; LP64-NEXT:    fld fa5, 176(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 80(s1)
+; LP64-NEXT:    fsd fa5, 72(s0)
 ; LP64-NEXT:    fld fa5, 184(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 72(s1)
+; LP64-NEXT:    fsd fa5, 64(s0)
 ; LP64-NEXT:    fld fa5, 192(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 64(s1)
+; LP64-NEXT:    fsd fa5, 56(s0)
 ; LP64-NEXT:    fld fa5, 200(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 56(s1)
+; LP64-NEXT:    fsd fa5, 48(s0)
 ; LP64-NEXT:    fld fa5, 208(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 48(s1)
+; LP64-NEXT:    fsd fa5, 40(s0)
 ; LP64-NEXT:    fld fa5, 216(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 40(s1)
+; LP64-NEXT:    fsd fa5, 32(s0)
 ; LP64-NEXT:    fld fa5, 224(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 32(s1)
+; LP64-NEXT:    fsd fa5, 24(s0)
 ; LP64-NEXT:    fld fa5, 232(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 24(s1)
+; LP64-NEXT:    fsd fa5, 16(s0)
 ; LP64-NEXT:    fld fa5, 240(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, 16(s1)
+; LP64-NEXT:    fsd fa5, 8(s0)
 ; LP64-NEXT:    fld fa5, 248(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, %lo(var+8)(s0)
-; LP64-NEXT:    fld fa5, 256(sp) # 8-byte Folded Reload
-; LP64-NEXT:    fsd fa5, %lo(var)(s0)
-; LP64-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
-; LP64-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
-; LP64-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
-; LP64-NEXT:    addi sp, sp, 288
+; LP64-NEXT:    fsd fa5, 0(s0)
+; LP64-NEXT:    ld ra, 264(sp) # 8-byte Folded Reload
+; LP64-NEXT:    ld s0, 256(sp) # 8-byte Folded Reload
+; LP64-NEXT:    addi sp, sp, 272
 ; LP64-NEXT:    ret
 ;
 ; LP64E-LABEL: caller:
 ; LP64E:       # %bb.0:
-; LP64E-NEXT:    addi sp, sp, -280
-; LP64E-NEXT:    sd ra, 272(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    sd s0, 264(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    sd s1, 256(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    lui s0, %hi(var)
-; LP64E-NEXT:    fld fa5, %lo(var)(s0)
+; LP64E-NEXT:    addi sp, sp, -272
+; LP64E-NEXT:    sd ra, 264(sp) # 8-byte Folded Spill
+; LP64E-NEXT:    sd s0, 256(sp) # 8-byte Folded Spill
+; LP64E-NEXT:    lui a0, %hi(var)
+; LP64E-NEXT:    addi s0, a0, %lo(var)
+; LP64E-NEXT:    fld fa5, 0(s0)
 ; LP64E-NEXT:    fsd fa5, 248(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, %lo(var+8)(s0)
+; LP64E-NEXT:    fld fa5, 8(s0)
 ; LP64E-NEXT:    fsd fa5, 240(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    addi s1, s0, %lo(var)
-; LP64E-NEXT:    fld fa5, 16(s1)
+; LP64E-NEXT:    fld fa5, 16(s0)
 ; LP64E-NEXT:    fsd fa5, 232(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 24(s1)
+; LP64E-NEXT:    fld fa5, 24(s0)
 ; LP64E-NEXT:    fsd fa5, 224(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 32(s1)
+; LP64E-NEXT:    fld fa5, 32(s0)
 ; LP64E-NEXT:    fsd fa5, 216(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 40(s1)
+; LP64E-NEXT:    fld fa5, 40(s0)
 ; LP64E-NEXT:    fsd fa5, 208(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 48(s1)
+; LP64E-NEXT:    fld fa5, 48(s0)
 ; LP64E-NEXT:    fsd fa5, 200(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 56(s1)
+; LP64E-NEXT:    fld fa5, 56(s0)
 ; LP64E-NEXT:    fsd fa5, 192(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 64(s1)
+; LP64E-NEXT:    fld fa5, 64(s0)
 ; LP64E-NEXT:    fsd fa5, 184(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 72(s1)
+; LP64E-NEXT:    fld fa5, 72(s0)
 ; LP64E-NEXT:    fsd fa5, 176(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 80(s1)
+; LP64E-NEXT:    fld fa5, 80(s0)
 ; LP64E-NEXT:    fsd fa5, 168(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 88(s1)
+; LP64E-NEXT:    fld fa5, 88(s0)
 ; LP64E-NEXT:    fsd fa5, 160(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 96(s1)
+; LP64E-NEXT:    fld fa5, 96(s0)
 ; LP64E-NEXT:    fsd fa5, 152(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 104(s1)
+; LP64E-NEXT:    fld fa5, 104(s0)
 ; LP64E-NEXT:    fsd fa5, 144(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 112(s1)
+; LP64E-NEXT:    fld fa5, 112(s0)
 ; LP64E-NEXT:    fsd fa5, 136(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 120(s1)
+; LP64E-NEXT:    fld fa5, 120(s0)
 ; LP64E-NEXT:    fsd fa5, 128(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 128(s1)
+; LP64E-NEXT:    fld fa5, 128(s0)
 ; LP64E-NEXT:    fsd fa5, 120(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 136(s1)
+; LP64E-NEXT:    fld fa5, 136(s0)
 ; LP64E-NEXT:    fsd fa5, 112(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 144(s1)
+; LP64E-NEXT:    fld fa5, 144(s0)
 ; LP64E-NEXT:    fsd fa5, 104(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 152(s1)
+; LP64E-NEXT:    fld fa5, 152(s0)
 ; LP64E-NEXT:    fsd fa5, 96(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 160(s1)
+; LP64E-NEXT:    fld fa5, 160(s0)
 ; LP64E-NEXT:    fsd fa5, 88(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 168(s1)
+; LP64E-NEXT:    fld fa5, 168(s0)
 ; LP64E-NEXT:    fsd fa5, 80(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 176(s1)
+; LP64E-NEXT:    fld fa5, 176(s0)
 ; LP64E-NEXT:    fsd fa5, 72(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 184(s1)
+; LP64E-NEXT:    fld fa5, 184(s0)
 ; LP64E-NEXT:    fsd fa5, 64(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 192(s1)
+; LP64E-NEXT:    fld fa5, 192(s0)
 ; LP64E-NEXT:    fsd fa5, 56(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 200(s1)
+; LP64E-NEXT:    fld fa5, 200(s0)
 ; LP64E-NEXT:    fsd fa5, 48(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 208(s1)
+; LP64E-NEXT:    fld fa5, 208(s0)
 ; LP64E-NEXT:    fsd fa5, 40(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 216(s1)
+; LP64E-NEXT:    fld fa5, 216(s0)
 ; LP64E-NEXT:    fsd fa5, 32(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 224(s1)
+; LP64E-NEXT:    fld fa5, 224(s0)
 ; LP64E-NEXT:    fsd fa5, 24(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 232(s1)
+; LP64E-NEXT:    fld fa5, 232(s0)
 ; LP64E-NEXT:    fsd fa5, 16(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 240(s1)
+; LP64E-NEXT:    fld fa5, 240(s0)
 ; LP64E-NEXT:    fsd fa5, 8(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    fld fa5, 248(s1)
+; LP64E-NEXT:    fld fa5, 248(s0)
 ; LP64E-NEXT:    fsd fa5, 0(sp) # 8-byte Folded Spill
 ; LP64E-NEXT:    call callee
 ; LP64E-NEXT:    fld fa5, 0(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 248(s1)
+; LP64E-NEXT:    fsd fa5, 248(s0)
 ; LP64E-NEXT:    fld fa5, 8(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 240(s1)
+; LP64E-NEXT:    fsd fa5, 240(s0)
 ; LP64E-NEXT:    fld fa5, 16(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 232(s1)
+; LP64E-NEXT:    fsd fa5, 232(s0)
 ; LP64E-NEXT:    fld fa5, 24(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 224(s1)
+; LP64E-NEXT:    fsd fa5, 224(s0)
 ; LP64E-NEXT:    fld fa5, 32(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 216(s1)
+; LP64E-NEXT:    fsd fa5, 216(s0)
 ; LP64E-NEXT:    fld fa5, 40(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 208(s1)
+; LP64E-NEXT:    fsd fa5, 208(s0)
 ; LP64E-NEXT:    fld fa5, 48(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 200(s1)
+; LP64E-NEXT:    fsd fa5, 200(s0)
 ; LP64E-NEXT:    fld fa5, 56(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 192(s1)
+; LP64E-NEXT:    fsd fa5, 192(s0)
 ; LP64E-NEXT:    fld fa5, 64(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 184(s1)
+; LP64E-NEXT:    fsd fa5, 184(s0)
 ; LP64E-NEXT:    fld fa5, 72(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 176(s1)
+; LP64E-NEXT:    fsd fa5, 176(s0)
 ; LP64E-NEXT:    fld fa5, 80(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 168(s1)
+; LP64E-NEXT:    fsd fa5, 168(s0)
 ; LP64E-NEXT:    fld fa5, 88(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 160(s1)
+; LP64E-NEXT:    fsd fa5, 160(s0)
 ; LP64E-NEXT:    fld fa5, 96(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 152(s1)
+; LP64E-NEXT:    fsd fa5, 152(s0)
 ; LP64E-NEXT:    fld fa5, 104(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 144(s1)
+; LP64E-NEXT:    fsd fa5, 144(s0)
 ; LP64E-NEXT:    fld fa5, 112(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 136(s1)
+; LP64E-NEXT:    fsd fa5, 136(s0)
 ; LP64E-NEXT:    fld fa5, 120(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 128(s1)
+; LP64E-NEXT:    fsd fa5, 128(s0)
 ; LP64E-NEXT:    fld fa5, 128(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 120(s1)
+; LP64E-NEXT:    fsd fa5, 120(s0)
 ; LP64E-NEXT:    fld fa5, 136(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 112(s1)
+; LP64E-NEXT:    fsd fa5, 112(s0)
 ; LP64E-NEXT:    fld fa5, 144(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 104(s1)
+; LP64E-NEXT:    fsd fa5, 104(s0)
 ; LP64E-NEXT:    fld fa5, 152(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 96(s1)
+; LP64E-NEXT:    fsd fa5, 96(s0)
 ; LP64E-NEXT:    fld fa5, 160(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 88(s1)
+; LP64E-NEXT:    fsd fa5, 88(s0)
 ; LP64E-NEXT:    fld fa5, 168(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 80(s1)
+; LP64E-NEXT:    fsd fa5, 80(s0)
 ; LP64E-NEXT:    fld fa5, 176(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 72(s1)
+; LP64E-NEXT:    fsd fa5, 72(s0)
 ; LP64E-NEXT:    fld fa5, 184(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 64(s1)
+; LP64E-NEXT:    fsd fa5, 64(s0)
 ; LP64E-NEXT:    fld fa5, 192(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 56(s1)
+; LP64E-NEXT:    fsd fa5, 56(s0)
 ; LP64E-NEXT:    fld fa5, 200(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 48(s1)
+; LP64E-NEXT:    fsd fa5, 48(s0)
 ; LP64E-NEXT:    fld fa5, 208(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 40(s1)
+; LP64E-NEXT:    fsd fa5, 40(s0)
 ; LP64E-NEXT:    fld fa5, 216(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 32(s1)
+; LP64E-NEXT:    fsd fa5, 32(s0)
 ; LP64E-NEXT:    fld fa5, 224(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 24(s1)
+; LP64E-NEXT:    fsd fa5, 24(s0)
 ; LP64E-NEXT:    fld fa5, 232(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, 16(s1)
+; LP64E-NEXT:    fsd fa5, 16(s0)
 ; LP64E-NEXT:    fld fa5, 240(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, %lo(var+8)(s0)
+; LP64E-NEXT:    fsd fa5, 8(s0)
 ; LP64E-NEXT:    fld fa5, 248(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    fsd fa5, %lo(var)(s0)
-; LP64E-NEXT:    ld ra, 272(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    ld s0, 264(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    ld s1, 256(sp) # 8-byte Folded Reload
-; LP64E-NEXT:    addi sp, sp, 280
+; LP64E-NEXT:    fsd fa5, 0(s0)
+; LP64E-NEXT:    ld ra, 264(sp) # 8-byte Folded Reload
+; LP64E-NEXT:    ld s0, 256(sp) # 8-byte Folded Reload
+; LP64E-NEXT:    addi sp, sp, 272
 ; LP64E-NEXT:    ret
 ;
 ; ILP32D-LABEL: caller:
@@ -867,285 +861,281 @@ define void @caller() nounwind {
 ; ILP32D-NEXT:    addi sp, sp, -272
 ; ILP32D-NEXT:    sw ra, 268(sp) # 4-byte Folded Spill
 ; ILP32D-NEXT:    sw s0, 264(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    sw s1, 260(sp) # 4-byte Folded Spill
-; ILP32D-NEXT:    fsd fs0, 248(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs1, 240(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs2, 232(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs3, 224(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs4, 216(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs5, 208(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs6, 200(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs7, 192(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs8, 184(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs9, 176(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs10, 168(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fsd fs11, 160(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    lui s0, %hi(var)
-; ILP32D-NEXT:    fld fa5, %lo(var)(s0)
+; ILP32D-NEXT:    fsd fs0, 256(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs1, 248(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs2, 240(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs3, 232(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs4, 224(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs5, 216(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs6, 208(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs7, 200(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs8, 192(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs9, 184(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs10, 176(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fsd fs11, 168(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    lui a0, %hi(var)
+; ILP32D-NEXT:    addi s0, a0, %lo(var)
+; ILP32D-NEXT:    fld fa5, 0(s0)
+; ILP32D-NEXT:    fsd fa5, 160(sp) # 8-byte Folded Spill
+; ILP32D-NEXT:    fld fa5, 8(s0)
 ; ILP32D-NEXT:    fsd fa5, 152(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, %lo(var+8)(s0)
+; ILP32D-NEXT:    fld fa5, 16(s0)
 ; ILP32D-NEXT:    fsd fa5, 144(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    addi s1, s0, %lo(var)
-; ILP32D-NEXT:    fld fa5, 16(s1)
+; ILP32D-NEXT:    fld fa5, 24(s0)
 ; ILP32D-NEXT:    fsd fa5, 136(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 24(s1)
+; ILP32D-NEXT:    fld fa5, 32(s0)
 ; ILP32D-NEXT:    fsd fa5, 128(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 32(s1)
+; ILP32D-NEXT:    fld fa5, 40(s0)
 ; ILP32D-NEXT:    fsd fa5, 120(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 40(s1)
+; ILP32D-NEXT:    fld fa5, 48(s0)
 ; ILP32D-NEXT:    fsd fa5, 112(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 48(s1)
+; ILP32D-NEXT:    fld fa5, 56(s0)
 ; ILP32D-NEXT:    fsd fa5, 104(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 56(s1)
+; ILP32D-NEXT:    fld fa5, 64(s0)
 ; ILP32D-NEXT:    fsd fa5, 96(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 64(s1)
+; ILP32D-NEXT:    fld fa5, 72(s0)
 ; ILP32D-NEXT:    fsd fa5, 88(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 72(s1)
+; ILP32D-NEXT:    fld fa5, 80(s0)
 ; ILP32D-NEXT:    fsd fa5, 80(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 80(s1)
+; ILP32D-NEXT:    fld fa5, 88(s0)
 ; ILP32D-NEXT:    fsd fa5, 72(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 88(s1)
+; ILP32D-NEXT:    fld fa5, 96(s0)
 ; ILP32D-NEXT:    fsd fa5, 64(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 96(s1)
+; ILP32D-NEXT:    fld fa5, 104(s0)
 ; ILP32D-NEXT:    fsd fa5, 56(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 104(s1)
+; ILP32D-NEXT:    fld fa5, 112(s0)
 ; ILP32D-NEXT:    fsd fa5, 48(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 112(s1)
+; ILP32D-NEXT:    fld fa5, 120(s0)
 ; ILP32D-NEXT:    fsd fa5, 40(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 120(s1)
+; ILP32D-NEXT:    fld fa5, 128(s0)
 ; ILP32D-NEXT:    fsd fa5, 32(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 128(s1)
+; ILP32D-NEXT:    fld fa5, 136(s0)
 ; ILP32D-NEXT:    fsd fa5, 24(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 136(s1)
+; ILP32D-NEXT:    fld fa5, 144(s0)
 ; ILP32D-NEXT:    fsd fa5, 16(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 144(s1)
+; ILP32D-NEXT:    fld fa5, 152(s0)
 ; ILP32D-NEXT:    fsd fa5, 8(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fa5, 152(s1)
-; ILP32D-NEXT:    fsd fa5, 0(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    fld fs8, 160(s1)
-; ILP32D-NEXT:    fld fs9, 168(s1)
-; ILP32D-NEXT:    fld fs10, 176(s1)
-; ILP32D-NEXT:    fld fs11, 184(s1)
-; ILP32D-NEXT:    fld fs0, 192(s1)
-; ILP32D-NEXT:    fld fs1, 200(s1)
-; ILP32D-NEXT:    fld fs2, 208(s1)
-; ILP32D-NEXT:    fld fs3, 216(s1)
-; ILP32D-NEXT:    fld fs4, 224(s1)
-; ILP32D-NEXT:    fld fs5, 232(s1)
-; ILP32D-NEXT:    fld fs6, 240(s1)
-; ILP32D-NEXT:    fld fs7, 248(s1)
+; ILP32D-NEXT:    fld fs8, 160(s0)
+; ILP32D-NEXT:    fld fs9, 168(s0)
+; ILP32D-NEXT:    fld fs10, 176(s0)
+; ILP32D-NEXT:    fld fs11, 184(s0)
+; ILP32D-NEXT:    fld fs0, 192(s0)
+; ILP32D-NEXT:    fld fs1, 200(s0)
+; ILP32D-NEXT:    fld fs2, 208(s0)
+; ILP32D-NEXT:    fld fs3, 216(s0)
+; ILP32D-NEXT:    fld fs4, 224(s0)
+; ILP32D-NEXT:    fld fs5, 232(s0)
+; ILP32D-NEXT:    fld fs6, 240(s0)
+; ILP32D-NEXT:    fld fs7, 248(s0)
 ; ILP32D-NEXT:    call callee
-; ILP32D-NEXT:    fsd fs7, 248(s1)
-; ILP32D-NEXT:    fsd fs6, 240(s1)
-; ILP32D-NEXT:    fsd fs5, 232(s1)
-; ILP32D-NEXT:    fsd fs4, 224(s1)
-; ILP32D-NEXT:    fsd fs3, 216(s1)
-; ILP32D-NEXT:    fsd fs2, 208(s1)
-; ILP32D-NEXT:    fsd fs1, 200(s1)
-; ILP32D-NEXT:    fsd fs0, 192(s1)
-; ILP32D-NEXT:    fsd fs11, 184(s1)
-; ILP32D-NEXT:    fsd fs10, 176(s1)
-; ILP32D-NEXT:    fsd fs9, 168(s1)
-; ILP32D-NEXT:    fsd fs8, 160(s1)
-; ILP32D-NEXT:    fld fa5, 0(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 152(s1)
+; ILP32D-NEXT:    fsd fs7, 248(s0)
+; ILP32D-NEXT:    fsd fs6, 240(s0)
+; ILP32D-NEXT:    fsd fs5, 232(s0)
+; ILP32D-NEXT:    fsd fs4, 224(s0)
+; ILP32D-NEXT:    fsd fs3, 216(s0)
+; ILP32D-NEXT:    fsd fs2, 208(s0)
+; ILP32D-NEXT:    fsd fs1, 200(s0)
+; ILP32D-NEXT:    fsd fs0, 192(s0)
+; ILP32D-NEXT:    fsd fs11, 184(s0)
+; ILP32D-NEXT:    fsd fs10, 176(s0)
+; ILP32D-NEXT:    fsd fs9, 168(s0)
+; ILP32D-NEXT:    fsd fs8, 160(s0)
 ; ILP32D-NEXT:    fld fa5, 8(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 144(s1)
+; ILP32D-NEXT:    fsd fa5, 152(s0)
 ; ILP32D-NEXT:    fld fa5, 16(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 136(s1)
+; ILP32D-NEXT:    fsd fa5, 144(s0)
 ; ILP32D-NEXT:    fld fa5, 24(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 128(s1)
+; ILP32D-NEXT:    fsd fa5, 136(s0)
 ; ILP32D-NEXT:    fld fa5, 32(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 120(s1)
+; ILP32D-NEXT:    fsd fa5, 128(s0)
 ; ILP32D-NEXT:    fld fa5, 40(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 112(s1)
+; ILP32D-NEXT:    fsd fa5, 120(s0)
 ; ILP32D-NEXT:    fld fa5, 48(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 104(s1)
+; ILP32D-NEXT:    fsd fa5, 112(s0)
 ; ILP32D-NEXT:    fld fa5, 56(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 96(s1)
+; ILP32D-NEXT:    fsd fa5, 104(s0)
 ; ILP32D-NEXT:    fld fa5, 64(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 88(s1)
+; ILP32D-NEXT:    fsd fa5, 96(s0)
 ; ILP32D-NEXT:    fld fa5, 72(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 80(s1)
+; ILP32D-NEXT:    fsd fa5, 88(s0)
 ; ILP32D-NEXT:    fld fa5, 80(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 72(s1)
+; ILP32D-NEXT:    fsd fa5, 80(s0)
 ; ILP32D-NEXT:    fld fa5, 88(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 64(s1)
+; ILP32D-NEXT:    fsd fa5, 72(s0)
 ; ILP32D-NEXT:    fld fa5, 96(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 56(s1)
+; ILP32D-NEXT:    fsd fa5, 64(s0)
 ; ILP32D-NEXT:    fld fa5, 104(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 48(s1)
+; ILP32D-NEXT:    fsd fa5, 56(s0)
 ; ILP32D-NEXT:    fld fa5, 112(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 40(s1)
+; ILP32D-NEXT:    fsd fa5, 48(s0)
 ; ILP32D-NEXT:    fld fa5, 120(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 32(s1)
+; ILP32D-NEXT:    fsd fa5, 40(s0)
 ; ILP32D-NEXT:    fld fa5, 128(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 24(s1)
+; ILP32D-NEXT:    fsd fa5, 32(s0)
 ; ILP32D-NEXT:    fld fa5, 136(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, 16(s1)
+; ILP32D-NEXT:    fsd fa5, 24(s0)
 ; ILP32D-NEXT:    fld fa5, 144(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, %lo(var+8)(s0)
+; ILP32D-NEXT:    fsd fa5, 16(s0)
 ; ILP32D-NEXT:    fld fa5, 152(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fsd fa5, %lo(var)(s0)
+; ILP32D-NEXT:    fsd fa5, 8(s0)
+; ILP32D-NEXT:    fld fa5, 160(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fsd fa5, 0(s0)
 ; ILP32D-NEXT:    lw ra, 268(sp) # 4-byte Folded Reload
 ; ILP32D-NEXT:    lw s0, 264(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    lw s1, 260(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    fld fs0, 248(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs1, 240(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs2, 232(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs3, 224(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs4, 216(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs5, 208(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs6, 200(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs7, 192(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs8, 184(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs9, 176(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs10, 168(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs11, 160(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs0, 256(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs1, 248(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs2, 240(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs3, 232(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs4, 224(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs5, 216(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs6, 208(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs7, 200(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs8, 192(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs9, 184(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs10, 176(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs11, 168(sp) # 8-byte Folded Reload
 ; ILP32D-NEXT:    addi sp, sp, 272
 ; ILP32D-NEXT:    ret
 ;
 ; LP64D-LABEL: caller:
 ; LP64D:       # %bb.0:
-; LP64D-NEXT:    addi sp, sp, -288
-; LP64D-NEXT:    sd ra, 280(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    sd s0, 272(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    sd s1, 264(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs0, 256(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs1, 248(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs2, 240(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs3, 232(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs4, 224(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs5, 216(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs6, 208(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs7, 200(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs8, 192(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs9, 184(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs10, 176(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fsd fs11, 168(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    lui s0, %hi(var)
-; LP64D-NEXT:    fld fa5, %lo(var)(s0)
-; LP64D-NEXT:    fsd fa5, 160(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, %lo(var+8)(s0)
+; LP64D-NEXT:    addi sp, sp, -272
+; LP64D-NEXT:    sd ra, 264(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    sd s0, 256(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs0, 248(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs1, 240(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs2, 232(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs3, 224(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs4, 216(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs5, 208(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs6, 200(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs7, 192(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs8, 184(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs9, 176(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs10, 168(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fsd fs11, 160(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    lui a0, %hi(var)
+; LP64D-NEXT:    addi s0, a0, %lo(var)
+; LP64D-NEXT:    fld fa5, 0(s0)
 ; LP64D-NEXT:    fsd fa5, 152(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    addi s1, s0, %lo(var)
-; LP64D-NEXT:    fld fa5, 16(s1)
+; LP64D-NEXT:    fld fa5, 8(s0)
 ; LP64D-NEXT:    fsd fa5, 144(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 24(s1)
+; LP64D-NEXT:    fld fa5, 16(s0)
 ; LP64D-NEXT:    fsd fa5, 136(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 32(s1)
+; LP64D-NEXT:    fld fa5, 24(s0)
 ; LP64D-NEXT:    fsd fa5, 128(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 40(s1)
+; LP64D-NEXT:    fld fa5, 32(s0)
 ; LP64D-NEXT:    fsd fa5, 120(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 48(s1)
+; LP64D-NEXT:    fld fa5, 40(s0)
 ; LP64D-NEXT:    fsd fa5, 112(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 56(s1)
+; LP64D-NEXT:    fld fa5, 48(s0)
 ; LP64D-NEXT:    fsd fa5, 104(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 64(s1)
+; LP64D-NEXT:    fld fa5, 56(s0)
 ; LP64D-NEXT:    fsd fa5, 96(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 72(s1)
+; LP64D-NEXT:    fld fa5, 64(s0)
 ; LP64D-NEXT:    fsd fa5, 88(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 80(s1)
+; LP64D-NEXT:    fld fa5, 72(s0)
 ; LP64D-NEXT:    fsd fa5, 80(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 88(s1)
+; LP64D-NEXT:    fld fa5, 80(s0)
 ; LP64D-NEXT:    fsd fa5, 72(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 96(s1)
+; LP64D-NEXT:    fld fa5, 88(s0)
 ; LP64D-NEXT:    fsd fa5, 64(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 104(s1)
+; LP64D-NEXT:    fld fa5, 96(s0)
 ; LP64D-NEXT:    fsd fa5, 56(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 112(s1)
+; LP64D-NEXT:    fld fa5, 104(s0)
 ; LP64D-NEXT:    fsd fa5, 48(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 120(s1)
+; LP64D-NEXT:    fld fa5, 112(s0)
 ; LP64D-NEXT:    fsd fa5, 40(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 128(s1)
+; LP64D-NEXT:    fld fa5, 120(s0)
 ; LP64D-NEXT:    fsd fa5, 32(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 136(s1)
+; LP64D-NEXT:    fld fa5, 128(s0)
 ; LP64D-NEXT:    fsd fa5, 24(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 144(s1)
+; LP64D-NEXT:    fld fa5, 136(s0)
 ; LP64D-NEXT:    fsd fa5, 16(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fa5, 152(s1)
+; LP64D-NEXT:    fld fa5, 144(s0)
 ; LP64D-NEXT:    fsd fa5, 8(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    fld fs8, 160(s1)
-; LP64D-NEXT:    fld fs9, 168(s1)
-; LP64D-NEXT:    fld fs10, 176(s1)
-; LP64D-NEXT:    fld fs11, 184(s1)
-; LP64D-NEXT:    fld fs0, 192(s1)
-; LP64D-NEXT:    fld fs1, 200(s1)
-; LP64D-NEXT:    fld fs2, 208(s1)
-; LP64D-NEXT:    fld fs3, 216(s1)
-; LP64D-NEXT:    fld fs4, 224(s1)
-; LP64D-NEXT:    fld fs5, 232(s1)
-; LP64D-NEXT:    fld fs6, 240(s1)
-; LP64D-NEXT:    fld fs7, 248(s1)
+; LP64D-NEXT:    fld fa5, 152(s0)
+; LP64D-NEXT:    fsd fa5, 0(sp) # 8-byte Folded Spill
+; LP64D-NEXT:    fld fs8, 160(s0)
+; LP64D-NEXT:    fld fs9, 168(s0)
+; LP64D-NEXT:    fld fs10, 176(s0)
+; LP64D-NEXT:    fld fs11, 184(s0)
+; LP64D-NEXT:    fld fs0, 192(s0)
+; LP64D-NEXT:    fld fs1, 200(s0)
+; LP64D-NEXT:    fld fs2, 208(s0)
+; LP64D-NEXT:    fld fs3, 216(s0)
+; LP64D-NEXT:    fld fs4, 224(s0)
+; LP64D-NEXT:    fld fs5, 232(s0)
+; LP64D-NEXT:    fld fs6, 240(s0)
+; LP64D-NEXT:    fld fs7, 248(s0)
 ; LP64D-NEXT:    call callee
-; LP64D-NEXT:    fsd fs7, 248(s1)
-; LP64D-NEXT:    fsd fs6, 240(s1)
-; LP64D-NEXT:    fsd fs5, 232(s1)
-; LP64D-NEXT:    fsd fs4, 224(s1)
-; LP64D-NEXT:    fsd fs3, 216(s1)
-; LP64D-NEXT:    fsd fs2, 208(s1)
-; LP64D-NEXT:    fsd fs1, 200(s1)
-; LP64D-NEXT:    fsd fs0, 192(s1)
-; LP64D-NEXT:    fsd fs11, 184(s1)
-; LP64D-NEXT:    fsd fs10, 176(s1)
-; LP64D-NEXT:    fsd fs9, 168(s1)
-; LP64D-NEXT:    fsd fs8, 160(s1)
+; LP64D-NEXT:    fsd fs7, 248(s0)
+; LP64D-NEXT:    fsd fs6, 240(s0)
+; LP64D-NEXT:    fsd fs5, 232(s0)
+; LP64D-NEXT:    fsd fs4, 224(s0)
+; LP64D-NEXT:    fsd fs3, 216(s0)
+; LP64D-NEXT:    fsd fs2, 208(s0)
+; LP64D-NEXT:    fsd fs1, 200(s0)
+; LP64D-NEXT:    fsd fs0, 192(s0)
+; LP64D-NEXT:    fsd fs11, 184(s0)
+; LP64D-NEXT:    fsd fs10, 176(s0)
+; LP64D-NEXT:    fsd fs9, 168(s0)
+; LP64D-NEXT:    fsd fs8, 160(s0)
+; LP64D-NEXT:    fld fa5, 0(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fsd fa5, 152(s0)
 ; LP64D-NEXT:    fld fa5, 8(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 152(s1)
+; LP64D-NEXT:    fsd fa5, 144(s0)
 ; LP64D-NEXT:    fld fa5, 16(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 144(s1)
+; LP64D-NEXT:    fsd fa5, 136(s0)
 ; LP64D-NEXT:    fld fa5, 24(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 136(s1)
+; LP64D-NEXT:    fsd fa5, 128(s0)
 ; LP64D-NEXT:    fld fa5, 32(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 128(s1)
+; LP64D-NEXT:    fsd fa5, 120(s0)
 ; LP64D-NEXT:    fld fa5, 40(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 120(s1)
+; LP64D-NEXT:    fsd fa5, 112(s0)
 ; LP64D-NEXT:    fld fa5, 48(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 112(s1)
+; LP64D-NEXT:    fsd fa5, 104(s0)
 ; LP64D-NEXT:    fld fa5, 56(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 104(s1)
+; LP64D-NEXT:    fsd fa5, 96(s0)
 ; LP64D-NEXT:    fld fa5, 64(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 96(s1)
+; LP64D-NEXT:    fsd fa5, 88(s0)
 ; LP64D-NEXT:    fld fa5, 72(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 88(s1)
+; LP64D-NEXT:    fsd fa5, 80(s0)
 ; LP64D-NEXT:    fld fa5, 80(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 80(s1)
+; LP64D-NEXT:    fsd fa5, 72(s0)
 ; LP64D-NEXT:    fld fa5, 88(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 72(s1)
+; LP64D-NEXT:    fsd fa5, 64(s0)
 ; LP64D-NEXT:    fld fa5, 96(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 64(s1)
+; LP64D-NEXT:    fsd fa5, 56(s0)
 ; LP64D-NEXT:    fld fa5, 104(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 56(s1)
+; LP64D-NEXT:    fsd fa5, 48(s0)
 ; LP64D-NEXT:    fld fa5, 112(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 48(s1)
+; LP64D-NEXT:    fsd fa5, 40(s0)
 ; LP64D-NEXT:    fld fa5, 120(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 40(s1)
+; LP64D-NEXT:    fsd fa5, 32(s0)
 ; LP64D-NEXT:    fld fa5, 128(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 32(s1)
+; LP64D-NEXT:    fsd fa5, 24(s0)
 ; LP64D-NEXT:    fld fa5, 136(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 24(s1)
+; LP64D-NEXT:    fsd fa5, 16(s0)
 ; LP64D-NEXT:    fld fa5, 144(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, 16(s1)
+; LP64D-NEXT:    fsd fa5, 8(s0)
 ; LP64D-NEXT:    fld fa5, 152(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, %lo(var+8)(s0)
-; LP64D-NEXT:    fld fa5, 160(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fsd fa5, %lo(var)(s0)
-; LP64D-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs0, 256(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs1, 248(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs2, 240(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs3, 232(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs4, 224(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs5, 216(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs6, 208(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs7, 200(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs8, 192(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs9, 184(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs10, 176(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs11, 168(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    addi sp, sp, 288
+; LP64D-NEXT:    fsd fa5, 0(s0)
+; LP64D-NEXT:    ld ra, 264(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    ld s0, 256(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs0, 248(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs1, 240(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs2, 232(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs3, 224(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs4, 216(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs5, 208(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs6, 200(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs7, 192(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs8, 184(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs9, 176(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs10, 168(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs11, 160(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    addi sp, sp, 272
 ; LP64D-NEXT:    ret
   %val = load [32 x double], ptr @var
   call void @callee()
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
index 5e8ed4509b535..7a6f9136a242b 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
@@ -54,84 +54,82 @@ define void @callee() nounwind {
 ; RV32I-NEXT:    sw s9, 36(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lui a6, %hi(var)
-; RV32I-NEXT:    lw a0, %lo(var)(a6)
-; RV32I-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var+4)(a6)
-; RV32I-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var+8)(a6)
-; RV32I-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var+12)(a6)
-; RV32I-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    addi a5, a6, %lo(var)
-; RV32I-NEXT:    lw a0, 16(a5)
-; RV32I-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 20(a5)
-; RV32I-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw t0, 24(a5)
-; RV32I-NEXT:    lw t1, 28(a5)
-; RV32I-NEXT:    lw t2, 32(a5)
-; RV32I-NEXT:    lw t3, 36(a5)
-; RV32I-NEXT:    lw t4, 40(a5)
-; RV32I-NEXT:    lw t5, 44(a5)
-; RV32I-NEXT:    lw t6, 48(a5)
-; RV32I-NEXT:    lw s0, 52(a5)
-; RV32I-NEXT:    lw s1, 56(a5)
-; RV32I-NEXT:    lw s2, 60(a5)
-; RV32I-NEXT:    lw s3, 64(a5)
-; RV32I-NEXT:    lw s4, 68(a5)
-; RV32I-NEXT:    lw s5, 72(a5)
-; RV32I-NEXT:    lw s6, 76(a5)
-; RV32I-NEXT:    lw s7, 80(a5)
-; RV32I-NEXT:    lw s8, 84(a5)
-; RV32I-NEXT:    lw s9, 88(a5)
-; RV32I-NEXT:    lw s10, 92(a5)
-; RV32I-NEXT:    lw s11, 96(a5)
-; RV32I-NEXT:    lw ra, 100(a5)
-; RV32I-NEXT:    lw a7, 104(a5)
-; RV32I-NEXT:    lw a4, 108(a5)
-; RV32I-NEXT:    lw a0, 124(a5)
-; RV32I-NEXT:    lw a1, 120(a5)
-; RV32I-NEXT:    lw a2, 116(a5)
-; RV32I-NEXT:    lw a3, 112(a5)
-; RV32I-NEXT:    sw a0, 124(a5)
-; RV32I-NEXT:    sw a1, 120(a5)
-; RV32I-NEXT:    sw a2, 116(a5)
-; RV32I-NEXT:    sw a3, 112(a5)
-; RV32I-NEXT:    sw a4, 108(a5)
-; RV32I-NEXT:    sw a7, 104(a5)
-; RV32I-NEXT:    sw ra, 100(a5)
-; RV32I-NEXT:    sw s11, 96(a5)
-; RV32I-NEXT:    sw s10, 92(a5)
-; RV32I-NEXT:    sw s9, 88(a5)
-; RV32I-NEXT:    sw s8, 84(a5)
-; RV32I-NEXT:    sw s7, 80(a5)
-; RV32I-NEXT:    sw s6, 76(a5)
-; RV32I-NEXT:    sw s5, 72(a5)
-; RV32I-NEXT:    sw s4, 68(a5)
-; RV32I-NEXT:    sw s3, 64(a5)
-; RV32I-NEXT:    sw s2, 60(a5)
-; RV32I-NEXT:    sw s1, 56(a5)
-; RV32I-NEXT:    sw s0, 52(a5)
-; RV32I-NEXT:    sw t6, 48(a5)
-; RV32I-NEXT:    sw t5, 44(a5)
-; RV32I-NEXT:    sw t4, 40(a5)
-; RV32I-NEXT:    sw t3, 36(a5)
-; RV32I-NEXT:    sw t2, 32(a5)
-; RV32I-NEXT:    sw t1, 28(a5)
-; RV32I-NEXT:    sw t0, 24(a5)
-; RV32I-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 20(a5)
-; RV32I-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 16(a5)
-; RV32I-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var+12)(a6)
-; RV32I-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var+8)(a6)
-; RV32I-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var+4)(a6)
-; RV32I-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var)(a6)
+; RV32I-NEXT:    lui a0, %hi(var)
+; RV32I-NEXT:    addi a0, a0, %lo(var)
+; RV32I-NEXT:    lw a1, 0(a0)
+; RV32I-NEXT:    sw a1, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a1, 4(a0)
+; RV32I-NEXT:    sw a1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a1, 8(a0)
+; RV32I-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a1, 12(a0)
+; RV32I-NEXT:    sw a1, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a1, 16(a0)
+; RV32I-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a6, 20(a0)
+; RV32I-NEXT:    lw a7, 24(a0)
+; RV32I-NEXT:    lw t0, 28(a0)
+; RV32I-NEXT:    lw t1, 32(a0)
+; RV32I-NEXT:    lw t2, 36(a0)
+; RV32I-NEXT:    lw t3, 40(a0)
+; RV32I-NEXT:    lw t4, 44(a0)
+; RV32I-NEXT:    lw t5, 48(a0)
+; RV32I-NEXT:    lw t6, 52(a0)
+; RV32I-NEXT:    lw s0, 56(a0)
+; RV32I-NEXT:    lw s1, 60(a0)
+; RV32I-NEXT:    lw s2, 64(a0)
+; RV32I-NEXT:    lw s3, 68(a0)
+; RV32I-NEXT:    lw s4, 72(a0)
+; RV32I-NEXT:    lw s5, 76(a0)
+; RV32I-NEXT:    lw s6, 80(a0)
+; RV32I-NEXT:    lw s7, 84(a0)
+; RV32I-NEXT:    lw s8, 88(a0)
+; RV32I-NEXT:    lw s9, 92(a0)
+; RV32I-NEXT:    lw s10, 96(a0)
+; RV32I-NEXT:    lw s11, 100(a0)
+; RV32I-NEXT:    lw ra, 104(a0)
+; RV32I-NEXT:    lw a5, 108(a0)
+; RV32I-NEXT:    lw a1, 124(a0)
+; RV32I-NEXT:    lw a2, 120(a0)
+; RV32I-NEXT:    lw a3, 116(a0)
+; RV32I-NEXT:    lw a4, 112(a0)
+; RV32I-NEXT:    sw a1, 124(a0)
+; RV32I-NEXT:    sw a2, 120(a0)
+; RV32I-NEXT:    sw a3, 116(a0)
+; RV32I-NEXT:    sw a4, 112(a0)
+; RV32I-NEXT:    sw a5, 108(a0)
+; RV32I-NEXT:    sw ra, 104(a0)
+; RV32I-NEXT:    sw s11, 100(a0)
+; RV32I-NEXT:    sw s10, 96(a0)
+; RV32I-NEXT:    sw s9, 92(a0)
+; RV32I-NEXT:    sw s8, 88(a0)
+; RV32I-NEXT:    sw s7, 84(a0)
+; RV32I-NEXT:    sw s6, 80(a0)
+; RV32I-NEXT:    sw s5, 76(a0)
+; RV32I-NEXT:    sw s4, 72(a0)
+; RV32I-NEXT:    sw s3, 68(a0)
+; RV32I-NEXT:    sw s2, 64(a0)
+; RV32I-NEXT:    sw s1, 60(a0)
+; RV32I-NEXT:    sw s0, 56(a0)
+; RV32I-NEXT:    sw t6, 52(a0)
+; RV32I-NEXT:    sw t5, 48(a0)
+; RV32I-NEXT:    sw t4, 44(a0)
+; RV32I-NEXT:    sw t3, 40(a0)
+; RV32I-NEXT:    sw t2, 36(a0)
+; RV32I-NEXT:    sw t1, 32(a0)
+; RV32I-NEXT:    sw t0, 28(a0)
+; RV32I-NEXT:    sw a7, 24(a0)
+; RV32I-NEXT:    sw a6, 20(a0)
+; RV32I-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 16(a0)
+; RV32I-NEXT:    lw a1, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 12(a0)
+; RV32I-NEXT:    lw a1, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 8(a0)
+; RV32I-NEXT:    lw a1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 4(a0)
+; RV32I-NEXT:    lw a1, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 0(a0)
 ; RV32I-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
@@ -150,92 +148,90 @@ define void @callee() nounwind {
 ;
 ; RV32I-ILP32E-LABEL: callee:
 ; RV32I-ILP32E:       # %bb.0:
-; RV32I-ILP32E-NEXT:    addi sp, sp, -36
-; RV32I-ILP32E-NEXT:    sw ra, 32(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    sw s0, 28(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    sw s1, 24(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lui a6, %hi(var)
-; RV32I-ILP32E-NEXT:    lw a0, %lo(var)(a6)
-; RV32I-ILP32E-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, %lo(var+4)(a6)
-; RV32I-ILP32E-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, %lo(var+8)(a6)
-; RV32I-ILP32E-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, %lo(var+12)(a6)
-; RV32I-ILP32E-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    addi a5, a6, %lo(var)
-; RV32I-ILP32E-NEXT:    lw a0, 16(a5)
-; RV32I-ILP32E-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 20(a5)
-; RV32I-ILP32E-NEXT:    sw a0, 0(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw t0, 24(a5)
-; RV32I-ILP32E-NEXT:    lw t1, 28(a5)
-; RV32I-ILP32E-NEXT:    lw t2, 32(a5)
-; RV32I-ILP32E-NEXT:    lw t3, 36(a5)
-; RV32I-ILP32E-NEXT:    lw t4, 40(a5)
-; RV32I-ILP32E-NEXT:    lw t5, 44(a5)
-; RV32I-ILP32E-NEXT:    lw t6, 48(a5)
-; RV32I-ILP32E-NEXT:    lw s2, 52(a5)
-; RV32I-ILP32E-NEXT:    lw s3, 56(a5)
-; RV32I-ILP32E-NEXT:    lw s4, 60(a5)
-; RV32I-ILP32E-NEXT:    lw s5, 64(a5)
-; RV32I-ILP32E-NEXT:    lw s6, 68(a5)
-; RV32I-ILP32E-NEXT:    lw s7, 72(a5)
-; RV32I-ILP32E-NEXT:    lw s8, 76(a5)
-; RV32I-ILP32E-NEXT:    lw s9, 80(a5)
-; RV32I-ILP32E-NEXT:    lw s10, 84(a5)
-; RV32I-ILP32E-NEXT:    lw s11, 88(a5)
-; RV32I-ILP32E-NEXT:    lw s0, 92(a5)
-; RV32I-ILP32E-NEXT:    lw s1, 96(a5)
-; RV32I-ILP32E-NEXT:    lw ra, 100(a5)
-; RV32I-ILP32E-NEXT:    lw a7, 104(a5)
-; RV32I-ILP32E-NEXT:    lw a4, 108(a5)
-; RV32I-ILP32E-NEXT:    lw a0, 124(a5)
-; RV32I-ILP32E-NEXT:    lw a1, 120(a5)
-; RV32I-ILP32E-NEXT:    lw a2, 116(a5)
-; RV32I-ILP32E-NEXT:    lw a3, 112(a5)
-; RV32I-ILP32E-NEXT:    sw a0, 124(a5)
-; RV32I-ILP32E-NEXT:    sw a1, 120(a5)
-; RV32I-ILP32E-NEXT:    sw a2, 116(a5)
-; RV32I-ILP32E-NEXT:    sw a3, 112(a5)
-; RV32I-ILP32E-NEXT:    sw a4, 108(a5)
-; RV32I-ILP32E-NEXT:    sw a7, 104(a5)
-; RV32I-ILP32E-NEXT:    sw ra, 100(a5)
-; RV32I-ILP32E-NEXT:    sw s1, 96(a5)
-; RV32I-ILP32E-NEXT:    sw s0, 92(a5)
-; RV32I-ILP32E-NEXT:    sw s11, 88(a5)
-; RV32I-ILP32E-NEXT:    sw s10, 84(a5)
-; RV32I-ILP32E-NEXT:    sw s9, 80(a5)
-; RV32I-ILP32E-NEXT:    sw s8, 76(a5)
-; RV32I-ILP32E-NEXT:    sw s7, 72(a5)
-; RV32I-ILP32E-NEXT:    sw s6, 68(a5)
-; RV32I-ILP32E-NEXT:    sw s5, 64(a5)
-; RV32I-ILP32E-NEXT:    sw s4, 60(a5)
-; RV32I-ILP32E-NEXT:    sw s3, 56(a5)
-; RV32I-ILP32E-NEXT:    sw s2, 52(a5)
-; RV32I-ILP32E-NEXT:    sw t6, 48(a5)
-; RV32I-ILP32E-NEXT:    sw t5, 44(a5)
-; RV32I-ILP32E-NEXT:    sw t4, 40(a5)
-; RV32I-ILP32E-NEXT:    sw t3, 36(a5)
-; RV32I-ILP32E-NEXT:    sw t2, 32(a5)
-; RV32I-ILP32E-NEXT:    sw t1, 28(a5)
-; RV32I-ILP32E-NEXT:    sw t0, 24(a5)
-; RV32I-ILP32E-NEXT:    lw a0, 0(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 20(a5)
-; RV32I-ILP32E-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 16(a5)
-; RV32I-ILP32E-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, %lo(var+12)(a6)
-; RV32I-ILP32E-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, %lo(var+8)(a6)
-; RV32I-ILP32E-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, %lo(var+4)(a6)
-; RV32I-ILP32E-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, %lo(var)(a6)
-; RV32I-ILP32E-NEXT:    lw ra, 32(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    lw s0, 28(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    lw s1, 24(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    addi sp, sp, 36
+; RV32I-ILP32E-NEXT:    addi sp, sp, -32
+; RV32I-ILP32E-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT:    lui a0, %hi(var)
+; RV32I-ILP32E-NEXT:    addi a0, a0, %lo(var)
+; RV32I-ILP32E-NEXT:    lw a1, 0(a0)
+; RV32I-ILP32E-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT:    lw a1, 4(a0)
+; RV32I-ILP32E-NEXT:    sw a1, 12(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT:    lw a1, 8(a0)
+; RV32I-ILP32E-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT:    lw a1, 12(a0)
+; RV32I-ILP32E-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT:    lw a1, 16(a0)
+; RV32I-ILP32E-NEXT:    sw a1, 0(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT:    lw a6, 20(a0)
+; RV32I-ILP32E-NEXT:    lw a7, 24(a0)
+; RV32I-ILP32E-NEXT:    lw t0, 28(a0)
+; RV32I-ILP32E-NEXT:    lw t1, 32(a0)
+; RV32I-ILP32E-NEXT:    lw t2, 36(a0)
+; RV32I-ILP32E-NEXT:    lw t3, 40(a0)
+; RV32I-ILP32E-NEXT:    lw t4, 44(a0)
+; RV32I-ILP32E-NEXT:    lw t5, 48(a0)
+; RV32I-ILP32E-NEXT:    lw t6, 52(a0)
+; RV32I-ILP32E-NEXT:    lw s2, 56(a0)
+; RV32I-ILP32E-NEXT:    lw s3, 60(a0)
+; RV32I-ILP32E-NEXT:    lw s4, 64(a0)
+; RV32I-ILP32E-NEXT:    lw s5, 68(a0)
+; RV32I-ILP32E-NEXT:    lw s6, 72(a0)
+; RV32I-ILP32E-NEXT:    lw s7, 76(a0)
+; RV32I-ILP32E-NEXT:    lw s8, 80(a0)
+; RV32I-ILP32E-NEXT:    lw s9, 84(a0)
+; RV32I-ILP32E-NEXT:    lw s10, 88(a0)
+; RV32I-ILP32E-NEXT:    lw s11, 92(a0)
+; RV32I-ILP32E-NEXT:    lw s0, 96(a0)
+; RV32I-ILP32E-NEXT:    lw s1, 100(a0)
+; RV32I-ILP32E-NEXT:    lw ra, 104(a0)
+; RV32I-ILP32E-NEXT:    lw a5, 108(a0)
+; RV32I-ILP32E-NEXT:    lw a1, 124(a0)
+; RV32I-ILP32E-NEXT:    lw a2, 120(a0)
+; RV32I-ILP32E-NEXT:    lw a3, 116(a0)
+; RV32I-ILP32E-NEXT:    lw a4, 112(a0)
+; RV32I-ILP32E-NEXT:    sw a1, 124(a0)
+; RV32I-ILP32E-NEXT:    sw a2, 120(a0)
+; RV32I-ILP32E-NEXT:    sw a3, 116(a0)
+; RV32I-ILP32E-NEXT:    sw a4, 112(a0)
+; RV32I-ILP32E-NEXT:    sw a5, 108(a0)
+; RV32I-ILP32E-NEXT:    sw ra, 104(a0)
+; RV32I-ILP32E-NEXT:    sw s1, 100(a0)
+; RV32I-ILP32E-NEXT:    sw s0, 96(a0)
+; RV32I-ILP32E-NEXT:    sw s11, 92(a0)
+; RV32I-ILP32E-NEXT:    sw s10, 88(a0)
+; RV32I-ILP32E-NEXT:    sw s9, 84(a0)
+; RV32I-ILP32E-NEXT:    sw s8, 80(a0)
+; RV32I-ILP32E-NEXT:    sw s7, 76(a0)
+; RV32I-ILP32E-NEXT:    sw s6, 72(a0)
+; RV32I-ILP32E-NEXT:    sw s5, 68(a0)
+; RV32I-ILP32E-NEXT:    sw s4, 64(a0)
+; RV32I-ILP32E-NEXT:    sw s3, 60(a0)
+; RV32I-ILP32E-NEXT:    sw s2, 56(a0)
+; RV32I-ILP32E-NEXT:    sw t6, 52(a0)
+; RV32I-ILP32E-NEXT:    sw t5, 48(a0)
+; RV32I-ILP32E-NEXT:    sw t4, 44(a0)
+; RV32I-ILP32E-NEXT:    sw t3, 40(a0)
+; RV32I-ILP32E-NEXT:    sw t2, 36(a0)
+; RV32I-ILP32E-NEXT:    sw t1, 32(a0)
+; RV32I-ILP32E-NEXT:    sw t0, 28(a0)
+; RV32I-ILP32E-NEXT:    sw a7, 24(a0)
+; RV32I-ILP32E-NEXT:    sw a6, 20(a0)
+; RV32I-ILP32E-NEXT:    lw a1, 0(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT:    sw a1, 16(a0)
+; RV32I-ILP32E-NEXT:    lw a1, 4(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT:    sw a1, 12(a0)
+; RV32I-ILP32E-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT:    sw a1, 8(a0)
+; RV32I-ILP32E-NEXT:    lw a1, 12(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT:    sw a1, 4(a0)
+; RV32I-ILP32E-NEXT:    lw a1, 16(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT:    sw a1, 0(a0)
+; RV32I-ILP32E-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-ILP32E-NEXT:    addi sp, sp, 32
 ; RV32I-ILP32E-NEXT:    ret
 ;
 ; RV32I-WITH-FP-LABEL: callee:
@@ -255,86 +251,84 @@ define void @callee() nounwind {
 ; RV32I-WITH-FP-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
 ; RV32I-WITH-FP-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
 ; RV32I-WITH-FP-NEXT:    addi s0, sp, 80
-; RV32I-WITH-FP-NEXT:    lui a6, %hi(var)
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var)(a6)
-; RV32I-WITH-FP-NEXT:    sw a0, -56(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+4)(a6)
-; RV32I-WITH-FP-NEXT:    sw a0, -60(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+8)(a6)
-; RV32I-WITH-FP-NEXT:    sw a0, -64(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+12)(a6)
-; RV32I-WITH-FP-NEXT:    sw a0, -68(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    addi a5, a6, %lo(var)
-; RV32I-WITH-FP-NEXT:    lw a0, 16(a5)
-; RV32I-WITH-FP-NEXT:    sw a0, -72(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 20(a5)
-; RV32I-WITH-FP-NEXT:    sw a0, -76(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 24(a5)
-; RV32I-WITH-FP-NEXT:    sw a0, -80(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw t1, 28(a5)
-; RV32I-WITH-FP-NEXT:    lw t2, 32(a5)
-; RV32I-WITH-FP-NEXT:    lw t3, 36(a5)
-; RV32I-WITH-FP-NEXT:    lw t4, 40(a5)
-; RV32I-WITH-FP-NEXT:    lw t5, 44(a5)
-; RV32I-WITH-FP-NEXT:    lw t6, 48(a5)
-; RV32I-WITH-FP-NEXT:    lw s1, 52(a5)
-; RV32I-WITH-FP-NEXT:    lw s2, 56(a5)
-; RV32I-WITH-FP-NEXT:    lw s3, 60(a5)
-; RV32I-WITH-FP-NEXT:    lw s4, 64(a5)
-; RV32I-WITH-FP-NEXT:    lw s5, 68(a5)
-; RV32I-WITH-FP-NEXT:    lw s6, 72(a5)
-; RV32I-WITH-FP-NEXT:    lw s7, 76(a5)
-; RV32I-WITH-FP-NEXT:    lw s8, 80(a5)
-; RV32I-WITH-FP-NEXT:    lw s9, 84(a5)
-; RV32I-WITH-FP-NEXT:    lw s10, 88(a5)
-; RV32I-WITH-FP-NEXT:    lw s11, 92(a5)
-; RV32I-WITH-FP-NEXT:    lw ra, 96(a5)
-; RV32I-WITH-FP-NEXT:    lw t0, 100(a5)
-; RV32I-WITH-FP-NEXT:    lw a7, 104(a5)
-; RV32I-WITH-FP-NEXT:    lw a4, 108(a5)
-; RV32I-WITH-FP-NEXT:    lw a0, 124(a5)
-; RV32I-WITH-FP-NEXT:    lw a1, 120(a5)
-; RV32I-WITH-FP-NEXT:    lw a2, 116(a5)
-; RV32I-WITH-FP-NEXT:    lw a3, 112(a5)
-; RV32I-WITH-FP-NEXT:    sw a0, 124(a5)
-; RV32I-WITH-FP-NEXT:    sw a1, 120(a5)
-; RV32I-WITH-FP-NEXT:    sw a2, 116(a5)
-; RV32I-WITH-FP-NEXT:    sw a3, 112(a5)
-; RV32I-WITH-FP-NEXT:    sw a4, 108(a5)
-; RV32I-WITH-FP-NEXT:    sw a7, 104(a5)
-; RV32I-WITH-FP-NEXT:    sw t0, 100(a5)
-; RV32I-WITH-FP-NEXT:    sw ra, 96(a5)
-; RV32I-WITH-FP-NEXT:    sw s11, 92(a5)
-; RV32I-WITH-FP-NEXT:    sw s10, 88(a5)
-; RV32I-WITH-FP-NEXT:    sw s9, 84(a5)
-; RV32I-WITH-FP-NEXT:    sw s8, 80(a5)
-; RV32I-WITH-FP-NEXT:    sw s7, 76(a5)
-; RV32I-WITH-FP-NEXT:    sw s6, 72(a5)
-; RV32I-WITH-FP-NEXT:    sw s5, 68(a5)
-; RV32I-WITH-FP-NEXT:    sw s4, 64(a5)
-; RV32I-WITH-FP-NEXT:    sw s3, 60(a5)
-; RV32I-WITH-FP-NEXT:    sw s2, 56(a5)
-; RV32I-WITH-FP-NEXT:    sw s1, 52(a5)
-; RV32I-WITH-FP-NEXT:    sw t6, 48(a5)
-; RV32I-WITH-FP-NEXT:    sw t5, 44(a5)
-; RV32I-WITH-FP-NEXT:    sw t4, 40(a5)
-; RV32I-WITH-FP-NEXT:    sw t3, 36(a5)
-; RV32I-WITH-FP-NEXT:    sw t2, 32(a5)
-; RV32I-WITH-FP-NEXT:    sw t1, 28(a5)
-; RV32I-WITH-FP-NEXT:    lw a0, -80(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 24(a5)
-; RV32I-WITH-FP-NEXT:    lw a0, -76(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 20(a5)
-; RV32I-WITH-FP-NEXT:    lw a0, -72(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 16(a5)
-; RV32I-WITH-FP-NEXT:    lw a0, -68(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+12)(a6)
-; RV32I-WITH-FP-NEXT:    lw a0, -64(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+8)(a6)
-; RV32I-WITH-FP-NEXT:    lw a0, -60(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+4)(a6)
-; RV32I-WITH-FP-NEXT:    lw a0, -56(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var)(a6)
+; RV32I-WITH-FP-NEXT:    lui a0, %hi(var)
+; RV32I-WITH-FP-NEXT:    addi a0, a0, %lo(var)
+; RV32I-WITH-FP-NEXT:    lw a1, 0(a0)
+; RV32I-WITH-FP-NEXT:    sw a1, -56(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a1, 4(a0)
+; RV32I-WITH-FP-NEXT:    sw a1, -60(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a1, 8(a0)
+; RV32I-WITH-FP-NEXT:    sw a1, -64(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a1, 12(a0)
+; RV32I-WITH-FP-NEXT:    sw a1, -68(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a1, 16(a0)
+; RV32I-WITH-FP-NEXT:    sw a1, -72(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a1, 20(a0)
+; RV32I-WITH-FP-NEXT:    sw a1, -76(s0) # 4-byte Folded Spill
+; RV32I-WITH-FP-NEXT:    lw a7, 24(a0)
+; RV32I-WITH-FP-NEXT:    lw t0, 28(a0)
+; RV32I-WITH-FP-NEXT:    lw t1, 32(a0)
+; RV32I-WITH-FP-NEXT:    lw t2, 36(a0)
+; RV32I-WITH-FP-NEXT:    lw t3, 40(a0)
+; RV32I-WITH-FP-NEXT:    lw t4, 44(a0)
+; RV32I-WITH-FP-NEXT:    lw t5, 48(a0)
+; RV32I-WITH-FP-NEXT:    lw t6, 52(a0)
+; RV32I-WITH-FP-NEXT:    lw s1, 56(a0)
+; RV32I-WITH-FP-NEXT:    lw s2, 60(a0)
+; RV32I-WITH-FP-NEXT:    lw s3, 64(a0)
+; RV32I-WITH-FP-NEXT:    lw s4, 68(a0)
+; RV32I-WITH-FP-NEXT:    lw s5, 72(a0)
+; RV32I-WITH-FP-NEXT:    lw s6, 76(a0)
+; RV32I-WITH-FP-NEXT:    lw s7, 80(a0)
+; RV32I-WITH-FP-NEXT:    lw s8, 84(a0)
+; RV32I-WITH-FP-NEXT:    lw s9, 88(a0)
+; RV32I-WITH-FP-NEXT:    lw s10, 92(a0)
+; RV32I-WITH-FP-NEXT:    lw s11, 96(a0)
+; RV32I-WITH-FP-NEXT:    lw ra, 100(a0)
+; RV32I-WITH-FP-NEXT:    lw a6, 104(a0)
+; RV32I-WITH-FP-NEXT:    lw a5, 108(a0)
+; RV32I-WITH-FP-NEXT:    lw a1, 124(a0)
+; RV32I-WITH-FP-NEXT:    lw a2, 120(a0)
+; RV32I-WITH-FP-NEXT:    lw a3, 116(a0)
+; RV32I-WITH-FP-NEXT:    lw a4, 112(a0)
+; RV32I-WITH-FP-NEXT:    sw a1, 124(a0)
+; RV32I-WITH-FP-NEXT:    sw a2, 120(a0)
+; RV32I-WITH-FP-NEXT:    sw a3, 116(a0)
+; RV32I-WITH-FP-NEXT:    sw a4, 112(a0)
+; RV32I-WITH-FP-NEXT:    sw a5, 108(a0)
+; RV32I-WITH-FP-NEXT:    sw a6, 104(a0)
+; RV32I-WITH-FP-NEXT:    sw ra, 100(a0)
+; RV32I-WITH-FP-NEXT:    sw s11, 96(a0)
+; RV32I-WITH-FP-NEXT:    sw s10, 92(a0)
+; RV32I-WITH-FP-NEXT:    sw s9, 88(a0)
+; RV32I-WITH-FP-NEXT:    sw s8, 84(a0)
+; RV32I-WITH-FP-NEXT:    sw s7, 80(a0)
+; RV32I-WITH-FP-NEXT:    sw s6, 76(a0)
+; RV32I-WITH-FP-NEXT:    sw s5, 72(a0)
+; RV32I-WITH-FP-NEXT:    sw s4, 68(a0)
+; RV32I-WITH-FP-NEXT:    sw s3, 64(a0)
+; RV32I-WITH-FP-NEXT:    sw s2, 60(a0)
+; RV32I-WITH-FP-NEXT:    sw s1, 56(a0)
+; RV32I-WITH-FP-NEXT:    sw t6, 52(a0)
+; RV32I-WITH-FP-NEXT:    sw t5, 48(a0)
+; RV32I-WITH-FP-NEXT:    sw t4, 44(a0)
+; RV32I-WITH-FP-NEXT:    sw t3, 40(a0)
+; RV32I-WITH-FP-NEXT:    sw t2, 36(a0)
+; RV32I-WITH-FP-NEXT:    sw t1, 32(a0)
+; RV32I-WITH-FP-NEXT:    sw t0, 28(a0)
+; RV32I-WITH-FP-NEXT:    sw a7, 24(a0)
+; RV32I-WITH-FP-NEXT:    lw a1, -76(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a1, 20(a0)
+; RV32I-WITH-FP-NEXT:    lw a1, -72(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a1, 16(a0)
+; RV32I-WITH-FP-NEXT:    lw a1, -68(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a1, 12(a0)
+; RV32I-WITH-FP-NEXT:    lw a1, -64(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a1, 8(a0)
+; RV32I-WITH-FP-NEXT:    lw a1, -60(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a1, 4(a0)
+; RV32I-WITH-FP-NEXT:    lw a1, -56(s0) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    sw a1, 0(a0)
 ; RV32I-WITH-FP-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
@@ -354,84 +348,82 @@ define void @callee() nounwind {
 ; RV32IZCMP-LABEL: callee:
 ; RV32IZCMP:       # %bb.0:
 ; RV32IZCMP-NEXT:    cm.push {ra, s0-s11}, -96
-; RV32IZCMP-NEXT:    lui a6, %hi(var)
-; RV32IZCMP-NEXT:    lw a0, %lo(var)(a6)
-; RV32IZCMP-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var+4)(a6)
-; RV32IZCMP-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var+8)(a6)
-; RV32IZCMP-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var+12)(a6)
-; RV32IZCMP-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    addi a5, a6, %lo(var)
-; RV32IZCMP-NEXT:    lw a0, 16(a5)
-; RV32IZCMP-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 20(a5)
-; RV32IZCMP-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw t4, 24(a5)
-; RV32IZCMP-NEXT:    lw t5, 28(a5)
-; RV32IZCMP-NEXT:    lw t6, 32(a5)
-; RV32IZCMP-NEXT:    lw s2, 36(a5)
-; RV32IZCMP-NEXT:    lw s3, 40(a5)
-; RV32IZCMP-NEXT:    lw s4, 44(a5)
-; RV32IZCMP-NEXT:    lw s5, 48(a5)
-; RV32IZCMP-NEXT:    lw s6, 52(a5)
-; RV32IZCMP-NEXT:    lw s7, 56(a5)
-; RV32IZCMP-NEXT:    lw s8, 60(a5)
-; RV32IZCMP-NEXT:    lw s9, 64(a5)
-; RV32IZCMP-NEXT:    lw s10, 68(a5)
-; RV32IZCMP-NEXT:    lw s11, 72(a5)
-; RV32IZCMP-NEXT:    lw ra, 76(a5)
-; RV32IZCMP-NEXT:    lw s1, 80(a5)
-; RV32IZCMP-NEXT:    lw t3, 84(a5)
-; RV32IZCMP-NEXT:    lw t2, 88(a5)
-; RV32IZCMP-NEXT:    lw t1, 92(a5)
-; RV32IZCMP-NEXT:    lw t0, 96(a5)
-; RV32IZCMP-NEXT:    lw s0, 100(a5)
-; RV32IZCMP-NEXT:    lw a7, 104(a5)
-; RV32IZCMP-NEXT:    lw a4, 108(a5)
-; RV32IZCMP-NEXT:    lw a0, 124(a5)
-; RV32IZCMP-NEXT:    lw a1, 120(a5)
-; RV32IZCMP-NEXT:    lw a2, 116(a5)
-; RV32IZCMP-NEXT:    lw a3, 112(a5)
-; RV32IZCMP-NEXT:    sw a0, 124(a5)
-; RV32IZCMP-NEXT:    sw a1, 120(a5)
-; RV32IZCMP-NEXT:    sw a2, 116(a5)
-; RV32IZCMP-NEXT:    sw a3, 112(a5)
-; RV32IZCMP-NEXT:    sw a4, 108(a5)
-; RV32IZCMP-NEXT:    sw a7, 104(a5)
-; RV32IZCMP-NEXT:    sw s0, 100(a5)
-; RV32IZCMP-NEXT:    sw t0, 96(a5)
-; RV32IZCMP-NEXT:    sw t1, 92(a5)
-; RV32IZCMP-NEXT:    sw t2, 88(a5)
-; RV32IZCMP-NEXT:    sw t3, 84(a5)
-; RV32IZCMP-NEXT:    sw s1, 80(a5)
-; RV32IZCMP-NEXT:    sw ra, 76(a5)
-; RV32IZCMP-NEXT:    sw s11, 72(a5)
-; RV32IZCMP-NEXT:    sw s10, 68(a5)
-; RV32IZCMP-NEXT:    sw s9, 64(a5)
-; RV32IZCMP-NEXT:    sw s8, 60(a5)
-; RV32IZCMP-NEXT:    sw s7, 56(a5)
-; RV32IZCMP-NEXT:    sw s6, 52(a5)
-; RV32IZCMP-NEXT:    sw s5, 48(a5)
-; RV32IZCMP-NEXT:    sw s4, 44(a5)
-; RV32IZCMP-NEXT:    sw s3, 40(a5)
-; RV32IZCMP-NEXT:    sw s2, 36(a5)
-; RV32IZCMP-NEXT:    sw t6, 32(a5)
-; RV32IZCMP-NEXT:    sw t5, 28(a5)
-; RV32IZCMP-NEXT:    sw t4, 24(a5)
-; RV32IZCMP-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 20(a5)
-; RV32IZCMP-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 16(a5)
-; RV32IZCMP-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var+12)(a6)
-; RV32IZCMP-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var+8)(a6)
-; RV32IZCMP-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var+4)(a6)
-; RV32IZCMP-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var)(a6)
+; RV32IZCMP-NEXT:    lui a0, %hi(var)
+; RV32IZCMP-NEXT:    addi a0, a0, %lo(var)
+; RV32IZCMP-NEXT:    lw a1, 0(a0)
+; RV32IZCMP-NEXT:    sw a1, 28(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a1, 4(a0)
+; RV32IZCMP-NEXT:    sw a1, 24(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a1, 8(a0)
+; RV32IZCMP-NEXT:    sw a1, 20(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a1, 12(a0)
+; RV32IZCMP-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a1, 16(a0)
+; RV32IZCMP-NEXT:    sw a1, 12(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw t3, 20(a0)
+; RV32IZCMP-NEXT:    lw t4, 24(a0)
+; RV32IZCMP-NEXT:    lw t5, 28(a0)
+; RV32IZCMP-NEXT:    lw t6, 32(a0)
+; RV32IZCMP-NEXT:    lw s2, 36(a0)
+; RV32IZCMP-NEXT:    lw s3, 40(a0)
+; RV32IZCMP-NEXT:    lw s4, 44(a0)
+; RV32IZCMP-NEXT:    lw s5, 48(a0)
+; RV32IZCMP-NEXT:    lw s6, 52(a0)
+; RV32IZCMP-NEXT:    lw s7, 56(a0)
+; RV32IZCMP-NEXT:    lw s8, 60(a0)
+; RV32IZCMP-NEXT:    lw s9, 64(a0)
+; RV32IZCMP-NEXT:    lw s10, 68(a0)
+; RV32IZCMP-NEXT:    lw s11, 72(a0)
+; RV32IZCMP-NEXT:    lw ra, 76(a0)
+; RV32IZCMP-NEXT:    lw t2, 80(a0)
+; RV32IZCMP-NEXT:    lw s0, 84(a0)
+; RV32IZCMP-NEXT:    lw s1, 88(a0)
+; RV32IZCMP-NEXT:    lw t1, 92(a0)
+; RV32IZCMP-NEXT:    lw t0, 96(a0)
+; RV32IZCMP-NEXT:    lw a7, 100(a0)
+; RV32IZCMP-NEXT:    lw a6, 104(a0)
+; RV32IZCMP-NEXT:    lw a5, 108(a0)
+; RV32IZCMP-NEXT:    lw a1, 124(a0)
+; RV32IZCMP-NEXT:    lw a2, 120(a0)
+; RV32IZCMP-NEXT:    lw a3, 116(a0)
+; RV32IZCMP-NEXT:    lw a4, 112(a0)
+; RV32IZCMP-NEXT:    sw a1, 124(a0)
+; RV32IZCMP-NEXT:    sw a2, 120(a0)
+; RV32IZCMP-NEXT:    sw a3, 116(a0)
+; RV32IZCMP-NEXT:    sw a4, 112(a0)
+; RV32IZCMP-NEXT:    sw a5, 108(a0)
+; RV32IZCMP-NEXT:    sw a6, 104(a0)
+; RV32IZCMP-NEXT:    sw a7, 100(a0)
+; RV32IZCMP-NEXT:    sw t0, 96(a0)
+; RV32IZCMP-NEXT:    sw t1, 92(a0)
+; RV32IZCMP-NEXT:    sw s1, 88(a0)
+; RV32IZCMP-NEXT:    sw s0, 84(a0)
+; RV32IZCMP-NEXT:    sw t2, 80(a0)
+; RV32IZCMP-NEXT:    sw ra, 76(a0)
+; RV32IZCMP-NEXT:    sw s11, 72(a0)
+; RV32IZCMP-NEXT:    sw s10, 68(a0)
+; RV32IZCMP-NEXT:    sw s9, 64(a0)
+; RV32IZCMP-NEXT:    sw s8, 60(a0)
+; RV32IZCMP-NEXT:    sw s7, 56(a0)
+; RV32IZCMP-NEXT:    sw s6, 52(a0)
+; RV32IZCMP-NEXT:    sw s5, 48(a0)
+; RV32IZCMP-NEXT:    sw s4, 44(a0)
+; RV32IZCMP-NEXT:    sw s3, 40(a0)
+; RV32IZCMP-NEXT:    sw s2, 36(a0)
+; RV32IZCMP-NEXT:    sw t6, 32(a0)
+; RV32IZCMP-NEXT:    sw t5, 28(a0)
+; RV32IZCMP-NEXT:    sw t4, 24(a0)
+; RV32IZCMP-NEXT:    sw t3, 20(a0)
+; RV32IZCMP-NEXT:    lw a1, 12(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 16(a0)
+; RV32IZCMP-NEXT:    lw a1, 16(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 12(a0)
+; RV32IZCMP-NEXT:    lw a1, 20(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 8(a0)
+; RV32IZCMP-NEXT:    lw a1, 24(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 4(a0)
+; RV32IZCMP-NEXT:    lw a1, 28(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 0(a0)
 ; RV32IZCMP-NEXT:    cm.popret {ra, s0-s11}, 96
 ;
 ; RV32IZCMP-WITH-FP-LABEL: callee:
@@ -451,86 +443,84 @@ define void @callee() nounwind {
 ; RV32IZCMP-WITH-FP-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
 ; RV32IZCMP-WITH-FP-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
 ; RV32IZCMP-WITH-FP-NEXT:    addi s0, sp, 80
-; RV32IZCMP-WITH-FP-NEXT:    lui a6, %hi(var)
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var)(a6)
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, -56(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+4)(a6)
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, -60(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+8)(a6)
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, -64(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+12)(a6)
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, -68(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    addi a5, a6, %lo(var)
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, 16(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, -72(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, 20(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, -76(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, 24(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, -80(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    lw t5, 28(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw t6, 32(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw s2, 36(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw s3, 40(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw s4, 44(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw s5, 48(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw s6, 52(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw s7, 56(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw s8, 60(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw s9, 64(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw s10, 68(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw s11, 72(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw ra, 76(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw t4, 80(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw t3, 84(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw t2, 88(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw s1, 92(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw t1, 96(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw t0, 100(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a7, 104(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a4, 108(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, 124(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a1, 120(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a2, 116(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a3, 112(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, 124(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a1, 120(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a2, 116(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a3, 112(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a4, 108(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a7, 104(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw t0, 100(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw t1, 96(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw s1, 92(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw t2, 88(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw t3, 84(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw t4, 80(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw ra, 76(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw s11, 72(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw s10, 68(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw s9, 64(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw s8, 60(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw s7, 56(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw s6, 52(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw s5, 48(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw s4, 44(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw s3, 40(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw s2, 36(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw t6, 32(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw t5, 28(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, -80(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, 24(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, -76(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, 20(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, -72(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, 16(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, -68(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+12)(a6)
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, -64(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+8)(a6)
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, -60(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+4)(a6)
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, -56(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var)(a6)
+; RV32IZCMP-WITH-FP-NEXT:    lui a0, %hi(var)
+; RV32IZCMP-WITH-FP-NEXT:    addi a0, a0, %lo(var)
+; RV32IZCMP-WITH-FP-NEXT:    lw a1, 0(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw a1, -56(s0) # 4-byte Folded Spill
+; RV32IZCMP-WITH-FP-NEXT:    lw a1, 4(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw a1, -60(s0) # 4-byte Folded Spill
+; RV32IZCMP-WITH-FP-NEXT:    lw a1, 8(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw a1, -64(s0) # 4-byte Folded Spill
+; RV32IZCMP-WITH-FP-NEXT:    lw a1, 12(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw a1, -68(s0) # 4-byte Folded Spill
+; RV32IZCMP-WITH-FP-NEXT:    lw a1, 16(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw a1, -72(s0) # 4-byte Folded Spill
+; RV32IZCMP-WITH-FP-NEXT:    lw a1, 20(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw a1, -76(s0) # 4-byte Folded Spill
+; RV32IZCMP-WITH-FP-NEXT:    lw t4, 24(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw t5, 28(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw t6, 32(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw s2, 36(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw s3, 40(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw s4, 44(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw s5, 48(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw s6, 52(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw s7, 56(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw s8, 60(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw s9, 64(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw s10, 68(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw s11, 72(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw ra, 76(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw s1, 80(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw t3, 84(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw t2, 88(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw t1, 92(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw t0, 96(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw a7, 100(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw a6, 104(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw a5, 108(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw a1, 124(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw a2, 120(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw a3, 116(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw a4, 112(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw a1, 124(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw a2, 120(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw a3, 116(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw a4, 112(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw a5, 108(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw a6, 104(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw a7, 100(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw t0, 96(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw t1, 92(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw t2, 88(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw t3, 84(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw s1, 80(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw ra, 76(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw s11, 72(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw s10, 68(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw s9, 64(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw s8, 60(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw s7, 56(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw s6, 52(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw s5, 48(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw s4, 44(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw s3, 40(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw s2, 36(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw t6, 32(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw t5, 28(a0)
+; RV32IZCMP-WITH-FP-NEXT:    sw t4, 24(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw a1, -76(s0) # 4-byte Folded Reload
+; RV32IZCMP-WITH-FP-NEXT:    sw a1, 20(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw a1, -72(s0) # 4-byte Folded Reload
+; RV32IZCMP-WITH-FP-NEXT:    sw a1, 16(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw a1, -68(s0) # 4-byte Folded Reload
+; RV32IZCMP-WITH-FP-NEXT:    sw a1, 12(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw a1, -64(s0) # 4-byte Folded Reload
+; RV32IZCMP-WITH-FP-NEXT:    sw a1, 8(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw a1, -60(s0) # 4-byte Folded Reload
+; RV32IZCMP-WITH-FP-NEXT:    sw a1, 4(a0)
+; RV32IZCMP-WITH-FP-NEXT:    lw a1, -56(s0) # 4-byte Folded Reload
+; RV32IZCMP-WITH-FP-NEXT:    sw a1, 0(a0)
 ; RV32IZCMP-WITH-FP-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
 ; RV32IZCMP-WITH-FP-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
 ; RV32IZCMP-WITH-FP-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
@@ -549,202 +539,198 @@ define void @callee() nounwind {
 ;
 ; RV64I-LABEL: callee:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -160
-; RV64I-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 144(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 136(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 128(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s3, 120(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s4, 112(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s5, 104(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s6, 96(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s7, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s8, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s9, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s10, 64(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s11, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lui a6, %hi(var)
-; RV64I-NEXT:    lw a0, %lo(var)(a6)
-; RV64I-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var+4)(a6)
-; RV64I-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var+8)(a6)
-; RV64I-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var+12)(a6)
-; RV64I-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    addi a5, a6, %lo(var)
-; RV64I-NEXT:    lw a0, 16(a5)
-; RV64I-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 20(a5)
-; RV64I-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw t0, 24(a5)
-; RV64I-NEXT:    lw t1, 28(a5)
-; RV64I-NEXT:    lw t2, 32(a5)
-; RV64I-NEXT:    lw t3, 36(a5)
-; RV64I-NEXT:    lw t4, 40(a5)
-; RV64I-NEXT:    lw t5, 44(a5)
-; RV64I-NEXT:    lw t6, 48(a5)
-; RV64I-NEXT:    lw s0, 52(a5)
-; RV64I-NEXT:    lw s1, 56(a5)
-; RV64I-NEXT:    lw s2, 60(a5)
-; RV64I-NEXT:    lw s3, 64(a5)
-; RV64I-NEXT:    lw s4, 68(a5)
-; RV64I-NEXT:    lw s5, 72(a5)
-; RV64I-NEXT:    lw s6, 76(a5)
-; RV64I-NEXT:    lw s7, 80(a5)
-; RV64I-NEXT:    lw s8, 84(a5)
-; RV64I-NEXT:    lw s9, 88(a5)
-; RV64I-NEXT:    lw s10, 92(a5)
-; RV64I-NEXT:    lw s11, 96(a5)
-; RV64I-NEXT:    lw ra, 100(a5)
-; RV64I-NEXT:    lw a7, 104(a5)
-; RV64I-NEXT:    lw a4, 108(a5)
-; RV64I-NEXT:    lw a0, 124(a5)
-; RV64I-NEXT:    lw a1, 120(a5)
-; RV64I-NEXT:    lw a2, 116(a5)
-; RV64I-NEXT:    lw a3, 112(a5)
-; RV64I-NEXT:    sw a0, 124(a5)
-; RV64I-NEXT:    sw a1, 120(a5)
-; RV64I-NEXT:    sw a2, 116(a5)
-; RV64I-NEXT:    sw a3, 112(a5)
-; RV64I-NEXT:    sw a4, 108(a5)
-; RV64I-NEXT:    sw a7, 104(a5)
-; RV64I-NEXT:    sw ra, 100(a5)
-; RV64I-NEXT:    sw s11, 96(a5)
-; RV64I-NEXT:    sw s10, 92(a5)
-; RV64I-NEXT:    sw s9, 88(a5)
-; RV64I-NEXT:    sw s8, 84(a5)
-; RV64I-NEXT:    sw s7, 80(a5)
-; RV64I-NEXT:    sw s6, 76(a5)
-; RV64I-NEXT:    sw s5, 72(a5)
-; RV64I-NEXT:    sw s4, 68(a5)
-; RV64I-NEXT:    sw s3, 64(a5)
-; RV64I-NEXT:    sw s2, 60(a5)
-; RV64I-NEXT:    sw s1, 56(a5)
-; RV64I-NEXT:    sw s0, 52(a5)
-; RV64I-NEXT:    sw t6, 48(a5)
-; RV64I-NEXT:    sw t5, 44(a5)
-; RV64I-NEXT:    sw t4, 40(a5)
-; RV64I-NEXT:    sw t3, 36(a5)
-; RV64I-NEXT:    sw t2, 32(a5)
-; RV64I-NEXT:    sw t1, 28(a5)
-; RV64I-NEXT:    sw t0, 24(a5)
-; RV64I-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 20(a5)
-; RV64I-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 16(a5)
-; RV64I-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var+12)(a6)
-; RV64I-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var+8)(a6)
-; RV64I-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var+4)(a6)
-; RV64I-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var)(a6)
-; RV64I-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s5, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s6, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s7, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s8, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s9, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s10, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s11, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 160
+; RV64I-NEXT:    addi sp, sp, -144
+; RV64I-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s5, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s6, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s7, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s8, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s9, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s10, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s11, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a0, %hi(var)
+; RV64I-NEXT:    addi a0, a0, %lo(var)
+; RV64I-NEXT:    lw a1, 0(a0)
+; RV64I-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a1, 4(a0)
+; RV64I-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a1, 8(a0)
+; RV64I-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a1, 12(a0)
+; RV64I-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a1, 16(a0)
+; RV64I-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a6, 20(a0)
+; RV64I-NEXT:    lw a7, 24(a0)
+; RV64I-NEXT:    lw t0, 28(a0)
+; RV64I-NEXT:    lw t1, 32(a0)
+; RV64I-NEXT:    lw t2, 36(a0)
+; RV64I-NEXT:    lw t3, 40(a0)
+; RV64I-NEXT:    lw t4, 44(a0)
+; RV64I-NEXT:    lw t5, 48(a0)
+; RV64I-NEXT:    lw t6, 52(a0)
+; RV64I-NEXT:    lw s0, 56(a0)
+; RV64I-NEXT:    lw s1, 60(a0)
+; RV64I-NEXT:    lw s2, 64(a0)
+; RV64I-NEXT:    lw s3, 68(a0)
+; RV64I-NEXT:    lw s4, 72(a0)
+; RV64I-NEXT:    lw s5, 76(a0)
+; RV64I-NEXT:    lw s6, 80(a0)
+; RV64I-NEXT:    lw s7, 84(a0)
+; RV64I-NEXT:    lw s8, 88(a0)
+; RV64I-NEXT:    lw s9, 92(a0)
+; RV64I-NEXT:    lw s10, 96(a0)
+; RV64I-NEXT:    lw s11, 100(a0)
+; RV64I-NEXT:    lw ra, 104(a0)
+; RV64I-NEXT:    lw a5, 108(a0)
+; RV64I-NEXT:    lw a1, 124(a0)
+; RV64I-NEXT:    lw a2, 120(a0)
+; RV64I-NEXT:    lw a3, 116(a0)
+; RV64I-NEXT:    lw a4, 112(a0)
+; RV64I-NEXT:    sw a1, 124(a0)
+; RV64I-NEXT:    sw a2, 120(a0)
+; RV64I-NEXT:    sw a3, 116(a0)
+; RV64I-NEXT:    sw a4, 112(a0)
+; RV64I-NEXT:    sw a5, 108(a0)
+; RV64I-NEXT:    sw ra, 104(a0)
+; RV64I-NEXT:    sw s11, 100(a0)
+; RV64I-NEXT:    sw s10, 96(a0)
+; RV64I-NEXT:    sw s9, 92(a0)
+; RV64I-NEXT:    sw s8, 88(a0)
+; RV64I-NEXT:    sw s7, 84(a0)
+; RV64I-NEXT:    sw s6, 80(a0)
+; RV64I-NEXT:    sw s5, 76(a0)
+; RV64I-NEXT:    sw s4, 72(a0)
+; RV64I-NEXT:    sw s3, 68(a0)
+; RV64I-NEXT:    sw s2, 64(a0)
+; RV64I-NEXT:    sw s1, 60(a0)
+; RV64I-NEXT:    sw s0, 56(a0)
+; RV64I-NEXT:    sw t6, 52(a0)
+; RV64I-NEXT:    sw t5, 48(a0)
+; RV64I-NEXT:    sw t4, 44(a0)
+; RV64I-NEXT:    sw t3, 40(a0)
+; RV64I-NEXT:    sw t2, 36(a0)
+; RV64I-NEXT:    sw t1, 32(a0)
+; RV64I-NEXT:    sw t0, 28(a0)
+; RV64I-NEXT:    sw a7, 24(a0)
+; RV64I-NEXT:    sw a6, 20(a0)
+; RV64I-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 16(a0)
+; RV64I-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 12(a0)
+; RV64I-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 8(a0)
+; RV64I-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 4(a0)
+; RV64I-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 0(a0)
+; RV64I-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s7, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s8, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s9, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s10, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s11, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 144
 ; RV64I-NEXT:    ret
 ;
 ; RV64I-LP64E-LABEL: callee:
 ; RV64I-LP64E:       # %bb.0:
-; RV64I-LP64E-NEXT:    addi sp, sp, -72
-; RV64I-LP64E-NEXT:    sd ra, 64(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    sd s0, 56(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    sd s1, 48(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lui a6, %hi(var)
-; RV64I-LP64E-NEXT:    lw a0, %lo(var)(a6)
-; RV64I-LP64E-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, %lo(var+4)(a6)
-; RV64I-LP64E-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, %lo(var+8)(a6)
-; RV64I-LP64E-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, %lo(var+12)(a6)
-; RV64I-LP64E-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    addi a5, a6, %lo(var)
-; RV64I-LP64E-NEXT:    lw a0, 16(a5)
-; RV64I-LP64E-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 20(a5)
-; RV64I-LP64E-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw t0, 24(a5)
-; RV64I-LP64E-NEXT:    lw t1, 28(a5)
-; RV64I-LP64E-NEXT:    lw t2, 32(a5)
-; RV64I-LP64E-NEXT:    lw t3, 36(a5)
-; RV64I-LP64E-NEXT:    lw t4, 40(a5)
-; RV64I-LP64E-NEXT:    lw t5, 44(a5)
-; RV64I-LP64E-NEXT:    lw t6, 48(a5)
-; RV64I-LP64E-NEXT:    lw s2, 52(a5)
-; RV64I-LP64E-NEXT:    lw s3, 56(a5)
-; RV64I-LP64E-NEXT:    lw s4, 60(a5)
-; RV64I-LP64E-NEXT:    lw s5, 64(a5)
-; RV64I-LP64E-NEXT:    lw s6, 68(a5)
-; RV64I-LP64E-NEXT:    lw s7, 72(a5)
-; RV64I-LP64E-NEXT:    lw s8, 76(a5)
-; RV64I-LP64E-NEXT:    lw s9, 80(a5)
-; RV64I-LP64E-NEXT:    lw s10, 84(a5)
-; RV64I-LP64E-NEXT:    lw s11, 88(a5)
-; RV64I-LP64E-NEXT:    lw s0, 92(a5)
-; RV64I-LP64E-NEXT:    lw s1, 96(a5)
-; RV64I-LP64E-NEXT:    lw ra, 100(a5)
-; RV64I-LP64E-NEXT:    lw a7, 104(a5)
-; RV64I-LP64E-NEXT:    lw a4, 108(a5)
-; RV64I-LP64E-NEXT:    lw a0, 124(a5)
-; RV64I-LP64E-NEXT:    lw a1, 120(a5)
-; RV64I-LP64E-NEXT:    lw a2, 116(a5)
-; RV64I-LP64E-NEXT:    lw a3, 112(a5)
-; RV64I-LP64E-NEXT:    sw a0, 124(a5)
-; RV64I-LP64E-NEXT:    sw a1, 120(a5)
-; RV64I-LP64E-NEXT:    sw a2, 116(a5)
-; RV64I-LP64E-NEXT:    sw a3, 112(a5)
-; RV64I-LP64E-NEXT:    sw a4, 108(a5)
-; RV64I-LP64E-NEXT:    sw a7, 104(a5)
-; RV64I-LP64E-NEXT:    sw ra, 100(a5)
-; RV64I-LP64E-NEXT:    sw s1, 96(a5)
-; RV64I-LP64E-NEXT:    sw s0, 92(a5)
-; RV64I-LP64E-NEXT:    sw s11, 88(a5)
-; RV64I-LP64E-NEXT:    sw s10, 84(a5)
-; RV64I-LP64E-NEXT:    sw s9, 80(a5)
-; RV64I-LP64E-NEXT:    sw s8, 76(a5)
-; RV64I-LP64E-NEXT:    sw s7, 72(a5)
-; RV64I-LP64E-NEXT:    sw s6, 68(a5)
-; RV64I-LP64E-NEXT:    sw s5, 64(a5)
-; RV64I-LP64E-NEXT:    sw s4, 60(a5)
-; RV64I-LP64E-NEXT:    sw s3, 56(a5)
-; RV64I-LP64E-NEXT:    sw s2, 52(a5)
-; RV64I-LP64E-NEXT:    sw t6, 48(a5)
-; RV64I-LP64E-NEXT:    sw t5, 44(a5)
-; RV64I-LP64E-NEXT:    sw t4, 40(a5)
-; RV64I-LP64E-NEXT:    sw t3, 36(a5)
-; RV64I-LP64E-NEXT:    sw t2, 32(a5)
-; RV64I-LP64E-NEXT:    sw t1, 28(a5)
-; RV64I-LP64E-NEXT:    sw t0, 24(a5)
-; RV64I-LP64E-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 20(a5)
-; RV64I-LP64E-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 16(a5)
-; RV64I-LP64E-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, %lo(var+12)(a6)
-; RV64I-LP64E-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, %lo(var+8)(a6)
-; RV64I-LP64E-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, %lo(var+4)(a6)
-; RV64I-LP64E-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, %lo(var)(a6)
-; RV64I-LP64E-NEXT:    ld ra, 64(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    ld s0, 56(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    ld s1, 48(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    addi sp, sp, 72
+; RV64I-LP64E-NEXT:    addi sp, sp, -64
+; RV64I-LP64E-NEXT:    sd ra, 56(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT:    sd s0, 48(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT:    lui a0, %hi(var)
+; RV64I-LP64E-NEXT:    addi a0, a0, %lo(var)
+; RV64I-LP64E-NEXT:    lw a1, 0(a0)
+; RV64I-LP64E-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT:    lw a1, 4(a0)
+; RV64I-LP64E-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT:    lw a1, 8(a0)
+; RV64I-LP64E-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT:    lw a1, 12(a0)
+; RV64I-LP64E-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT:    lw a1, 16(a0)
+; RV64I-LP64E-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT:    lw a6, 20(a0)
+; RV64I-LP64E-NEXT:    lw a7, 24(a0)
+; RV64I-LP64E-NEXT:    lw t0, 28(a0)
+; RV64I-LP64E-NEXT:    lw t1, 32(a0)
+; RV64I-LP64E-NEXT:    lw t2, 36(a0)
+; RV64I-LP64E-NEXT:    lw t3, 40(a0)
+; RV64I-LP64E-NEXT:    lw t4, 44(a0)
+; RV64I-LP64E-NEXT:    lw t5, 48(a0)
+; RV64I-LP64E-NEXT:    lw t6, 52(a0)
+; RV64I-LP64E-NEXT:    lw s2, 56(a0)
+; RV64I-LP64E-NEXT:    lw s3, 60(a0)
+; RV64I-LP64E-NEXT:    lw s4, 64(a0)
+; RV64I-LP64E-NEXT:    lw s5, 68(a0)
+; RV64I-LP64E-NEXT:    lw s6, 72(a0)
+; RV64I-LP64E-NEXT:    lw s7, 76(a0)
+; RV64I-LP64E-NEXT:    lw s8, 80(a0)
+; RV64I-LP64E-NEXT:    lw s9, 84(a0)
+; RV64I-LP64E-NEXT:    lw s10, 88(a0)
+; RV64I-LP64E-NEXT:    lw s11, 92(a0)
+; RV64I-LP64E-NEXT:    lw s0, 96(a0)
+; RV64I-LP64E-NEXT:    lw s1, 100(a0)
+; RV64I-LP64E-NEXT:    lw ra, 104(a0)
+; RV64I-LP64E-NEXT:    lw a5, 108(a0)
+; RV64I-LP64E-NEXT:    lw a1, 124(a0)
+; RV64I-LP64E-NEXT:    lw a2, 120(a0)
+; RV64I-LP64E-NEXT:    lw a3, 116(a0)
+; RV64I-LP64E-NEXT:    lw a4, 112(a0)
+; RV64I-LP64E-NEXT:    sw a1, 124(a0)
+; RV64I-LP64E-NEXT:    sw a2, 120(a0)
+; RV64I-LP64E-NEXT:    sw a3, 116(a0)
+; RV64I-LP64E-NEXT:    sw a4, 112(a0)
+; RV64I-LP64E-NEXT:    sw a5, 108(a0)
+; RV64I-LP64E-NEXT:    sw ra, 104(a0)
+; RV64I-LP64E-NEXT:    sw s1, 100(a0)
+; RV64I-LP64E-NEXT:    sw s0, 96(a0)
+; RV64I-LP64E-NEXT:    sw s11, 92(a0)
+; RV64I-LP64E-NEXT:    sw s10, 88(a0)
+; RV64I-LP64E-NEXT:    sw s9, 84(a0)
+; RV64I-LP64E-NEXT:    sw s8, 80(a0)
+; RV64I-LP64E-NEXT:    sw s7, 76(a0)
+; RV64I-LP64E-NEXT:    sw s6, 72(a0)
+; RV64I-LP64E-NEXT:    sw s5, 68(a0)
+; RV64I-LP64E-NEXT:    sw s4, 64(a0)
+; RV64I-LP64E-NEXT:    sw s3, 60(a0)
+; RV64I-LP64E-NEXT:    sw s2, 56(a0)
+; RV64I-LP64E-NEXT:    sw t6, 52(a0)
+; RV64I-LP64E-NEXT:    sw t5, 48(a0)
+; RV64I-LP64E-NEXT:    sw t4, 44(a0)
+; RV64I-LP64E-NEXT:    sw t3, 40(a0)
+; RV64I-LP64E-NEXT:    sw t2, 36(a0)
+; RV64I-LP64E-NEXT:    sw t1, 32(a0)
+; RV64I-LP64E-NEXT:    sw t0, 28(a0)
+; RV64I-LP64E-NEXT:    sw a7, 24(a0)
+; RV64I-LP64E-NEXT:    sw a6, 20(a0)
+; RV64I-LP64E-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT:    sw a1, 16(a0)
+; RV64I-LP64E-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT:    sw a1, 12(a0)
+; RV64I-LP64E-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT:    sw a1, 8(a0)
+; RV64I-LP64E-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT:    sw a1, 4(a0)
+; RV64I-LP64E-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT:    sw a1, 0(a0)
+; RV64I-LP64E-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
+; RV64I-LP64E-NEXT:    addi sp, sp, 64
 ; RV64I-LP64E-NEXT:    ret
 ;
 ; RV64I-WITH-FP-LABEL: callee:
@@ -764,86 +750,84 @@ define void @callee() nounwind {
 ; RV64I-WITH-FP-NEXT:    sd s10, 64(sp) # 8-byte Folded Spill
 ; RV64I-WITH-FP-NEXT:    sd s11, 56(sp) # 8-byte Folded Spill
 ; RV64I-WITH-FP-NEXT:    addi s0, sp, 160
-; RV64I-WITH-FP-NEXT:    lui a6, %hi(var)
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var)(a6)
-; RV64I-WITH-FP-NEXT:    sd a0, -112(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+4)(a6)
-; RV64I-WITH-FP-NEXT:    sd a0, -120(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+8)(a6)
-; RV64I-WITH-FP-NEXT:    sd a0, -128(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+12)(a6)
-; RV64I-WITH-FP-NEXT:    sd a0, -136(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    addi a5, a6, %lo(var)
-; RV64I-WITH-FP-NEXT:    lw a0, 16(a5)
-; RV64I-WITH-FP-NEXT:    sd a0, -144(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 20(a5)
-; RV64I-WITH-FP-NEXT:    sd a0, -152(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 24(a5)
-; RV64I-WITH-FP-NEXT:    sd a0, -160(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw t1, 28(a5)
-; RV64I-WITH-FP-NEXT:    lw t2, 32(a5)
-; RV64I-WITH-FP-NEXT:    lw t3, 36(a5)
-; RV64I-WITH-FP-NEXT:    lw t4, 40(a5)
-; RV64I-WITH-FP-NEXT:    lw t5, 44(a5)
-; RV64I-WITH-FP-NEXT:    lw t6, 48(a5)
-; RV64I-WITH-FP-NEXT:    lw s1, 52(a5)
-; RV64I-WITH-FP-NEXT:    lw s2, 56(a5)
-; RV64I-WITH-FP-NEXT:    lw s3, 60(a5)
-; RV64I-WITH-FP-NEXT:    lw s4, 64(a5)
-; RV64I-WITH-FP-NEXT:    lw s5, 68(a5)
-; RV64I-WITH-FP-NEXT:    lw s6, 72(a5)
-; RV64I-WITH-FP-NEXT:    lw s7, 76(a5)
-; RV64I-WITH-FP-NEXT:    lw s8, 80(a5)
-; RV64I-WITH-FP-NEXT:    lw s9, 84(a5)
-; RV64I-WITH-FP-NEXT:    lw s10, 88(a5)
-; RV64I-WITH-FP-NEXT:    lw s11, 92(a5)
-; RV64I-WITH-FP-NEXT:    lw ra, 96(a5)
-; RV64I-WITH-FP-NEXT:    lw t0, 100(a5)
-; RV64I-WITH-FP-NEXT:    lw a7, 104(a5)
-; RV64I-WITH-FP-NEXT:    lw a4, 108(a5)
-; RV64I-WITH-FP-NEXT:    lw a0, 124(a5)
-; RV64I-WITH-FP-NEXT:    lw a1, 120(a5)
-; RV64I-WITH-FP-NEXT:    lw a2, 116(a5)
-; RV64I-WITH-FP-NEXT:    lw a3, 112(a5)
-; RV64I-WITH-FP-NEXT:    sw a0, 124(a5)
-; RV64I-WITH-FP-NEXT:    sw a1, 120(a5)
-; RV64I-WITH-FP-NEXT:    sw a2, 116(a5)
-; RV64I-WITH-FP-NEXT:    sw a3, 112(a5)
-; RV64I-WITH-FP-NEXT:    sw a4, 108(a5)
-; RV64I-WITH-FP-NEXT:    sw a7, 104(a5)
-; RV64I-WITH-FP-NEXT:    sw t0, 100(a5)
-; RV64I-WITH-FP-NEXT:    sw ra, 96(a5)
-; RV64I-WITH-FP-NEXT:    sw s11, 92(a5)
-; RV64I-WITH-FP-NEXT:    sw s10, 88(a5)
-; RV64I-WITH-FP-NEXT:    sw s9, 84(a5)
-; RV64I-WITH-FP-NEXT:    sw s8, 80(a5)
-; RV64I-WITH-FP-NEXT:    sw s7, 76(a5)
-; RV64I-WITH-FP-NEXT:    sw s6, 72(a5)
-; RV64I-WITH-FP-NEXT:    sw s5, 68(a5)
-; RV64I-WITH-FP-NEXT:    sw s4, 64(a5)
-; RV64I-WITH-FP-NEXT:    sw s3, 60(a5)
-; RV64I-WITH-FP-NEXT:    sw s2, 56(a5)
-; RV64I-WITH-FP-NEXT:    sw s1, 52(a5)
-; RV64I-WITH-FP-NEXT:    sw t6, 48(a5)
-; RV64I-WITH-FP-NEXT:    sw t5, 44(a5)
-; RV64I-WITH-FP-NEXT:    sw t4, 40(a5)
-; RV64I-WITH-FP-NEXT:    sw t3, 36(a5)
-; RV64I-WITH-FP-NEXT:    sw t2, 32(a5)
-; RV64I-WITH-FP-NEXT:    sw t1, 28(a5)
-; RV64I-WITH-FP-NEXT:    ld a0, -160(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 24(a5)
-; RV64I-WITH-FP-NEXT:    ld a0, -152(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 20(a5)
-; RV64I-WITH-FP-NEXT:    ld a0, -144(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 16(a5)
-; RV64I-WITH-FP-NEXT:    ld a0, -136(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+12)(a6)
-; RV64I-WITH-FP-NEXT:    ld a0, -128(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+8)(a6)
-; RV64I-WITH-FP-NEXT:    ld a0, -120(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+4)(a6)
-; RV64I-WITH-FP-NEXT:    ld a0, -112(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var)(a6)
+; RV64I-WITH-FP-NEXT:    lui a0, %hi(var)
+; RV64I-WITH-FP-NEXT:    addi a0, a0, %lo(var)
+; RV64I-WITH-FP-NEXT:    lw a1, 0(a0)
+; RV64I-WITH-FP-NEXT:    sd a1, -112(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a1, 4(a0)
+; RV64I-WITH-FP-NEXT:    sd a1, -120(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a1, 8(a0)
+; RV64I-WITH-FP-NEXT:    sd a1, -128(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a1, 12(a0)
+; RV64I-WITH-FP-NEXT:    sd a1, -136(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a1, 16(a0)
+; RV64I-WITH-FP-NEXT:    sd a1, -144(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a1, 20(a0)
+; RV64I-WITH-FP-NEXT:    sd a1, -152(s0) # 8-byte Folded Spill
+; RV64I-WITH-FP-NEXT:    lw a7, 24(a0)
+; RV64I-WITH-FP-NEXT:    lw t0, 28(a0)
+; RV64I-WITH-FP-NEXT:    lw t1, 32(a0)
+; RV64I-WITH-FP-NEXT:    lw t2, 36(a0)
+; RV64I-WITH-FP-NEXT:    lw t3, 40(a0)
+; RV64I-WITH-FP-NEXT:    lw t4, 44(a0)
+; RV64I-WITH-FP-NEXT:    lw t5, 48(a0)
+; RV64I-WITH-FP-NEXT:    lw t6, 52(a0)
+; RV64I-WITH-FP-NEXT:    lw s1, 56(a0)
+; RV64I-WITH-FP-NEXT:    lw s2, 60(a0)
+; RV64I-WITH-FP-NEXT:    lw s3, 64(a0)
+; RV64I-WITH-FP-NEXT:    lw s4, 68(a0)
+; RV64I-WITH-FP-NEXT:    lw s5, 72(a0)
+; RV64I-WITH-FP-NEXT:    lw s6, 76(a0)
+; RV64I-WITH-FP-NEXT:    lw s7, 80(a0)
+; RV64I-WITH-FP-NEXT:    lw s8, 84(a0)
+; RV64I-WITH-FP-NEXT:    lw s9, 88(a0)
+; RV64I-WITH-FP-NEXT:    lw s10, 92(a0)
+; RV64I-WITH-FP-NEXT:    lw s11, 96(a0)
+; RV64I-WITH-FP-NEXT:    lw ra, 100(a0)
+; RV64I-WITH-FP-NEXT:    lw a6, 104(a0)
+; RV64I-WITH-FP-NEXT:    lw a5, 108(a0)
+; RV64I-WITH-FP-NEXT:    lw a1, 124(a0)
+; RV64I-WITH-FP-NEXT:    lw a2, 120(a0)
+; RV64I-WITH-FP-NEXT:    lw a3, 116(a0)
+; RV64I-WITH-FP-NEXT:    lw a4, 112(a0)
+; RV64I-WITH-FP-NEXT:    sw a1, 124(a0)
+; RV64I-WITH-FP-NEXT:    sw a2, 120(a0)
+; RV64I-WITH-FP-NEXT:    sw a3, 116(a0)
+; RV64I-WITH-FP-NEXT:    sw a4, 112(a0)
+; RV64I-WITH-FP-NEXT:    sw a5, 108(a0)
+; RV64I-WITH-FP-NEXT:    sw a6, 104(a0)
+; RV64I-WITH-FP-NEXT:    sw ra, 100(a0)
+; RV64I-WITH-FP-NEXT:    sw s11, 96(a0)
+; RV64I-WITH-FP-NEXT:    sw s10, 92(a0)
+; RV64I-WITH-FP-NEXT:    sw s9, 88(a0)
+; RV64I-WITH-FP-NEXT:    sw s8, 84(a0)
+; RV64I-WITH-FP-NEXT:    sw s7, 80(a0)
+; RV64I-WITH-FP-NEXT:    sw s6, 76(a0)
+; RV64I-WITH-FP-NEXT:    sw s5, 72(a0)
+; RV64I-WITH-FP-NEXT:    sw s4, 68(a0)
+; RV64I-WITH-FP-NEXT:    sw s3, 64(a0)
+; RV64I-WITH-FP-NEXT:    sw s2, 60(a0)
+; RV64I-WITH-FP-NEXT:    sw s1, 56(a0)
+; RV64I-WITH-FP-NEXT:    sw t6, 52(a0)
+; RV64I-WITH-FP-NEXT:    sw t5, 48(a0)
+; RV64I-WITH-FP-NEXT:    sw t4, 44(a0)
+; RV64I-WITH-FP-NEXT:    sw t3, 40(a0)
+; RV64I-WITH-FP-NEXT:    sw t2, 36(a0)
+; RV64I-WITH-FP-NEXT:    sw t1, 32(a0)
+; RV64I-WITH-FP-NEXT:    sw t0, 28(a0)
+; RV64I-WITH-FP-NEXT:    sw a7, 24(a0)
+; RV64I-WITH-FP-NEXT:    ld a1, -152(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a1, 20(a0)
+; RV64I-WITH-FP-NEXT:    ld a1, -144(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a1, 16(a0)
+; RV64I-WITH-FP-NEXT:    ld a1, -136(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a1, 12(a0)
+; RV64I-WITH-FP-NEXT:    ld a1, -128(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a1, 8(a0)
+; RV64I-WITH-FP-NEXT:    ld a1, -120(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a1, 4(a0)
+; RV64I-WITH-FP-NEXT:    ld a1, -112(s0) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    sw a1, 0(a0)
 ; RV64I-WITH-FP-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
@@ -863,84 +847,82 @@ define void @callee() nounwind {
 ; RV64IZCMP-LABEL: callee:
 ; RV64IZCMP:       # %bb.0:
 ; RV64IZCMP-NEXT:    cm.push {ra, s0-s11}, -160
-; RV64IZCMP-NEXT:    lui a6, %hi(var)
-; RV64IZCMP-NEXT:    lw a0, %lo(var)(a6)
-; RV64IZCMP-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var+4)(a6)
-; RV64IZCMP-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var+8)(a6)
-; RV64IZCMP-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var+12)(a6)
-; RV64IZCMP-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    addi a5, a6, %lo(var)
-; RV64IZCMP-NEXT:    lw a0, 16(a5)
-; RV64IZCMP-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 20(a5)
-; RV64IZCMP-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw t4, 24(a5)
-; RV64IZCMP-NEXT:    lw t5, 28(a5)
-; RV64IZCMP-NEXT:    lw t6, 32(a5)
-; RV64IZCMP-NEXT:    lw s2, 36(a5)
-; RV64IZCMP-NEXT:    lw s3, 40(a5)
-; RV64IZCMP-NEXT:    lw s4, 44(a5)
-; RV64IZCMP-NEXT:    lw s5, 48(a5)
-; RV64IZCMP-NEXT:    lw s6, 52(a5)
-; RV64IZCMP-NEXT:    lw s7, 56(a5)
-; RV64IZCMP-NEXT:    lw s8, 60(a5)
-; RV64IZCMP-NEXT:    lw s9, 64(a5)
-; RV64IZCMP-NEXT:    lw s10, 68(a5)
-; RV64IZCMP-NEXT:    lw s11, 72(a5)
-; RV64IZCMP-NEXT:    lw ra, 76(a5)
-; RV64IZCMP-NEXT:    lw s1, 80(a5)
-; RV64IZCMP-NEXT:    lw t3, 84(a5)
-; RV64IZCMP-NEXT:    lw t2, 88(a5)
-; RV64IZCMP-NEXT:    lw t1, 92(a5)
-; RV64IZCMP-NEXT:    lw t0, 96(a5)
-; RV64IZCMP-NEXT:    lw s0, 100(a5)
-; RV64IZCMP-NEXT:    lw a7, 104(a5)
-; RV64IZCMP-NEXT:    lw a4, 108(a5)
-; RV64IZCMP-NEXT:    lw a0, 124(a5)
-; RV64IZCMP-NEXT:    lw a1, 120(a5)
-; RV64IZCMP-NEXT:    lw a2, 116(a5)
-; RV64IZCMP-NEXT:    lw a3, 112(a5)
-; RV64IZCMP-NEXT:    sw a0, 124(a5)
-; RV64IZCMP-NEXT:    sw a1, 120(a5)
-; RV64IZCMP-NEXT:    sw a2, 116(a5)
-; RV64IZCMP-NEXT:    sw a3, 112(a5)
-; RV64IZCMP-NEXT:    sw a4, 108(a5)
-; RV64IZCMP-NEXT:    sw a7, 104(a5)
-; RV64IZCMP-NEXT:    sw s0, 100(a5)
-; RV64IZCMP-NEXT:    sw t0, 96(a5)
-; RV64IZCMP-NEXT:    sw t1, 92(a5)
-; RV64IZCMP-NEXT:    sw t2, 88(a5)
-; RV64IZCMP-NEXT:    sw t3, 84(a5)
-; RV64IZCMP-NEXT:    sw s1, 80(a5)
-; RV64IZCMP-NEXT:    sw ra, 76(a5)
-; RV64IZCMP-NEXT:    sw s11, 72(a5)
-; RV64IZCMP-NEXT:    sw s10, 68(a5)
-; RV64IZCMP-NEXT:    sw s9, 64(a5)
-; RV64IZCMP-NEXT:    sw s8, 60(a5)
-; RV64IZCMP-NEXT:    sw s7, 56(a5)
-; RV64IZCMP-NEXT:    sw s6, 52(a5)
-; RV64IZCMP-NEXT:    sw s5, 48(a5)
-; RV64IZCMP-NEXT:    sw s4, 44(a5)
-; RV64IZCMP-NEXT:    sw s3, 40(a5)
-; RV64IZCMP-NEXT:    sw s2, 36(a5)
-; RV64IZCMP-NEXT:    sw t6, 32(a5)
-; RV64IZCMP-NEXT:    sw t5, 28(a5)
-; RV64IZCMP-NEXT:    sw t4, 24(a5)
-; RV64IZCMP-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 20(a5)
-; RV64IZCMP-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 16(a5)
-; RV64IZCMP-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var+12)(a6)
-; RV64IZCMP-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var+8)(a6)
-; RV64IZCMP-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var+4)(a6)
-; RV64IZCMP-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var)(a6)
+; RV64IZCMP-NEXT:    lui a0, %hi(var)
+; RV64IZCMP-NEXT:    addi a0, a0, %lo(var)
+; RV64IZCMP-NEXT:    lw a1, 0(a0)
+; RV64IZCMP-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a1, 4(a0)
+; RV64IZCMP-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a1, 8(a0)
+; RV64IZCMP-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a1, 12(a0)
+; RV64IZCMP-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a1, 16(a0)
+; RV64IZCMP-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw t3, 20(a0)
+; RV64IZCMP-NEXT:    lw t4, 24(a0)
+; RV64IZCMP-NEXT:    lw t5, 28(a0)
+; RV64IZCMP-NEXT:    lw t6, 32(a0)
+; RV64IZCMP-NEXT:    lw s2, 36(a0)
+; RV64IZCMP-NEXT:    lw s3, 40(a0)
+; RV64IZCMP-NEXT:    lw s4, 44(a0)
+; RV64IZCMP-NEXT:    lw s5, 48(a0)
+; RV64IZCMP-NEXT:    lw s6, 52(a0)
+; RV64IZCMP-NEXT:    lw s7, 56(a0)
+; RV64IZCMP-NEXT:    lw s8, 60(a0)
+; RV64IZCMP-NEXT:    lw s9, 64(a0)
+; RV64IZCMP-NEXT:    lw s10, 68(a0)
+; RV64IZCMP-NEXT:    lw s11, 72(a0)
+; RV64IZCMP-NEXT:    lw ra, 76(a0)
+; RV64IZCMP-NEXT:    lw t2, 80(a0)
+; RV64IZCMP-NEXT:    lw s0, 84(a0)
+; RV64IZCMP-NEXT:    lw s1, 88(a0)
+; RV64IZCMP-NEXT:    lw t1, 92(a0)
+; RV64IZCMP-NEXT:    lw t0, 96(a0)
+; RV64IZCMP-NEXT:    lw a7, 100(a0)
+; RV64IZCMP-NEXT:    lw a6, 104(a0)
+; RV64IZCMP-NEXT:    lw a5, 108(a0)
+; RV64IZCMP-NEXT:    lw a1, 124(a0)
+; RV64IZCMP-NEXT:    lw a2, 120(a0)
+; RV64IZCMP-NEXT:    lw a3, 116(a0)
+; RV64IZCMP-NEXT:    lw a4, 112(a0)
+; RV64IZCMP-NEXT:    sw a1, 124(a0)
+; RV64IZCMP-NEXT:    sw a2, 120(a0)
+; RV64IZCMP-NEXT:    sw a3, 116(a0)
+; RV64IZCMP-NEXT:    sw a4, 112(a0)
+; RV64IZCMP-NEXT:    sw a5, 108(a0)
+; RV64IZCMP-NEXT:    sw a6, 104(a0)
+; RV64IZCMP-NEXT:    sw a7, 100(a0)
+; RV64IZCMP-NEXT:    sw t0, 96(a0)
+; RV64IZCMP-NEXT:    sw t1, 92(a0)
+; RV64IZCMP-NEXT:    sw s1, 88(a0)
+; RV64IZCMP-NEXT:    sw s0, 84(a0)
+; RV64IZCMP-NEXT:    sw t2, 80(a0)
+; RV64IZCMP-NEXT:    sw ra, 76(a0)
+; RV64IZCMP-NEXT:    sw s11, 72(a0)
+; RV64IZCMP-NEXT:    sw s10, 68(a0)
+; RV64IZCMP-NEXT:    sw s9, 64(a0)
+; RV64IZCMP-NEXT:    sw s8, 60(a0)
+; RV64IZCMP-NEXT:    sw s7, 56(a0)
+; RV64IZCMP-NEXT:    sw s6, 52(a0)
+; RV64IZCMP-NEXT:    sw s5, 48(a0)
+; RV64IZCMP-NEXT:    sw s4, 44(a0)
+; RV64IZCMP-NEXT:    sw s3, 40(a0)
+; RV64IZCMP-NEXT:    sw s2, 36(a0)
+; RV64IZCMP-NEXT:    sw t6, 32(a0)
+; RV64IZCMP-NEXT:    sw t5, 28(a0)
+; RV64IZCMP-NEXT:    sw t4, 24(a0)
+; RV64IZCMP-NEXT:    sw t3, 20(a0)
+; RV64IZCMP-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 16(a0)
+; RV64IZCMP-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 12(a0)
+; RV64IZCMP-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 8(a0)
+; RV64IZCMP-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 4(a0)
+; RV64IZCMP-NEXT:    ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 0(a0)
 ; RV64IZCMP-NEXT:    cm.popret {ra, s0-s11}, 160
 ;
 ; RV64IZCMP-WITH-FP-LABEL: callee:
@@ -960,86 +942,84 @@ define void @callee() nounwind {
 ; RV64IZCMP-WITH-FP-NEXT:    sd s10, 64(sp) # 8-byte Folded Spill
 ; RV64IZCMP-WITH-FP-NEXT:    sd s11, 56(sp) # 8-byte Folded Spill
 ; RV64IZCMP-WITH-FP-NEXT:    addi s0, sp, 160
-; RV64IZCMP-WITH-FP-NEXT:    lui a6, %hi(var)
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var)(a6)
-; RV64IZCMP-WITH-FP-NEXT:    sd a0, -112(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+4)(a6)
-; RV64IZCMP-WITH-FP-NEXT:    sd a0, -120(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+8)(a6)
-; RV64IZCMP-WITH-FP-NEXT:    sd a0, -128(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+12)(a6)
-; RV64IZCMP-WITH-FP-NEXT:    sd a0, -136(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    addi a5, a6, %lo(var)
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, 16(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sd a0, -144(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, 20(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sd a0, -152(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, 24(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sd a0, -160(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    lw t5, 28(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw t6, 32(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw s2, 36(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw s3, 40(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw s4, 44(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw s5, 48(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw s6, 52(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw s7, 56(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw s8, 60(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw s9, 64(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw s10, 68(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw s11, 72(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw ra, 76(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw t4, 80(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw t3, 84(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw t2, 88(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw s1, 92(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw t1, 96(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw t0, 100(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw a7, 104(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw a4, 108(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, 124(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw a1, 120(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw a2, 116(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw a3, 112(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, 124(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw a1, 120(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw a2, 116(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw a3, 112(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw a4, 108(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw a7, 104(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw t0, 100(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw t1, 96(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw s1, 92(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw t2, 88(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw t3, 84(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw t4, 80(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw ra, 76(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw s11, 72(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw s10, 68(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw s9, 64(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw s8, 60(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw s7, 56(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw s6, 52(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw s5, 48(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw s4, 44(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw s3, 40(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw s2, 36(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw t6, 32(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw t5, 28(a5)
-; RV64IZCMP-WITH-FP-NEXT:    ld a0, -160(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, 24(a5)
-; RV64IZCMP-WITH-FP-NEXT:    ld a0, -152(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, 20(a5)
-; RV64IZCMP-WITH-FP-NEXT:    ld a0, -144(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, 16(a5)
-; RV64IZCMP-WITH-FP-NEXT:    ld a0, -136(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+12)(a6)
-; RV64IZCMP-WITH-FP-NEXT:    ld a0, -128(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+8)(a6)
-; RV64IZCMP-WITH-FP-NEXT:    ld a0, -120(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+4)(a6)
-; RV64IZCMP-WITH-FP-NEXT:    ld a0, -112(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var)(a6)
+; RV64IZCMP-WITH-FP-NEXT:    lui a0, %hi(var)
+; RV64IZCMP-WITH-FP-NEXT:    addi a0, a0, %lo(var)
+; RV64IZCMP-WITH-FP-NEXT:    lw a1, 0(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sd a1, -112(s0) # 8-byte Folded Spill
+; RV64IZCMP-WITH-FP-NEXT:    lw a1, 4(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sd a1, -120(s0) # 8-byte Folded Spill
+; RV64IZCMP-WITH-FP-NEXT:    lw a1, 8(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sd a1, -128(s0) # 8-byte Folded Spill
+; RV64IZCMP-WITH-FP-NEXT:    lw a1, 12(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sd a1, -136(s0) # 8-byte Folded Spill
+; RV64IZCMP-WITH-FP-NEXT:    lw a1, 16(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sd a1, -144(s0) # 8-byte Folded Spill
+; RV64IZCMP-WITH-FP-NEXT:    lw a1, 20(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sd a1, -152(s0) # 8-byte Folded Spill
+; RV64IZCMP-WITH-FP-NEXT:    lw t4, 24(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw t5, 28(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw t6, 32(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw s2, 36(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw s3, 40(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw s4, 44(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw s5, 48(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw s6, 52(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw s7, 56(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw s8, 60(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw s9, 64(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw s10, 68(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw s11, 72(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw ra, 76(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw s1, 80(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw t3, 84(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw t2, 88(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw t1, 92(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw t0, 96(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw a7, 100(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw a6, 104(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw a5, 108(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw a1, 124(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw a2, 120(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw a3, 116(a0)
+; RV64IZCMP-WITH-FP-NEXT:    lw a4, 112(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw a1, 124(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw a2, 120(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw a3, 116(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw a4, 112(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw a5, 108(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw a6, 104(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw a7, 100(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw t0, 96(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw t1, 92(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw t2, 88(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw t3, 84(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw s1, 80(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw ra, 76(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw s11, 72(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw s10, 68(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw s9, 64(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw s8, 60(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw s7, 56(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw s6, 52(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw s5, 48(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw s4, 44(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw s3, 40(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw s2, 36(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw t6, 32(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw t5, 28(a0)
+; RV64IZCMP-WITH-FP-NEXT:    sw t4, 24(a0)
+; RV64IZCMP-WITH-FP-NEXT:    ld a1, -152(s0) # 8-byte Folded Reload
+; RV64IZCMP-WITH-FP-NEXT:    sw a1, 20(a0)
+; RV64IZCMP-WITH-FP-NEXT:    ld a1, -144(s0) # 8-byte Folded Reload
+; RV64IZCMP-WITH-FP-NEXT:    sw a1, 16(a0)
+; RV64IZCMP-WITH-FP-NEXT:    ld a1, -136(s0) # 8-byte Folded Reload
+; RV64IZCMP-WITH-FP-NEXT:    sw a1, 12(a0)
+; RV64IZCMP-WITH-FP-NEXT:    ld a1, -128(s0) # 8-byte Folded Reload
+; RV64IZCMP-WITH-FP-NEXT:    sw a1, 8(a0)
+; RV64IZCMP-WITH-FP-NEXT:    ld a1, -120(s0) # 8-byte Folded Reload
+; RV64IZCMP-WITH-FP-NEXT:    sw a1, 4(a0)
+; RV64IZCMP-WITH-FP-NEXT:    ld a1, -112(s0) # 8-byte Folded Reload
+; RV64IZCMP-WITH-FP-NEXT:    sw a1, 0(a0)
 ; RV64IZCMP-WITH-FP-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
 ; RV64IZCMP-WITH-FP-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
 ; RV64IZCMP-WITH-FP-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
@@ -1080,117 +1060,115 @@ define void @caller() nounwind {
 ; RV32I-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lui s0, %hi(var)
-; RV32I-NEXT:    lw a0, %lo(var)(s0)
+; RV32I-NEXT:    lui a0, %hi(var)
+; RV32I-NEXT:    addi s0, a0, %lo(var)
+; RV32I-NEXT:    lw a0, 0(s0)
 ; RV32I-NEXT:    sw a0, 88(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var+4)(s0)
+; RV32I-NEXT:    lw a0, 4(s0)
 ; RV32I-NEXT:    sw a0, 84(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var+8)(s0)
+; RV32I-NEXT:    lw a0, 8(s0)
 ; RV32I-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var+12)(s0)
+; RV32I-NEXT:    lw a0, 12(s0)
 ; RV32I-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    addi s5, s0, %lo(var)
-; RV32I-NEXT:    lw a0, 16(s5)
+; RV32I-NEXT:    lw a0, 16(s0)
 ; RV32I-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 20(s5)
+; RV32I-NEXT:    lw a0, 20(s0)
 ; RV32I-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 24(s5)
+; RV32I-NEXT:    lw a0, 24(s0)
 ; RV32I-NEXT:    sw a0, 64(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 28(s5)
+; RV32I-NEXT:    lw a0, 28(s0)
 ; RV32I-NEXT:    sw a0, 60(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 32(s5)
+; RV32I-NEXT:    lw a0, 32(s0)
 ; RV32I-NEXT:    sw a0, 56(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 36(s5)
+; RV32I-NEXT:    lw a0, 36(s0)
 ; RV32I-NEXT:    sw a0, 52(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 40(s5)
+; RV32I-NEXT:    lw a0, 40(s0)
 ; RV32I-NEXT:    sw a0, 48(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 44(s5)
+; RV32I-NEXT:    lw a0, 44(s0)
 ; RV32I-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 48(s5)
+; RV32I-NEXT:    lw a0, 48(s0)
 ; RV32I-NEXT:    sw a0, 40(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 52(s5)
+; RV32I-NEXT:    lw a0, 52(s0)
 ; RV32I-NEXT:    sw a0, 36(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 56(s5)
+; RV32I-NEXT:    lw a0, 56(s0)
 ; RV32I-NEXT:    sw a0, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 60(s5)
+; RV32I-NEXT:    lw a0, 60(s0)
 ; RV32I-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 64(s5)
+; RV32I-NEXT:    lw a0, 64(s0)
 ; RV32I-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 68(s5)
+; RV32I-NEXT:    lw a0, 68(s0)
 ; RV32I-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 72(s5)
+; RV32I-NEXT:    lw a0, 72(s0)
 ; RV32I-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 76(s5)
+; RV32I-NEXT:    lw a0, 76(s0)
 ; RV32I-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 80(s5)
+; RV32I-NEXT:    lw a0, 80(s0)
 ; RV32I-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 84(s5)
-; RV32I-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw s3, 88(s5)
-; RV32I-NEXT:    lw s4, 92(s5)
-; RV32I-NEXT:    lw s6, 96(s5)
-; RV32I-NEXT:    lw s7, 100(s5)
-; RV32I-NEXT:    lw s8, 104(s5)
-; RV32I-NEXT:    lw s9, 108(s5)
-; RV32I-NEXT:    lw s10, 112(s5)
-; RV32I-NEXT:    lw s11, 116(s5)
-; RV32I-NEXT:    lw s1, 120(s5)
-; RV32I-NEXT:    lw s2, 124(s5)
+; RV32I-NEXT:    lw s11, 84(s0)
+; RV32I-NEXT:    lw s1, 88(s0)
+; RV32I-NEXT:    lw s2, 92(s0)
+; RV32I-NEXT:    lw s3, 96(s0)
+; RV32I-NEXT:    lw s4, 100(s0)
+; RV32I-NEXT:    lw s5, 104(s0)
+; RV32I-NEXT:    lw s6, 108(s0)
+; RV32I-NEXT:    lw s7, 112(s0)
+; RV32I-NEXT:    lw s8, 116(s0)
+; RV32I-NEXT:    lw s9, 120(s0)
+; RV32I-NEXT:    lw s10, 124(s0)
 ; RV32I-NEXT:    call callee
-; RV32I-NEXT:    sw s2, 124(s5)
-; RV32I-NEXT:    sw s1, 120(s5)
-; RV32I-NEXT:    sw s11, 116(s5)
-; RV32I-NEXT:    sw s10, 112(s5)
-; RV32I-NEXT:    sw s9, 108(s5)
-; RV32I-NEXT:    sw s8, 104(s5)
-; RV32I-NEXT:    sw s7, 100(s5)
-; RV32I-NEXT:    sw s6, 96(s5)
-; RV32I-NEXT:    sw s4, 92(s5)
-; RV32I-NEXT:    sw s3, 88(s5)
-; RV32I-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 84(s5)
+; RV32I-NEXT:    sw s10, 124(s0)
+; RV32I-NEXT:    sw s9, 120(s0)
+; RV32I-NEXT:    sw s8, 116(s0)
+; RV32I-NEXT:    sw s7, 112(s0)
+; RV32I-NEXT:    sw s6, 108(s0)
+; RV32I-NEXT:    sw s5, 104(s0)
+; RV32I-NEXT:    sw s4, 100(s0)
+; RV32I-NEXT:    sw s3, 96(s0)
+; RV32I-NEXT:    sw s2, 92(s0)
+; RV32I-NEXT:    sw s1, 88(s0)
+; RV32I-NEXT:    sw s11, 84(s0)
 ; RV32I-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 80(s5)
+; RV32I-NEXT:    sw a0, 80(s0)
 ; RV32I-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 76(s5)
+; RV32I-NEXT:    sw a0, 76(s0)
 ; RV32I-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 72(s5)
+; RV32I-NEXT:    sw a0, 72(s0)
 ; RV32I-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 68(s5)
+; RV32I-NEXT:    sw a0, 68(s0)
 ; RV32I-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 64(s5)
+; RV32I-NEXT:    sw a0, 64(s0)
 ; RV32I-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 60(s5)
+; RV32I-NEXT:    sw a0, 60(s0)
 ; RV32I-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 56(s5)
+; RV32I-NEXT:    sw a0, 56(s0)
 ; RV32I-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 52(s5)
+; RV32I-NEXT:    sw a0, 52(s0)
 ; RV32I-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 48(s5)
+; RV32I-NEXT:    sw a0, 48(s0)
 ; RV32I-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 44(s5)
+; RV32I-NEXT:    sw a0, 44(s0)
 ; RV32I-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 40(s5)
+; RV32I-NEXT:    sw a0, 40(s0)
 ; RV32I-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 36(s5)
+; RV32I-NEXT:    sw a0, 36(s0)
 ; RV32I-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 32(s5)
+; RV32I-NEXT:    sw a0, 32(s0)
 ; RV32I-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 28(s5)
+; RV32I-NEXT:    sw a0, 28(s0)
 ; RV32I-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 24(s5)
+; RV32I-NEXT:    sw a0, 24(s0)
 ; RV32I-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 20(s5)
+; RV32I-NEXT:    sw a0, 20(s0)
 ; RV32I-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 16(s5)
+; RV32I-NEXT:    sw a0, 16(s0)
 ; RV32I-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var+12)(s0)
+; RV32I-NEXT:    sw a0, 12(s0)
 ; RV32I-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var+8)(s0)
+; RV32I-NEXT:    sw a0, 8(s0)
 ; RV32I-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var+4)(s0)
+; RV32I-NEXT:    sw a0, 4(s0)
 ; RV32I-NEXT:    lw a0, 88(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var)(s0)
+; RV32I-NEXT:    sw a0, 0(s0)
 ; RV32I-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
@@ -1214,135 +1192,134 @@ define void @caller() nounwind {
 ; RV32I-ILP32E-NEXT:    sw s0, 128(sp) # 4-byte Folded Spill
 ; RV32I-ILP32E-NEXT:    sw s1, 124(sp) # 4-byte Folded Spill
 ; RV32I-ILP32E-NEXT:    lui a0, %hi(var)
-; RV32I-ILP32E-NEXT:    lw a1, %lo(var)(a0)
-; RV32I-ILP32E-NEXT:    sw a1, 120(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a1, %lo(var+4)(a0)
-; RV32I-ILP32E-NEXT:    sw a1, 116(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a1, %lo(var+8)(a0)
-; RV32I-ILP32E-NEXT:    sw a1, 112(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a1, %lo(var+12)(a0)
-; RV32I-ILP32E-NEXT:    sw a1, 108(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    addi s1, a0, %lo(var)
-; RV32I-ILP32E-NEXT:    lw a0, 16(s1)
+; RV32I-ILP32E-NEXT:    addi s0, a0, %lo(var)
+; RV32I-ILP32E-NEXT:    lw a0, 0(s0)
+; RV32I-ILP32E-NEXT:    sw a0, 120(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT:    lw a0, 4(s0)
+; RV32I-ILP32E-NEXT:    sw a0, 116(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT:    lw a0, 8(s0)
+; RV32I-ILP32E-NEXT:    sw a0, 112(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT:    lw a0, 12(s0)
+; RV32I-ILP32E-NEXT:    sw a0, 108(sp) # 4-byte Folded Spill
+; RV32I-ILP32E-NEXT:    lw a0, 16(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 104(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 20(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 20(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 100(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 24(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 24(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 96(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 28(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 28(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 92(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 32(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 32(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 88(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 36(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 36(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 84(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 40(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 40(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 44(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 44(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 48(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 48(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 52(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 52(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 56(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 56(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 64(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 60(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 60(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 60(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 64(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 64(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 56(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 68(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 68(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 52(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 72(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 72(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 48(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 76(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 76(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 80(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 80(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 40(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 84(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 84(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 36(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 88(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 88(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 32(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 92(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 92(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 96(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 96(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 100(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 100(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 104(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 104(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 108(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 108(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 112(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 112(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 116(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 116(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw a0, 120(s1)
+; RV32I-ILP32E-NEXT:    lw a0, 120(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 0(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lw s0, 124(s1)
+; RV32I-ILP32E-NEXT:    lw s1, 124(s0)
 ; RV32I-ILP32E-NEXT:    call callee
-; RV32I-ILP32E-NEXT:    sw s0, 124(s1)
+; RV32I-ILP32E-NEXT:    sw s1, 124(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 0(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 120(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 120(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 116(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 116(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 112(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 112(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 108(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 108(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 104(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 104(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 100(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 100(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 96(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 96(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 92(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 92(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 88(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 88(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 84(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 84(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 80(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 80(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 76(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 76(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 72(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 72(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 68(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 68(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 64(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 64(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 60(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 60(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 56(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 56(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 52(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 52(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 48(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 48(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 44(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 44(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 40(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 40(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 36(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 36(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 88(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 32(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 32(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 92(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 28(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 28(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 96(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 24(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 24(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 100(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 20(s1)
+; RV32I-ILP32E-NEXT:    sw a0, 20(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 104(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, 16(s1)
-; RV32I-ILP32E-NEXT:    lui a1, %hi(var)
+; RV32I-ILP32E-NEXT:    sw a0, 16(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 108(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, %lo(var+12)(a1)
+; RV32I-ILP32E-NEXT:    sw a0, 12(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 112(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, %lo(var+8)(a1)
+; RV32I-ILP32E-NEXT:    sw a0, 8(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 116(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, %lo(var+4)(a1)
+; RV32I-ILP32E-NEXT:    sw a0, 4(s0)
 ; RV32I-ILP32E-NEXT:    lw a0, 120(sp) # 4-byte Folded Reload
-; RV32I-ILP32E-NEXT:    sw a0, %lo(var)(a1)
+; RV32I-ILP32E-NEXT:    sw a0, 0(s0)
 ; RV32I-ILP32E-NEXT:    lw ra, 132(sp) # 4-byte Folded Reload
 ; RV32I-ILP32E-NEXT:    lw s0, 128(sp) # 4-byte Folded Reload
 ; RV32I-ILP32E-NEXT:    lw s1, 124(sp) # 4-byte Folded Reload
@@ -1366,119 +1343,117 @@ define void @caller() nounwind {
 ; RV32I-WITH-FP-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
 ; RV32I-WITH-FP-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
 ; RV32I-WITH-FP-NEXT:    addi s0, sp, 144
-; RV32I-WITH-FP-NEXT:    lui s1, %hi(var)
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var)(s1)
+; RV32I-WITH-FP-NEXT:    lui a0, %hi(var)
+; RV32I-WITH-FP-NEXT:    addi s1, a0, %lo(var)
+; RV32I-WITH-FP-NEXT:    lw a0, 0(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -56(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+4)(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 4(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -60(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+8)(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 8(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -64(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+12)(s1)
+; RV32I-WITH-FP-NEXT:    lw a0, 12(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -68(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    addi s6, s1, %lo(var)
-; RV32I-WITH-FP-NEXT:    lw a0, 16(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 16(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -72(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 20(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 20(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -76(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 24(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 24(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -80(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 28(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 28(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -84(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 32(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 32(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -88(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 36(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 36(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -92(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 40(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 40(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -96(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 44(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 44(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -100(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 48(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 48(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -104(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 52(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 52(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -108(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 56(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 56(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -112(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 60(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 60(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -116(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 64(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 64(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -120(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 68(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 68(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -124(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 72(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 72(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -128(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 76(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 76(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -132(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 80(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 80(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -136(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 84(s6)
+; RV32I-WITH-FP-NEXT:    lw a0, 84(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -140(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, 88(s6)
-; RV32I-WITH-FP-NEXT:    sw a0, -144(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw s8, 92(s6)
-; RV32I-WITH-FP-NEXT:    lw s9, 96(s6)
-; RV32I-WITH-FP-NEXT:    lw s10, 100(s6)
-; RV32I-WITH-FP-NEXT:    lw s11, 104(s6)
-; RV32I-WITH-FP-NEXT:    lw s2, 108(s6)
-; RV32I-WITH-FP-NEXT:    lw s3, 112(s6)
-; RV32I-WITH-FP-NEXT:    lw s4, 116(s6)
-; RV32I-WITH-FP-NEXT:    lw s5, 120(s6)
-; RV32I-WITH-FP-NEXT:    lw s7, 124(s6)
+; RV32I-WITH-FP-NEXT:    lw s4, 88(s1)
+; RV32I-WITH-FP-NEXT:    lw s5, 92(s1)
+; RV32I-WITH-FP-NEXT:    lw s6, 96(s1)
+; RV32I-WITH-FP-NEXT:    lw s7, 100(s1)
+; RV32I-WITH-FP-NEXT:    lw s8, 104(s1)
+; RV32I-WITH-FP-NEXT:    lw s9, 108(s1)
+; RV32I-WITH-FP-NEXT:    lw s10, 112(s1)
+; RV32I-WITH-FP-NEXT:    lw s11, 116(s1)
+; RV32I-WITH-FP-NEXT:    lw s2, 120(s1)
+; RV32I-WITH-FP-NEXT:    lw s3, 124(s1)
 ; RV32I-WITH-FP-NEXT:    call callee
-; RV32I-WITH-FP-NEXT:    sw s7, 124(s6)
-; RV32I-WITH-FP-NEXT:    sw s5, 120(s6)
-; RV32I-WITH-FP-NEXT:    sw s4, 116(s6)
-; RV32I-WITH-FP-NEXT:    sw s3, 112(s6)
-; RV32I-WITH-FP-NEXT:    sw s2, 108(s6)
-; RV32I-WITH-FP-NEXT:    sw s11, 104(s6)
-; RV32I-WITH-FP-NEXT:    sw s10, 100(s6)
-; RV32I-WITH-FP-NEXT:    sw s9, 96(s6)
-; RV32I-WITH-FP-NEXT:    sw s8, 92(s6)
-; RV32I-WITH-FP-NEXT:    lw a0, -144(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 88(s6)
+; RV32I-WITH-FP-NEXT:    sw s3, 124(s1)
+; RV32I-WITH-FP-NEXT:    sw s2, 120(s1)
+; RV32I-WITH-FP-NEXT:    sw s11, 116(s1)
+; RV32I-WITH-FP-NEXT:    sw s10, 112(s1)
+; RV32I-WITH-FP-NEXT:    sw s9, 108(s1)
+; RV32I-WITH-FP-NEXT:    sw s8, 104(s1)
+; RV32I-WITH-FP-NEXT:    sw s7, 100(s1)
+; RV32I-WITH-FP-NEXT:    sw s6, 96(s1)
+; RV32I-WITH-FP-NEXT:    sw s5, 92(s1)
+; RV32I-WITH-FP-NEXT:    sw s4, 88(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -140(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 84(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 84(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -136(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 80(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 80(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -132(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 76(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 76(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -128(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 72(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 72(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -124(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 68(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 68(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -120(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 64(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 64(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -116(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 60(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 60(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -112(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 56(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 56(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -108(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 52(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 52(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -104(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 48(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 48(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -100(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 44(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 44(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -96(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 40(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 40(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -92(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 36(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 36(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -88(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 32(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 32(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -84(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 28(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 28(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -80(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 24(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 24(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -76(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 20(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 20(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -72(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, 16(s6)
+; RV32I-WITH-FP-NEXT:    sw a0, 16(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -68(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+12)(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 12(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -64(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+8)(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 8(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -60(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+4)(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 4(s1)
 ; RV32I-WITH-FP-NEXT:    lw a0, -56(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var)(s1)
+; RV32I-WITH-FP-NEXT:    sw a0, 0(s1)
 ; RV32I-WITH-FP-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
@@ -1499,117 +1474,115 @@ define void @caller() nounwind {
 ; RV32IZCMP:       # %bb.0:
 ; RV32IZCMP-NEXT:    cm.push {ra, s0-s11}, -112
 ; RV32IZCMP-NEXT:    addi sp, sp, -48
-; RV32IZCMP-NEXT:    lui s0, %hi(var)
-; RV32IZCMP-NEXT:    lw a0, %lo(var)(s0)
+; RV32IZCMP-NEXT:    lui a0, %hi(var)
+; RV32IZCMP-NEXT:    addi s0, a0, %lo(var)
+; RV32IZCMP-NEXT:    lw a0, 0(s0)
 ; RV32IZCMP-NEXT:    sw a0, 92(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var+4)(s0)
+; RV32IZCMP-NEXT:    lw a0, 4(s0)
 ; RV32IZCMP-NEXT:    sw a0, 88(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var+8)(s0)
+; RV32IZCMP-NEXT:    lw a0, 8(s0)
 ; RV32IZCMP-NEXT:    sw a0, 84(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var+12)(s0)
+; RV32IZCMP-NEXT:    lw a0, 12(s0)
 ; RV32IZCMP-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    addi s1, s0, %lo(var)
-; RV32IZCMP-NEXT:    lw a0, 16(s1)
+; RV32IZCMP-NEXT:    lw a0, 16(s0)
 ; RV32IZCMP-NEXT:    sw a0, 76(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 20(s1)
+; RV32IZCMP-NEXT:    lw a0, 20(s0)
 ; RV32IZCMP-NEXT:    sw a0, 72(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 24(s1)
+; RV32IZCMP-NEXT:    lw a0, 24(s0)
 ; RV32IZCMP-NEXT:    sw a0, 68(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 28(s1)
+; RV32IZCMP-NEXT:    lw a0, 28(s0)
 ; RV32IZCMP-NEXT:    sw a0, 64(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 32(s1)
+; RV32IZCMP-NEXT:    lw a0, 32(s0)
 ; RV32IZCMP-NEXT:    sw a0, 60(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 36(s1)
+; RV32IZCMP-NEXT:    lw a0, 36(s0)
 ; RV32IZCMP-NEXT:    sw a0, 56(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 40(s1)
+; RV32IZCMP-NEXT:    lw a0, 40(s0)
 ; RV32IZCMP-NEXT:    sw a0, 52(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 44(s1)
+; RV32IZCMP-NEXT:    lw a0, 44(s0)
 ; RV32IZCMP-NEXT:    sw a0, 48(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 48(s1)
+; RV32IZCMP-NEXT:    lw a0, 48(s0)
 ; RV32IZCMP-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 52(s1)
+; RV32IZCMP-NEXT:    lw a0, 52(s0)
 ; RV32IZCMP-NEXT:    sw a0, 40(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 56(s1)
+; RV32IZCMP-NEXT:    lw a0, 56(s0)
 ; RV32IZCMP-NEXT:    sw a0, 36(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 60(s1)
+; RV32IZCMP-NEXT:    lw a0, 60(s0)
 ; RV32IZCMP-NEXT:    sw a0, 32(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 64(s1)
+; RV32IZCMP-NEXT:    lw a0, 64(s0)
 ; RV32IZCMP-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 68(s1)
+; RV32IZCMP-NEXT:    lw a0, 68(s0)
 ; RV32IZCMP-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 72(s1)
+; RV32IZCMP-NEXT:    lw a0, 72(s0)
 ; RV32IZCMP-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 76(s1)
+; RV32IZCMP-NEXT:    lw a0, 76(s0)
 ; RV32IZCMP-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 80(s1)
+; RV32IZCMP-NEXT:    lw a0, 80(s0)
 ; RV32IZCMP-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 84(s1)
-; RV32IZCMP-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw s4, 88(s1)
-; RV32IZCMP-NEXT:    lw s5, 92(s1)
-; RV32IZCMP-NEXT:    lw s6, 96(s1)
-; RV32IZCMP-NEXT:    lw s7, 100(s1)
-; RV32IZCMP-NEXT:    lw s8, 104(s1)
-; RV32IZCMP-NEXT:    lw s9, 108(s1)
-; RV32IZCMP-NEXT:    lw s10, 112(s1)
-; RV32IZCMP-NEXT:    lw s11, 116(s1)
-; RV32IZCMP-NEXT:    lw s2, 120(s1)
-; RV32IZCMP-NEXT:    lw s3, 124(s1)
+; RV32IZCMP-NEXT:    lw s1, 84(s0)
+; RV32IZCMP-NEXT:    lw s2, 88(s0)
+; RV32IZCMP-NEXT:    lw s3, 92(s0)
+; RV32IZCMP-NEXT:    lw s4, 96(s0)
+; RV32IZCMP-NEXT:    lw s5, 100(s0)
+; RV32IZCMP-NEXT:    lw s6, 104(s0)
+; RV32IZCMP-NEXT:    lw s7, 108(s0)
+; RV32IZCMP-NEXT:    lw s8, 112(s0)
+; RV32IZCMP-NEXT:    lw s9, 116(s0)
+; RV32IZCMP-NEXT:    lw s10, 120(s0)
+; RV32IZCMP-NEXT:    lw s11, 124(s0)
 ; RV32IZCMP-NEXT:    call callee
-; RV32IZCMP-NEXT:    sw s3, 124(s1)
-; RV32IZCMP-NEXT:    sw s2, 120(s1)
-; RV32IZCMP-NEXT:    sw s11, 116(s1)
-; RV32IZCMP-NEXT:    sw s10, 112(s1)
-; RV32IZCMP-NEXT:    sw s9, 108(s1)
-; RV32IZCMP-NEXT:    sw s8, 104(s1)
-; RV32IZCMP-NEXT:    sw s7, 100(s1)
-; RV32IZCMP-NEXT:    sw s6, 96(s1)
-; RV32IZCMP-NEXT:    sw s5, 92(s1)
-; RV32IZCMP-NEXT:    sw s4, 88(s1)
-; RV32IZCMP-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 84(s1)
+; RV32IZCMP-NEXT:    sw s11, 124(s0)
+; RV32IZCMP-NEXT:    sw s10, 120(s0)
+; RV32IZCMP-NEXT:    sw s9, 116(s0)
+; RV32IZCMP-NEXT:    sw s8, 112(s0)
+; RV32IZCMP-NEXT:    sw s7, 108(s0)
+; RV32IZCMP-NEXT:    sw s6, 104(s0)
+; RV32IZCMP-NEXT:    sw s5, 100(s0)
+; RV32IZCMP-NEXT:    sw s4, 96(s0)
+; RV32IZCMP-NEXT:    sw s3, 92(s0)
+; RV32IZCMP-NEXT:    sw s2, 88(s0)
+; RV32IZCMP-NEXT:    sw s1, 84(s0)
 ; RV32IZCMP-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 80(s1)
+; RV32IZCMP-NEXT:    sw a0, 80(s0)
 ; RV32IZCMP-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 76(s1)
+; RV32IZCMP-NEXT:    sw a0, 76(s0)
 ; RV32IZCMP-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 72(s1)
+; RV32IZCMP-NEXT:    sw a0, 72(s0)
 ; RV32IZCMP-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 68(s1)
+; RV32IZCMP-NEXT:    sw a0, 68(s0)
 ; RV32IZCMP-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 64(s1)
+; RV32IZCMP-NEXT:    sw a0, 64(s0)
 ; RV32IZCMP-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 60(s1)
+; RV32IZCMP-NEXT:    sw a0, 60(s0)
 ; RV32IZCMP-NEXT:    lw a0, 36(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 56(s1)
+; RV32IZCMP-NEXT:    sw a0, 56(s0)
 ; RV32IZCMP-NEXT:    lw a0, 40(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 52(s1)
+; RV32IZCMP-NEXT:    sw a0, 52(s0)
 ; RV32IZCMP-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 48(s1)
+; RV32IZCMP-NEXT:    sw a0, 48(s0)
 ; RV32IZCMP-NEXT:    lw a0, 48(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 44(s1)
+; RV32IZCMP-NEXT:    sw a0, 44(s0)
 ; RV32IZCMP-NEXT:    lw a0, 52(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 40(s1)
+; RV32IZCMP-NEXT:    sw a0, 40(s0)
 ; RV32IZCMP-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 36(s1)
+; RV32IZCMP-NEXT:    sw a0, 36(s0)
 ; RV32IZCMP-NEXT:    lw a0, 60(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 32(s1)
+; RV32IZCMP-NEXT:    sw a0, 32(s0)
 ; RV32IZCMP-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 28(s1)
+; RV32IZCMP-NEXT:    sw a0, 28(s0)
 ; RV32IZCMP-NEXT:    lw a0, 68(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 24(s1)
+; RV32IZCMP-NEXT:    sw a0, 24(s0)
 ; RV32IZCMP-NEXT:    lw a0, 72(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 20(s1)
+; RV32IZCMP-NEXT:    sw a0, 20(s0)
 ; RV32IZCMP-NEXT:    lw a0, 76(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 16(s1)
+; RV32IZCMP-NEXT:    sw a0, 16(s0)
 ; RV32IZCMP-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var+12)(s0)
+; RV32IZCMP-NEXT:    sw a0, 12(s0)
 ; RV32IZCMP-NEXT:    lw a0, 84(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var+8)(s0)
+; RV32IZCMP-NEXT:    sw a0, 8(s0)
 ; RV32IZCMP-NEXT:    lw a0, 88(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var+4)(s0)
+; RV32IZCMP-NEXT:    sw a0, 4(s0)
 ; RV32IZCMP-NEXT:    lw a0, 92(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var)(s0)
+; RV32IZCMP-NEXT:    sw a0, 0(s0)
 ; RV32IZCMP-NEXT:    addi sp, sp, 48
 ; RV32IZCMP-NEXT:    cm.popret {ra, s0-s11}, 112
 ;
@@ -1630,16 +1603,16 @@ define void @caller() nounwind {
 ; RV32IZCMP-WITH-FP-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
 ; RV32IZCMP-WITH-FP-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
 ; RV32IZCMP-WITH-FP-NEXT:    addi s0, sp, 144
-; RV32IZCMP-WITH-FP-NEXT:    lui s6, %hi(var)
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var)(s6)
+; RV32IZCMP-WITH-FP-NEXT:    lui a0, %hi(var)
+; RV32IZCMP-WITH-FP-NEXT:    addi s1, a0, %lo(var)
+; RV32IZCMP-WITH-FP-NEXT:    lw a0, 0(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, -56(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+4)(s6)
+; RV32IZCMP-WITH-FP-NEXT:    lw a0, 4(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, -60(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+8)(s6)
+; RV32IZCMP-WITH-FP-NEXT:    lw a0, 8(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, -64(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+12)(s6)
+; RV32IZCMP-WITH-FP-NEXT:    lw a0, 12(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, -68(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    addi s1, s6, %lo(var)
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, 16(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, -72(s0) # 4-byte Folded Spill
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, 20(s1)
@@ -1676,29 +1649,27 @@ define void @caller() nounwind {
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, -136(s0) # 4-byte Folded Spill
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, 84(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, -140(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, 88(s1)
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, -144(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    lw s8, 92(s1)
-; RV32IZCMP-WITH-FP-NEXT:    lw s9, 96(s1)
-; RV32IZCMP-WITH-FP-NEXT:    lw s10, 100(s1)
-; RV32IZCMP-WITH-FP-NEXT:    lw s11, 104(s1)
-; RV32IZCMP-WITH-FP-NEXT:    lw s2, 108(s1)
-; RV32IZCMP-WITH-FP-NEXT:    lw s3, 112(s1)
-; RV32IZCMP-WITH-FP-NEXT:    lw s4, 116(s1)
-; RV32IZCMP-WITH-FP-NEXT:    lw s5, 120(s1)
-; RV32IZCMP-WITH-FP-NEXT:    lw s7, 124(s1)
+; RV32IZCMP-WITH-FP-NEXT:    lw s4, 88(s1)
+; RV32IZCMP-WITH-FP-NEXT:    lw s5, 92(s1)
+; RV32IZCMP-WITH-FP-NEXT:    lw s6, 96(s1)
+; RV32IZCMP-WITH-FP-NEXT:    lw s7, 100(s1)
+; RV32IZCMP-WITH-FP-NEXT:    lw s8, 104(s1)
+; RV32IZCMP-WITH-FP-NEXT:    lw s9, 108(s1)
+; RV32IZCMP-WITH-FP-NEXT:    lw s10, 112(s1)
+; RV32IZCMP-WITH-FP-NEXT:    lw s11, 116(s1)
+; RV32IZCMP-WITH-FP-NEXT:    lw s2, 120(s1)
+; RV32IZCMP-WITH-FP-NEXT:    lw s3, 124(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    call callee
-; RV32IZCMP-WITH-FP-NEXT:    sw s7, 124(s1)
-; RV32IZCMP-WITH-FP-NEXT:    sw s5, 120(s1)
-; RV32IZCMP-WITH-FP-NEXT:    sw s4, 116(s1)
-; RV32IZCMP-WITH-FP-NEXT:    sw s3, 112(s1)
-; RV32IZCMP-WITH-FP-NEXT:    sw s2, 108(s1)
-; RV32IZCMP-WITH-FP-NEXT:    sw s11, 104(s1)
-; RV32IZCMP-WITH-FP-NEXT:    sw s10, 100(s1)
-; RV32IZCMP-WITH-FP-NEXT:    sw s9, 96(s1)
-; RV32IZCMP-WITH-FP-NEXT:    sw s8, 92(s1)
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, -144(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, 88(s1)
+; RV32IZCMP-WITH-FP-NEXT:    sw s3, 124(s1)
+; RV32IZCMP-WITH-FP-NEXT:    sw s2, 120(s1)
+; RV32IZCMP-WITH-FP-NEXT:    sw s11, 116(s1)
+; RV32IZCMP-WITH-FP-NEXT:    sw s10, 112(s1)
+; RV32IZCMP-WITH-FP-NEXT:    sw s9, 108(s1)
+; RV32IZCMP-WITH-FP-NEXT:    sw s8, 104(s1)
+; RV32IZCMP-WITH-FP-NEXT:    sw s7, 100(s1)
+; RV32IZCMP-WITH-FP-NEXT:    sw s6, 96(s1)
+; RV32IZCMP-WITH-FP-NEXT:    sw s5, 92(s1)
+; RV32IZCMP-WITH-FP-NEXT:    sw s4, 88(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, -140(s0) # 4-byte Folded Reload
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, 84(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, -136(s0) # 4-byte Folded Reload
@@ -1736,13 +1707,13 @@ define void @caller() nounwind {
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, -72(s0) # 4-byte Folded Reload
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, 16(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, -68(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+12)(s6)
+; RV32IZCMP-WITH-FP-NEXT:    sw a0, 12(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, -64(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+8)(s6)
+; RV32IZCMP-WITH-FP-NEXT:    sw a0, 8(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, -60(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+4)(s6)
+; RV32IZCMP-WITH-FP-NEXT:    sw a0, 4(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, -56(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var)(s6)
+; RV32IZCMP-WITH-FP-NEXT:    sw a0, 0(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
 ; RV32IZCMP-WITH-FP-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
 ; RV32IZCMP-WITH-FP-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
@@ -1761,145 +1732,143 @@ define void @caller() nounwind {
 ;
 ; RV64I-LABEL: caller:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -288
-; RV64I-NEXT:    sd ra, 280(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 272(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 264(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 256(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s3, 248(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s4, 240(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s5, 232(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s6, 224(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s7, 216(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s8, 208(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s9, 200(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s10, 192(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s11, 184(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lui s0, %hi(var)
-; RV64I-NEXT:    lw a0, %lo(var)(s0)
-; RV64I-NEXT:    sd a0, 176(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var+4)(s0)
-; RV64I-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var+8)(s0)
+; RV64I-NEXT:    addi sp, sp, -272
+; RV64I-NEXT:    sd ra, 264(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 256(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 248(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 240(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 232(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 224(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s5, 216(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s6, 208(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s7, 200(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s8, 192(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s9, 184(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s10, 176(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s11, 168(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a0, %hi(var)
+; RV64I-NEXT:    addi s0, a0, %lo(var)
+; RV64I-NEXT:    lw a0, 0(s0)
 ; RV64I-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var+12)(s0)
+; RV64I-NEXT:    lw a0, 4(s0)
 ; RV64I-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    addi s5, s0, %lo(var)
-; RV64I-NEXT:    lw a0, 16(s5)
+; RV64I-NEXT:    lw a0, 8(s0)
 ; RV64I-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 20(s5)
+; RV64I-NEXT:    lw a0, 12(s0)
 ; RV64I-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 24(s5)
+; RV64I-NEXT:    lw a0, 16(s0)
 ; RV64I-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 28(s5)
+; RV64I-NEXT:    lw a0, 20(s0)
 ; RV64I-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 32(s5)
+; RV64I-NEXT:    lw a0, 24(s0)
 ; RV64I-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 36(s5)
+; RV64I-NEXT:    lw a0, 28(s0)
 ; RV64I-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 40(s5)
+; RV64I-NEXT:    lw a0, 32(s0)
 ; RV64I-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 44(s5)
+; RV64I-NEXT:    lw a0, 36(s0)
 ; RV64I-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 48(s5)
+; RV64I-NEXT:    lw a0, 40(s0)
 ; RV64I-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 52(s5)
+; RV64I-NEXT:    lw a0, 44(s0)
 ; RV64I-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 56(s5)
+; RV64I-NEXT:    lw a0, 48(s0)
 ; RV64I-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 60(s5)
+; RV64I-NEXT:    lw a0, 52(s0)
 ; RV64I-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 64(s5)
+; RV64I-NEXT:    lw a0, 56(s0)
 ; RV64I-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 68(s5)
+; RV64I-NEXT:    lw a0, 60(s0)
 ; RV64I-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 72(s5)
+; RV64I-NEXT:    lw a0, 64(s0)
 ; RV64I-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 76(s5)
+; RV64I-NEXT:    lw a0, 68(s0)
 ; RV64I-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 80(s5)
+; RV64I-NEXT:    lw a0, 72(s0)
 ; RV64I-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 84(s5)
+; RV64I-NEXT:    lw a0, 76(s0)
 ; RV64I-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw s3, 88(s5)
-; RV64I-NEXT:    lw s4, 92(s5)
-; RV64I-NEXT:    lw s6, 96(s5)
-; RV64I-NEXT:    lw s7, 100(s5)
-; RV64I-NEXT:    lw s8, 104(s5)
-; RV64I-NEXT:    lw s9, 108(s5)
-; RV64I-NEXT:    lw s10, 112(s5)
-; RV64I-NEXT:    lw s11, 116(s5)
-; RV64I-NEXT:    lw s1, 120(s5)
-; RV64I-NEXT:    lw s2, 124(s5)
+; RV64I-NEXT:    lw a0, 80(s0)
+; RV64I-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw s11, 84(s0)
+; RV64I-NEXT:    lw s1, 88(s0)
+; RV64I-NEXT:    lw s2, 92(s0)
+; RV64I-NEXT:    lw s3, 96(s0)
+; RV64I-NEXT:    lw s4, 100(s0)
+; RV64I-NEXT:    lw s5, 104(s0)
+; RV64I-NEXT:    lw s6, 108(s0)
+; RV64I-NEXT:    lw s7, 112(s0)
+; RV64I-NEXT:    lw s8, 116(s0)
+; RV64I-NEXT:    lw s9, 120(s0)
+; RV64I-NEXT:    lw s10, 124(s0)
 ; RV64I-NEXT:    call callee
-; RV64I-NEXT:    sw s2, 124(s5)
-; RV64I-NEXT:    sw s1, 120(s5)
-; RV64I-NEXT:    sw s11, 116(s5)
-; RV64I-NEXT:    sw s10, 112(s5)
-; RV64I-NEXT:    sw s9, 108(s5)
-; RV64I-NEXT:    sw s8, 104(s5)
-; RV64I-NEXT:    sw s7, 100(s5)
-; RV64I-NEXT:    sw s6, 96(s5)
-; RV64I-NEXT:    sw s4, 92(s5)
-; RV64I-NEXT:    sw s3, 88(s5)
+; RV64I-NEXT:    sw s10, 124(s0)
+; RV64I-NEXT:    sw s9, 120(s0)
+; RV64I-NEXT:    sw s8, 116(s0)
+; RV64I-NEXT:    sw s7, 112(s0)
+; RV64I-NEXT:    sw s6, 108(s0)
+; RV64I-NEXT:    sw s5, 104(s0)
+; RV64I-NEXT:    sw s4, 100(s0)
+; RV64I-NEXT:    sw s3, 96(s0)
+; RV64I-NEXT:    sw s2, 92(s0)
+; RV64I-NEXT:    sw s1, 88(s0)
+; RV64I-NEXT:    sw s11, 84(s0)
+; RV64I-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a0, 80(s0)
 ; RV64I-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 84(s5)
+; RV64I-NEXT:    sw a0, 76(s0)
 ; RV64I-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 80(s5)
+; RV64I-NEXT:    sw a0, 72(s0)
 ; RV64I-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 76(s5)
+; RV64I-NEXT:    sw a0, 68(s0)
 ; RV64I-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 72(s5)
+; RV64I-NEXT:    sw a0, 64(s0)
 ; RV64I-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 68(s5)
+; RV64I-NEXT:    sw a0, 60(s0)
 ; RV64I-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 64(s5)
+; RV64I-NEXT:    sw a0, 56(s0)
 ; RV64I-NEXT:    ld a0, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 60(s5)
+; RV64I-NEXT:    sw a0, 52(s0)
 ; RV64I-NEXT:    ld a0, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 56(s5)
+; RV64I-NEXT:    sw a0, 48(s0)
 ; RV64I-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 52(s5)
+; RV64I-NEXT:    sw a0, 44(s0)
 ; RV64I-NEXT:    ld a0, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 48(s5)
+; RV64I-NEXT:    sw a0, 40(s0)
 ; RV64I-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 44(s5)
+; RV64I-NEXT:    sw a0, 36(s0)
 ; RV64I-NEXT:    ld a0, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 40(s5)
+; RV64I-NEXT:    sw a0, 32(s0)
 ; RV64I-NEXT:    ld a0, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 36(s5)
+; RV64I-NEXT:    sw a0, 28(s0)
 ; RV64I-NEXT:    ld a0, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 32(s5)
+; RV64I-NEXT:    sw a0, 24(s0)
 ; RV64I-NEXT:    ld a0, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 28(s5)
+; RV64I-NEXT:    sw a0, 20(s0)
 ; RV64I-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 24(s5)
+; RV64I-NEXT:    sw a0, 16(s0)
 ; RV64I-NEXT:    ld a0, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 20(s5)
+; RV64I-NEXT:    sw a0, 12(s0)
 ; RV64I-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 16(s5)
+; RV64I-NEXT:    sw a0, 8(s0)
 ; RV64I-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var+12)(s0)
+; RV64I-NEXT:    sw a0, 4(s0)
 ; RV64I-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var+8)(s0)
-; RV64I-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var+4)(s0)
-; RV64I-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var)(s0)
-; RV64I-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 256(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 248(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 240(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s5, 232(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s6, 224(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s7, 216(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s8, 208(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s9, 200(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s10, 192(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s11, 184(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 288
+; RV64I-NEXT:    sw a0, 0(s0)
+; RV64I-NEXT:    ld ra, 264(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 256(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 248(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 240(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 232(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 224(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 216(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 208(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s7, 200(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s8, 192(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s9, 184(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s10, 176(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s11, 168(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 272
 ; RV64I-NEXT:    ret
 ;
 ; RV64I-LP64E-LABEL: caller:
@@ -1909,135 +1878,134 @@ define void @caller() nounwind {
 ; RV64I-LP64E-NEXT:    sd s0, 256(sp) # 8-byte Folded Spill
 ; RV64I-LP64E-NEXT:    sd s1, 248(sp) # 8-byte Folded Spill
 ; RV64I-LP64E-NEXT:    lui a0, %hi(var)
-; RV64I-LP64E-NEXT:    lw a1, %lo(var)(a0)
-; RV64I-LP64E-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a1, %lo(var+4)(a0)
-; RV64I-LP64E-NEXT:    sd a1, 232(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a1, %lo(var+8)(a0)
-; RV64I-LP64E-NEXT:    sd a1, 224(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a1, %lo(var+12)(a0)
-; RV64I-LP64E-NEXT:    sd a1, 216(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    addi s1, a0, %lo(var)
-; RV64I-LP64E-NEXT:    lw a0, 16(s1)
+; RV64I-LP64E-NEXT:    addi s0, a0, %lo(var)
+; RV64I-LP64E-NEXT:    lw a0, 0(s0)
+; RV64I-LP64E-NEXT:    sd a0, 240(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT:    lw a0, 4(s0)
+; RV64I-LP64E-NEXT:    sd a0, 232(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT:    lw a0, 8(s0)
+; RV64I-LP64E-NEXT:    sd a0, 224(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT:    lw a0, 12(s0)
+; RV64I-LP64E-NEXT:    sd a0, 216(sp) # 8-byte Folded Spill
+; RV64I-LP64E-NEXT:    lw a0, 16(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 208(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 20(s1)
+; RV64I-LP64E-NEXT:    lw a0, 20(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 200(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 24(s1)
+; RV64I-LP64E-NEXT:    lw a0, 24(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 192(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 28(s1)
+; RV64I-LP64E-NEXT:    lw a0, 28(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 184(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 32(s1)
+; RV64I-LP64E-NEXT:    lw a0, 32(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 176(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 36(s1)
+; RV64I-LP64E-NEXT:    lw a0, 36(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 40(s1)
+; RV64I-LP64E-NEXT:    lw a0, 40(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 44(s1)
+; RV64I-LP64E-NEXT:    lw a0, 44(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 48(s1)
+; RV64I-LP64E-NEXT:    lw a0, 48(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 52(s1)
+; RV64I-LP64E-NEXT:    lw a0, 52(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 56(s1)
+; RV64I-LP64E-NEXT:    lw a0, 56(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 60(s1)
+; RV64I-LP64E-NEXT:    lw a0, 60(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 64(s1)
+; RV64I-LP64E-NEXT:    lw a0, 64(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 68(s1)
+; RV64I-LP64E-NEXT:    lw a0, 68(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 72(s1)
+; RV64I-LP64E-NEXT:    lw a0, 72(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 76(s1)
+; RV64I-LP64E-NEXT:    lw a0, 76(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 80(s1)
+; RV64I-LP64E-NEXT:    lw a0, 80(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 84(s1)
+; RV64I-LP64E-NEXT:    lw a0, 84(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 88(s1)
+; RV64I-LP64E-NEXT:    lw a0, 88(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 92(s1)
+; RV64I-LP64E-NEXT:    lw a0, 92(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 96(s1)
+; RV64I-LP64E-NEXT:    lw a0, 96(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 100(s1)
+; RV64I-LP64E-NEXT:    lw a0, 100(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 104(s1)
+; RV64I-LP64E-NEXT:    lw a0, 104(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 108(s1)
+; RV64I-LP64E-NEXT:    lw a0, 108(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 112(s1)
+; RV64I-LP64E-NEXT:    lw a0, 112(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 116(s1)
+; RV64I-LP64E-NEXT:    lw a0, 116(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw a0, 120(s1)
+; RV64I-LP64E-NEXT:    lw a0, 120(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lw s0, 124(s1)
+; RV64I-LP64E-NEXT:    lw s1, 124(s0)
 ; RV64I-LP64E-NEXT:    call callee
-; RV64I-LP64E-NEXT:    sw s0, 124(s1)
+; RV64I-LP64E-NEXT:    sw s1, 124(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 120(s1)
+; RV64I-LP64E-NEXT:    sw a0, 120(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 116(s1)
+; RV64I-LP64E-NEXT:    sw a0, 116(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 112(s1)
+; RV64I-LP64E-NEXT:    sw a0, 112(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 108(s1)
+; RV64I-LP64E-NEXT:    sw a0, 108(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 104(s1)
+; RV64I-LP64E-NEXT:    sw a0, 104(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 100(s1)
+; RV64I-LP64E-NEXT:    sw a0, 100(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 96(s1)
+; RV64I-LP64E-NEXT:    sw a0, 96(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 56(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 92(s1)
+; RV64I-LP64E-NEXT:    sw a0, 92(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 64(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 88(s1)
+; RV64I-LP64E-NEXT:    sw a0, 88(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 84(s1)
+; RV64I-LP64E-NEXT:    sw a0, 84(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 80(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 80(s1)
+; RV64I-LP64E-NEXT:    sw a0, 80(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 76(s1)
+; RV64I-LP64E-NEXT:    sw a0, 76(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 96(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 72(s1)
+; RV64I-LP64E-NEXT:    sw a0, 72(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 104(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 68(s1)
+; RV64I-LP64E-NEXT:    sw a0, 68(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 112(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 64(s1)
+; RV64I-LP64E-NEXT:    sw a0, 64(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 120(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 60(s1)
+; RV64I-LP64E-NEXT:    sw a0, 60(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 56(s1)
+; RV64I-LP64E-NEXT:    sw a0, 56(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 136(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 52(s1)
+; RV64I-LP64E-NEXT:    sw a0, 52(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 48(s1)
+; RV64I-LP64E-NEXT:    sw a0, 48(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 44(s1)
+; RV64I-LP64E-NEXT:    sw a0, 44(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 40(s1)
+; RV64I-LP64E-NEXT:    sw a0, 40(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 36(s1)
+; RV64I-LP64E-NEXT:    sw a0, 36(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 32(s1)
+; RV64I-LP64E-NEXT:    sw a0, 32(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 184(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 28(s1)
+; RV64I-LP64E-NEXT:    sw a0, 28(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 192(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 24(s1)
+; RV64I-LP64E-NEXT:    sw a0, 24(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 200(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 20(s1)
+; RV64I-LP64E-NEXT:    sw a0, 20(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 208(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, 16(s1)
-; RV64I-LP64E-NEXT:    lui a1, %hi(var)
+; RV64I-LP64E-NEXT:    sw a0, 16(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 216(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, %lo(var+12)(a1)
+; RV64I-LP64E-NEXT:    sw a0, 12(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 224(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, %lo(var+8)(a1)
+; RV64I-LP64E-NEXT:    sw a0, 8(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, %lo(var+4)(a1)
+; RV64I-LP64E-NEXT:    sw a0, 4(s0)
 ; RV64I-LP64E-NEXT:    ld a0, 240(sp) # 8-byte Folded Reload
-; RV64I-LP64E-NEXT:    sw a0, %lo(var)(a1)
+; RV64I-LP64E-NEXT:    sw a0, 0(s0)
 ; RV64I-LP64E-NEXT:    ld ra, 264(sp) # 8-byte Folded Reload
 ; RV64I-LP64E-NEXT:    ld s0, 256(sp) # 8-byte Folded Reload
 ; RV64I-LP64E-NEXT:    ld s1, 248(sp) # 8-byte Folded Reload
@@ -2061,119 +2029,117 @@ define void @caller() nounwind {
 ; RV64I-WITH-FP-NEXT:    sd s10, 192(sp) # 8-byte Folded Spill
 ; RV64I-WITH-FP-NEXT:    sd s11, 184(sp) # 8-byte Folded Spill
 ; RV64I-WITH-FP-NEXT:    addi s0, sp, 288
-; RV64I-WITH-FP-NEXT:    lui s1, %hi(var)
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var)(s1)
+; RV64I-WITH-FP-NEXT:    lui a0, %hi(var)
+; RV64I-WITH-FP-NEXT:    addi s1, a0, %lo(var)
+; RV64I-WITH-FP-NEXT:    lw a0, 0(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -112(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+4)(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 4(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -120(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+8)(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 8(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -128(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+12)(s1)
+; RV64I-WITH-FP-NEXT:    lw a0, 12(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -136(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    addi s6, s1, %lo(var)
-; RV64I-WITH-FP-NEXT:    lw a0, 16(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 16(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -144(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 20(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 20(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -152(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 24(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 24(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -160(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 28(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 28(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -168(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 32(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 32(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -176(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 36(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 36(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -184(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 40(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 40(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -192(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 44(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 44(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -200(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 48(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 48(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -208(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 52(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 52(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -216(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 56(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 56(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -224(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 60(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 60(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -232(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 64(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 64(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -240(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 68(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 68(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -248(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 72(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 72(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -256(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 76(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 76(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -264(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 80(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 80(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -272(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 84(s6)
+; RV64I-WITH-FP-NEXT:    lw a0, 84(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -280(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, 88(s6)
-; RV64I-WITH-FP-NEXT:    sd a0, -288(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw s8, 92(s6)
-; RV64I-WITH-FP-NEXT:    lw s9, 96(s6)
-; RV64I-WITH-FP-NEXT:    lw s10, 100(s6)
-; RV64I-WITH-FP-NEXT:    lw s11, 104(s6)
-; RV64I-WITH-FP-NEXT:    lw s2, 108(s6)
-; RV64I-WITH-FP-NEXT:    lw s3, 112(s6)
-; RV64I-WITH-FP-NEXT:    lw s4, 116(s6)
-; RV64I-WITH-FP-NEXT:    lw s5, 120(s6)
-; RV64I-WITH-FP-NEXT:    lw s7, 124(s6)
+; RV64I-WITH-FP-NEXT:    lw s4, 88(s1)
+; RV64I-WITH-FP-NEXT:    lw s5, 92(s1)
+; RV64I-WITH-FP-NEXT:    lw s6, 96(s1)
+; RV64I-WITH-FP-NEXT:    lw s7, 100(s1)
+; RV64I-WITH-FP-NEXT:    lw s8, 104(s1)
+; RV64I-WITH-FP-NEXT:    lw s9, 108(s1)
+; RV64I-WITH-FP-NEXT:    lw s10, 112(s1)
+; RV64I-WITH-FP-NEXT:    lw s11, 116(s1)
+; RV64I-WITH-FP-NEXT:    lw s2, 120(s1)
+; RV64I-WITH-FP-NEXT:    lw s3, 124(s1)
 ; RV64I-WITH-FP-NEXT:    call callee
-; RV64I-WITH-FP-NEXT:    sw s7, 124(s6)
-; RV64I-WITH-FP-NEXT:    sw s5, 120(s6)
-; RV64I-WITH-FP-NEXT:    sw s4, 116(s6)
-; RV64I-WITH-FP-NEXT:    sw s3, 112(s6)
-; RV64I-WITH-FP-NEXT:    sw s2, 108(s6)
-; RV64I-WITH-FP-NEXT:    sw s11, 104(s6)
-; RV64I-WITH-FP-NEXT:    sw s10, 100(s6)
-; RV64I-WITH-FP-NEXT:    sw s9, 96(s6)
-; RV64I-WITH-FP-NEXT:    sw s8, 92(s6)
-; RV64I-WITH-FP-NEXT:    ld a0, -288(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 88(s6)
+; RV64I-WITH-FP-NEXT:    sw s3, 124(s1)
+; RV64I-WITH-FP-NEXT:    sw s2, 120(s1)
+; RV64I-WITH-FP-NEXT:    sw s11, 116(s1)
+; RV64I-WITH-FP-NEXT:    sw s10, 112(s1)
+; RV64I-WITH-FP-NEXT:    sw s9, 108(s1)
+; RV64I-WITH-FP-NEXT:    sw s8, 104(s1)
+; RV64I-WITH-FP-NEXT:    sw s7, 100(s1)
+; RV64I-WITH-FP-NEXT:    sw s6, 96(s1)
+; RV64I-WITH-FP-NEXT:    sw s5, 92(s1)
+; RV64I-WITH-FP-NEXT:    sw s4, 88(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -280(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 84(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 84(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -272(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 80(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 80(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -264(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 76(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 76(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -256(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 72(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 72(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -248(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 68(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 68(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -240(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 64(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 64(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -232(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 60(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 60(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -224(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 56(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 56(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -216(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 52(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 52(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -208(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 48(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 48(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -200(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 44(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 44(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -192(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 40(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 40(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -184(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 36(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 36(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -176(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 32(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 32(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -168(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 28(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 28(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -160(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 24(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 24(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -152(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 20(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 20(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -144(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, 16(s6)
+; RV64I-WITH-FP-NEXT:    sw a0, 16(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -136(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+12)(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 12(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -128(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+8)(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 8(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -120(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+4)(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 4(s1)
 ; RV64I-WITH-FP-NEXT:    ld a0, -112(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var)(s1)
+; RV64I-WITH-FP-NEXT:    sw a0, 0(s1)
 ; RV64I-WITH-FP-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
@@ -2194,117 +2160,115 @@ define void @caller() nounwind {
 ; RV64IZCMP:       # %bb.0:
 ; RV64IZCMP-NEXT:    cm.push {ra, s0-s11}, -160
 ; RV64IZCMP-NEXT:    addi sp, sp, -128
-; RV64IZCMP-NEXT:    lui s0, %hi(var)
-; RV64IZCMP-NEXT:    lw a0, %lo(var)(s0)
+; RV64IZCMP-NEXT:    lui a0, %hi(var)
+; RV64IZCMP-NEXT:    addi s0, a0, %lo(var)
+; RV64IZCMP-NEXT:    lw a0, 0(s0)
 ; RV64IZCMP-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var+4)(s0)
+; RV64IZCMP-NEXT:    lw a0, 4(s0)
 ; RV64IZCMP-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var+8)(s0)
+; RV64IZCMP-NEXT:    lw a0, 8(s0)
 ; RV64IZCMP-NEXT:    sd a0, 152(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var+12)(s0)
+; RV64IZCMP-NEXT:    lw a0, 12(s0)
 ; RV64IZCMP-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    addi s1, s0, %lo(var)
-; RV64IZCMP-NEXT:    lw a0, 16(s1)
+; RV64IZCMP-NEXT:    lw a0, 16(s0)
 ; RV64IZCMP-NEXT:    sd a0, 136(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 20(s1)
+; RV64IZCMP-NEXT:    lw a0, 20(s0)
 ; RV64IZCMP-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 24(s1)
+; RV64IZCMP-NEXT:    lw a0, 24(s0)
 ; RV64IZCMP-NEXT:    sd a0, 120(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 28(s1)
+; RV64IZCMP-NEXT:    lw a0, 28(s0)
 ; RV64IZCMP-NEXT:    sd a0, 112(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 32(s1)
+; RV64IZCMP-NEXT:    lw a0, 32(s0)
 ; RV64IZCMP-NEXT:    sd a0, 104(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 36(s1)
+; RV64IZCMP-NEXT:    lw a0, 36(s0)
 ; RV64IZCMP-NEXT:    sd a0, 96(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 40(s1)
+; RV64IZCMP-NEXT:    lw a0, 40(s0)
 ; RV64IZCMP-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 44(s1)
+; RV64IZCMP-NEXT:    lw a0, 44(s0)
 ; RV64IZCMP-NEXT:    sd a0, 80(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 48(s1)
+; RV64IZCMP-NEXT:    lw a0, 48(s0)
 ; RV64IZCMP-NEXT:    sd a0, 72(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 52(s1)
+; RV64IZCMP-NEXT:    lw a0, 52(s0)
 ; RV64IZCMP-NEXT:    sd a0, 64(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 56(s1)
+; RV64IZCMP-NEXT:    lw a0, 56(s0)
 ; RV64IZCMP-NEXT:    sd a0, 56(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 60(s1)
+; RV64IZCMP-NEXT:    lw a0, 60(s0)
 ; RV64IZCMP-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 64(s1)
+; RV64IZCMP-NEXT:    lw a0, 64(s0)
 ; RV64IZCMP-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 68(s1)
+; RV64IZCMP-NEXT:    lw a0, 68(s0)
 ; RV64IZCMP-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 72(s1)
+; RV64IZCMP-NEXT:    lw a0, 72(s0)
 ; RV64IZCMP-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 76(s1)
+; RV64IZCMP-NEXT:    lw a0, 76(s0)
 ; RV64IZCMP-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 80(s1)
+; RV64IZCMP-NEXT:    lw a0, 80(s0)
 ; RV64IZCMP-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 84(s1)
-; RV64IZCMP-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw s4, 88(s1)
-; RV64IZCMP-NEXT:    lw s5, 92(s1)
-; RV64IZCMP-NEXT:    lw s6, 96(s1)
-; RV64IZCMP-NEXT:    lw s7, 100(s1)
-; RV64IZCMP-NEXT:    lw s8, 104(s1)
-; RV64IZCMP-NEXT:    lw s9, 108(s1)
-; RV64IZCMP-NEXT:    lw s10, 112(s1)
-; RV64IZCMP-NEXT:    lw s11, 116(s1)
-; RV64IZCMP-NEXT:    lw s2, 120(s1)
-; RV64IZCMP-NEXT:    lw s3, 124(s1)
+; RV64IZCMP-NEXT:    lw s1, 84(s0)
+; RV64IZCMP-NEXT:    lw s2, 88(s0)
+; RV64IZCMP-NEXT:    lw s3, 92(s0)
+; RV64IZCMP-NEXT:    lw s4, 96(s0)
+; RV64IZCMP-NEXT:    lw s5, 100(s0)
+; RV64IZCMP-NEXT:    lw s6, 104(s0)
+; RV64IZCMP-NEXT:    lw s7, 108(s0)
+; RV64IZCMP-NEXT:    lw s8, 112(s0)
+; RV64IZCMP-NEXT:    lw s9, 116(s0)
+; RV64IZCMP-NEXT:    lw s10, 120(s0)
+; RV64IZCMP-NEXT:    lw s11, 124(s0)
 ; RV64IZCMP-NEXT:    call callee
-; RV64IZCMP-NEXT:    sw s3, 124(s1)
-; RV64IZCMP-NEXT:    sw s2, 120(s1)
-; RV64IZCMP-NEXT:    sw s11, 116(s1)
-; RV64IZCMP-NEXT:    sw s10, 112(s1)
-; RV64IZCMP-NEXT:    sw s9, 108(s1)
-; RV64IZCMP-NEXT:    sw s8, 104(s1)
-; RV64IZCMP-NEXT:    sw s7, 100(s1)
-; RV64IZCMP-NEXT:    sw s6, 96(s1)
-; RV64IZCMP-NEXT:    sw s5, 92(s1)
-; RV64IZCMP-NEXT:    sw s4, 88(s1)
-; RV64IZCMP-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 84(s1)
+; RV64IZCMP-NEXT:    sw s11, 124(s0)
+; RV64IZCMP-NEXT:    sw s10, 120(s0)
+; RV64IZCMP-NEXT:    sw s9, 116(s0)
+; RV64IZCMP-NEXT:    sw s8, 112(s0)
+; RV64IZCMP-NEXT:    sw s7, 108(s0)
+; RV64IZCMP-NEXT:    sw s6, 104(s0)
+; RV64IZCMP-NEXT:    sw s5, 100(s0)
+; RV64IZCMP-NEXT:    sw s4, 96(s0)
+; RV64IZCMP-NEXT:    sw s3, 92(s0)
+; RV64IZCMP-NEXT:    sw s2, 88(s0)
+; RV64IZCMP-NEXT:    sw s1, 84(s0)
 ; RV64IZCMP-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 80(s1)
+; RV64IZCMP-NEXT:    sw a0, 80(s0)
 ; RV64IZCMP-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 76(s1)
+; RV64IZCMP-NEXT:    sw a0, 76(s0)
 ; RV64IZCMP-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 72(s1)
+; RV64IZCMP-NEXT:    sw a0, 72(s0)
 ; RV64IZCMP-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 68(s1)
+; RV64IZCMP-NEXT:    sw a0, 68(s0)
 ; RV64IZCMP-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 64(s1)
+; RV64IZCMP-NEXT:    sw a0, 64(s0)
 ; RV64IZCMP-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 60(s1)
+; RV64IZCMP-NEXT:    sw a0, 60(s0)
 ; RV64IZCMP-NEXT:    ld a0, 56(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 56(s1)
+; RV64IZCMP-NEXT:    sw a0, 56(s0)
 ; RV64IZCMP-NEXT:    ld a0, 64(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 52(s1)
+; RV64IZCMP-NEXT:    sw a0, 52(s0)
 ; RV64IZCMP-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 48(s1)
+; RV64IZCMP-NEXT:    sw a0, 48(s0)
 ; RV64IZCMP-NEXT:    ld a0, 80(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 44(s1)
+; RV64IZCMP-NEXT:    sw a0, 44(s0)
 ; RV64IZCMP-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 40(s1)
+; RV64IZCMP-NEXT:    sw a0, 40(s0)
 ; RV64IZCMP-NEXT:    ld a0, 96(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 36(s1)
+; RV64IZCMP-NEXT:    sw a0, 36(s0)
 ; RV64IZCMP-NEXT:    ld a0, 104(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 32(s1)
+; RV64IZCMP-NEXT:    sw a0, 32(s0)
 ; RV64IZCMP-NEXT:    ld a0, 112(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 28(s1)
+; RV64IZCMP-NEXT:    sw a0, 28(s0)
 ; RV64IZCMP-NEXT:    ld a0, 120(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 24(s1)
+; RV64IZCMP-NEXT:    sw a0, 24(s0)
 ; RV64IZCMP-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 20(s1)
+; RV64IZCMP-NEXT:    sw a0, 20(s0)
 ; RV64IZCMP-NEXT:    ld a0, 136(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 16(s1)
+; RV64IZCMP-NEXT:    sw a0, 16(s0)
 ; RV64IZCMP-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var+12)(s0)
+; RV64IZCMP-NEXT:    sw a0, 12(s0)
 ; RV64IZCMP-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var+8)(s0)
+; RV64IZCMP-NEXT:    sw a0, 8(s0)
 ; RV64IZCMP-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var+4)(s0)
+; RV64IZCMP-NEXT:    sw a0, 4(s0)
 ; RV64IZCMP-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var)(s0)
+; RV64IZCMP-NEXT:    sw a0, 0(s0)
 ; RV64IZCMP-NEXT:    addi sp, sp, 128
 ; RV64IZCMP-NEXT:    cm.popret {ra, s0-s11}, 160
 ;
@@ -2325,16 +2289,16 @@ define void @caller() nounwind {
 ; RV64IZCMP-WITH-FP-NEXT:    sd s10, 192(sp) # 8-byte Folded Spill
 ; RV64IZCMP-WITH-FP-NEXT:    sd s11, 184(sp) # 8-byte Folded Spill
 ; RV64IZCMP-WITH-FP-NEXT:    addi s0, sp, 288
-; RV64IZCMP-WITH-FP-NEXT:    lui s6, %hi(var)
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var)(s6)
+; RV64IZCMP-WITH-FP-NEXT:    lui a0, %hi(var)
+; RV64IZCMP-WITH-FP-NEXT:    addi s1, a0, %lo(var)
+; RV64IZCMP-WITH-FP-NEXT:    lw a0, 0(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    sd a0, -112(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+4)(s6)
+; RV64IZCMP-WITH-FP-NEXT:    lw a0, 4(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    sd a0, -120(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+8)(s6)
+; RV64IZCMP-WITH-FP-NEXT:    lw a0, 8(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    sd a0, -128(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+12)(s6)
+; RV64IZCMP-WITH-FP-NEXT:    lw a0, 12(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    sd a0, -136(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    addi s1, s6, %lo(var)
 ; RV64IZCMP-WITH-FP-NEXT:    lw a0, 16(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    sd a0, -144(s0) # 8-byte Folded Spill
 ; RV64IZCMP-WITH-FP-NEXT:    lw a0, 20(s1)
@@ -2371,29 +2335,27 @@ define void @caller() nounwind {
 ; RV64IZCMP-WITH-FP-NEXT:    sd a0, -272(s0) # 8-byte Folded Spill
 ; RV64IZCMP-WITH-FP-NEXT:    lw a0, 84(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    sd a0, -280(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, 88(s1)
-; RV64IZCMP-WITH-FP-NEXT:    sd a0, -288(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    lw s8, 92(s1)
-; RV64IZCMP-WITH-FP-NEXT:    lw s9, 96(s1)
-; RV64IZCMP-WITH-FP-NEXT:    lw s10, 100(s1)
-; RV64IZCMP-WITH-FP-NEXT:    lw s11, 104(s1)
-; RV64IZCMP-WITH-FP-NEXT:    lw s2, 108(s1)
-; RV64IZCMP-WITH-FP-NEXT:    lw s3, 112(s1)
-; RV64IZCMP-WITH-FP-NEXT:    lw s4, 116(s1)
-; RV64IZCMP-WITH-FP-NEXT:    lw s5, 120(s1)
-; RV64IZCMP-WITH-FP-NEXT:    lw s7, 124(s1)
+; RV64IZCMP-WITH-FP-NEXT:    lw s4, 88(s1)
+; RV64IZCMP-WITH-FP-NEXT:    lw s5, 92(s1)
+; RV64IZCMP-WITH-FP-NEXT:    lw s6, 96(s1)
+; RV64IZCMP-WITH-FP-NEXT:    lw s7, 100(s1)
+; RV64IZCMP-WITH-FP-NEXT:    lw s8, 104(s1)
+; RV64IZCMP-WITH-FP-NEXT:    lw s9, 108(s1)
+; RV64IZCMP-WITH-FP-NEXT:    lw s10, 112(s1)
+; RV64IZCMP-WITH-FP-NEXT:    lw s11, 116(s1)
+; RV64IZCMP-WITH-FP-NEXT:    lw s2, 120(s1)
+; RV64IZCMP-WITH-FP-NEXT:    lw s3, 124(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    call callee
-; RV64IZCMP-WITH-FP-NEXT:    sw s7, 124(s1)
-; RV64IZCMP-WITH-FP-NEXT:    sw s5, 120(s1)
-; RV64IZCMP-WITH-FP-NEXT:    sw s4, 116(s1)
-; RV64IZCMP-WITH-FP-NEXT:    sw s3, 112(s1)
-; RV64IZCMP-WITH-FP-NEXT:    sw s2, 108(s1)
-; RV64IZCMP-WITH-FP-NEXT:    sw s11, 104(s1)
-; RV64IZCMP-WITH-FP-NEXT:    sw s10, 100(s1)
-; RV64IZCMP-WITH-FP-NEXT:    sw s9, 96(s1)
-; RV64IZCMP-WITH-FP-NEXT:    sw s8, 92(s1)
-; RV64IZCMP-WITH-FP-NEXT:    ld a0, -288(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, 88(s1)
+; RV64IZCMP-WITH-FP-NEXT:    sw s3, 124(s1)
+; RV64IZCMP-WITH-FP-NEXT:    sw s2, 120(s1)
+; RV64IZCMP-WITH-FP-NEXT:    sw s11, 116(s1)
+; RV64IZCMP-WITH-FP-NEXT:    sw s10, 112(s1)
+; RV64IZCMP-WITH-FP-NEXT:    sw s9, 108(s1)
+; RV64IZCMP-WITH-FP-NEXT:    sw s8, 104(s1)
+; RV64IZCMP-WITH-FP-NEXT:    sw s7, 100(s1)
+; RV64IZCMP-WITH-FP-NEXT:    sw s6, 96(s1)
+; RV64IZCMP-WITH-FP-NEXT:    sw s5, 92(s1)
+; RV64IZCMP-WITH-FP-NEXT:    sw s4, 88(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    ld a0, -280(s0) # 8-byte Folded Reload
 ; RV64IZCMP-WITH-FP-NEXT:    sw a0, 84(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    ld a0, -272(s0) # 8-byte Folded Reload
@@ -2431,13 +2393,13 @@ define void @caller() nounwind {
 ; RV64IZCMP-WITH-FP-NEXT:    ld a0, -144(s0) # 8-byte Folded Reload
 ; RV64IZCMP-WITH-FP-NEXT:    sw a0, 16(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    ld a0, -136(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+12)(s6)
+; RV64IZCMP-WITH-FP-NEXT:    sw a0, 12(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    ld a0, -128(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+8)(s6)
+; RV64IZCMP-WITH-FP-NEXT:    sw a0, 8(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    ld a0, -120(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+4)(s6)
+; RV64IZCMP-WITH-FP-NEXT:    sw a0, 4(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    ld a0, -112(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var)(s6)
+; RV64IZCMP-WITH-FP-NEXT:    sw a0, 0(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
 ; RV64IZCMP-WITH-FP-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
 ; RV64IZCMP-WITH-FP-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/double-mem.ll b/llvm/test/CodeGen/RISCV/double-mem.ll
index 38cb52b6f4b30..017e06924156a 100644
--- a/llvm/test/CodeGen/RISCV/double-mem.ll
+++ b/llvm/test/CodeGen/RISCV/double-mem.ll
@@ -84,37 +84,37 @@ define dso_local double @fld_fsd_global(double %a, double %b) nounwind {
 ; CHECKIFD:       # %bb.0:
 ; CHECKIFD-NEXT:    fadd.d fa0, fa0, fa1
 ; CHECKIFD-NEXT:    lui a0, %hi(G)
-; CHECKIFD-NEXT:    fld fa5, %lo(G)(a0)
-; CHECKIFD-NEXT:    addi a1, a0, %lo(G)
-; CHECKIFD-NEXT:    fsd fa0, %lo(G)(a0)
-; CHECKIFD-NEXT:    fld fa5, 72(a1)
-; CHECKIFD-NEXT:    fsd fa0, 72(a1)
+; CHECKIFD-NEXT:    addi a0, a0, %lo(G)
+; CHECKIFD-NEXT:    fld fa5, 0(a0)
+; CHECKIFD-NEXT:    fsd fa0, 0(a0)
+; CHECKIFD-NEXT:    fld fa5, 72(a0)
+; CHECKIFD-NEXT:    fsd fa0, 72(a0)
 ; CHECKIFD-NEXT:    ret
 ;
 ; RV32IZFINXZDINX-LABEL: fld_fsd_global:
 ; RV32IZFINXZDINX:       # %bb.0:
 ; RV32IZFINXZDINX-NEXT:    fadd.d a0, a0, a2
 ; RV32IZFINXZDINX-NEXT:    lui a2, %hi(G)
-; RV32IZFINXZDINX-NEXT:    lw a4, %lo(G)(a2)
-; RV32IZFINXZDINX-NEXT:    lw a5, %lo(G+4)(a2)
-; RV32IZFINXZDINX-NEXT:    addi a3, a2, %lo(G)
-; RV32IZFINXZDINX-NEXT:    sw a0, %lo(G)(a2)
-; RV32IZFINXZDINX-NEXT:    sw a1, %lo(G+4)(a2)
-; RV32IZFINXZDINX-NEXT:    lw a4, 72(a3)
-; RV32IZFINXZDINX-NEXT:    lw a5, 76(a3)
-; RV32IZFINXZDINX-NEXT:    sw a0, 72(a3)
-; RV32IZFINXZDINX-NEXT:    sw a1, 76(a3)
+; RV32IZFINXZDINX-NEXT:    addi a2, a2, %lo(G)
+; RV32IZFINXZDINX-NEXT:    lw a4, 0(a2)
+; RV32IZFINXZDINX-NEXT:    lw a5, 4(a2)
+; RV32IZFINXZDINX-NEXT:    sw a0, 0(a2)
+; RV32IZFINXZDINX-NEXT:    sw a1, 4(a2)
+; RV32IZFINXZDINX-NEXT:    lw a4, 72(a2)
+; RV32IZFINXZDINX-NEXT:    lw a5, 76(a2)
+; RV32IZFINXZDINX-NEXT:    sw a0, 72(a2)
+; RV32IZFINXZDINX-NEXT:    sw a1, 76(a2)
 ; RV32IZFINXZDINX-NEXT:    ret
 ;
 ; RV64IZFINXZDINX-LABEL: fld_fsd_global:
 ; RV64IZFINXZDINX:       # %bb.0:
 ; RV64IZFINXZDINX-NEXT:    fadd.d a0, a0, a1
 ; RV64IZFINXZDINX-NEXT:    lui a1, %hi(G)
-; RV64IZFINXZDINX-NEXT:    ld zero, %lo(G)(a1)
-; RV64IZFINXZDINX-NEXT:    addi a2, a1, %lo(G)
-; RV64IZFINXZDINX-NEXT:    sd a0, %lo(G)(a1)
-; RV64IZFINXZDINX-NEXT:    ld zero, 72(a2)
-; RV64IZFINXZDINX-NEXT:    sd a0, 72(a2)
+; RV64IZFINXZDINX-NEXT:    addi a1, a1, %lo(G)
+; RV64IZFINXZDINX-NEXT:    ld zero, 0(a1)
+; RV64IZFINXZDINX-NEXT:    sd a0, 0(a1)
+; RV64IZFINXZDINX-NEXT:    ld zero, 72(a1)
+; RV64IZFINXZDINX-NEXT:    sd a0, 72(a1)
 ; RV64IZFINXZDINX-NEXT:    ret
 ; Use %a and %b in an FP op to ensure floating point registers are used, even
 ; for the soft float ABI
diff --git a/llvm/test/CodeGen/RISCV/float-mem.ll b/llvm/test/CodeGen/RISCV/float-mem.ll
index 3779d39a753e1..55fe5dc598dfb 100644
--- a/llvm/test/CodeGen/RISCV/float-mem.ll
+++ b/llvm/test/CodeGen/RISCV/float-mem.ll
@@ -64,22 +64,22 @@ define dso_local float @flw_fsw_global(float %a, float %b) nounwind {
 ; CHECKIF:       # %bb.0:
 ; CHECKIF-NEXT:    fadd.s fa0, fa0, fa1
 ; CHECKIF-NEXT:    lui a0, %hi(G)
-; CHECKIF-NEXT:    flw fa5, %lo(G)(a0)
-; CHECKIF-NEXT:    addi a1, a0, %lo(G)
-; CHECKIF-NEXT:    fsw fa0, %lo(G)(a0)
-; CHECKIF-NEXT:    flw fa5, 36(a1)
-; CHECKIF-NEXT:    fsw fa0, 36(a1)
+; CHECKIF-NEXT:    addi a0, a0, %lo(G)
+; CHECKIF-NEXT:    flw fa5, 0(a0)
+; CHECKIF-NEXT:    fsw fa0, 0(a0)
+; CHECKIF-NEXT:    flw fa5, 36(a0)
+; CHECKIF-NEXT:    fsw fa0, 36(a0)
 ; CHECKIF-NEXT:    ret
 ;
 ; CHECKIZFINX-LABEL: flw_fsw_global:
 ; CHECKIZFINX:       # %bb.0:
 ; CHECKIZFINX-NEXT:    fadd.s a0, a0, a1
 ; CHECKIZFINX-NEXT:    lui a1, %hi(G)
-; CHECKIZFINX-NEXT:    lw zero, %lo(G)(a1)
-; CHECKIZFINX-NEXT:    addi a2, a1, %lo(G)
-; CHECKIZFINX-NEXT:    sw a0, %lo(G)(a1)
-; CHECKIZFINX-NEXT:    lw zero, 36(a2)
-; CHECKIZFINX-NEXT:    sw a0, 36(a2)
+; CHECKIZFINX-NEXT:    addi a1, a1, %lo(G)
+; CHECKIZFINX-NEXT:    lw zero, 0(a1)
+; CHECKIZFINX-NEXT:    sw a0, 0(a1)
+; CHECKIZFINX-NEXT:    lw zero, 36(a1)
+; CHECKIZFINX-NEXT:    sw a0, 36(a1)
 ; CHECKIZFINX-NEXT:    ret
   %1 = fadd float %a, %b
   %2 = load volatile float, ptr @G
diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
index 3c2e84689c979..2a00604dd07a0 100644
--- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
+++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
@@ -59,9 +59,9 @@ entry:
 define dso_local i64 @load_g_1() nounwind {
 ; RV32I-LABEL: load_g_1:
 ; RV32I:       # %bb.0: # %entry
-; RV32I-NEXT:    lui a1, %hi(g_1)
-; RV32I-NEXT:    lw a0, %lo(g_1)(a1)
-; RV32I-NEXT:    addi a1, a1, %lo(g_1)
+; RV32I-NEXT:    lui a0, %hi(g_1)
+; RV32I-NEXT:    addi a1, a0, %lo(g_1)
+; RV32I-NEXT:    lw a0, 0(a1)
 ; RV32I-NEXT:    lw a1, 4(a1)
 ; RV32I-NEXT:    ret
 ;
@@ -94,9 +94,9 @@ entry:
 define dso_local i64 @load_g_2() nounwind {
 ; RV32I-LABEL: load_g_2:
 ; RV32I:       # %bb.0: # %entry
-; RV32I-NEXT:    lui a1, %hi(g_2)
-; RV32I-NEXT:    lw a0, %lo(g_2)(a1)
-; RV32I-NEXT:    addi a1, a1, %lo(g_2)
+; RV32I-NEXT:    lui a0, %hi(g_2)
+; RV32I-NEXT:    addi a1, a0, %lo(g_2)
+; RV32I-NEXT:    lw a0, 0(a1)
 ; RV32I-NEXT:    lw a1, 4(a1)
 ; RV32I-NEXT:    ret
 ;
@@ -129,9 +129,9 @@ entry:
 define dso_local i64 @load_g_4() nounwind {
 ; RV32I-LABEL: load_g_4:
 ; RV32I:       # %bb.0: # %entry
-; RV32I-NEXT:    lui a1, %hi(g_4)
-; RV32I-NEXT:    lw a0, %lo(g_4)(a1)
-; RV32I-NEXT:    addi a1, a1, %lo(g_4)
+; RV32I-NEXT:    lui a0, %hi(g_4)
+; RV32I-NEXT:    addi a1, a0, %lo(g_4)
+; RV32I-NEXT:    lw a0, 0(a1)
 ; RV32I-NEXT:    lw a1, 4(a1)
 ; RV32I-NEXT:    ret
 ;
@@ -233,9 +233,9 @@ define dso_local void @store_g_4() nounwind {
 ; RV32I-LABEL: store_g_4:
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    lui a0, %hi(g_4)
-; RV32I-NEXT:    sw zero, %lo(g_4)(a0)
 ; RV32I-NEXT:    addi a0, a0, %lo(g_4)
 ; RV32I-NEXT:    sw zero, 4(a0)
+; RV32I-NEXT:    sw zero, 0(a0)
 ; RV32I-NEXT:    ret
 ;
 ; RV32I-MEDIUM-LABEL: store_g_4:
@@ -547,18 +547,18 @@ define dso_local i64 @load_tl_4() nounwind {
 ; RV32I-LABEL: load_tl_4:
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    lui a0, %tprel_hi(tl_4)
-; RV32I-NEXT:    add a1, a0, tp, %tprel_add(tl_4)
-; RV32I-NEXT:    lw a0, %tprel_lo(tl_4)(a1)
-; RV32I-NEXT:    addi a1, a1, %tprel_lo(tl_4)
+; RV32I-NEXT:    add a0, a0, tp, %tprel_add(tl_4)
+; RV32I-NEXT:    addi a1, a0, %tprel_lo(tl_4)
+; RV32I-NEXT:    lw a0, 0(a1)
 ; RV32I-NEXT:    lw a1, 4(a1)
 ; RV32I-NEXT:    ret
 ;
 ; RV32I-MEDIUM-LABEL: load_tl_4:
 ; RV32I-MEDIUM:       # %bb.0: # %entry
 ; RV32I-MEDIUM-NEXT:    lui a0, %tprel_hi(tl_4)
-; RV32I-MEDIUM-NEXT:    add a1, a0, tp, %tprel_add(tl_4)
-; RV32I-MEDIUM-NEXT:    lw a0, %tprel_lo(tl_4)(a1)
-; RV32I-MEDIUM-NEXT:    addi a1, a1, %tprel_lo(tl_4)
+; RV32I-MEDIUM-NEXT:    add a0, a0, tp, %tprel_add(tl_4)
+; RV32I-MEDIUM-NEXT:    addi a1, a0, %tprel_lo(tl_4)
+; RV32I-MEDIUM-NEXT:    lw a0, 0(a1)
 ; RV32I-MEDIUM-NEXT:    lw a1, 4(a1)
 ; RV32I-MEDIUM-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/global-merge-offset.ll b/llvm/test/CodeGen/RISCV/global-merge-offset.ll
index 13afcba181719..3c1e838f7f6cf 100644
--- a/llvm/test/CodeGen/RISCV/global-merge-offset.ll
+++ b/llvm/test/CodeGen/RISCV/global-merge-offset.ll
@@ -22,20 +22,20 @@ define void @f1(i32 %a) nounwind {
 ; CHECK-LABEL: f1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    lui a1, %hi(.L_MergedGlobals)
-; CHECK-NEXT:    addi a2, a1, %lo(.L_MergedGlobals)
-; CHECK-NEXT:    sw a0, 2044(a2)
-; CHECK-NEXT:    sw a0, 404(a2)
-; CHECK-NEXT:    sw a0, %lo(.L_MergedGlobals)(a1)
+; CHECK-NEXT:    addi a1, a1, %lo(.L_MergedGlobals)
+; CHECK-NEXT:    sw a0, 2044(a1)
+; CHECK-NEXT:    sw a0, 404(a1)
+; CHECK-NEXT:    sw a0, 0(a1)
 ; CHECK-NEXT:    ret
 ;
 ; CHECK-TOOBIG-LABEL: f1:
 ; CHECK-TOOBIG:       # %bb.0:
 ; CHECK-TOOBIG-NEXT:    lui a1, %hi(ga1+1640)
 ; CHECK-TOOBIG-NEXT:    lui a2, %hi(.L_MergedGlobals)
-; CHECK-TOOBIG-NEXT:    addi a3, a2, %lo(.L_MergedGlobals)
+; CHECK-TOOBIG-NEXT:    addi a2, a2, %lo(.L_MergedGlobals)
 ; CHECK-TOOBIG-NEXT:    sw a0, %lo(ga1+1640)(a1)
-; CHECK-TOOBIG-NEXT:    sw a0, 408(a3)
-; CHECK-TOOBIG-NEXT:    sw a0, %lo(.L_MergedGlobals)(a2)
+; CHECK-TOOBIG-NEXT:    sw a0, 408(a2)
+; CHECK-TOOBIG-NEXT:    sw a0, 0(a2)
 ; CHECK-TOOBIG-NEXT:    ret
   %ga1_end = getelementptr inbounds [410 x i32], ptr @ga1, i32 0, i64 410
   %ga2_end = getelementptr inbounds [ArrSize x i32], ptr @ga2, i32 0, i64 ArrSize
diff --git a/llvm/test/CodeGen/RISCV/global-merge.ll b/llvm/test/CodeGen/RISCV/global-merge.ll
index 20379ee2e7dac..2c286119ba52f 100644
--- a/llvm/test/CodeGen/RISCV/global-merge.ll
+++ b/llvm/test/CodeGen/RISCV/global-merge.ll
@@ -18,8 +18,8 @@ define void @f1(i32 %a) nounwind {
 ; CHECK-LABEL: f1:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    lui a1, %hi(.L_MergedGlobals)
-; CHECK-NEXT:    sw a0, %lo(.L_MergedGlobals)(a1)
 ; CHECK-NEXT:    addi a1, a1, %lo(.L_MergedGlobals)
+; CHECK-NEXT:    sw a0, 0(a1)
 ; CHECK-NEXT:    sw a0, 4(a1)
 ; CHECK-NEXT:    sw a0, 8(a1)
 ; CHECK-NEXT:    sw a0, 12(a1)
diff --git a/llvm/test/CodeGen/RISCV/half-mem.ll b/llvm/test/CodeGen/RISCV/half-mem.ll
index 5b6a94a83f94b..8ff342c1b7c3a 100644
--- a/llvm/test/CodeGen/RISCV/half-mem.ll
+++ b/llvm/test/CodeGen/RISCV/half-mem.ll
@@ -112,22 +112,22 @@ define half @flh_fsh_global(half %a, half %b) nounwind {
 ; CHECKIZFH:       # %bb.0:
 ; CHECKIZFH-NEXT:    fadd.h fa0, fa0, fa1
 ; CHECKIZFH-NEXT:    lui a0, %hi(G)
-; CHECKIZFH-NEXT:    flh fa5, %lo(G)(a0)
-; CHECKIZFH-NEXT:    addi a1, a0, %lo(G)
-; CHECKIZFH-NEXT:    fsh fa0, %lo(G)(a0)
-; CHECKIZFH-NEXT:    flh fa5, 18(a1)
-; CHECKIZFH-NEXT:    fsh fa0, 18(a1)
+; CHECKIZFH-NEXT:    addi a0, a0, %lo(G)
+; CHECKIZFH-NEXT:    flh fa5, 0(a0)
+; CHECKIZFH-NEXT:    fsh fa0, 0(a0)
+; CHECKIZFH-NEXT:    flh fa5, 18(a0)
+; CHECKIZFH-NEXT:    fsh fa0, 18(a0)
 ; CHECKIZFH-NEXT:    ret
 ;
 ; CHECKIZHINX-LABEL: flh_fsh_global:
 ; CHECKIZHINX:       # %bb.0:
 ; CHECKIZHINX-NEXT:    fadd.h a0, a0, a1
 ; CHECKIZHINX-NEXT:    lui a1, %hi(G)
-; CHECKIZHINX-NEXT:    lh zero, %lo(G)(a1)
-; CHECKIZHINX-NEXT:    addi a2, a1, %lo(G)
-; CHECKIZHINX-NEXT:    sh a0, %lo(G)(a1)
-; CHECKIZHINX-NEXT:    lh zero, 18(a2)
-; CHECKIZHINX-NEXT:    sh a0, 18(a2)
+; CHECKIZHINX-NEXT:    addi a1, a1, %lo(G)
+; CHECKIZHINX-NEXT:    lh zero, 0(a1)
+; CHECKIZHINX-NEXT:    sh a0, 0(a1)
+; CHECKIZHINX-NEXT:    lh zero, 18(a1)
+; CHECKIZHINX-NEXT:    sh a0, 18(a1)
 ; CHECKIZHINX-NEXT:    ret
 ;
 ; CHECKIZFHMIN-LABEL: flh_fsh_global:
@@ -137,11 +137,11 @@ define half @flh_fsh_global(half %a, half %b) nounwind {
 ; CHECKIZFHMIN-NEXT:    fadd.s fa5, fa4, fa5
 ; CHECKIZFHMIN-NEXT:    fcvt.h.s fa0, fa5
 ; CHECKIZFHMIN-NEXT:    lui a0, %hi(G)
-; CHECKIZFHMIN-NEXT:    flh fa5, %lo(G)(a0)
-; CHECKIZFHMIN-NEXT:    addi a1, a0, %lo(G)
-; CHECKIZFHMIN-NEXT:    fsh fa0, %lo(G)(a0)
-; CHECKIZFHMIN-NEXT:    flh fa5, 18(a1)
-; CHECKIZFHMIN-NEXT:    fsh fa0, 18(a1)
+; CHECKIZFHMIN-NEXT:    addi a0, a0, %lo(G)
+; CHECKIZFHMIN-NEXT:    flh fa5, 0(a0)
+; CHECKIZFHMIN-NEXT:    fsh fa0, 0(a0)
+; CHECKIZFHMIN-NEXT:    flh fa5, 18(a0)
+; CHECKIZFHMIN-NEXT:    fsh fa0, 18(a0)
 ; CHECKIZFHMIN-NEXT:    ret
 ;
 ; CHECKIZHINXMIN-LABEL: flh_fsh_global:
@@ -151,11 +151,11 @@ define half @flh_fsh_global(half %a, half %b) nounwind {
 ; CHECKIZHINXMIN-NEXT:    fadd.s a0, a0, a1
 ; CHECKIZHINXMIN-NEXT:    fcvt.h.s a0, a0
 ; CHECKIZHINXMIN-NEXT:    lui a1, %hi(G)
-; CHECKIZHINXMIN-NEXT:    lh zero, %lo(G)(a1)
-; CHECKIZHINXMIN-NEXT:    addi a2, a1, %lo(G)
-; CHECKIZHINXMIN-NEXT:    sh a0, %lo(G)(a1)
-; CHECKIZHINXMIN-NEXT:    lh zero, 18(a2)
-; CHECKIZHINXMIN-NEXT:    sh a0, 18(a2)
+; CHECKIZHINXMIN-NEXT:    addi a1, a1, %lo(G)
+; CHECKIZHINXMIN-NEXT:    lh zero, 0(a1)
+; CHECKIZHINXMIN-NEXT:    sh a0, 0(a1)
+; CHECKIZHINXMIN-NEXT:    lh zero, 18(a1)
+; CHECKIZHINXMIN-NEXT:    sh a0, 18(a1)
 ; CHECKIZHINXMIN-NEXT:    ret
   %1 = fadd half %a, %b
   %2 = load volatile half, ptr @G
diff --git a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
index 5f9866f08c821..18f9db0edccbe 100644
--- a/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
+++ b/llvm/test/CodeGen/RISCV/hoist-global-addr-base.ll
@@ -306,8 +306,8 @@ define void @self_store() {
 ; RV32-LABEL: self_store:
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    lui a0, %hi(f)
-; RV32-NEXT:    addi a1, a0, %lo(f)
-; RV32-NEXT:    sw a1, %lo(f+4)(a0)
+; RV32-NEXT:    addi a0, a0, %lo(f)
+; RV32-NEXT:    sw a0, 4(a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: self_store:
diff --git a/llvm/test/CodeGen/RISCV/mem.ll b/llvm/test/CodeGen/RISCV/mem.ll
index a9cb80cb66349..5d55595b3158b 100644
--- a/llvm/test/CodeGen/RISCV/mem.ll
+++ b/llvm/test/CodeGen/RISCV/mem.ll
@@ -168,12 +168,12 @@ define dso_local i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind {
 define dso_local i32 @lw_sw_global(i32 %a) nounwind {
 ; RV32I-LABEL: lw_sw_global:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    lui a2, %hi(G)
-; RV32I-NEXT:    lw a1, %lo(G)(a2)
-; RV32I-NEXT:    addi a3, a2, %lo(G)
-; RV32I-NEXT:    sw a0, %lo(G)(a2)
-; RV32I-NEXT:    lw zero, 36(a3)
-; RV32I-NEXT:    sw a0, 36(a3)
+; RV32I-NEXT:    lui a1, %hi(G)
+; RV32I-NEXT:    addi a2, a1, %lo(G)
+; RV32I-NEXT:    lw a1, 0(a2)
+; RV32I-NEXT:    sw a0, 0(a2)
+; RV32I-NEXT:    lw zero, 36(a2)
+; RV32I-NEXT:    sw a0, 36(a2)
 ; RV32I-NEXT:    mv a0, a1
 ; RV32I-NEXT:    ret
   %1 = load volatile i32, ptr @G
diff --git a/llvm/test/CodeGen/RISCV/mem64.ll b/llvm/test/CodeGen/RISCV/mem64.ll
index 248964146325a..379b3483a1b91 100644
--- a/llvm/test/CodeGen/RISCV/mem64.ll
+++ b/llvm/test/CodeGen/RISCV/mem64.ll
@@ -213,12 +213,12 @@ define dso_local i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind {
 define dso_local i64 @ld_sd_global(i64 %a) nounwind {
 ; RV64I-LABEL: ld_sd_global:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    lui a2, %hi(G)
-; RV64I-NEXT:    ld a1, %lo(G)(a2)
-; RV64I-NEXT:    addi a3, a2, %lo(G)
-; RV64I-NEXT:    sd a0, %lo(G)(a2)
-; RV64I-NEXT:    ld zero, 72(a3)
-; RV64I-NEXT:    sd a0, 72(a3)
+; RV64I-NEXT:    lui a1, %hi(G)
+; RV64I-NEXT:    addi a2, a1, %lo(G)
+; RV64I-NEXT:    ld a1, 0(a2)
+; RV64I-NEXT:    sd a0, 0(a2)
+; RV64I-NEXT:    ld zero, 72(a2)
+; RV64I-NEXT:    sd a0, 72(a2)
 ; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:    ret
   %1 = load volatile i64, ptr @G
diff --git a/llvm/test/CodeGen/RISCV/memcpy.ll b/llvm/test/CodeGen/RISCV/memcpy.ll
index 02f582339d0b7..6cbd27bc21256 100644
--- a/llvm/test/CodeGen/RISCV/memcpy.ll
+++ b/llvm/test/CodeGen/RISCV/memcpy.ll
@@ -23,61 +23,61 @@
 define i32 @t0() {
 ; RV32-LABEL: t0:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    lui a0, %hi(src)
-; RV32-NEXT:    lw a1, %lo(src)(a0)
-; RV32-NEXT:    lui a2, %hi(dst)
-; RV32-NEXT:    sw a1, %lo(dst)(a2)
-; RV32-NEXT:    addi a0, a0, %lo(src)
-; RV32-NEXT:    lbu a1, 10(a0)
-; RV32-NEXT:    lh a3, 8(a0)
-; RV32-NEXT:    lw a0, 4(a0)
-; RV32-NEXT:    addi a2, a2, %lo(dst)
-; RV32-NEXT:    sb a1, 10(a2)
-; RV32-NEXT:    sh a3, 8(a2)
-; RV32-NEXT:    sw a0, 4(a2)
+; RV32-NEXT:    lui a0, %hi(dst)
+; RV32-NEXT:    addi a0, a0, %lo(dst)
+; RV32-NEXT:    lui a1, %hi(src)
+; RV32-NEXT:    addi a1, a1, %lo(src)
+; RV32-NEXT:    lbu a2, 10(a1)
+; RV32-NEXT:    lh a3, 8(a1)
+; RV32-NEXT:    lw a4, 4(a1)
+; RV32-NEXT:    lw a1, 0(a1)
+; RV32-NEXT:    sb a2, 10(a0)
+; RV32-NEXT:    sh a3, 8(a0)
+; RV32-NEXT:    sw a4, 4(a0)
+; RV32-NEXT:    sw a1, 0(a0)
 ; RV32-NEXT:    li a0, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: t0:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    lui a0, %hi(src)
-; RV64-NEXT:    ld a1, %lo(src)(a0)
-; RV64-NEXT:    lui a2, %hi(dst)
-; RV64-NEXT:    addi a0, a0, %lo(src)
-; RV64-NEXT:    lbu a3, 10(a0)
-; RV64-NEXT:    lh a0, 8(a0)
-; RV64-NEXT:    sd a1, %lo(dst)(a2)
-; RV64-NEXT:    addi a1, a2, %lo(dst)
-; RV64-NEXT:    sb a3, 10(a1)
-; RV64-NEXT:    sh a0, 8(a1)
+; RV64-NEXT:    lui a0, %hi(dst)
+; RV64-NEXT:    lui a1, %hi(src)
+; RV64-NEXT:    addi a1, a1, %lo(src)
+; RV64-NEXT:    lbu a2, 10(a1)
+; RV64-NEXT:    lh a3, 8(a1)
+; RV64-NEXT:    ld a1, 0(a1)
+; RV64-NEXT:    addi a0, a0, %lo(dst)
+; RV64-NEXT:    sb a2, 10(a0)
+; RV64-NEXT:    sh a3, 8(a0)
+; RV64-NEXT:    sd a1, 0(a0)
 ; RV64-NEXT:    li a0, 0
 ; RV64-NEXT:    ret
 ;
 ; RV32-FAST-LABEL: t0:
 ; RV32-FAST:       # %bb.0: # %entry
-; RV32-FAST-NEXT:    lui a0, %hi(src)
-; RV32-FAST-NEXT:    lw a1, %lo(src)(a0)
-; RV32-FAST-NEXT:    lui a2, %hi(dst)
-; RV32-FAST-NEXT:    addi a0, a0, %lo(src)
-; RV32-FAST-NEXT:    lw a3, 7(a0)
-; RV32-FAST-NEXT:    lw a0, 4(a0)
-; RV32-FAST-NEXT:    sw a1, %lo(dst)(a2)
-; RV32-FAST-NEXT:    addi a1, a2, %lo(dst)
-; RV32-FAST-NEXT:    sw a3, 7(a1)
-; RV32-FAST-NEXT:    sw a0, 4(a1)
+; RV32-FAST-NEXT:    lui a0, %hi(dst)
+; RV32-FAST-NEXT:    lui a1, %hi(src)
+; RV32-FAST-NEXT:    addi a1, a1, %lo(src)
+; RV32-FAST-NEXT:    lw a2, 7(a1)
+; RV32-FAST-NEXT:    lw a3, 4(a1)
+; RV32-FAST-NEXT:    lw a1, 0(a1)
+; RV32-FAST-NEXT:    addi a0, a0, %lo(dst)
+; RV32-FAST-NEXT:    sw a2, 7(a0)
+; RV32-FAST-NEXT:    sw a3, 4(a0)
+; RV32-FAST-NEXT:    sw a1, 0(a0)
 ; RV32-FAST-NEXT:    li a0, 0
 ; RV32-FAST-NEXT:    ret
 ;
 ; RV64-FAST-LABEL: t0:
 ; RV64-FAST:       # %bb.0: # %entry
 ; RV64-FAST-NEXT:    lui a0, %hi(src)
-; RV64-FAST-NEXT:    ld a1, %lo(src)(a0)
 ; RV64-FAST-NEXT:    addi a0, a0, %lo(src)
-; RV64-FAST-NEXT:    lw a0, 7(a0)
+; RV64-FAST-NEXT:    lw a1, 7(a0)
+; RV64-FAST-NEXT:    ld a0, 0(a0)
 ; RV64-FAST-NEXT:    lui a2, %hi(dst)
-; RV64-FAST-NEXT:    sd a1, %lo(dst)(a2)
-; RV64-FAST-NEXT:    addi a1, a2, %lo(dst)
-; RV64-FAST-NEXT:    sw a0, 7(a1)
+; RV64-FAST-NEXT:    addi a2, a2, %lo(dst)
+; RV64-FAST-NEXT:    sw a1, 7(a2)
+; RV64-FAST-NEXT:    sd a0, 0(a2)
 ; RV64-FAST-NEXT:    li a0, 0
 ; RV64-FAST-NEXT:    ret
 entry:
@@ -131,15 +131,15 @@ define void @t1(ptr nocapture %C) nounwind {
 ; RV64-FAST-LABEL: t1:
 ; RV64-FAST:       # %bb.0: # %entry
 ; RV64-FAST-NEXT:    lui a1, %hi(.L.str1)
-; RV64-FAST-NEXT:    ld a2, %lo(.L.str1)(a1)
 ; RV64-FAST-NEXT:    addi a1, a1, %lo(.L.str1)
-; RV64-FAST-NEXT:    ld a3, 23(a1)
-; RV64-FAST-NEXT:    ld a4, 16(a1)
-; RV64-FAST-NEXT:    ld a1, 8(a1)
-; RV64-FAST-NEXT:    sd a2, 0(a0)
-; RV64-FAST-NEXT:    sd a3, 23(a0)
-; RV64-FAST-NEXT:    sd a4, 16(a0)
-; RV64-FAST-NEXT:    sd a1, 8(a0)
+; RV64-FAST-NEXT:    ld a2, 23(a1)
+; RV64-FAST-NEXT:    ld a3, 16(a1)
+; RV64-FAST-NEXT:    ld a4, 8(a1)
+; RV64-FAST-NEXT:    ld a1, 0(a1)
+; RV64-FAST-NEXT:    sd a2, 23(a0)
+; RV64-FAST-NEXT:    sd a3, 16(a0)
+; RV64-FAST-NEXT:    sd a4, 8(a0)
+; RV64-FAST-NEXT:    sd a1, 0(a0)
 ; RV64-FAST-NEXT:    ret
 entry:
   tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str1, i64 31, i1 false)
@@ -163,19 +163,19 @@ define void @t2(ptr nocapture %C) nounwind {
 ;
 ; RV64-FAST-LABEL: t2:
 ; RV64-FAST:       # %bb.0: # %entry
+; RV64-FAST-NEXT:    lui a1, 1156
+; RV64-FAST-NEXT:    addi a1, a1, 332
+; RV64-FAST-NEXT:    sw a1, 32(a0)
 ; RV64-FAST-NEXT:    lui a1, %hi(.L.str2)
-; RV64-FAST-NEXT:    ld a2, %lo(.L.str2)(a1)
-; RV64-FAST-NEXT:    sd a2, 0(a0)
-; RV64-FAST-NEXT:    lui a2, 1156
-; RV64-FAST-NEXT:    addi a2, a2, 332
 ; RV64-FAST-NEXT:    addi a1, a1, %lo(.L.str2)
-; RV64-FAST-NEXT:    ld a3, 24(a1)
-; RV64-FAST-NEXT:    ld a4, 16(a1)
-; RV64-FAST-NEXT:    ld a1, 8(a1)
-; RV64-FAST-NEXT:    sw a2, 32(a0)
-; RV64-FAST-NEXT:    sd a3, 24(a0)
-; RV64-FAST-NEXT:    sd a4, 16(a0)
-; RV64-FAST-NEXT:    sd a1, 8(a0)
+; RV64-FAST-NEXT:    ld a2, 24(a1)
+; RV64-FAST-NEXT:    ld a3, 16(a1)
+; RV64-FAST-NEXT:    ld a4, 8(a1)
+; RV64-FAST-NEXT:    ld a1, 0(a1)
+; RV64-FAST-NEXT:    sd a2, 24(a0)
+; RV64-FAST-NEXT:    sd a3, 16(a0)
+; RV64-FAST-NEXT:    sd a4, 8(a0)
+; RV64-FAST-NEXT:    sd a1, 0(a0)
 ; RV64-FAST-NEXT:    ret
 entry:
   tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str2, i64 36, i1 false)
@@ -222,13 +222,13 @@ define void @t3(ptr nocapture %C) nounwind {
 ; RV64-FAST-LABEL: t3:
 ; RV64-FAST:       # %bb.0: # %entry
 ; RV64-FAST-NEXT:    lui a1, %hi(.L.str3)
-; RV64-FAST-NEXT:    ld a2, %lo(.L.str3)(a1)
 ; RV64-FAST-NEXT:    addi a1, a1, %lo(.L.str3)
-; RV64-FAST-NEXT:    ld a3, 16(a1)
-; RV64-FAST-NEXT:    ld a1, 8(a1)
-; RV64-FAST-NEXT:    sd a2, 0(a0)
-; RV64-FAST-NEXT:    sd a3, 16(a0)
-; RV64-FAST-NEXT:    sd a1, 8(a0)
+; RV64-FAST-NEXT:    ld a2, 16(a1)
+; RV64-FAST-NEXT:    ld a3, 8(a1)
+; RV64-FAST-NEXT:    ld a1, 0(a1)
+; RV64-FAST-NEXT:    sd a2, 16(a0)
+; RV64-FAST-NEXT:    sd a3, 8(a0)
+; RV64-FAST-NEXT:    sd a1, 0(a0)
 ; RV64-FAST-NEXT:    ret
 entry:
   tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str3, i64 24, i1 false)
@@ -271,13 +271,13 @@ define void @t4(ptr nocapture %C) nounwind {
 ; RV64-FAST-LABEL: t4:
 ; RV64-FAST:       # %bb.0: # %entry
 ; RV64-FAST-NEXT:    lui a1, %hi(.L.str4)
-; RV64-FAST-NEXT:    ld a2, %lo(.L.str4)(a1)
 ; RV64-FAST-NEXT:    addi a1, a1, %lo(.L.str4)
-; RV64-FAST-NEXT:    ld a1, 8(a1)
+; RV64-FAST-NEXT:    ld a2, 8(a1)
+; RV64-FAST-NEXT:    ld a1, 0(a1)
 ; RV64-FAST-NEXT:    li a3, 32
 ; RV64-FAST-NEXT:    sh a3, 16(a0)
-; RV64-FAST-NEXT:    sd a2, 0(a0)
-; RV64-FAST-NEXT:    sd a1, 8(a0)
+; RV64-FAST-NEXT:    sd a2, 8(a0)
+; RV64-FAST-NEXT:    sd a1, 0(a0)
 ; RV64-FAST-NEXT:    ret
 entry:
   tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str4, i64 18, i1 false)
@@ -391,12 +391,12 @@ define void @t6() nounwind {
 ; RV64-FAST-LABEL: t6:
 ; RV64-FAST:       # %bb.0: # %entry
 ; RV64-FAST-NEXT:    lui a0, %hi(.L.str6)
-; RV64-FAST-NEXT:    ld a1, %lo(.L.str6)(a0)
 ; RV64-FAST-NEXT:    addi a0, a0, %lo(.L.str6)
-; RV64-FAST-NEXT:    ld a0, 6(a0)
+; RV64-FAST-NEXT:    ld a1, 6(a0)
+; RV64-FAST-NEXT:    ld a0, 0(a0)
 ; RV64-FAST-NEXT:    lui a2, %hi(spool.splbuf)
-; RV64-FAST-NEXT:    sd a1, %lo(spool.splbuf)(a2)
-; RV64-FAST-NEXT:    sd a0, %lo(spool.splbuf+6)(a2)
+; RV64-FAST-NEXT:    sd a1, %lo(spool.splbuf+6)(a2)
+; RV64-FAST-NEXT:    sd a0, %lo(spool.splbuf)(a2)
 ; RV64-FAST-NEXT:    ret
 entry:
   call void @llvm.memcpy.p0.p0.i64(ptr @spool.splbuf, ptr @.str6, i64 14, i1 false)
diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
index 7548faaae61f4..89c50914f61db 100644
--- a/llvm/test/CodeGen/RISCV/push-pop-popret.ll
+++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
@@ -704,57 +704,55 @@ entry:
 define i32 @nocompress(i32 signext %size) {
 ; RV32IZCMP-LABEL: nocompress:
 ; RV32IZCMP:       # %bb.0: # %entry
-; RV32IZCMP-NEXT:    cm.push {ra, s0-s8}, -48
+; RV32IZCMP-NEXT:    cm.push {ra, s0-s7}, -48
 ; RV32IZCMP-NEXT:    .cfi_def_cfa_offset 48
-; RV32IZCMP-NEXT:    .cfi_offset ra, -40
-; RV32IZCMP-NEXT:    .cfi_offset s0, -36
-; RV32IZCMP-NEXT:    .cfi_offset s1, -32
-; RV32IZCMP-NEXT:    .cfi_offset s2, -28
-; RV32IZCMP-NEXT:    .cfi_offset s3, -24
-; RV32IZCMP-NEXT:    .cfi_offset s4, -20
-; RV32IZCMP-NEXT:    .cfi_offset s5, -16
-; RV32IZCMP-NEXT:    .cfi_offset s6, -12
-; RV32IZCMP-NEXT:    .cfi_offset s7, -8
-; RV32IZCMP-NEXT:    .cfi_offset s8, -4
+; RV32IZCMP-NEXT:    .cfi_offset ra, -36
+; RV32IZCMP-NEXT:    .cfi_offset s0, -32
+; RV32IZCMP-NEXT:    .cfi_offset s1, -28
+; RV32IZCMP-NEXT:    .cfi_offset s2, -24
+; RV32IZCMP-NEXT:    .cfi_offset s3, -20
+; RV32IZCMP-NEXT:    .cfi_offset s4, -16
+; RV32IZCMP-NEXT:    .cfi_offset s5, -12
+; RV32IZCMP-NEXT:    .cfi_offset s6, -8
+; RV32IZCMP-NEXT:    .cfi_offset s7, -4
 ; RV32IZCMP-NEXT:    addi s0, sp, 48
 ; RV32IZCMP-NEXT:    .cfi_def_cfa s0, 0
 ; RV32IZCMP-NEXT:    addi a0, a0, 15
 ; RV32IZCMP-NEXT:    andi a0, a0, -16
 ; RV32IZCMP-NEXT:    sub s2, sp, a0
 ; RV32IZCMP-NEXT:    mv sp, s2
-; RV32IZCMP-NEXT:    lui s1, %hi(var)
-; RV32IZCMP-NEXT:    lw s3, %lo(var)(s1)
-; RV32IZCMP-NEXT:    lw s4, %lo(var+4)(s1)
-; RV32IZCMP-NEXT:    lw s5, %lo(var+8)(s1)
-; RV32IZCMP-NEXT:    lw s6, %lo(var+12)(s1)
-; RV32IZCMP-NEXT:    addi s7, s1, %lo(var)
-; RV32IZCMP-NEXT:    lw s8, 16(s7)
+; RV32IZCMP-NEXT:    lui a0, %hi(var)
+; RV32IZCMP-NEXT:    addi s1, a0, %lo(var)
+; RV32IZCMP-NEXT:    lw s3, 0(s1)
+; RV32IZCMP-NEXT:    lw s4, 4(s1)
+; RV32IZCMP-NEXT:    lw s5, 8(s1)
+; RV32IZCMP-NEXT:    lw s6, 12(s1)
+; RV32IZCMP-NEXT:    lw s7, 16(s1)
 ; RV32IZCMP-NEXT:    mv a0, s2
 ; RV32IZCMP-NEXT:    call callee_void
-; RV32IZCMP-NEXT:    sw s8, 16(s7)
-; RV32IZCMP-NEXT:    sw s6, %lo(var+12)(s1)
-; RV32IZCMP-NEXT:    sw s5, %lo(var+8)(s1)
-; RV32IZCMP-NEXT:    sw s4, %lo(var+4)(s1)
-; RV32IZCMP-NEXT:    sw s3, %lo(var)(s1)
+; RV32IZCMP-NEXT:    sw s7, 16(s1)
+; RV32IZCMP-NEXT:    sw s6, 12(s1)
+; RV32IZCMP-NEXT:    sw s5, 8(s1)
+; RV32IZCMP-NEXT:    sw s4, 4(s1)
+; RV32IZCMP-NEXT:    sw s3, 0(s1)
 ; RV32IZCMP-NEXT:    mv a0, s2
 ; RV32IZCMP-NEXT:    addi sp, s0, -48
-; RV32IZCMP-NEXT:    cm.pop {ra, s0-s8}, 48
+; RV32IZCMP-NEXT:    cm.pop {ra, s0-s7}, 48
 ; RV32IZCMP-NEXT:    tail callee
 ;
 ; RV64IZCMP-LABEL: nocompress:
 ; RV64IZCMP:       # %bb.0: # %entry
-; RV64IZCMP-NEXT:    cm.push {ra, s0-s8}, -80
+; RV64IZCMP-NEXT:    cm.push {ra, s0-s7}, -80
 ; RV64IZCMP-NEXT:    .cfi_def_cfa_offset 80
-; RV64IZCMP-NEXT:    .cfi_offset ra, -80
-; RV64IZCMP-NEXT:    .cfi_offset s0, -72
-; RV64IZCMP-NEXT:    .cfi_offset s1, -64
-; RV64IZCMP-NEXT:    .cfi_offset s2, -56
-; RV64IZCMP-NEXT:    .cfi_offset s3, -48
-; RV64IZCMP-NEXT:    .cfi_offset s4, -40
-; RV64IZCMP-NEXT:    .cfi_offset s5, -32
-; RV64IZCMP-NEXT:    .cfi_offset s6, -24
-; RV64IZCMP-NEXT:    .cfi_offset s7, -16
-; RV64IZCMP-NEXT:    .cfi_offset s8, -8
+; RV64IZCMP-NEXT:    .cfi_offset ra, -72
+; RV64IZCMP-NEXT:    .cfi_offset s0, -64
+; RV64IZCMP-NEXT:    .cfi_offset s1, -56
+; RV64IZCMP-NEXT:    .cfi_offset s2, -48
+; RV64IZCMP-NEXT:    .cfi_offset s3, -40
+; RV64IZCMP-NEXT:    .cfi_offset s4, -32
+; RV64IZCMP-NEXT:    .cfi_offset s5, -24
+; RV64IZCMP-NEXT:    .cfi_offset s6, -16
+; RV64IZCMP-NEXT:    .cfi_offset s7, -8
 ; RV64IZCMP-NEXT:    addi s0, sp, 80
 ; RV64IZCMP-NEXT:    .cfi_def_cfa s0, 0
 ; RV64IZCMP-NEXT:    slli a0, a0, 32
@@ -763,78 +761,76 @@ define i32 @nocompress(i32 signext %size) {
 ; RV64IZCMP-NEXT:    andi a0, a0, -16
 ; RV64IZCMP-NEXT:    sub s2, sp, a0
 ; RV64IZCMP-NEXT:    mv sp, s2
-; RV64IZCMP-NEXT:    lui s1, %hi(var)
-; RV64IZCMP-NEXT:    lw s3, %lo(var)(s1)
-; RV64IZCMP-NEXT:    lw s4, %lo(var+4)(s1)
-; RV64IZCMP-NEXT:    lw s5, %lo(var+8)(s1)
-; RV64IZCMP-NEXT:    lw s6, %lo(var+12)(s1)
-; RV64IZCMP-NEXT:    addi s7, s1, %lo(var)
-; RV64IZCMP-NEXT:    lw s8, 16(s7)
+; RV64IZCMP-NEXT:    lui a0, %hi(var)
+; RV64IZCMP-NEXT:    addi s1, a0, %lo(var)
+; RV64IZCMP-NEXT:    lw s3, 0(s1)
+; RV64IZCMP-NEXT:    lw s4, 4(s1)
+; RV64IZCMP-NEXT:    lw s5, 8(s1)
+; RV64IZCMP-NEXT:    lw s6, 12(s1)
+; RV64IZCMP-NEXT:    lw s7, 16(s1)
 ; RV64IZCMP-NEXT:    mv a0, s2
 ; RV64IZCMP-NEXT:    call callee_void
-; RV64IZCMP-NEXT:    sw s8, 16(s7)
-; RV64IZCMP-NEXT:    sw s6, %lo(var+12)(s1)
-; RV64IZCMP-NEXT:    sw s5, %lo(var+8)(s1)
-; RV64IZCMP-NEXT:    sw s4, %lo(var+4)(s1)
-; RV64IZCMP-NEXT:    sw s3, %lo(var)(s1)
+; RV64IZCMP-NEXT:    sw s7, 16(s1)
+; RV64IZCMP-NEXT:    sw s6, 12(s1)
+; RV64IZCMP-NEXT:    sw s5, 8(s1)
+; RV64IZCMP-NEXT:    sw s4, 4(s1)
+; RV64IZCMP-NEXT:    sw s3, 0(s1)
 ; RV64IZCMP-NEXT:    mv a0, s2
 ; RV64IZCMP-NEXT:    addi sp, s0, -80
-; RV64IZCMP-NEXT:    cm.pop {ra, s0-s8}, 80
+; RV64IZCMP-NEXT:    cm.pop {ra, s0-s7}, 80
 ; RV64IZCMP-NEXT:    tail callee
 ;
 ; RV32IZCMP-SR-LABEL: nocompress:
 ; RV32IZCMP-SR:       # %bb.0: # %entry
-; RV32IZCMP-SR-NEXT:    cm.push {ra, s0-s8}, -48
+; RV32IZCMP-SR-NEXT:    cm.push {ra, s0-s7}, -48
 ; RV32IZCMP-SR-NEXT:    .cfi_def_cfa_offset 48
-; RV32IZCMP-SR-NEXT:    .cfi_offset ra, -40
-; RV32IZCMP-SR-NEXT:    .cfi_offset s0, -36
-; RV32IZCMP-SR-NEXT:    .cfi_offset s1, -32
-; RV32IZCMP-SR-NEXT:    .cfi_offset s2, -28
-; RV32IZCMP-SR-NEXT:    .cfi_offset s3, -24
-; RV32IZCMP-SR-NEXT:    .cfi_offset s4, -20
-; RV32IZCMP-SR-NEXT:    .cfi_offset s5, -16
-; RV32IZCMP-SR-NEXT:    .cfi_offset s6, -12
-; RV32IZCMP-SR-NEXT:    .cfi_offset s7, -8
-; RV32IZCMP-SR-NEXT:    .cfi_offset s8, -4
+; RV32IZCMP-SR-NEXT:    .cfi_offset ra, -36
+; RV32IZCMP-SR-NEXT:    .cfi_offset s0, -32
+; RV32IZCMP-SR-NEXT:    .cfi_offset s1, -28
+; RV32IZCMP-SR-NEXT:    .cfi_offset s2, -24
+; RV32IZCMP-SR-NEXT:    .cfi_offset s3, -20
+; RV32IZCMP-SR-NEXT:    .cfi_offset s4, -16
+; RV32IZCMP-SR-NEXT:    .cfi_offset s5, -12
+; RV32IZCMP-SR-NEXT:    .cfi_offset s6, -8
+; RV32IZCMP-SR-NEXT:    .cfi_offset s7, -4
 ; RV32IZCMP-SR-NEXT:    addi s0, sp, 48
 ; RV32IZCMP-SR-NEXT:    .cfi_def_cfa s0, 0
 ; RV32IZCMP-SR-NEXT:    addi a0, a0, 15
 ; RV32IZCMP-SR-NEXT:    andi a0, a0, -16
 ; RV32IZCMP-SR-NEXT:    sub s2, sp, a0
 ; RV32IZCMP-SR-NEXT:    mv sp, s2
-; RV32IZCMP-SR-NEXT:    lui s1, %hi(var)
-; RV32IZCMP-SR-NEXT:    lw s3, %lo(var)(s1)
-; RV32IZCMP-SR-NEXT:    lw s4, %lo(var+4)(s1)
-; RV32IZCMP-SR-NEXT:    lw s5, %lo(var+8)(s1)
-; RV32IZCMP-SR-NEXT:    lw s6, %lo(var+12)(s1)
-; RV32IZCMP-SR-NEXT:    addi s7, s1, %lo(var)
-; RV32IZCMP-SR-NEXT:    lw s8, 16(s7)
+; RV32IZCMP-SR-NEXT:    lui a0, %hi(var)
+; RV32IZCMP-SR-NEXT:    addi s1, a0, %lo(var)
+; RV32IZCMP-SR-NEXT:    lw s3, 0(s1)
+; RV32IZCMP-SR-NEXT:    lw s4, 4(s1)
+; RV32IZCMP-SR-NEXT:    lw s5, 8(s1)
+; RV32IZCMP-SR-NEXT:    lw s6, 12(s1)
+; RV32IZCMP-SR-NEXT:    lw s7, 16(s1)
 ; RV32IZCMP-SR-NEXT:    mv a0, s2
 ; RV32IZCMP-SR-NEXT:    call callee_void
-; RV32IZCMP-SR-NEXT:    sw s8, 16(s7)
-; RV32IZCMP-SR-NEXT:    sw s6, %lo(var+12)(s1)
-; RV32IZCMP-SR-NEXT:    sw s5, %lo(var+8)(s1)
-; RV32IZCMP-SR-NEXT:    sw s4, %lo(var+4)(s1)
-; RV32IZCMP-SR-NEXT:    sw s3, %lo(var)(s1)
+; RV32IZCMP-SR-NEXT:    sw s7, 16(s1)
+; RV32IZCMP-SR-NEXT:    sw s6, 12(s1)
+; RV32IZCMP-SR-NEXT:    sw s5, 8(s1)
+; RV32IZCMP-SR-NEXT:    sw s4, 4(s1)
+; RV32IZCMP-SR-NEXT:    sw s3, 0(s1)
 ; RV32IZCMP-SR-NEXT:    mv a0, s2
 ; RV32IZCMP-SR-NEXT:    addi sp, s0, -48
-; RV32IZCMP-SR-NEXT:    cm.pop {ra, s0-s8}, 48
+; RV32IZCMP-SR-NEXT:    cm.pop {ra, s0-s7}, 48
 ; RV32IZCMP-SR-NEXT:    tail callee
 ;
 ; RV64IZCMP-SR-LABEL: nocompress:
 ; RV64IZCMP-SR:       # %bb.0: # %entry
-; RV64IZCMP-SR-NEXT:    cm.push {ra, s0-s8}, -80
+; RV64IZCMP-SR-NEXT:    cm.push {ra, s0-s7}, -80
 ; RV64IZCMP-SR-NEXT:    .cfi_def_cfa_offset 80
-; RV64IZCMP-SR-NEXT:    .cfi_offset ra, -80
-; RV64IZCMP-SR-NEXT:    .cfi_offset s0, -72
-; RV64IZCMP-SR-NEXT:    .cfi_offset s1, -64
-; RV64IZCMP-SR-NEXT:    .cfi_offset s2, -56
-; RV64IZCMP-SR-NEXT:    .cfi_offset s3, -48
-; RV64IZCMP-SR-NEXT:    .cfi_offset s4, -40
-; RV64IZCMP-SR-NEXT:    .cfi_offset s5, -32
-; RV64IZCMP-SR-NEXT:    .cfi_offset s6, -24
-; RV64IZCMP-SR-NEXT:    .cfi_offset s7, -16
-; RV64IZCMP-SR-NEXT:    .cfi_offset s8, -8
+; RV64IZCMP-SR-NEXT:    .cfi_offset ra, -72
+; RV64IZCMP-SR-NEXT:    .cfi_offset s0, -64
+; RV64IZCMP-SR-NEXT:    .cfi_offset s1, -56
+; RV64IZCMP-SR-NEXT:    .cfi_offset s2, -48
+; RV64IZCMP-SR-NEXT:    .cfi_offset s3, -40
+; RV64IZCMP-SR-NEXT:    .cfi_offset s4, -32
+; RV64IZCMP-SR-NEXT:    .cfi_offset s5, -24
+; RV64IZCMP-SR-NEXT:    .cfi_offset s6, -16
+; RV64IZCMP-SR-NEXT:    .cfi_offset s7, -8
 ; RV64IZCMP-SR-NEXT:    addi s0, sp, 80
 ; RV64IZCMP-SR-NEXT:    .cfi_def_cfa s0, 0
 ; RV64IZCMP-SR-NEXT:    slli a0, a0, 32
@@ -843,23 +839,23 @@ define i32 @nocompress(i32 signext %size) {
 ; RV64IZCMP-SR-NEXT:    andi a0, a0, -16
 ; RV64IZCMP-SR-NEXT:    sub s2, sp, a0
 ; RV64IZCMP-SR-NEXT:    mv sp, s2
-; RV64IZCMP-SR-NEXT:    lui s1, %hi(var)
-; RV64IZCMP-SR-NEXT:    lw s3, %lo(var)(s1)
-; RV64IZCMP-SR-NEXT:    lw s4, %lo(var+4)(s1)
-; RV64IZCMP-SR-NEXT:    lw s5, %lo(var+8)(s1)
-; RV64IZCMP-SR-NEXT:    lw s6, %lo(var+12)(s1)
-; RV64IZCMP-SR-NEXT:    addi s7, s1, %lo(var)
-; RV64IZCMP-SR-NEXT:    lw s8, 16(s7)
+; RV64IZCMP-SR-NEXT:    lui a0, %hi(var)
+; RV64IZCMP-SR-NEXT:    addi s1, a0, %lo(var)
+; RV64IZCMP-SR-NEXT:    lw s3, 0(s1)
+; RV64IZCMP-SR-NEXT:    lw s4, 4(s1)
+; RV64IZCMP-SR-NEXT:    lw s5, 8(s1)
+; RV64IZCMP-SR-NEXT:    lw s6, 12(s1)
+; RV64IZCMP-SR-NEXT:    lw s7, 16(s1)
 ; RV64IZCMP-SR-NEXT:    mv a0, s2
 ; RV64IZCMP-SR-NEXT:    call callee_void
-; RV64IZCMP-SR-NEXT:    sw s8, 16(s7)
-; RV64IZCMP-SR-NEXT:    sw s6, %lo(var+12)(s1)
-; RV64IZCMP-SR-NEXT:    sw s5, %lo(var+8)(s1)
-; RV64IZCMP-SR-NEXT:    sw s4, %lo(var+4)(s1)
-; RV64IZCMP-SR-NEXT:    sw s3, %lo(var)(s1)
+; RV64IZCMP-SR-NEXT:    sw s7, 16(s1)
+; RV64IZCMP-SR-NEXT:    sw s6, 12(s1)
+; RV64IZCMP-SR-NEXT:    sw s5, 8(s1)
+; RV64IZCMP-SR-NEXT:    sw s4, 4(s1)
+; RV64IZCMP-SR-NEXT:    sw s3, 0(s1)
 ; RV64IZCMP-SR-NEXT:    mv a0, s2
 ; RV64IZCMP-SR-NEXT:    addi sp, s0, -80
-; RV64IZCMP-SR-NEXT:    cm.pop {ra, s0-s8}, 80
+; RV64IZCMP-SR-NEXT:    cm.pop {ra, s0-s7}, 80
 ; RV64IZCMP-SR-NEXT:    tail callee
 ;
 ; RV32I-LABEL: nocompress:
@@ -875,7 +871,6 @@ define i32 @nocompress(i32 signext %size) {
 ; RV32I-NEXT:    sw s5, 20(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s6, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s7, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s8, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    .cfi_offset ra, -4
 ; RV32I-NEXT:    .cfi_offset s0, -8
 ; RV32I-NEXT:    .cfi_offset s1, -12
@@ -885,27 +880,26 @@ define i32 @nocompress(i32 signext %size) {
 ; RV32I-NEXT:    .cfi_offset s5, -28
 ; RV32I-NEXT:    .cfi_offset s6, -32
 ; RV32I-NEXT:    .cfi_offset s7, -36
-; RV32I-NEXT:    .cfi_offset s8, -40
 ; RV32I-NEXT:    addi s0, sp, 48
 ; RV32I-NEXT:    .cfi_def_cfa s0, 0
 ; RV32I-NEXT:    addi a0, a0, 15
 ; RV32I-NEXT:    andi a0, a0, -16
 ; RV32I-NEXT:    sub s1, sp, a0
 ; RV32I-NEXT:    mv sp, s1
-; RV32I-NEXT:    lui s2, %hi(var)
-; RV32I-NEXT:    lw s3, %lo(var)(s2)
-; RV32I-NEXT:    lw s4, %lo(var+4)(s2)
-; RV32I-NEXT:    lw s5, %lo(var+8)(s2)
-; RV32I-NEXT:    lw s6, %lo(var+12)(s2)
-; RV32I-NEXT:    addi s7, s2, %lo(var)
-; RV32I-NEXT:    lw s8, 16(s7)
+; RV32I-NEXT:    lui a0, %hi(var)
+; RV32I-NEXT:    addi s2, a0, %lo(var)
+; RV32I-NEXT:    lw s3, 0(s2)
+; RV32I-NEXT:    lw s4, 4(s2)
+; RV32I-NEXT:    lw s5, 8(s2)
+; RV32I-NEXT:    lw s6, 12(s2)
+; RV32I-NEXT:    lw s7, 16(s2)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call callee_void
-; RV32I-NEXT:    sw s8, 16(s7)
-; RV32I-NEXT:    sw s6, %lo(var+12)(s2)
-; RV32I-NEXT:    sw s5, %lo(var+8)(s2)
-; RV32I-NEXT:    sw s4, %lo(var+4)(s2)
-; RV32I-NEXT:    sw s3, %lo(var)(s2)
+; RV32I-NEXT:    sw s7, 16(s2)
+; RV32I-NEXT:    sw s6, 12(s2)
+; RV32I-NEXT:    sw s5, 8(s2)
+; RV32I-NEXT:    sw s4, 4(s2)
+; RV32I-NEXT:    sw s3, 0(s2)
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    addi sp, s0, -48
 ; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
@@ -917,7 +911,6 @@ define i32 @nocompress(i32 signext %size) {
 ; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    tail callee
 ;
@@ -934,7 +927,6 @@ define i32 @nocompress(i32 signext %size) {
 ; RV64I-NEXT:    sd s5, 24(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s6, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s7, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s8, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    .cfi_offset ra, -8
 ; RV64I-NEXT:    .cfi_offset s0, -16
 ; RV64I-NEXT:    .cfi_offset s1, -24
@@ -944,7 +936,6 @@ define i32 @nocompress(i32 signext %size) {
 ; RV64I-NEXT:    .cfi_offset s5, -56
 ; RV64I-NEXT:    .cfi_offset s6, -64
 ; RV64I-NEXT:    .cfi_offset s7, -72
-; RV64I-NEXT:    .cfi_offset s8, -80
 ; RV64I-NEXT:    addi s0, sp, 80
 ; RV64I-NEXT:    .cfi_def_cfa s0, 0
 ; RV64I-NEXT:    slli a0, a0, 32
@@ -953,20 +944,20 @@ define i32 @nocompress(i32 signext %size) {
 ; RV64I-NEXT:    andi a0, a0, -16
 ; RV64I-NEXT:    sub s1, sp, a0
 ; RV64I-NEXT:    mv sp, s1
-; RV64I-NEXT:    lui s2, %hi(var)
-; RV64I-NEXT:    lw s3, %lo(var)(s2)
-; RV64I-NEXT:    lw s4, %lo(var+4)(s2)
-; RV64I-NEXT:    lw s5, %lo(var+8)(s2)
-; RV64I-NEXT:    lw s6, %lo(var+12)(s2)
-; RV64I-NEXT:    addi s7, s2, %lo(var)
-; RV64I-NEXT:    lw s8, 16(s7)
+; RV64I-NEXT:    lui a0, %hi(var)
+; RV64I-NEXT:    addi s2, a0, %lo(var)
+; RV64I-NEXT:    lw s3, 0(s2)
+; RV64I-NEXT:    lw s4, 4(s2)
+; RV64I-NEXT:    lw s5, 8(s2)
+; RV64I-NEXT:    lw s6, 12(s2)
+; RV64I-NEXT:    lw s7, 16(s2)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call callee_void
-; RV64I-NEXT:    sw s8, 16(s7)
-; RV64I-NEXT:    sw s6, %lo(var+12)(s2)
-; RV64I-NEXT:    sw s5, %lo(var+8)(s2)
-; RV64I-NEXT:    sw s4, %lo(var+4)(s2)
-; RV64I-NEXT:    sw s3, %lo(var)(s2)
+; RV64I-NEXT:    sw s7, 16(s2)
+; RV64I-NEXT:    sw s6, 12(s2)
+; RV64I-NEXT:    sw s5, 8(s2)
+; RV64I-NEXT:    sw s4, 4(s2)
+; RV64I-NEXT:    sw s3, 0(s2)
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    addi sp, s0, -80
 ; RV64I-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
@@ -978,7 +969,6 @@ define i32 @nocompress(i32 signext %size) {
 ; RV64I-NEXT:    ld s5, 24(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s6, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s7, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s8, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 80
 ; RV64I-NEXT:    tail callee
 entry:
@@ -1106,282 +1096,278 @@ define i32 @varargs(ptr %fmt, ...) nounwind {
 define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind {
 ; RV32IZCMP-LABEL: many_args:
 ; RV32IZCMP:       # %bb.0: # %entry
-; RV32IZCMP-NEXT:    cm.push {ra, s0-s4}, -32
+; RV32IZCMP-NEXT:    cm.push {ra, s0-s3}, -32
 ; RV32IZCMP-NEXT:    lui a0, %hi(var0)
-; RV32IZCMP-NEXT:    lw a6, %lo(var0)(a0)
-; RV32IZCMP-NEXT:    lw a7, %lo(var0+4)(a0)
-; RV32IZCMP-NEXT:    lw t0, %lo(var0+8)(a0)
-; RV32IZCMP-NEXT:    lw t1, %lo(var0+12)(a0)
-; RV32IZCMP-NEXT:    addi a5, a0, %lo(var0)
-; RV32IZCMP-NEXT:    lw t2, 16(a5)
-; RV32IZCMP-NEXT:    lw t3, 20(a5)
-; RV32IZCMP-NEXT:    lw t4, 24(a5)
-; RV32IZCMP-NEXT:    lw t5, 28(a5)
-; RV32IZCMP-NEXT:    lw t6, 32(a5)
-; RV32IZCMP-NEXT:    lw s2, 36(a5)
-; RV32IZCMP-NEXT:    lw s3, 40(a5)
-; RV32IZCMP-NEXT:    lw s4, 44(a5)
-; RV32IZCMP-NEXT:    lw a1, 48(a5)
-; RV32IZCMP-NEXT:    lw s0, 52(a5)
-; RV32IZCMP-NEXT:    lw s1, 68(a5)
-; RV32IZCMP-NEXT:    lw a2, 64(a5)
-; RV32IZCMP-NEXT:    lw a3, 60(a5)
-; RV32IZCMP-NEXT:    lw a4, 56(a5)
-; RV32IZCMP-NEXT:    sw s1, 68(a5)
-; RV32IZCMP-NEXT:    sw a2, 64(a5)
-; RV32IZCMP-NEXT:    sw a3, 60(a5)
-; RV32IZCMP-NEXT:    sw a4, 56(a5)
-; RV32IZCMP-NEXT:    sw s0, 52(a5)
-; RV32IZCMP-NEXT:    sw a1, 48(a5)
-; RV32IZCMP-NEXT:    sw s4, 44(a5)
-; RV32IZCMP-NEXT:    sw s3, 40(a5)
-; RV32IZCMP-NEXT:    sw s2, 36(a5)
-; RV32IZCMP-NEXT:    sw t6, 32(a5)
-; RV32IZCMP-NEXT:    sw t5, 28(a5)
-; RV32IZCMP-NEXT:    sw t4, 24(a5)
-; RV32IZCMP-NEXT:    sw t3, 20(a5)
-; RV32IZCMP-NEXT:    sw t2, 16(a5)
-; RV32IZCMP-NEXT:    sw t1, %lo(var0+12)(a0)
-; RV32IZCMP-NEXT:    sw t0, %lo(var0+8)(a0)
-; RV32IZCMP-NEXT:    sw a7, %lo(var0+4)(a0)
-; RV32IZCMP-NEXT:    sw a6, %lo(var0)(a0)
-; RV32IZCMP-NEXT:    cm.popret {ra, s0-s4}, 32
+; RV32IZCMP-NEXT:    addi a0, a0, %lo(var0)
+; RV32IZCMP-NEXT:    lw a6, 0(a0)
+; RV32IZCMP-NEXT:    lw a7, 4(a0)
+; RV32IZCMP-NEXT:    lw t0, 8(a0)
+; RV32IZCMP-NEXT:    lw t1, 12(a0)
+; RV32IZCMP-NEXT:    lw t2, 16(a0)
+; RV32IZCMP-NEXT:    lw t3, 20(a0)
+; RV32IZCMP-NEXT:    lw t4, 24(a0)
+; RV32IZCMP-NEXT:    lw t5, 28(a0)
+; RV32IZCMP-NEXT:    lw t6, 32(a0)
+; RV32IZCMP-NEXT:    lw s2, 36(a0)
+; RV32IZCMP-NEXT:    lw s3, 40(a0)
+; RV32IZCMP-NEXT:    lw a2, 44(a0)
+; RV32IZCMP-NEXT:    lw a3, 48(a0)
+; RV32IZCMP-NEXT:    lw a4, 52(a0)
+; RV32IZCMP-NEXT:    lw s0, 68(a0)
+; RV32IZCMP-NEXT:    lw s1, 64(a0)
+; RV32IZCMP-NEXT:    lw a5, 60(a0)
+; RV32IZCMP-NEXT:    lw a1, 56(a0)
+; RV32IZCMP-NEXT:    sw s0, 68(a0)
+; RV32IZCMP-NEXT:    sw s1, 64(a0)
+; RV32IZCMP-NEXT:    sw a5, 60(a0)
+; RV32IZCMP-NEXT:    sw a1, 56(a0)
+; RV32IZCMP-NEXT:    sw a4, 52(a0)
+; RV32IZCMP-NEXT:    sw a3, 48(a0)
+; RV32IZCMP-NEXT:    sw a2, 44(a0)
+; RV32IZCMP-NEXT:    sw s3, 40(a0)
+; RV32IZCMP-NEXT:    sw s2, 36(a0)
+; RV32IZCMP-NEXT:    sw t6, 32(a0)
+; RV32IZCMP-NEXT:    sw t5, 28(a0)
+; RV32IZCMP-NEXT:    sw t4, 24(a0)
+; RV32IZCMP-NEXT:    sw t3, 20(a0)
+; RV32IZCMP-NEXT:    sw t2, 16(a0)
+; RV32IZCMP-NEXT:    sw t1, 12(a0)
+; RV32IZCMP-NEXT:    sw t0, 8(a0)
+; RV32IZCMP-NEXT:    sw a7, 4(a0)
+; RV32IZCMP-NEXT:    sw a6, 0(a0)
+; RV32IZCMP-NEXT:    cm.popret {ra, s0-s3}, 32
 ;
 ; RV64IZCMP-LABEL: many_args:
 ; RV64IZCMP:       # %bb.0: # %entry
-; RV64IZCMP-NEXT:    cm.push {ra, s0-s4}, -48
+; RV64IZCMP-NEXT:    cm.push {ra, s0-s3}, -48
 ; RV64IZCMP-NEXT:    lui a0, %hi(var0)
-; RV64IZCMP-NEXT:    lw a6, %lo(var0)(a0)
-; RV64IZCMP-NEXT:    lw a7, %lo(var0+4)(a0)
-; RV64IZCMP-NEXT:    lw t0, %lo(var0+8)(a0)
-; RV64IZCMP-NEXT:    lw t1, %lo(var0+12)(a0)
-; RV64IZCMP-NEXT:    addi a5, a0, %lo(var0)
-; RV64IZCMP-NEXT:    lw t2, 16(a5)
-; RV64IZCMP-NEXT:    lw t3, 20(a5)
-; RV64IZCMP-NEXT:    lw t4, 24(a5)
-; RV64IZCMP-NEXT:    lw t5, 28(a5)
-; RV64IZCMP-NEXT:    lw t6, 32(a5)
-; RV64IZCMP-NEXT:    lw s2, 36(a5)
-; RV64IZCMP-NEXT:    lw s3, 40(a5)
-; RV64IZCMP-NEXT:    lw s4, 44(a5)
-; RV64IZCMP-NEXT:    lw a1, 48(a5)
-; RV64IZCMP-NEXT:    lw s0, 52(a5)
-; RV64IZCMP-NEXT:    lw s1, 68(a5)
-; RV64IZCMP-NEXT:    lw a2, 64(a5)
-; RV64IZCMP-NEXT:    lw a3, 60(a5)
-; RV64IZCMP-NEXT:    lw a4, 56(a5)
-; RV64IZCMP-NEXT:    sw s1, 68(a5)
-; RV64IZCMP-NEXT:    sw a2, 64(a5)
-; RV64IZCMP-NEXT:    sw a3, 60(a5)
-; RV64IZCMP-NEXT:    sw a4, 56(a5)
-; RV64IZCMP-NEXT:    sw s0, 52(a5)
-; RV64IZCMP-NEXT:    sw a1, 48(a5)
-; RV64IZCMP-NEXT:    sw s4, 44(a5)
-; RV64IZCMP-NEXT:    sw s3, 40(a5)
-; RV64IZCMP-NEXT:    sw s2, 36(a5)
-; RV64IZCMP-NEXT:    sw t6, 32(a5)
-; RV64IZCMP-NEXT:    sw t5, 28(a5)
-; RV64IZCMP-NEXT:    sw t4, 24(a5)
-; RV64IZCMP-NEXT:    sw t3, 20(a5)
-; RV64IZCMP-NEXT:    sw t2, 16(a5)
-; RV64IZCMP-NEXT:    sw t1, %lo(var0+12)(a0)
-; RV64IZCMP-NEXT:    sw t0, %lo(var0+8)(a0)
-; RV64IZCMP-NEXT:    sw a7, %lo(var0+4)(a0)
-; RV64IZCMP-NEXT:    sw a6, %lo(var0)(a0)
-; RV64IZCMP-NEXT:    cm.popret {ra, s0-s4}, 48
+; RV64IZCMP-NEXT:    addi a0, a0, %lo(var0)
+; RV64IZCMP-NEXT:    lw a6, 0(a0)
+; RV64IZCMP-NEXT:    lw a7, 4(a0)
+; RV64IZCMP-NEXT:    lw t0, 8(a0)
+; RV64IZCMP-NEXT:    lw t1, 12(a0)
+; RV64IZCMP-NEXT:    lw t2, 16(a0)
+; RV64IZCMP-NEXT:    lw t3, 20(a0)
+; RV64IZCMP-NEXT:    lw t4, 24(a0)
+; RV64IZCMP-NEXT:    lw t5, 28(a0)
+; RV64IZCMP-NEXT:    lw t6, 32(a0)
+; RV64IZCMP-NEXT:    lw s2, 36(a0)
+; RV64IZCMP-NEXT:    lw s3, 40(a0)
+; RV64IZCMP-NEXT:    lw a2, 44(a0)
+; RV64IZCMP-NEXT:    lw a3, 48(a0)
+; RV64IZCMP-NEXT:    lw a4, 52(a0)
+; RV64IZCMP-NEXT:    lw s0, 68(a0)
+; RV64IZCMP-NEXT:    lw s1, 64(a0)
+; RV64IZCMP-NEXT:    lw a5, 60(a0)
+; RV64IZCMP-NEXT:    lw a1, 56(a0)
+; RV64IZCMP-NEXT:    sw s0, 68(a0)
+; RV64IZCMP-NEXT:    sw s1, 64(a0)
+; RV64IZCMP-NEXT:    sw a5, 60(a0)
+; RV64IZCMP-NEXT:    sw a1, 56(a0)
+; RV64IZCMP-NEXT:    sw a4, 52(a0)
+; RV64IZCMP-NEXT:    sw a3, 48(a0)
+; RV64IZCMP-NEXT:    sw a2, 44(a0)
+; RV64IZCMP-NEXT:    sw s3, 40(a0)
+; RV64IZCMP-NEXT:    sw s2, 36(a0)
+; RV64IZCMP-NEXT:    sw t6, 32(a0)
+; RV64IZCMP-NEXT:    sw t5, 28(a0)
+; RV64IZCMP-NEXT:    sw t4, 24(a0)
+; RV64IZCMP-NEXT:    sw t3, 20(a0)
+; RV64IZCMP-NEXT:    sw t2, 16(a0)
+; RV64IZCMP-NEXT:    sw t1, 12(a0)
+; RV64IZCMP-NEXT:    sw t0, 8(a0)
+; RV64IZCMP-NEXT:    sw a7, 4(a0)
+; RV64IZCMP-NEXT:    sw a6, 0(a0)
+; RV64IZCMP-NEXT:    cm.popret {ra, s0-s3}, 48
 ;
 ; RV32IZCMP-SR-LABEL: many_args:
 ; RV32IZCMP-SR:       # %bb.0: # %entry
-; RV32IZCMP-SR-NEXT:    cm.push {ra, s0-s4}, -32
+; RV32IZCMP-SR-NEXT:    cm.push {ra, s0-s3}, -32
 ; RV32IZCMP-SR-NEXT:    lui a0, %hi(var0)
-; RV32IZCMP-SR-NEXT:    lw a6, %lo(var0)(a0)
-; RV32IZCMP-SR-NEXT:    lw a7, %lo(var0+4)(a0)
-; RV32IZCMP-SR-NEXT:    lw t0, %lo(var0+8)(a0)
-; RV32IZCMP-SR-NEXT:    lw t1, %lo(var0+12)(a0)
-; RV32IZCMP-SR-NEXT:    addi a5, a0, %lo(var0)
-; RV32IZCMP-SR-NEXT:    lw t2, 16(a5)
-; RV32IZCMP-SR-NEXT:    lw t3, 20(a5)
-; RV32IZCMP-SR-NEXT:    lw t4, 24(a5)
-; RV32IZCMP-SR-NEXT:    lw t5, 28(a5)
-; RV32IZCMP-SR-NEXT:    lw t6, 32(a5)
-; RV32IZCMP-SR-NEXT:    lw s2, 36(a5)
-; RV32IZCMP-SR-NEXT:    lw s3, 40(a5)
-; RV32IZCMP-SR-NEXT:    lw s4, 44(a5)
-; RV32IZCMP-SR-NEXT:    lw a1, 48(a5)
-; RV32IZCMP-SR-NEXT:    lw s0, 52(a5)
-; RV32IZCMP-SR-NEXT:    lw s1, 68(a5)
-; RV32IZCMP-SR-NEXT:    lw a2, 64(a5)
-; RV32IZCMP-SR-NEXT:    lw a3, 60(a5)
-; RV32IZCMP-SR-NEXT:    lw a4, 56(a5)
-; RV32IZCMP-SR-NEXT:    sw s1, 68(a5)
-; RV32IZCMP-SR-NEXT:    sw a2, 64(a5)
-; RV32IZCMP-SR-NEXT:    sw a3, 60(a5)
-; RV32IZCMP-SR-NEXT:    sw a4, 56(a5)
-; RV32IZCMP-SR-NEXT:    sw s0, 52(a5)
-; RV32IZCMP-SR-NEXT:    sw a1, 48(a5)
-; RV32IZCMP-SR-NEXT:    sw s4, 44(a5)
-; RV32IZCMP-SR-NEXT:    sw s3, 40(a5)
-; RV32IZCMP-SR-NEXT:    sw s2, 36(a5)
-; RV32IZCMP-SR-NEXT:    sw t6, 32(a5)
-; RV32IZCMP-SR-NEXT:    sw t5, 28(a5)
-; RV32IZCMP-SR-NEXT:    sw t4, 24(a5)
-; RV32IZCMP-SR-NEXT:    sw t3, 20(a5)
-; RV32IZCMP-SR-NEXT:    sw t2, 16(a5)
-; RV32IZCMP-SR-NEXT:    sw t1, %lo(var0+12)(a0)
-; RV32IZCMP-SR-NEXT:    sw t0, %lo(var0+8)(a0)
-; RV32IZCMP-SR-NEXT:    sw a7, %lo(var0+4)(a0)
-; RV32IZCMP-SR-NEXT:    sw a6, %lo(var0)(a0)
-; RV32IZCMP-SR-NEXT:    cm.popret {ra, s0-s4}, 32
+; RV32IZCMP-SR-NEXT:    addi a0, a0, %lo(var0)
+; RV32IZCMP-SR-NEXT:    lw a6, 0(a0)
+; RV32IZCMP-SR-NEXT:    lw a7, 4(a0)
+; RV32IZCMP-SR-NEXT:    lw t0, 8(a0)
+; RV32IZCMP-SR-NEXT:    lw t1, 12(a0)
+; RV32IZCMP-SR-NEXT:    lw t2, 16(a0)
+; RV32IZCMP-SR-NEXT:    lw t3, 20(a0)
+; RV32IZCMP-SR-NEXT:    lw t4, 24(a0)
+; RV32IZCMP-SR-NEXT:    lw t5, 28(a0)
+; RV32IZCMP-SR-NEXT:    lw t6, 32(a0)
+; RV32IZCMP-SR-NEXT:    lw s2, 36(a0)
+; RV32IZCMP-SR-NEXT:    lw s3, 40(a0)
+; RV32IZCMP-SR-NEXT:    lw a2, 44(a0)
+; RV32IZCMP-SR-NEXT:    lw a3, 48(a0)
+; RV32IZCMP-SR-NEXT:    lw a4, 52(a0)
+; RV32IZCMP-SR-NEXT:    lw s0, 68(a0)
+; RV32IZCMP-SR-NEXT:    lw s1, 64(a0)
+; RV32IZCMP-SR-NEXT:    lw a5, 60(a0)
+; RV32IZCMP-SR-NEXT:    lw a1, 56(a0)
+; RV32IZCMP-SR-NEXT:    sw s0, 68(a0)
+; RV32IZCMP-SR-NEXT:    sw s1, 64(a0)
+; RV32IZCMP-SR-NEXT:    sw a5, 60(a0)
+; RV32IZCMP-SR-NEXT:    sw a1, 56(a0)
+; RV32IZCMP-SR-NEXT:    sw a4, 52(a0)
+; RV32IZCMP-SR-NEXT:    sw a3, 48(a0)
+; RV32IZCMP-SR-NEXT:    sw a2, 44(a0)
+; RV32IZCMP-SR-NEXT:    sw s3, 40(a0)
+; RV32IZCMP-SR-NEXT:    sw s2, 36(a0)
+; RV32IZCMP-SR-NEXT:    sw t6, 32(a0)
+; RV32IZCMP-SR-NEXT:    sw t5, 28(a0)
+; RV32IZCMP-SR-NEXT:    sw t4, 24(a0)
+; RV32IZCMP-SR-NEXT:    sw t3, 20(a0)
+; RV32IZCMP-SR-NEXT:    sw t2, 16(a0)
+; RV32IZCMP-SR-NEXT:    sw t1, 12(a0)
+; RV32IZCMP-SR-NEXT:    sw t0, 8(a0)
+; RV32IZCMP-SR-NEXT:    sw a7, 4(a0)
+; RV32IZCMP-SR-NEXT:    sw a6, 0(a0)
+; RV32IZCMP-SR-NEXT:    cm.popret {ra, s0-s3}, 32
 ;
 ; RV64IZCMP-SR-LABEL: many_args:
 ; RV64IZCMP-SR:       # %bb.0: # %entry
-; RV64IZCMP-SR-NEXT:    cm.push {ra, s0-s4}, -48
+; RV64IZCMP-SR-NEXT:    cm.push {ra, s0-s3}, -48
 ; RV64IZCMP-SR-NEXT:    lui a0, %hi(var0)
-; RV64IZCMP-SR-NEXT:    lw a6, %lo(var0)(a0)
-; RV64IZCMP-SR-NEXT:    lw a7, %lo(var0+4)(a0)
-; RV64IZCMP-SR-NEXT:    lw t0, %lo(var0+8)(a0)
-; RV64IZCMP-SR-NEXT:    lw t1, %lo(var0+12)(a0)
-; RV64IZCMP-SR-NEXT:    addi a5, a0, %lo(var0)
-; RV64IZCMP-SR-NEXT:    lw t2, 16(a5)
-; RV64IZCMP-SR-NEXT:    lw t3, 20(a5)
-; RV64IZCMP-SR-NEXT:    lw t4, 24(a5)
-; RV64IZCMP-SR-NEXT:    lw t5, 28(a5)
-; RV64IZCMP-SR-NEXT:    lw t6, 32(a5)
-; RV64IZCMP-SR-NEXT:    lw s2, 36(a5)
-; RV64IZCMP-SR-NEXT:    lw s3, 40(a5)
-; RV64IZCMP-SR-NEXT:    lw s4, 44(a5)
-; RV64IZCMP-SR-NEXT:    lw a1, 48(a5)
-; RV64IZCMP-SR-NEXT:    lw s0, 52(a5)
-; RV64IZCMP-SR-NEXT:    lw s1, 68(a5)
-; RV64IZCMP-SR-NEXT:    lw a2, 64(a5)
-; RV64IZCMP-SR-NEXT:    lw a3, 60(a5)
-; RV64IZCMP-SR-NEXT:    lw a4, 56(a5)
-; RV64IZCMP-SR-NEXT:    sw s1, 68(a5)
-; RV64IZCMP-SR-NEXT:    sw a2, 64(a5)
-; RV64IZCMP-SR-NEXT:    sw a3, 60(a5)
-; RV64IZCMP-SR-NEXT:    sw a4, 56(a5)
-; RV64IZCMP-SR-NEXT:    sw s0, 52(a5)
-; RV64IZCMP-SR-NEXT:    sw a1, 48(a5)
-; RV64IZCMP-SR-NEXT:    sw s4, 44(a5)
-; RV64IZCMP-SR-NEXT:    sw s3, 40(a5)
-; RV64IZCMP-SR-NEXT:    sw s2, 36(a5)
-; RV64IZCMP-SR-NEXT:    sw t6, 32(a5)
-; RV64IZCMP-SR-NEXT:    sw t5, 28(a5)
-; RV64IZCMP-SR-NEXT:    sw t4, 24(a5)
-; RV64IZCMP-SR-NEXT:    sw t3, 20(a5)
-; RV64IZCMP-SR-NEXT:    sw t2, 16(a5)
-; RV64IZCMP-SR-NEXT:    sw t1, %lo(var0+12)(a0)
-; RV64IZCMP-SR-NEXT:    sw t0, %lo(var0+8)(a0)
-; RV64IZCMP-SR-NEXT:    sw a7, %lo(var0+4)(a0)
-; RV64IZCMP-SR-NEXT:    sw a6, %lo(var0)(a0)
-; RV64IZCMP-SR-NEXT:    cm.popret {ra, s0-s4}, 48
+; RV64IZCMP-SR-NEXT:    addi a0, a0, %lo(var0)
+; RV64IZCMP-SR-NEXT:    lw a6, 0(a0)
+; RV64IZCMP-SR-NEXT:    lw a7, 4(a0)
+; RV64IZCMP-SR-NEXT:    lw t0, 8(a0)
+; RV64IZCMP-SR-NEXT:    lw t1, 12(a0)
+; RV64IZCMP-SR-NEXT:    lw t2, 16(a0)
+; RV64IZCMP-SR-NEXT:    lw t3, 20(a0)
+; RV64IZCMP-SR-NEXT:    lw t4, 24(a0)
+; RV64IZCMP-SR-NEXT:    lw t5, 28(a0)
+; RV64IZCMP-SR-NEXT:    lw t6, 32(a0)
+; RV64IZCMP-SR-NEXT:    lw s2, 36(a0)
+; RV64IZCMP-SR-NEXT:    lw s3, 40(a0)
+; RV64IZCMP-SR-NEXT:    lw a2, 44(a0)
+; RV64IZCMP-SR-NEXT:    lw a3, 48(a0)
+; RV64IZCMP-SR-NEXT:    lw a4, 52(a0)
+; RV64IZCMP-SR-NEXT:    lw s0, 68(a0)
+; RV64IZCMP-SR-NEXT:    lw s1, 64(a0)
+; RV64IZCMP-SR-NEXT:    lw a5, 60(a0)
+; RV64IZCMP-SR-NEXT:    lw a1, 56(a0)
+; RV64IZCMP-SR-NEXT:    sw s0, 68(a0)
+; RV64IZCMP-SR-NEXT:    sw s1, 64(a0)
+; RV64IZCMP-SR-NEXT:    sw a5, 60(a0)
+; RV64IZCMP-SR-NEXT:    sw a1, 56(a0)
+; RV64IZCMP-SR-NEXT:    sw a4, 52(a0)
+; RV64IZCMP-SR-NEXT:    sw a3, 48(a0)
+; RV64IZCMP-SR-NEXT:    sw a2, 44(a0)
+; RV64IZCMP-SR-NEXT:    sw s3, 40(a0)
+; RV64IZCMP-SR-NEXT:    sw s2, 36(a0)
+; RV64IZCMP-SR-NEXT:    sw t6, 32(a0)
+; RV64IZCMP-SR-NEXT:    sw t5, 28(a0)
+; RV64IZCMP-SR-NEXT:    sw t4, 24(a0)
+; RV64IZCMP-SR-NEXT:    sw t3, 20(a0)
+; RV64IZCMP-SR-NEXT:    sw t2, 16(a0)
+; RV64IZCMP-SR-NEXT:    sw t1, 12(a0)
+; RV64IZCMP-SR-NEXT:    sw t0, 8(a0)
+; RV64IZCMP-SR-NEXT:    sw a7, 4(a0)
+; RV64IZCMP-SR-NEXT:    sw a6, 0(a0)
+; RV64IZCMP-SR-NEXT:    cm.popret {ra, s0-s3}, 48
 ;
 ; RV32I-LABEL: many_args:
 ; RV32I:       # %bb.0: # %entry
-; RV32I-NEXT:    addi sp, sp, -32
-; RV32I-NEXT:    sw s0, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s1, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s2, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s3, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    sw s4, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 0(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lui a0, %hi(var0)
-; RV32I-NEXT:    lw a1, %lo(var0)(a0)
-; RV32I-NEXT:    lw a2, %lo(var0+4)(a0)
-; RV32I-NEXT:    lw a3, %lo(var0+8)(a0)
-; RV32I-NEXT:    lw a4, %lo(var0+12)(a0)
-; RV32I-NEXT:    addi a5, a0, %lo(var0)
-; RV32I-NEXT:    lw a6, 16(a5)
-; RV32I-NEXT:    lw a7, 20(a5)
-; RV32I-NEXT:    lw t0, 24(a5)
-; RV32I-NEXT:    lw t1, 28(a5)
-; RV32I-NEXT:    lw t2, 32(a5)
-; RV32I-NEXT:    lw t3, 36(a5)
-; RV32I-NEXT:    lw t4, 40(a5)
-; RV32I-NEXT:    lw t5, 44(a5)
-; RV32I-NEXT:    lw t6, 48(a5)
-; RV32I-NEXT:    lw s0, 52(a5)
-; RV32I-NEXT:    lw s1, 68(a5)
-; RV32I-NEXT:    lw s2, 64(a5)
-; RV32I-NEXT:    lw s3, 60(a5)
-; RV32I-NEXT:    lw s4, 56(a5)
-; RV32I-NEXT:    sw s1, 68(a5)
-; RV32I-NEXT:    sw s2, 64(a5)
-; RV32I-NEXT:    sw s3, 60(a5)
-; RV32I-NEXT:    sw s4, 56(a5)
-; RV32I-NEXT:    sw s0, 52(a5)
-; RV32I-NEXT:    sw t6, 48(a5)
-; RV32I-NEXT:    sw t5, 44(a5)
-; RV32I-NEXT:    sw t4, 40(a5)
-; RV32I-NEXT:    sw t3, 36(a5)
-; RV32I-NEXT:    sw t2, 32(a5)
-; RV32I-NEXT:    sw t1, 28(a5)
-; RV32I-NEXT:    sw t0, 24(a5)
-; RV32I-NEXT:    sw a7, 20(a5)
-; RV32I-NEXT:    sw a6, 16(a5)
-; RV32I-NEXT:    sw a4, %lo(var0+12)(a0)
-; RV32I-NEXT:    sw a3, %lo(var0+8)(a0)
-; RV32I-NEXT:    sw a2, %lo(var0+4)(a0)
-; RV32I-NEXT:    sw a1, %lo(var0)(a0)
-; RV32I-NEXT:    lw s0, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    addi a0, a0, %lo(var0)
+; RV32I-NEXT:    lw a1, 0(a0)
+; RV32I-NEXT:    lw a2, 4(a0)
+; RV32I-NEXT:    lw a3, 8(a0)
+; RV32I-NEXT:    lw a4, 12(a0)
+; RV32I-NEXT:    lw a5, 16(a0)
+; RV32I-NEXT:    lw a6, 20(a0)
+; RV32I-NEXT:    lw a7, 24(a0)
+; RV32I-NEXT:    lw t0, 28(a0)
+; RV32I-NEXT:    lw t1, 32(a0)
+; RV32I-NEXT:    lw t2, 36(a0)
+; RV32I-NEXT:    lw t3, 40(a0)
+; RV32I-NEXT:    lw t4, 44(a0)
+; RV32I-NEXT:    lw t5, 48(a0)
+; RV32I-NEXT:    lw t6, 52(a0)
+; RV32I-NEXT:    lw s0, 68(a0)
+; RV32I-NEXT:    lw s1, 64(a0)
+; RV32I-NEXT:    lw s2, 60(a0)
+; RV32I-NEXT:    lw s3, 56(a0)
+; RV32I-NEXT:    sw s0, 68(a0)
+; RV32I-NEXT:    sw s1, 64(a0)
+; RV32I-NEXT:    sw s2, 60(a0)
+; RV32I-NEXT:    sw s3, 56(a0)
+; RV32I-NEXT:    sw t6, 52(a0)
+; RV32I-NEXT:    sw t5, 48(a0)
+; RV32I-NEXT:    sw t4, 44(a0)
+; RV32I-NEXT:    sw t3, 40(a0)
+; RV32I-NEXT:    sw t2, 36(a0)
+; RV32I-NEXT:    sw t1, 32(a0)
+; RV32I-NEXT:    sw t0, 28(a0)
+; RV32I-NEXT:    sw a7, 24(a0)
+; RV32I-NEXT:    sw a6, 20(a0)
+; RV32I-NEXT:    sw a5, 16(a0)
+; RV32I-NEXT:    sw a4, 12(a0)
+; RV32I-NEXT:    sw a3, 8(a0)
+; RV32I-NEXT:    sw a2, 4(a0)
+; RV32I-NEXT:    sw a1, 0(a0)
+; RV32I-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: many_args:
 ; RV64I:       # %bb.0: # %entry
-; RV64I-NEXT:    addi sp, sp, -48
-; RV64I-NEXT:    sd s0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s3, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s4, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd s0, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    lui a0, %hi(var0)
-; RV64I-NEXT:    lw a1, %lo(var0)(a0)
-; RV64I-NEXT:    lw a2, %lo(var0+4)(a0)
-; RV64I-NEXT:    lw a3, %lo(var0+8)(a0)
-; RV64I-NEXT:    lw a4, %lo(var0+12)(a0)
-; RV64I-NEXT:    addi a5, a0, %lo(var0)
-; RV64I-NEXT:    lw a6, 16(a5)
-; RV64I-NEXT:    lw a7, 20(a5)
-; RV64I-NEXT:    lw t0, 24(a5)
-; RV64I-NEXT:    lw t1, 28(a5)
-; RV64I-NEXT:    lw t2, 32(a5)
-; RV64I-NEXT:    lw t3, 36(a5)
-; RV64I-NEXT:    lw t4, 40(a5)
-; RV64I-NEXT:    lw t5, 44(a5)
-; RV64I-NEXT:    lw t6, 48(a5)
-; RV64I-NEXT:    lw s0, 52(a5)
-; RV64I-NEXT:    lw s1, 68(a5)
-; RV64I-NEXT:    lw s2, 64(a5)
-; RV64I-NEXT:    lw s3, 60(a5)
-; RV64I-NEXT:    lw s4, 56(a5)
-; RV64I-NEXT:    sw s1, 68(a5)
-; RV64I-NEXT:    sw s2, 64(a5)
-; RV64I-NEXT:    sw s3, 60(a5)
-; RV64I-NEXT:    sw s4, 56(a5)
-; RV64I-NEXT:    sw s0, 52(a5)
-; RV64I-NEXT:    sw t6, 48(a5)
-; RV64I-NEXT:    sw t5, 44(a5)
-; RV64I-NEXT:    sw t4, 40(a5)
-; RV64I-NEXT:    sw t3, 36(a5)
-; RV64I-NEXT:    sw t2, 32(a5)
-; RV64I-NEXT:    sw t1, 28(a5)
-; RV64I-NEXT:    sw t0, 24(a5)
-; RV64I-NEXT:    sw a7, 20(a5)
-; RV64I-NEXT:    sw a6, 16(a5)
-; RV64I-NEXT:    sw a4, %lo(var0+12)(a0)
-; RV64I-NEXT:    sw a3, %lo(var0+8)(a0)
-; RV64I-NEXT:    sw a2, %lo(var0+4)(a0)
-; RV64I-NEXT:    sw a1, %lo(var0)(a0)
-; RV64I-NEXT:    ld s0, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    addi a0, a0, %lo(var0)
+; RV64I-NEXT:    lw a1, 0(a0)
+; RV64I-NEXT:    lw a2, 4(a0)
+; RV64I-NEXT:    lw a3, 8(a0)
+; RV64I-NEXT:    lw a4, 12(a0)
+; RV64I-NEXT:    lw a5, 16(a0)
+; RV64I-NEXT:    lw a6, 20(a0)
+; RV64I-NEXT:    lw a7, 24(a0)
+; RV64I-NEXT:    lw t0, 28(a0)
+; RV64I-NEXT:    lw t1, 32(a0)
+; RV64I-NEXT:    lw t2, 36(a0)
+; RV64I-NEXT:    lw t3, 40(a0)
+; RV64I-NEXT:    lw t4, 44(a0)
+; RV64I-NEXT:    lw t5, 48(a0)
+; RV64I-NEXT:    lw t6, 52(a0)
+; RV64I-NEXT:    lw s0, 68(a0)
+; RV64I-NEXT:    lw s1, 64(a0)
+; RV64I-NEXT:    lw s2, 60(a0)
+; RV64I-NEXT:    lw s3, 56(a0)
+; RV64I-NEXT:    sw s0, 68(a0)
+; RV64I-NEXT:    sw s1, 64(a0)
+; RV64I-NEXT:    sw s2, 60(a0)
+; RV64I-NEXT:    sw s3, 56(a0)
+; RV64I-NEXT:    sw t6, 52(a0)
+; RV64I-NEXT:    sw t5, 48(a0)
+; RV64I-NEXT:    sw t4, 44(a0)
+; RV64I-NEXT:    sw t3, 40(a0)
+; RV64I-NEXT:    sw t2, 36(a0)
+; RV64I-NEXT:    sw t1, 32(a0)
+; RV64I-NEXT:    sw t0, 28(a0)
+; RV64I-NEXT:    sw a7, 24(a0)
+; RV64I-NEXT:    sw a6, 20(a0)
+; RV64I-NEXT:    sw a5, 16(a0)
+; RV64I-NEXT:    sw a4, 12(a0)
+; RV64I-NEXT:    sw a3, 8(a0)
+; RV64I-NEXT:    sw a2, 4(a0)
+; RV64I-NEXT:    sw a1, 0(a0)
+; RV64I-NEXT:    ld s0, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 entry:
   %val = load [18 x i32], ptr @var0
@@ -1797,464 +1783,456 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV32IZCMP-LABEL: callee_with_irq:
 ; RV32IZCMP:       # %bb.0:
 ; RV32IZCMP-NEXT:    cm.push {ra, s0-s11}, -112
-; RV32IZCMP-NEXT:    addi sp, sp, -48
-; RV32IZCMP-NEXT:    sw t0, 92(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    sw t1, 88(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    sw t2, 84(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    sw a1, 76(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    sw a2, 72(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    sw a3, 68(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    sw a4, 64(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    sw a5, 60(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    sw a6, 56(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    sw a7, 52(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    sw t3, 48(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    sw t4, 44(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    sw t5, 40(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    sw t6, 36(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lui a6, %hi(var_test_irq)
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a6)
-; RV32IZCMP-NEXT:    sw a0, 32(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
-; RV32IZCMP-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
-; RV32IZCMP-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
-; RV32IZCMP-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    addi a5, a6, %lo(var_test_irq)
-; RV32IZCMP-NEXT:    lw a0, 16(a5)
-; RV32IZCMP-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 20(a5)
-; RV32IZCMP-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw t4, 24(a5)
-; RV32IZCMP-NEXT:    lw t5, 28(a5)
-; RV32IZCMP-NEXT:    lw t6, 32(a5)
-; RV32IZCMP-NEXT:    lw s2, 36(a5)
-; RV32IZCMP-NEXT:    lw s3, 40(a5)
-; RV32IZCMP-NEXT:    lw s4, 44(a5)
-; RV32IZCMP-NEXT:    lw s5, 48(a5)
-; RV32IZCMP-NEXT:    lw s6, 52(a5)
-; RV32IZCMP-NEXT:    lw s7, 56(a5)
-; RV32IZCMP-NEXT:    lw s8, 60(a5)
-; RV32IZCMP-NEXT:    lw s9, 64(a5)
-; RV32IZCMP-NEXT:    lw s10, 68(a5)
-; RV32IZCMP-NEXT:    lw s11, 72(a5)
-; RV32IZCMP-NEXT:    lw ra, 76(a5)
-; RV32IZCMP-NEXT:    lw s1, 80(a5)
-; RV32IZCMP-NEXT:    lw t3, 84(a5)
-; RV32IZCMP-NEXT:    lw t2, 88(a5)
-; RV32IZCMP-NEXT:    lw t1, 92(a5)
-; RV32IZCMP-NEXT:    lw t0, 96(a5)
-; RV32IZCMP-NEXT:    lw s0, 100(a5)
-; RV32IZCMP-NEXT:    lw a7, 104(a5)
-; RV32IZCMP-NEXT:    lw a4, 108(a5)
-; RV32IZCMP-NEXT:    lw a0, 124(a5)
-; RV32IZCMP-NEXT:    lw a1, 120(a5)
-; RV32IZCMP-NEXT:    lw a2, 116(a5)
-; RV32IZCMP-NEXT:    lw a3, 112(a5)
-; RV32IZCMP-NEXT:    sw a0, 124(a5)
-; RV32IZCMP-NEXT:    sw a1, 120(a5)
-; RV32IZCMP-NEXT:    sw a2, 116(a5)
-; RV32IZCMP-NEXT:    sw a3, 112(a5)
-; RV32IZCMP-NEXT:    sw a4, 108(a5)
-; RV32IZCMP-NEXT:    sw a7, 104(a5)
-; RV32IZCMP-NEXT:    sw s0, 100(a5)
-; RV32IZCMP-NEXT:    sw t0, 96(a5)
-; RV32IZCMP-NEXT:    sw t1, 92(a5)
-; RV32IZCMP-NEXT:    sw t2, 88(a5)
-; RV32IZCMP-NEXT:    sw t3, 84(a5)
-; RV32IZCMP-NEXT:    sw s1, 80(a5)
-; RV32IZCMP-NEXT:    sw ra, 76(a5)
-; RV32IZCMP-NEXT:    sw s11, 72(a5)
-; RV32IZCMP-NEXT:    sw s10, 68(a5)
-; RV32IZCMP-NEXT:    sw s9, 64(a5)
-; RV32IZCMP-NEXT:    sw s8, 60(a5)
-; RV32IZCMP-NEXT:    sw s7, 56(a5)
-; RV32IZCMP-NEXT:    sw s6, 52(a5)
-; RV32IZCMP-NEXT:    sw s5, 48(a5)
-; RV32IZCMP-NEXT:    sw s4, 44(a5)
-; RV32IZCMP-NEXT:    sw s3, 40(a5)
-; RV32IZCMP-NEXT:    sw s2, 36(a5)
-; RV32IZCMP-NEXT:    sw t6, 32(a5)
-; RV32IZCMP-NEXT:    sw t5, 28(a5)
-; RV32IZCMP-NEXT:    sw t4, 24(a5)
-; RV32IZCMP-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 20(a5)
-; RV32IZCMP-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 16(a5)
-; RV32IZCMP-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
-; RV32IZCMP-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
-; RV32IZCMP-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
-; RV32IZCMP-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a6)
-; RV32IZCMP-NEXT:    lw t0, 92(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    lw t1, 88(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    lw t2, 84(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    lw a1, 76(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    lw a2, 72(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    lw a3, 68(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    lw a4, 64(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    lw a5, 60(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    lw a6, 56(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    lw a7, 52(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    lw t3, 48(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    lw t4, 44(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    lw t5, 40(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    lw t6, 36(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    addi sp, sp, 48
+; RV32IZCMP-NEXT:    addi sp, sp, -32
+; RV32IZCMP-NEXT:    sw t0, 76(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t1, 72(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t2, 68(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a0, 64(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a1, 60(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a2, 56(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a3, 52(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a4, 48(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a5, 44(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a6, 40(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw a7, 36(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t3, 32(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t4, 28(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t5, 24(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    sw t6, 20(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lui a0, %hi(var_test_irq)
+; RV32IZCMP-NEXT:    addi a0, a0, %lo(var_test_irq)
+; RV32IZCMP-NEXT:    lw a1, 0(a0)
+; RV32IZCMP-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a1, 4(a0)
+; RV32IZCMP-NEXT:    sw a1, 12(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a1, 8(a0)
+; RV32IZCMP-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a1, 12(a0)
+; RV32IZCMP-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a1, 16(a0)
+; RV32IZCMP-NEXT:    sw a1, 0(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw t3, 20(a0)
+; RV32IZCMP-NEXT:    lw t4, 24(a0)
+; RV32IZCMP-NEXT:    lw t5, 28(a0)
+; RV32IZCMP-NEXT:    lw t6, 32(a0)
+; RV32IZCMP-NEXT:    lw s2, 36(a0)
+; RV32IZCMP-NEXT:    lw s3, 40(a0)
+; RV32IZCMP-NEXT:    lw s4, 44(a0)
+; RV32IZCMP-NEXT:    lw s5, 48(a0)
+; RV32IZCMP-NEXT:    lw s6, 52(a0)
+; RV32IZCMP-NEXT:    lw s7, 56(a0)
+; RV32IZCMP-NEXT:    lw s8, 60(a0)
+; RV32IZCMP-NEXT:    lw s9, 64(a0)
+; RV32IZCMP-NEXT:    lw s10, 68(a0)
+; RV32IZCMP-NEXT:    lw s11, 72(a0)
+; RV32IZCMP-NEXT:    lw ra, 76(a0)
+; RV32IZCMP-NEXT:    lw t2, 80(a0)
+; RV32IZCMP-NEXT:    lw s0, 84(a0)
+; RV32IZCMP-NEXT:    lw s1, 88(a0)
+; RV32IZCMP-NEXT:    lw t1, 92(a0)
+; RV32IZCMP-NEXT:    lw t0, 96(a0)
+; RV32IZCMP-NEXT:    lw a7, 100(a0)
+; RV32IZCMP-NEXT:    lw a6, 104(a0)
+; RV32IZCMP-NEXT:    lw a5, 108(a0)
+; RV32IZCMP-NEXT:    lw a1, 124(a0)
+; RV32IZCMP-NEXT:    lw a2, 120(a0)
+; RV32IZCMP-NEXT:    lw a3, 116(a0)
+; RV32IZCMP-NEXT:    lw a4, 112(a0)
+; RV32IZCMP-NEXT:    sw a1, 124(a0)
+; RV32IZCMP-NEXT:    sw a2, 120(a0)
+; RV32IZCMP-NEXT:    sw a3, 116(a0)
+; RV32IZCMP-NEXT:    sw a4, 112(a0)
+; RV32IZCMP-NEXT:    sw a5, 108(a0)
+; RV32IZCMP-NEXT:    sw a6, 104(a0)
+; RV32IZCMP-NEXT:    sw a7, 100(a0)
+; RV32IZCMP-NEXT:    sw t0, 96(a0)
+; RV32IZCMP-NEXT:    sw t1, 92(a0)
+; RV32IZCMP-NEXT:    sw s1, 88(a0)
+; RV32IZCMP-NEXT:    sw s0, 84(a0)
+; RV32IZCMP-NEXT:    sw t2, 80(a0)
+; RV32IZCMP-NEXT:    sw ra, 76(a0)
+; RV32IZCMP-NEXT:    sw s11, 72(a0)
+; RV32IZCMP-NEXT:    sw s10, 68(a0)
+; RV32IZCMP-NEXT:    sw s9, 64(a0)
+; RV32IZCMP-NEXT:    sw s8, 60(a0)
+; RV32IZCMP-NEXT:    sw s7, 56(a0)
+; RV32IZCMP-NEXT:    sw s6, 52(a0)
+; RV32IZCMP-NEXT:    sw s5, 48(a0)
+; RV32IZCMP-NEXT:    sw s4, 44(a0)
+; RV32IZCMP-NEXT:    sw s3, 40(a0)
+; RV32IZCMP-NEXT:    sw s2, 36(a0)
+; RV32IZCMP-NEXT:    sw t6, 32(a0)
+; RV32IZCMP-NEXT:    sw t5, 28(a0)
+; RV32IZCMP-NEXT:    sw t4, 24(a0)
+; RV32IZCMP-NEXT:    sw t3, 20(a0)
+; RV32IZCMP-NEXT:    lw a1, 0(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 16(a0)
+; RV32IZCMP-NEXT:    lw a1, 4(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 12(a0)
+; RV32IZCMP-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 8(a0)
+; RV32IZCMP-NEXT:    lw a1, 12(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 4(a0)
+; RV32IZCMP-NEXT:    lw a1, 16(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 0(a0)
+; RV32IZCMP-NEXT:    lw t0, 76(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t1, 72(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t2, 68(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a1, 60(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a2, 56(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a3, 52(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a4, 48(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a5, 44(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a6, 40(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw a7, 36(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t3, 32(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t4, 28(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t5, 24(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    lw t6, 20(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    addi sp, sp, 32
 ; RV32IZCMP-NEXT:    cm.pop {ra, s0-s11}, 112
 ; RV32IZCMP-NEXT:    mret
 ;
 ; RV64IZCMP-LABEL: callee_with_irq:
 ; RV64IZCMP:       # %bb.0:
 ; RV64IZCMP-NEXT:    cm.push {ra, s0-s11}, -160
-; RV64IZCMP-NEXT:    addi sp, sp, -128
-; RV64IZCMP-NEXT:    sd t0, 168(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    sd t1, 160(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    sd t2, 152(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    sd a2, 128(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    sd a3, 120(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    sd a4, 112(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    sd a5, 104(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    sd a6, 96(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    sd a7, 88(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    sd t3, 80(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    sd t4, 72(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    sd t5, 64(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    sd t6, 56(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lui a6, %hi(var_test_irq)
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a6)
-; RV64IZCMP-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
-; RV64IZCMP-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
-; RV64IZCMP-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
-; RV64IZCMP-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    addi a5, a6, %lo(var_test_irq)
-; RV64IZCMP-NEXT:    lw a0, 16(a5)
-; RV64IZCMP-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 20(a5)
-; RV64IZCMP-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw t4, 24(a5)
-; RV64IZCMP-NEXT:    lw t5, 28(a5)
-; RV64IZCMP-NEXT:    lw t6, 32(a5)
-; RV64IZCMP-NEXT:    lw s2, 36(a5)
-; RV64IZCMP-NEXT:    lw s3, 40(a5)
-; RV64IZCMP-NEXT:    lw s4, 44(a5)
-; RV64IZCMP-NEXT:    lw s5, 48(a5)
-; RV64IZCMP-NEXT:    lw s6, 52(a5)
-; RV64IZCMP-NEXT:    lw s7, 56(a5)
-; RV64IZCMP-NEXT:    lw s8, 60(a5)
-; RV64IZCMP-NEXT:    lw s9, 64(a5)
-; RV64IZCMP-NEXT:    lw s10, 68(a5)
-; RV64IZCMP-NEXT:    lw s11, 72(a5)
-; RV64IZCMP-NEXT:    lw ra, 76(a5)
-; RV64IZCMP-NEXT:    lw s1, 80(a5)
-; RV64IZCMP-NEXT:    lw t3, 84(a5)
-; RV64IZCMP-NEXT:    lw t2, 88(a5)
-; RV64IZCMP-NEXT:    lw t1, 92(a5)
-; RV64IZCMP-NEXT:    lw t0, 96(a5)
-; RV64IZCMP-NEXT:    lw s0, 100(a5)
-; RV64IZCMP-NEXT:    lw a7, 104(a5)
-; RV64IZCMP-NEXT:    lw a4, 108(a5)
-; RV64IZCMP-NEXT:    lw a0, 124(a5)
-; RV64IZCMP-NEXT:    lw a1, 120(a5)
-; RV64IZCMP-NEXT:    lw a2, 116(a5)
-; RV64IZCMP-NEXT:    lw a3, 112(a5)
-; RV64IZCMP-NEXT:    sw a0, 124(a5)
-; RV64IZCMP-NEXT:    sw a1, 120(a5)
-; RV64IZCMP-NEXT:    sw a2, 116(a5)
-; RV64IZCMP-NEXT:    sw a3, 112(a5)
-; RV64IZCMP-NEXT:    sw a4, 108(a5)
-; RV64IZCMP-NEXT:    sw a7, 104(a5)
-; RV64IZCMP-NEXT:    sw s0, 100(a5)
-; RV64IZCMP-NEXT:    sw t0, 96(a5)
-; RV64IZCMP-NEXT:    sw t1, 92(a5)
-; RV64IZCMP-NEXT:    sw t2, 88(a5)
-; RV64IZCMP-NEXT:    sw t3, 84(a5)
-; RV64IZCMP-NEXT:    sw s1, 80(a5)
-; RV64IZCMP-NEXT:    sw ra, 76(a5)
-; RV64IZCMP-NEXT:    sw s11, 72(a5)
-; RV64IZCMP-NEXT:    sw s10, 68(a5)
-; RV64IZCMP-NEXT:    sw s9, 64(a5)
-; RV64IZCMP-NEXT:    sw s8, 60(a5)
-; RV64IZCMP-NEXT:    sw s7, 56(a5)
-; RV64IZCMP-NEXT:    sw s6, 52(a5)
-; RV64IZCMP-NEXT:    sw s5, 48(a5)
-; RV64IZCMP-NEXT:    sw s4, 44(a5)
-; RV64IZCMP-NEXT:    sw s3, 40(a5)
-; RV64IZCMP-NEXT:    sw s2, 36(a5)
-; RV64IZCMP-NEXT:    sw t6, 32(a5)
-; RV64IZCMP-NEXT:    sw t5, 28(a5)
-; RV64IZCMP-NEXT:    sw t4, 24(a5)
-; RV64IZCMP-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 20(a5)
-; RV64IZCMP-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 16(a5)
-; RV64IZCMP-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
-; RV64IZCMP-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
-; RV64IZCMP-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
-; RV64IZCMP-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a6)
-; RV64IZCMP-NEXT:    ld t0, 168(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    ld t1, 160(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    ld t2, 152(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    ld a2, 128(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    ld a3, 120(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    ld a4, 112(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    ld a5, 104(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    ld a6, 96(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    ld a7, 88(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    ld t3, 80(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    ld t4, 72(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    ld t5, 64(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    ld t6, 56(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    addi sp, sp, 128
+; RV64IZCMP-NEXT:    addi sp, sp, -112
+; RV64IZCMP-NEXT:    sd t0, 152(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t1, 144(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t2, 136(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a2, 112(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a3, 104(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a4, 96(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a5, 88(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a6, 80(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd a7, 72(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t3, 64(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t4, 56(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t5, 48(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    sd t6, 40(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lui a0, %hi(var_test_irq)
+; RV64IZCMP-NEXT:    addi a0, a0, %lo(var_test_irq)
+; RV64IZCMP-NEXT:    lw a1, 0(a0)
+; RV64IZCMP-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a1, 4(a0)
+; RV64IZCMP-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a1, 8(a0)
+; RV64IZCMP-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a1, 12(a0)
+; RV64IZCMP-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a1, 16(a0)
+; RV64IZCMP-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw t3, 20(a0)
+; RV64IZCMP-NEXT:    lw t4, 24(a0)
+; RV64IZCMP-NEXT:    lw t5, 28(a0)
+; RV64IZCMP-NEXT:    lw t6, 32(a0)
+; RV64IZCMP-NEXT:    lw s2, 36(a0)
+; RV64IZCMP-NEXT:    lw s3, 40(a0)
+; RV64IZCMP-NEXT:    lw s4, 44(a0)
+; RV64IZCMP-NEXT:    lw s5, 48(a0)
+; RV64IZCMP-NEXT:    lw s6, 52(a0)
+; RV64IZCMP-NEXT:    lw s7, 56(a0)
+; RV64IZCMP-NEXT:    lw s8, 60(a0)
+; RV64IZCMP-NEXT:    lw s9, 64(a0)
+; RV64IZCMP-NEXT:    lw s10, 68(a0)
+; RV64IZCMP-NEXT:    lw s11, 72(a0)
+; RV64IZCMP-NEXT:    lw ra, 76(a0)
+; RV64IZCMP-NEXT:    lw t2, 80(a0)
+; RV64IZCMP-NEXT:    lw s0, 84(a0)
+; RV64IZCMP-NEXT:    lw s1, 88(a0)
+; RV64IZCMP-NEXT:    lw t1, 92(a0)
+; RV64IZCMP-NEXT:    lw t0, 96(a0)
+; RV64IZCMP-NEXT:    lw a7, 100(a0)
+; RV64IZCMP-NEXT:    lw a6, 104(a0)
+; RV64IZCMP-NEXT:    lw a5, 108(a0)
+; RV64IZCMP-NEXT:    lw a1, 124(a0)
+; RV64IZCMP-NEXT:    lw a2, 120(a0)
+; RV64IZCMP-NEXT:    lw a3, 116(a0)
+; RV64IZCMP-NEXT:    lw a4, 112(a0)
+; RV64IZCMP-NEXT:    sw a1, 124(a0)
+; RV64IZCMP-NEXT:    sw a2, 120(a0)
+; RV64IZCMP-NEXT:    sw a3, 116(a0)
+; RV64IZCMP-NEXT:    sw a4, 112(a0)
+; RV64IZCMP-NEXT:    sw a5, 108(a0)
+; RV64IZCMP-NEXT:    sw a6, 104(a0)
+; RV64IZCMP-NEXT:    sw a7, 100(a0)
+; RV64IZCMP-NEXT:    sw t0, 96(a0)
+; RV64IZCMP-NEXT:    sw t1, 92(a0)
+; RV64IZCMP-NEXT:    sw s1, 88(a0)
+; RV64IZCMP-NEXT:    sw s0, 84(a0)
+; RV64IZCMP-NEXT:    sw t2, 80(a0)
+; RV64IZCMP-NEXT:    sw ra, 76(a0)
+; RV64IZCMP-NEXT:    sw s11, 72(a0)
+; RV64IZCMP-NEXT:    sw s10, 68(a0)
+; RV64IZCMP-NEXT:    sw s9, 64(a0)
+; RV64IZCMP-NEXT:    sw s8, 60(a0)
+; RV64IZCMP-NEXT:    sw s7, 56(a0)
+; RV64IZCMP-NEXT:    sw s6, 52(a0)
+; RV64IZCMP-NEXT:    sw s5, 48(a0)
+; RV64IZCMP-NEXT:    sw s4, 44(a0)
+; RV64IZCMP-NEXT:    sw s3, 40(a0)
+; RV64IZCMP-NEXT:    sw s2, 36(a0)
+; RV64IZCMP-NEXT:    sw t6, 32(a0)
+; RV64IZCMP-NEXT:    sw t5, 28(a0)
+; RV64IZCMP-NEXT:    sw t4, 24(a0)
+; RV64IZCMP-NEXT:    sw t3, 20(a0)
+; RV64IZCMP-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 16(a0)
+; RV64IZCMP-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 12(a0)
+; RV64IZCMP-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 8(a0)
+; RV64IZCMP-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 4(a0)
+; RV64IZCMP-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 0(a0)
+; RV64IZCMP-NEXT:    ld t0, 152(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t1, 144(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t2, 136(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a2, 112(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a3, 104(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a4, 96(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a5, 88(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a6, 80(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld a7, 72(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t3, 64(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t4, 56(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t5, 48(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    ld t6, 40(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    addi sp, sp, 112
 ; RV64IZCMP-NEXT:    cm.pop {ra, s0-s11}, 160
 ; RV64IZCMP-NEXT:    mret
 ;
 ; RV32IZCMP-SR-LABEL: callee_with_irq:
 ; RV32IZCMP-SR:       # %bb.0:
 ; RV32IZCMP-SR-NEXT:    cm.push {ra, s0-s11}, -112
-; RV32IZCMP-SR-NEXT:    addi sp, sp, -48
-; RV32IZCMP-SR-NEXT:    sw t0, 92(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    sw t1, 88(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    sw t2, 84(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    sw a0, 80(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    sw a1, 76(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    sw a2, 72(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    sw a3, 68(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    sw a4, 64(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    sw a5, 60(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    sw a6, 56(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    sw a7, 52(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    sw t3, 48(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    sw t4, 44(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    sw t5, 40(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    sw t6, 36(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lui a6, %hi(var_test_irq)
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq)(a6)
-; RV32IZCMP-SR-NEXT:    sw a0, 32(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
-; RV32IZCMP-SR-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
-; RV32IZCMP-SR-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
-; RV32IZCMP-SR-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    addi a5, a6, %lo(var_test_irq)
-; RV32IZCMP-SR-NEXT:    lw a0, 16(a5)
-; RV32IZCMP-SR-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw a0, 20(a5)
-; RV32IZCMP-SR-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw t4, 24(a5)
-; RV32IZCMP-SR-NEXT:    lw t5, 28(a5)
-; RV32IZCMP-SR-NEXT:    lw t6, 32(a5)
-; RV32IZCMP-SR-NEXT:    lw s2, 36(a5)
-; RV32IZCMP-SR-NEXT:    lw s3, 40(a5)
-; RV32IZCMP-SR-NEXT:    lw s4, 44(a5)
-; RV32IZCMP-SR-NEXT:    lw s5, 48(a5)
-; RV32IZCMP-SR-NEXT:    lw s6, 52(a5)
-; RV32IZCMP-SR-NEXT:    lw s7, 56(a5)
-; RV32IZCMP-SR-NEXT:    lw s8, 60(a5)
-; RV32IZCMP-SR-NEXT:    lw s9, 64(a5)
-; RV32IZCMP-SR-NEXT:    lw s10, 68(a5)
-; RV32IZCMP-SR-NEXT:    lw s11, 72(a5)
-; RV32IZCMP-SR-NEXT:    lw ra, 76(a5)
-; RV32IZCMP-SR-NEXT:    lw s1, 80(a5)
-; RV32IZCMP-SR-NEXT:    lw t3, 84(a5)
-; RV32IZCMP-SR-NEXT:    lw t2, 88(a5)
-; RV32IZCMP-SR-NEXT:    lw t1, 92(a5)
-; RV32IZCMP-SR-NEXT:    lw t0, 96(a5)
-; RV32IZCMP-SR-NEXT:    lw s0, 100(a5)
-; RV32IZCMP-SR-NEXT:    lw a7, 104(a5)
-; RV32IZCMP-SR-NEXT:    lw a4, 108(a5)
-; RV32IZCMP-SR-NEXT:    lw a0, 124(a5)
-; RV32IZCMP-SR-NEXT:    lw a1, 120(a5)
-; RV32IZCMP-SR-NEXT:    lw a2, 116(a5)
-; RV32IZCMP-SR-NEXT:    lw a3, 112(a5)
-; RV32IZCMP-SR-NEXT:    sw a0, 124(a5)
-; RV32IZCMP-SR-NEXT:    sw a1, 120(a5)
-; RV32IZCMP-SR-NEXT:    sw a2, 116(a5)
-; RV32IZCMP-SR-NEXT:    sw a3, 112(a5)
-; RV32IZCMP-SR-NEXT:    sw a4, 108(a5)
-; RV32IZCMP-SR-NEXT:    sw a7, 104(a5)
-; RV32IZCMP-SR-NEXT:    sw s0, 100(a5)
-; RV32IZCMP-SR-NEXT:    sw t0, 96(a5)
-; RV32IZCMP-SR-NEXT:    sw t1, 92(a5)
-; RV32IZCMP-SR-NEXT:    sw t2, 88(a5)
-; RV32IZCMP-SR-NEXT:    sw t3, 84(a5)
-; RV32IZCMP-SR-NEXT:    sw s1, 80(a5)
-; RV32IZCMP-SR-NEXT:    sw ra, 76(a5)
-; RV32IZCMP-SR-NEXT:    sw s11, 72(a5)
-; RV32IZCMP-SR-NEXT:    sw s10, 68(a5)
-; RV32IZCMP-SR-NEXT:    sw s9, 64(a5)
-; RV32IZCMP-SR-NEXT:    sw s8, 60(a5)
-; RV32IZCMP-SR-NEXT:    sw s7, 56(a5)
-; RV32IZCMP-SR-NEXT:    sw s6, 52(a5)
-; RV32IZCMP-SR-NEXT:    sw s5, 48(a5)
-; RV32IZCMP-SR-NEXT:    sw s4, 44(a5)
-; RV32IZCMP-SR-NEXT:    sw s3, 40(a5)
-; RV32IZCMP-SR-NEXT:    sw s2, 36(a5)
-; RV32IZCMP-SR-NEXT:    sw t6, 32(a5)
-; RV32IZCMP-SR-NEXT:    sw t5, 28(a5)
-; RV32IZCMP-SR-NEXT:    sw t4, 24(a5)
-; RV32IZCMP-SR-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, 20(a5)
-; RV32IZCMP-SR-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, 16(a5)
-; RV32IZCMP-SR-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
-; RV32IZCMP-SR-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
-; RV32IZCMP-SR-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
-; RV32IZCMP-SR-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq)(a6)
-; RV32IZCMP-SR-NEXT:    lw t0, 92(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    lw t1, 88(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    lw t2, 84(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    lw a0, 80(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    lw a1, 76(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    lw a2, 72(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    lw a3, 68(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    lw a4, 64(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    lw a5, 60(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    lw a6, 56(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    lw a7, 52(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    lw t3, 48(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    lw t4, 44(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    lw t5, 40(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    lw t6, 36(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    addi sp, sp, 48
+; RV32IZCMP-SR-NEXT:    addi sp, sp, -32
+; RV32IZCMP-SR-NEXT:    sw t0, 76(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    sw t1, 72(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    sw t2, 68(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    sw a0, 64(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    sw a1, 60(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    sw a2, 56(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    sw a3, 52(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    sw a4, 48(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    sw a5, 44(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    sw a6, 40(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    sw a7, 36(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    sw t3, 32(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    sw t4, 28(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    sw t5, 24(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    sw t6, 20(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    lui a0, %hi(var_test_irq)
+; RV32IZCMP-SR-NEXT:    addi a0, a0, %lo(var_test_irq)
+; RV32IZCMP-SR-NEXT:    lw a1, 0(a0)
+; RV32IZCMP-SR-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    lw a1, 4(a0)
+; RV32IZCMP-SR-NEXT:    sw a1, 12(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    lw a1, 8(a0)
+; RV32IZCMP-SR-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    lw a1, 12(a0)
+; RV32IZCMP-SR-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    lw a1, 16(a0)
+; RV32IZCMP-SR-NEXT:    sw a1, 0(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    lw t3, 20(a0)
+; RV32IZCMP-SR-NEXT:    lw t4, 24(a0)
+; RV32IZCMP-SR-NEXT:    lw t5, 28(a0)
+; RV32IZCMP-SR-NEXT:    lw t6, 32(a0)
+; RV32IZCMP-SR-NEXT:    lw s2, 36(a0)
+; RV32IZCMP-SR-NEXT:    lw s3, 40(a0)
+; RV32IZCMP-SR-NEXT:    lw s4, 44(a0)
+; RV32IZCMP-SR-NEXT:    lw s5, 48(a0)
+; RV32IZCMP-SR-NEXT:    lw s6, 52(a0)
+; RV32IZCMP-SR-NEXT:    lw s7, 56(a0)
+; RV32IZCMP-SR-NEXT:    lw s8, 60(a0)
+; RV32IZCMP-SR-NEXT:    lw s9, 64(a0)
+; RV32IZCMP-SR-NEXT:    lw s10, 68(a0)
+; RV32IZCMP-SR-NEXT:    lw s11, 72(a0)
+; RV32IZCMP-SR-NEXT:    lw ra, 76(a0)
+; RV32IZCMP-SR-NEXT:    lw t2, 80(a0)
+; RV32IZCMP-SR-NEXT:    lw s0, 84(a0)
+; RV32IZCMP-SR-NEXT:    lw s1, 88(a0)
+; RV32IZCMP-SR-NEXT:    lw t1, 92(a0)
+; RV32IZCMP-SR-NEXT:    lw t0, 96(a0)
+; RV32IZCMP-SR-NEXT:    lw a7, 100(a0)
+; RV32IZCMP-SR-NEXT:    lw a6, 104(a0)
+; RV32IZCMP-SR-NEXT:    lw a5, 108(a0)
+; RV32IZCMP-SR-NEXT:    lw a1, 124(a0)
+; RV32IZCMP-SR-NEXT:    lw a2, 120(a0)
+; RV32IZCMP-SR-NEXT:    lw a3, 116(a0)
+; RV32IZCMP-SR-NEXT:    lw a4, 112(a0)
+; RV32IZCMP-SR-NEXT:    sw a1, 124(a0)
+; RV32IZCMP-SR-NEXT:    sw a2, 120(a0)
+; RV32IZCMP-SR-NEXT:    sw a3, 116(a0)
+; RV32IZCMP-SR-NEXT:    sw a4, 112(a0)
+; RV32IZCMP-SR-NEXT:    sw a5, 108(a0)
+; RV32IZCMP-SR-NEXT:    sw a6, 104(a0)
+; RV32IZCMP-SR-NEXT:    sw a7, 100(a0)
+; RV32IZCMP-SR-NEXT:    sw t0, 96(a0)
+; RV32IZCMP-SR-NEXT:    sw t1, 92(a0)
+; RV32IZCMP-SR-NEXT:    sw s1, 88(a0)
+; RV32IZCMP-SR-NEXT:    sw s0, 84(a0)
+; RV32IZCMP-SR-NEXT:    sw t2, 80(a0)
+; RV32IZCMP-SR-NEXT:    sw ra, 76(a0)
+; RV32IZCMP-SR-NEXT:    sw s11, 72(a0)
+; RV32IZCMP-SR-NEXT:    sw s10, 68(a0)
+; RV32IZCMP-SR-NEXT:    sw s9, 64(a0)
+; RV32IZCMP-SR-NEXT:    sw s8, 60(a0)
+; RV32IZCMP-SR-NEXT:    sw s7, 56(a0)
+; RV32IZCMP-SR-NEXT:    sw s6, 52(a0)
+; RV32IZCMP-SR-NEXT:    sw s5, 48(a0)
+; RV32IZCMP-SR-NEXT:    sw s4, 44(a0)
+; RV32IZCMP-SR-NEXT:    sw s3, 40(a0)
+; RV32IZCMP-SR-NEXT:    sw s2, 36(a0)
+; RV32IZCMP-SR-NEXT:    sw t6, 32(a0)
+; RV32IZCMP-SR-NEXT:    sw t5, 28(a0)
+; RV32IZCMP-SR-NEXT:    sw t4, 24(a0)
+; RV32IZCMP-SR-NEXT:    sw t3, 20(a0)
+; RV32IZCMP-SR-NEXT:    lw a1, 0(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    sw a1, 16(a0)
+; RV32IZCMP-SR-NEXT:    lw a1, 4(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    sw a1, 12(a0)
+; RV32IZCMP-SR-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    sw a1, 8(a0)
+; RV32IZCMP-SR-NEXT:    lw a1, 12(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    sw a1, 4(a0)
+; RV32IZCMP-SR-NEXT:    lw a1, 16(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    sw a1, 0(a0)
+; RV32IZCMP-SR-NEXT:    lw t0, 76(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    lw t1, 72(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    lw t2, 68(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    lw a0, 64(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    lw a1, 60(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    lw a2, 56(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    lw a3, 52(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    lw a4, 48(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    lw a5, 44(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    lw a6, 40(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    lw a7, 36(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    lw t3, 32(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    lw t4, 28(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    lw t5, 24(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    lw t6, 20(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    addi sp, sp, 32
 ; RV32IZCMP-SR-NEXT:    cm.pop {ra, s0-s11}, 112
 ; RV32IZCMP-SR-NEXT:    mret
 ;
 ; RV64IZCMP-SR-LABEL: callee_with_irq:
 ; RV64IZCMP-SR:       # %bb.0:
 ; RV64IZCMP-SR-NEXT:    cm.push {ra, s0-s11}, -160
-; RV64IZCMP-SR-NEXT:    addi sp, sp, -128
-; RV64IZCMP-SR-NEXT:    sd t0, 168(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    sd t1, 160(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    sd t2, 152(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    sd a0, 144(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    sd a1, 136(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    sd a2, 128(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    sd a3, 120(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    sd a4, 112(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    sd a5, 104(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    sd a6, 96(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    sd a7, 88(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    sd t3, 80(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    sd t4, 72(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    sd t5, 64(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    sd t6, 56(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lui a6, %hi(var_test_irq)
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq)(a6)
-; RV64IZCMP-SR-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
-; RV64IZCMP-SR-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
-; RV64IZCMP-SR-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
-; RV64IZCMP-SR-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    addi a5, a6, %lo(var_test_irq)
-; RV64IZCMP-SR-NEXT:    lw a0, 16(a5)
-; RV64IZCMP-SR-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw a0, 20(a5)
-; RV64IZCMP-SR-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw t4, 24(a5)
-; RV64IZCMP-SR-NEXT:    lw t5, 28(a5)
-; RV64IZCMP-SR-NEXT:    lw t6, 32(a5)
-; RV64IZCMP-SR-NEXT:    lw s2, 36(a5)
-; RV64IZCMP-SR-NEXT:    lw s3, 40(a5)
-; RV64IZCMP-SR-NEXT:    lw s4, 44(a5)
-; RV64IZCMP-SR-NEXT:    lw s5, 48(a5)
-; RV64IZCMP-SR-NEXT:    lw s6, 52(a5)
-; RV64IZCMP-SR-NEXT:    lw s7, 56(a5)
-; RV64IZCMP-SR-NEXT:    lw s8, 60(a5)
-; RV64IZCMP-SR-NEXT:    lw s9, 64(a5)
-; RV64IZCMP-SR-NEXT:    lw s10, 68(a5)
-; RV64IZCMP-SR-NEXT:    lw s11, 72(a5)
-; RV64IZCMP-SR-NEXT:    lw ra, 76(a5)
-; RV64IZCMP-SR-NEXT:    lw s1, 80(a5)
-; RV64IZCMP-SR-NEXT:    lw t3, 84(a5)
-; RV64IZCMP-SR-NEXT:    lw t2, 88(a5)
-; RV64IZCMP-SR-NEXT:    lw t1, 92(a5)
-; RV64IZCMP-SR-NEXT:    lw t0, 96(a5)
-; RV64IZCMP-SR-NEXT:    lw s0, 100(a5)
-; RV64IZCMP-SR-NEXT:    lw a7, 104(a5)
-; RV64IZCMP-SR-NEXT:    lw a4, 108(a5)
-; RV64IZCMP-SR-NEXT:    lw a0, 124(a5)
-; RV64IZCMP-SR-NEXT:    lw a1, 120(a5)
-; RV64IZCMP-SR-NEXT:    lw a2, 116(a5)
-; RV64IZCMP-SR-NEXT:    lw a3, 112(a5)
-; RV64IZCMP-SR-NEXT:    sw a0, 124(a5)
-; RV64IZCMP-SR-NEXT:    sw a1, 120(a5)
-; RV64IZCMP-SR-NEXT:    sw a2, 116(a5)
-; RV64IZCMP-SR-NEXT:    sw a3, 112(a5)
-; RV64IZCMP-SR-NEXT:    sw a4, 108(a5)
-; RV64IZCMP-SR-NEXT:    sw a7, 104(a5)
-; RV64IZCMP-SR-NEXT:    sw s0, 100(a5)
-; RV64IZCMP-SR-NEXT:    sw t0, 96(a5)
-; RV64IZCMP-SR-NEXT:    sw t1, 92(a5)
-; RV64IZCMP-SR-NEXT:    sw t2, 88(a5)
-; RV64IZCMP-SR-NEXT:    sw t3, 84(a5)
-; RV64IZCMP-SR-NEXT:    sw s1, 80(a5)
-; RV64IZCMP-SR-NEXT:    sw ra, 76(a5)
-; RV64IZCMP-SR-NEXT:    sw s11, 72(a5)
-; RV64IZCMP-SR-NEXT:    sw s10, 68(a5)
-; RV64IZCMP-SR-NEXT:    sw s9, 64(a5)
-; RV64IZCMP-SR-NEXT:    sw s8, 60(a5)
-; RV64IZCMP-SR-NEXT:    sw s7, 56(a5)
-; RV64IZCMP-SR-NEXT:    sw s6, 52(a5)
-; RV64IZCMP-SR-NEXT:    sw s5, 48(a5)
-; RV64IZCMP-SR-NEXT:    sw s4, 44(a5)
-; RV64IZCMP-SR-NEXT:    sw s3, 40(a5)
-; RV64IZCMP-SR-NEXT:    sw s2, 36(a5)
-; RV64IZCMP-SR-NEXT:    sw t6, 32(a5)
-; RV64IZCMP-SR-NEXT:    sw t5, 28(a5)
-; RV64IZCMP-SR-NEXT:    sw t4, 24(a5)
-; RV64IZCMP-SR-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, 20(a5)
-; RV64IZCMP-SR-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, 16(a5)
-; RV64IZCMP-SR-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
-; RV64IZCMP-SR-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
-; RV64IZCMP-SR-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
-; RV64IZCMP-SR-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq)(a6)
-; RV64IZCMP-SR-NEXT:    ld t0, 168(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    ld t1, 160(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    ld t2, 152(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    ld a1, 136(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    ld a2, 128(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    ld a3, 120(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    ld a4, 112(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    ld a5, 104(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    ld a6, 96(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    ld a7, 88(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    ld t3, 80(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    ld t4, 72(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    ld t5, 64(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    ld t6, 56(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    addi sp, sp, 128
+; RV64IZCMP-SR-NEXT:    addi sp, sp, -112
+; RV64IZCMP-SR-NEXT:    sd t0, 152(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    sd t1, 144(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    sd t2, 136(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    sd a0, 128(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    sd a1, 120(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    sd a2, 112(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    sd a3, 104(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    sd a4, 96(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    sd a5, 88(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    sd a6, 80(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    sd a7, 72(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    sd t3, 64(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    sd t4, 56(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    sd t5, 48(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    sd t6, 40(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    lui a0, %hi(var_test_irq)
+; RV64IZCMP-SR-NEXT:    addi a0, a0, %lo(var_test_irq)
+; RV64IZCMP-SR-NEXT:    lw a1, 0(a0)
+; RV64IZCMP-SR-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    lw a1, 4(a0)
+; RV64IZCMP-SR-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    lw a1, 8(a0)
+; RV64IZCMP-SR-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    lw a1, 12(a0)
+; RV64IZCMP-SR-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    lw a1, 16(a0)
+; RV64IZCMP-SR-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    lw t3, 20(a0)
+; RV64IZCMP-SR-NEXT:    lw t4, 24(a0)
+; RV64IZCMP-SR-NEXT:    lw t5, 28(a0)
+; RV64IZCMP-SR-NEXT:    lw t6, 32(a0)
+; RV64IZCMP-SR-NEXT:    lw s2, 36(a0)
+; RV64IZCMP-SR-NEXT:    lw s3, 40(a0)
+; RV64IZCMP-SR-NEXT:    lw s4, 44(a0)
+; RV64IZCMP-SR-NEXT:    lw s5, 48(a0)
+; RV64IZCMP-SR-NEXT:    lw s6, 52(a0)
+; RV64IZCMP-SR-NEXT:    lw s7, 56(a0)
+; RV64IZCMP-SR-NEXT:    lw s8, 60(a0)
+; RV64IZCMP-SR-NEXT:    lw s9, 64(a0)
+; RV64IZCMP-SR-NEXT:    lw s10, 68(a0)
+; RV64IZCMP-SR-NEXT:    lw s11, 72(a0)
+; RV64IZCMP-SR-NEXT:    lw ra, 76(a0)
+; RV64IZCMP-SR-NEXT:    lw t2, 80(a0)
+; RV64IZCMP-SR-NEXT:    lw s0, 84(a0)
+; RV64IZCMP-SR-NEXT:    lw s1, 88(a0)
+; RV64IZCMP-SR-NEXT:    lw t1, 92(a0)
+; RV64IZCMP-SR-NEXT:    lw t0, 96(a0)
+; RV64IZCMP-SR-NEXT:    lw a7, 100(a0)
+; RV64IZCMP-SR-NEXT:    lw a6, 104(a0)
+; RV64IZCMP-SR-NEXT:    lw a5, 108(a0)
+; RV64IZCMP-SR-NEXT:    lw a1, 124(a0)
+; RV64IZCMP-SR-NEXT:    lw a2, 120(a0)
+; RV64IZCMP-SR-NEXT:    lw a3, 116(a0)
+; RV64IZCMP-SR-NEXT:    lw a4, 112(a0)
+; RV64IZCMP-SR-NEXT:    sw a1, 124(a0)
+; RV64IZCMP-SR-NEXT:    sw a2, 120(a0)
+; RV64IZCMP-SR-NEXT:    sw a3, 116(a0)
+; RV64IZCMP-SR-NEXT:    sw a4, 112(a0)
+; RV64IZCMP-SR-NEXT:    sw a5, 108(a0)
+; RV64IZCMP-SR-NEXT:    sw a6, 104(a0)
+; RV64IZCMP-SR-NEXT:    sw a7, 100(a0)
+; RV64IZCMP-SR-NEXT:    sw t0, 96(a0)
+; RV64IZCMP-SR-NEXT:    sw t1, 92(a0)
+; RV64IZCMP-SR-NEXT:    sw s1, 88(a0)
+; RV64IZCMP-SR-NEXT:    sw s0, 84(a0)
+; RV64IZCMP-SR-NEXT:    sw t2, 80(a0)
+; RV64IZCMP-SR-NEXT:    sw ra, 76(a0)
+; RV64IZCMP-SR-NEXT:    sw s11, 72(a0)
+; RV64IZCMP-SR-NEXT:    sw s10, 68(a0)
+; RV64IZCMP-SR-NEXT:    sw s9, 64(a0)
+; RV64IZCMP-SR-NEXT:    sw s8, 60(a0)
+; RV64IZCMP-SR-NEXT:    sw s7, 56(a0)
+; RV64IZCMP-SR-NEXT:    sw s6, 52(a0)
+; RV64IZCMP-SR-NEXT:    sw s5, 48(a0)
+; RV64IZCMP-SR-NEXT:    sw s4, 44(a0)
+; RV64IZCMP-SR-NEXT:    sw s3, 40(a0)
+; RV64IZCMP-SR-NEXT:    sw s2, 36(a0)
+; RV64IZCMP-SR-NEXT:    sw t6, 32(a0)
+; RV64IZCMP-SR-NEXT:    sw t5, 28(a0)
+; RV64IZCMP-SR-NEXT:    sw t4, 24(a0)
+; RV64IZCMP-SR-NEXT:    sw t3, 20(a0)
+; RV64IZCMP-SR-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    sw a1, 16(a0)
+; RV64IZCMP-SR-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    sw a1, 12(a0)
+; RV64IZCMP-SR-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    sw a1, 8(a0)
+; RV64IZCMP-SR-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    sw a1, 4(a0)
+; RV64IZCMP-SR-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    sw a1, 0(a0)
+; RV64IZCMP-SR-NEXT:    ld t0, 152(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    ld t1, 144(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    ld t2, 136(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    ld a1, 120(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    ld a2, 112(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    ld a3, 104(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    ld a4, 96(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    ld a5, 88(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    ld a6, 80(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    ld a7, 72(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    ld t3, 64(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    ld t4, 56(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    ld t5, 48(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    ld t6, 40(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    addi sp, sp, 112
 ; RV64IZCMP-SR-NEXT:    cm.pop {ra, s0-s11}, 160
 ; RV64IZCMP-SR-NEXT:    mret
 ;
@@ -2289,84 +2267,82 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV32I-NEXT:    sw t4, 40(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw t5, 36(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw t6, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lui a6, %hi(var_test_irq)
-; RV32I-NEXT:    lw a0, %lo(var_test_irq)(a6)
-; RV32I-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
-; RV32I-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
-; RV32I-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
-; RV32I-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    addi a5, a6, %lo(var_test_irq)
-; RV32I-NEXT:    lw a0, 16(a5)
-; RV32I-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 20(a5)
-; RV32I-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw t0, 24(a5)
-; RV32I-NEXT:    lw t1, 28(a5)
-; RV32I-NEXT:    lw t2, 32(a5)
-; RV32I-NEXT:    lw t3, 36(a5)
-; RV32I-NEXT:    lw t4, 40(a5)
-; RV32I-NEXT:    lw t5, 44(a5)
-; RV32I-NEXT:    lw t6, 48(a5)
-; RV32I-NEXT:    lw s0, 52(a5)
-; RV32I-NEXT:    lw s1, 56(a5)
-; RV32I-NEXT:    lw s2, 60(a5)
-; RV32I-NEXT:    lw s3, 64(a5)
-; RV32I-NEXT:    lw s4, 68(a5)
-; RV32I-NEXT:    lw s5, 72(a5)
-; RV32I-NEXT:    lw s6, 76(a5)
-; RV32I-NEXT:    lw s7, 80(a5)
-; RV32I-NEXT:    lw s8, 84(a5)
-; RV32I-NEXT:    lw s9, 88(a5)
-; RV32I-NEXT:    lw s10, 92(a5)
-; RV32I-NEXT:    lw s11, 96(a5)
-; RV32I-NEXT:    lw ra, 100(a5)
-; RV32I-NEXT:    lw a7, 104(a5)
-; RV32I-NEXT:    lw a4, 108(a5)
-; RV32I-NEXT:    lw a0, 124(a5)
-; RV32I-NEXT:    lw a1, 120(a5)
-; RV32I-NEXT:    lw a2, 116(a5)
-; RV32I-NEXT:    lw a3, 112(a5)
-; RV32I-NEXT:    sw a0, 124(a5)
-; RV32I-NEXT:    sw a1, 120(a5)
-; RV32I-NEXT:    sw a2, 116(a5)
-; RV32I-NEXT:    sw a3, 112(a5)
-; RV32I-NEXT:    sw a4, 108(a5)
-; RV32I-NEXT:    sw a7, 104(a5)
-; RV32I-NEXT:    sw ra, 100(a5)
-; RV32I-NEXT:    sw s11, 96(a5)
-; RV32I-NEXT:    sw s10, 92(a5)
-; RV32I-NEXT:    sw s9, 88(a5)
-; RV32I-NEXT:    sw s8, 84(a5)
-; RV32I-NEXT:    sw s7, 80(a5)
-; RV32I-NEXT:    sw s6, 76(a5)
-; RV32I-NEXT:    sw s5, 72(a5)
-; RV32I-NEXT:    sw s4, 68(a5)
-; RV32I-NEXT:    sw s3, 64(a5)
-; RV32I-NEXT:    sw s2, 60(a5)
-; RV32I-NEXT:    sw s1, 56(a5)
-; RV32I-NEXT:    sw s0, 52(a5)
-; RV32I-NEXT:    sw t6, 48(a5)
-; RV32I-NEXT:    sw t5, 44(a5)
-; RV32I-NEXT:    sw t4, 40(a5)
-; RV32I-NEXT:    sw t3, 36(a5)
-; RV32I-NEXT:    sw t2, 32(a5)
-; RV32I-NEXT:    sw t1, 28(a5)
-; RV32I-NEXT:    sw t0, 24(a5)
-; RV32I-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 20(a5)
-; RV32I-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 16(a5)
-; RV32I-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
-; RV32I-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
-; RV32I-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
-; RV32I-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV32I-NEXT:    lui a0, %hi(var_test_irq)
+; RV32I-NEXT:    addi a0, a0, %lo(var_test_irq)
+; RV32I-NEXT:    lw a1, 0(a0)
+; RV32I-NEXT:    sw a1, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a1, 4(a0)
+; RV32I-NEXT:    sw a1, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a1, 8(a0)
+; RV32I-NEXT:    sw a1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a1, 12(a0)
+; RV32I-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a1, 16(a0)
+; RV32I-NEXT:    sw a1, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a6, 20(a0)
+; RV32I-NEXT:    lw a7, 24(a0)
+; RV32I-NEXT:    lw t0, 28(a0)
+; RV32I-NEXT:    lw t1, 32(a0)
+; RV32I-NEXT:    lw t2, 36(a0)
+; RV32I-NEXT:    lw t3, 40(a0)
+; RV32I-NEXT:    lw t4, 44(a0)
+; RV32I-NEXT:    lw t5, 48(a0)
+; RV32I-NEXT:    lw t6, 52(a0)
+; RV32I-NEXT:    lw s0, 56(a0)
+; RV32I-NEXT:    lw s1, 60(a0)
+; RV32I-NEXT:    lw s2, 64(a0)
+; RV32I-NEXT:    lw s3, 68(a0)
+; RV32I-NEXT:    lw s4, 72(a0)
+; RV32I-NEXT:    lw s5, 76(a0)
+; RV32I-NEXT:    lw s6, 80(a0)
+; RV32I-NEXT:    lw s7, 84(a0)
+; RV32I-NEXT:    lw s8, 88(a0)
+; RV32I-NEXT:    lw s9, 92(a0)
+; RV32I-NEXT:    lw s10, 96(a0)
+; RV32I-NEXT:    lw s11, 100(a0)
+; RV32I-NEXT:    lw ra, 104(a0)
+; RV32I-NEXT:    lw a5, 108(a0)
+; RV32I-NEXT:    lw a1, 124(a0)
+; RV32I-NEXT:    lw a2, 120(a0)
+; RV32I-NEXT:    lw a3, 116(a0)
+; RV32I-NEXT:    lw a4, 112(a0)
+; RV32I-NEXT:    sw a1, 124(a0)
+; RV32I-NEXT:    sw a2, 120(a0)
+; RV32I-NEXT:    sw a3, 116(a0)
+; RV32I-NEXT:    sw a4, 112(a0)
+; RV32I-NEXT:    sw a5, 108(a0)
+; RV32I-NEXT:    sw ra, 104(a0)
+; RV32I-NEXT:    sw s11, 100(a0)
+; RV32I-NEXT:    sw s10, 96(a0)
+; RV32I-NEXT:    sw s9, 92(a0)
+; RV32I-NEXT:    sw s8, 88(a0)
+; RV32I-NEXT:    sw s7, 84(a0)
+; RV32I-NEXT:    sw s6, 80(a0)
+; RV32I-NEXT:    sw s5, 76(a0)
+; RV32I-NEXT:    sw s4, 72(a0)
+; RV32I-NEXT:    sw s3, 68(a0)
+; RV32I-NEXT:    sw s2, 64(a0)
+; RV32I-NEXT:    sw s1, 60(a0)
+; RV32I-NEXT:    sw s0, 56(a0)
+; RV32I-NEXT:    sw t6, 52(a0)
+; RV32I-NEXT:    sw t5, 48(a0)
+; RV32I-NEXT:    sw t4, 44(a0)
+; RV32I-NEXT:    sw t3, 40(a0)
+; RV32I-NEXT:    sw t2, 36(a0)
+; RV32I-NEXT:    sw t1, 32(a0)
+; RV32I-NEXT:    sw t0, 28(a0)
+; RV32I-NEXT:    sw a7, 24(a0)
+; RV32I-NEXT:    sw a6, 20(a0)
+; RV32I-NEXT:    lw a1, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 16(a0)
+; RV32I-NEXT:    lw a1, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 12(a0)
+; RV32I-NEXT:    lw a1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 8(a0)
+; RV32I-NEXT:    lw a1, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 4(a0)
+; RV32I-NEXT:    lw a1, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 0(a0)
 ; RV32I-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw t0, 136(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw t1, 132(sp) # 4-byte Folded Reload
@@ -2429,84 +2405,82 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV64I-NEXT:    sd t4, 64(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd t5, 56(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd t6, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lui a6, %hi(var_test_irq)
-; RV64I-NEXT:    lw a0, %lo(var_test_irq)(a6)
-; RV64I-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
-; RV64I-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
-; RV64I-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
-; RV64I-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    addi a5, a6, %lo(var_test_irq)
-; RV64I-NEXT:    lw a0, 16(a5)
-; RV64I-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 20(a5)
-; RV64I-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw t0, 24(a5)
-; RV64I-NEXT:    lw t1, 28(a5)
-; RV64I-NEXT:    lw t2, 32(a5)
-; RV64I-NEXT:    lw t3, 36(a5)
-; RV64I-NEXT:    lw t4, 40(a5)
-; RV64I-NEXT:    lw t5, 44(a5)
-; RV64I-NEXT:    lw t6, 48(a5)
-; RV64I-NEXT:    lw s0, 52(a5)
-; RV64I-NEXT:    lw s1, 56(a5)
-; RV64I-NEXT:    lw s2, 60(a5)
-; RV64I-NEXT:    lw s3, 64(a5)
-; RV64I-NEXT:    lw s4, 68(a5)
-; RV64I-NEXT:    lw s5, 72(a5)
-; RV64I-NEXT:    lw s6, 76(a5)
-; RV64I-NEXT:    lw s7, 80(a5)
-; RV64I-NEXT:    lw s8, 84(a5)
-; RV64I-NEXT:    lw s9, 88(a5)
-; RV64I-NEXT:    lw s10, 92(a5)
-; RV64I-NEXT:    lw s11, 96(a5)
-; RV64I-NEXT:    lw ra, 100(a5)
-; RV64I-NEXT:    lw a7, 104(a5)
-; RV64I-NEXT:    lw a4, 108(a5)
-; RV64I-NEXT:    lw a0, 124(a5)
-; RV64I-NEXT:    lw a1, 120(a5)
-; RV64I-NEXT:    lw a2, 116(a5)
-; RV64I-NEXT:    lw a3, 112(a5)
-; RV64I-NEXT:    sw a0, 124(a5)
-; RV64I-NEXT:    sw a1, 120(a5)
-; RV64I-NEXT:    sw a2, 116(a5)
-; RV64I-NEXT:    sw a3, 112(a5)
-; RV64I-NEXT:    sw a4, 108(a5)
-; RV64I-NEXT:    sw a7, 104(a5)
-; RV64I-NEXT:    sw ra, 100(a5)
-; RV64I-NEXT:    sw s11, 96(a5)
-; RV64I-NEXT:    sw s10, 92(a5)
-; RV64I-NEXT:    sw s9, 88(a5)
-; RV64I-NEXT:    sw s8, 84(a5)
-; RV64I-NEXT:    sw s7, 80(a5)
-; RV64I-NEXT:    sw s6, 76(a5)
-; RV64I-NEXT:    sw s5, 72(a5)
-; RV64I-NEXT:    sw s4, 68(a5)
-; RV64I-NEXT:    sw s3, 64(a5)
-; RV64I-NEXT:    sw s2, 60(a5)
-; RV64I-NEXT:    sw s1, 56(a5)
-; RV64I-NEXT:    sw s0, 52(a5)
-; RV64I-NEXT:    sw t6, 48(a5)
-; RV64I-NEXT:    sw t5, 44(a5)
-; RV64I-NEXT:    sw t4, 40(a5)
-; RV64I-NEXT:    sw t3, 36(a5)
-; RV64I-NEXT:    sw t2, 32(a5)
-; RV64I-NEXT:    sw t1, 28(a5)
-; RV64I-NEXT:    sw t0, 24(a5)
-; RV64I-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 20(a5)
-; RV64I-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 16(a5)
-; RV64I-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
-; RV64I-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
-; RV64I-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
-; RV64I-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV64I-NEXT:    lui a0, %hi(var_test_irq)
+; RV64I-NEXT:    addi a0, a0, %lo(var_test_irq)
+; RV64I-NEXT:    lw a1, 0(a0)
+; RV64I-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a1, 4(a0)
+; RV64I-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a1, 8(a0)
+; RV64I-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a1, 12(a0)
+; RV64I-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a1, 16(a0)
+; RV64I-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a6, 20(a0)
+; RV64I-NEXT:    lw a7, 24(a0)
+; RV64I-NEXT:    lw t0, 28(a0)
+; RV64I-NEXT:    lw t1, 32(a0)
+; RV64I-NEXT:    lw t2, 36(a0)
+; RV64I-NEXT:    lw t3, 40(a0)
+; RV64I-NEXT:    lw t4, 44(a0)
+; RV64I-NEXT:    lw t5, 48(a0)
+; RV64I-NEXT:    lw t6, 52(a0)
+; RV64I-NEXT:    lw s0, 56(a0)
+; RV64I-NEXT:    lw s1, 60(a0)
+; RV64I-NEXT:    lw s2, 64(a0)
+; RV64I-NEXT:    lw s3, 68(a0)
+; RV64I-NEXT:    lw s4, 72(a0)
+; RV64I-NEXT:    lw s5, 76(a0)
+; RV64I-NEXT:    lw s6, 80(a0)
+; RV64I-NEXT:    lw s7, 84(a0)
+; RV64I-NEXT:    lw s8, 88(a0)
+; RV64I-NEXT:    lw s9, 92(a0)
+; RV64I-NEXT:    lw s10, 96(a0)
+; RV64I-NEXT:    lw s11, 100(a0)
+; RV64I-NEXT:    lw ra, 104(a0)
+; RV64I-NEXT:    lw a5, 108(a0)
+; RV64I-NEXT:    lw a1, 124(a0)
+; RV64I-NEXT:    lw a2, 120(a0)
+; RV64I-NEXT:    lw a3, 116(a0)
+; RV64I-NEXT:    lw a4, 112(a0)
+; RV64I-NEXT:    sw a1, 124(a0)
+; RV64I-NEXT:    sw a2, 120(a0)
+; RV64I-NEXT:    sw a3, 116(a0)
+; RV64I-NEXT:    sw a4, 112(a0)
+; RV64I-NEXT:    sw a5, 108(a0)
+; RV64I-NEXT:    sw ra, 104(a0)
+; RV64I-NEXT:    sw s11, 100(a0)
+; RV64I-NEXT:    sw s10, 96(a0)
+; RV64I-NEXT:    sw s9, 92(a0)
+; RV64I-NEXT:    sw s8, 88(a0)
+; RV64I-NEXT:    sw s7, 84(a0)
+; RV64I-NEXT:    sw s6, 80(a0)
+; RV64I-NEXT:    sw s5, 76(a0)
+; RV64I-NEXT:    sw s4, 72(a0)
+; RV64I-NEXT:    sw s3, 68(a0)
+; RV64I-NEXT:    sw s2, 64(a0)
+; RV64I-NEXT:    sw s1, 60(a0)
+; RV64I-NEXT:    sw s0, 56(a0)
+; RV64I-NEXT:    sw t6, 52(a0)
+; RV64I-NEXT:    sw t5, 48(a0)
+; RV64I-NEXT:    sw t4, 44(a0)
+; RV64I-NEXT:    sw t3, 40(a0)
+; RV64I-NEXT:    sw t2, 36(a0)
+; RV64I-NEXT:    sw t1, 32(a0)
+; RV64I-NEXT:    sw t0, 28(a0)
+; RV64I-NEXT:    sw a7, 24(a0)
+; RV64I-NEXT:    sw a6, 20(a0)
+; RV64I-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 16(a0)
+; RV64I-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 12(a0)
+; RV64I-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 8(a0)
+; RV64I-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 4(a0)
+; RV64I-NEXT:    ld a1, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 0(a0)
 ; RV64I-NEXT:    ld ra, 264(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld t0, 256(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
@@ -2546,333 +2520,325 @@ define void @callee_no_irq() nounwind{
 ; RV32IZCMP-LABEL: callee_no_irq:
 ; RV32IZCMP:       # %bb.0:
 ; RV32IZCMP-NEXT:    cm.push {ra, s0-s11}, -96
-; RV32IZCMP-NEXT:    lui a6, %hi(var_test_irq)
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a6)
-; RV32IZCMP-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
-; RV32IZCMP-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
-; RV32IZCMP-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
-; RV32IZCMP-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    addi a5, a6, %lo(var_test_irq)
-; RV32IZCMP-NEXT:    lw a0, 16(a5)
-; RV32IZCMP-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, 20(a5)
-; RV32IZCMP-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw t4, 24(a5)
-; RV32IZCMP-NEXT:    lw t5, 28(a5)
-; RV32IZCMP-NEXT:    lw t6, 32(a5)
-; RV32IZCMP-NEXT:    lw s2, 36(a5)
-; RV32IZCMP-NEXT:    lw s3, 40(a5)
-; RV32IZCMP-NEXT:    lw s4, 44(a5)
-; RV32IZCMP-NEXT:    lw s5, 48(a5)
-; RV32IZCMP-NEXT:    lw s6, 52(a5)
-; RV32IZCMP-NEXT:    lw s7, 56(a5)
-; RV32IZCMP-NEXT:    lw s8, 60(a5)
-; RV32IZCMP-NEXT:    lw s9, 64(a5)
-; RV32IZCMP-NEXT:    lw s10, 68(a5)
-; RV32IZCMP-NEXT:    lw s11, 72(a5)
-; RV32IZCMP-NEXT:    lw ra, 76(a5)
-; RV32IZCMP-NEXT:    lw s1, 80(a5)
-; RV32IZCMP-NEXT:    lw t3, 84(a5)
-; RV32IZCMP-NEXT:    lw t2, 88(a5)
-; RV32IZCMP-NEXT:    lw t1, 92(a5)
-; RV32IZCMP-NEXT:    lw t0, 96(a5)
-; RV32IZCMP-NEXT:    lw s0, 100(a5)
-; RV32IZCMP-NEXT:    lw a7, 104(a5)
-; RV32IZCMP-NEXT:    lw a4, 108(a5)
-; RV32IZCMP-NEXT:    lw a0, 124(a5)
-; RV32IZCMP-NEXT:    lw a1, 120(a5)
-; RV32IZCMP-NEXT:    lw a2, 116(a5)
-; RV32IZCMP-NEXT:    lw a3, 112(a5)
-; RV32IZCMP-NEXT:    sw a0, 124(a5)
-; RV32IZCMP-NEXT:    sw a1, 120(a5)
-; RV32IZCMP-NEXT:    sw a2, 116(a5)
-; RV32IZCMP-NEXT:    sw a3, 112(a5)
-; RV32IZCMP-NEXT:    sw a4, 108(a5)
-; RV32IZCMP-NEXT:    sw a7, 104(a5)
-; RV32IZCMP-NEXT:    sw s0, 100(a5)
-; RV32IZCMP-NEXT:    sw t0, 96(a5)
-; RV32IZCMP-NEXT:    sw t1, 92(a5)
-; RV32IZCMP-NEXT:    sw t2, 88(a5)
-; RV32IZCMP-NEXT:    sw t3, 84(a5)
-; RV32IZCMP-NEXT:    sw s1, 80(a5)
-; RV32IZCMP-NEXT:    sw ra, 76(a5)
-; RV32IZCMP-NEXT:    sw s11, 72(a5)
-; RV32IZCMP-NEXT:    sw s10, 68(a5)
-; RV32IZCMP-NEXT:    sw s9, 64(a5)
-; RV32IZCMP-NEXT:    sw s8, 60(a5)
-; RV32IZCMP-NEXT:    sw s7, 56(a5)
-; RV32IZCMP-NEXT:    sw s6, 52(a5)
-; RV32IZCMP-NEXT:    sw s5, 48(a5)
-; RV32IZCMP-NEXT:    sw s4, 44(a5)
-; RV32IZCMP-NEXT:    sw s3, 40(a5)
-; RV32IZCMP-NEXT:    sw s2, 36(a5)
-; RV32IZCMP-NEXT:    sw t6, 32(a5)
-; RV32IZCMP-NEXT:    sw t5, 28(a5)
-; RV32IZCMP-NEXT:    sw t4, 24(a5)
-; RV32IZCMP-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 20(a5)
-; RV32IZCMP-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, 16(a5)
-; RV32IZCMP-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
-; RV32IZCMP-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
-; RV32IZCMP-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
-; RV32IZCMP-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-NEXT:    lui a0, %hi(var_test_irq)
+; RV32IZCMP-NEXT:    addi a0, a0, %lo(var_test_irq)
+; RV32IZCMP-NEXT:    lw a1, 0(a0)
+; RV32IZCMP-NEXT:    sw a1, 28(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a1, 4(a0)
+; RV32IZCMP-NEXT:    sw a1, 24(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a1, 8(a0)
+; RV32IZCMP-NEXT:    sw a1, 20(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a1, 12(a0)
+; RV32IZCMP-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw a1, 16(a0)
+; RV32IZCMP-NEXT:    sw a1, 12(sp) # 4-byte Folded Spill
+; RV32IZCMP-NEXT:    lw t3, 20(a0)
+; RV32IZCMP-NEXT:    lw t4, 24(a0)
+; RV32IZCMP-NEXT:    lw t5, 28(a0)
+; RV32IZCMP-NEXT:    lw t6, 32(a0)
+; RV32IZCMP-NEXT:    lw s2, 36(a0)
+; RV32IZCMP-NEXT:    lw s3, 40(a0)
+; RV32IZCMP-NEXT:    lw s4, 44(a0)
+; RV32IZCMP-NEXT:    lw s5, 48(a0)
+; RV32IZCMP-NEXT:    lw s6, 52(a0)
+; RV32IZCMP-NEXT:    lw s7, 56(a0)
+; RV32IZCMP-NEXT:    lw s8, 60(a0)
+; RV32IZCMP-NEXT:    lw s9, 64(a0)
+; RV32IZCMP-NEXT:    lw s10, 68(a0)
+; RV32IZCMP-NEXT:    lw s11, 72(a0)
+; RV32IZCMP-NEXT:    lw ra, 76(a0)
+; RV32IZCMP-NEXT:    lw t2, 80(a0)
+; RV32IZCMP-NEXT:    lw s0, 84(a0)
+; RV32IZCMP-NEXT:    lw s1, 88(a0)
+; RV32IZCMP-NEXT:    lw t1, 92(a0)
+; RV32IZCMP-NEXT:    lw t0, 96(a0)
+; RV32IZCMP-NEXT:    lw a7, 100(a0)
+; RV32IZCMP-NEXT:    lw a6, 104(a0)
+; RV32IZCMP-NEXT:    lw a5, 108(a0)
+; RV32IZCMP-NEXT:    lw a1, 124(a0)
+; RV32IZCMP-NEXT:    lw a2, 120(a0)
+; RV32IZCMP-NEXT:    lw a3, 116(a0)
+; RV32IZCMP-NEXT:    lw a4, 112(a0)
+; RV32IZCMP-NEXT:    sw a1, 124(a0)
+; RV32IZCMP-NEXT:    sw a2, 120(a0)
+; RV32IZCMP-NEXT:    sw a3, 116(a0)
+; RV32IZCMP-NEXT:    sw a4, 112(a0)
+; RV32IZCMP-NEXT:    sw a5, 108(a0)
+; RV32IZCMP-NEXT:    sw a6, 104(a0)
+; RV32IZCMP-NEXT:    sw a7, 100(a0)
+; RV32IZCMP-NEXT:    sw t0, 96(a0)
+; RV32IZCMP-NEXT:    sw t1, 92(a0)
+; RV32IZCMP-NEXT:    sw s1, 88(a0)
+; RV32IZCMP-NEXT:    sw s0, 84(a0)
+; RV32IZCMP-NEXT:    sw t2, 80(a0)
+; RV32IZCMP-NEXT:    sw ra, 76(a0)
+; RV32IZCMP-NEXT:    sw s11, 72(a0)
+; RV32IZCMP-NEXT:    sw s10, 68(a0)
+; RV32IZCMP-NEXT:    sw s9, 64(a0)
+; RV32IZCMP-NEXT:    sw s8, 60(a0)
+; RV32IZCMP-NEXT:    sw s7, 56(a0)
+; RV32IZCMP-NEXT:    sw s6, 52(a0)
+; RV32IZCMP-NEXT:    sw s5, 48(a0)
+; RV32IZCMP-NEXT:    sw s4, 44(a0)
+; RV32IZCMP-NEXT:    sw s3, 40(a0)
+; RV32IZCMP-NEXT:    sw s2, 36(a0)
+; RV32IZCMP-NEXT:    sw t6, 32(a0)
+; RV32IZCMP-NEXT:    sw t5, 28(a0)
+; RV32IZCMP-NEXT:    sw t4, 24(a0)
+; RV32IZCMP-NEXT:    sw t3, 20(a0)
+; RV32IZCMP-NEXT:    lw a1, 12(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 16(a0)
+; RV32IZCMP-NEXT:    lw a1, 16(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 12(a0)
+; RV32IZCMP-NEXT:    lw a1, 20(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 8(a0)
+; RV32IZCMP-NEXT:    lw a1, 24(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 4(a0)
+; RV32IZCMP-NEXT:    lw a1, 28(sp) # 4-byte Folded Reload
+; RV32IZCMP-NEXT:    sw a1, 0(a0)
 ; RV32IZCMP-NEXT:    cm.popret {ra, s0-s11}, 96
 ;
 ; RV64IZCMP-LABEL: callee_no_irq:
 ; RV64IZCMP:       # %bb.0:
 ; RV64IZCMP-NEXT:    cm.push {ra, s0-s11}, -160
-; RV64IZCMP-NEXT:    lui a6, %hi(var_test_irq)
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a6)
-; RV64IZCMP-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
-; RV64IZCMP-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
-; RV64IZCMP-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
-; RV64IZCMP-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    addi a5, a6, %lo(var_test_irq)
-; RV64IZCMP-NEXT:    lw a0, 16(a5)
-; RV64IZCMP-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, 20(a5)
-; RV64IZCMP-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw t4, 24(a5)
-; RV64IZCMP-NEXT:    lw t5, 28(a5)
-; RV64IZCMP-NEXT:    lw t6, 32(a5)
-; RV64IZCMP-NEXT:    lw s2, 36(a5)
-; RV64IZCMP-NEXT:    lw s3, 40(a5)
-; RV64IZCMP-NEXT:    lw s4, 44(a5)
-; RV64IZCMP-NEXT:    lw s5, 48(a5)
-; RV64IZCMP-NEXT:    lw s6, 52(a5)
-; RV64IZCMP-NEXT:    lw s7, 56(a5)
-; RV64IZCMP-NEXT:    lw s8, 60(a5)
-; RV64IZCMP-NEXT:    lw s9, 64(a5)
-; RV64IZCMP-NEXT:    lw s10, 68(a5)
-; RV64IZCMP-NEXT:    lw s11, 72(a5)
-; RV64IZCMP-NEXT:    lw ra, 76(a5)
-; RV64IZCMP-NEXT:    lw s1, 80(a5)
-; RV64IZCMP-NEXT:    lw t3, 84(a5)
-; RV64IZCMP-NEXT:    lw t2, 88(a5)
-; RV64IZCMP-NEXT:    lw t1, 92(a5)
-; RV64IZCMP-NEXT:    lw t0, 96(a5)
-; RV64IZCMP-NEXT:    lw s0, 100(a5)
-; RV64IZCMP-NEXT:    lw a7, 104(a5)
-; RV64IZCMP-NEXT:    lw a4, 108(a5)
-; RV64IZCMP-NEXT:    lw a0, 124(a5)
-; RV64IZCMP-NEXT:    lw a1, 120(a5)
-; RV64IZCMP-NEXT:    lw a2, 116(a5)
-; RV64IZCMP-NEXT:    lw a3, 112(a5)
-; RV64IZCMP-NEXT:    sw a0, 124(a5)
-; RV64IZCMP-NEXT:    sw a1, 120(a5)
-; RV64IZCMP-NEXT:    sw a2, 116(a5)
-; RV64IZCMP-NEXT:    sw a3, 112(a5)
-; RV64IZCMP-NEXT:    sw a4, 108(a5)
-; RV64IZCMP-NEXT:    sw a7, 104(a5)
-; RV64IZCMP-NEXT:    sw s0, 100(a5)
-; RV64IZCMP-NEXT:    sw t0, 96(a5)
-; RV64IZCMP-NEXT:    sw t1, 92(a5)
-; RV64IZCMP-NEXT:    sw t2, 88(a5)
-; RV64IZCMP-NEXT:    sw t3, 84(a5)
-; RV64IZCMP-NEXT:    sw s1, 80(a5)
-; RV64IZCMP-NEXT:    sw ra, 76(a5)
-; RV64IZCMP-NEXT:    sw s11, 72(a5)
-; RV64IZCMP-NEXT:    sw s10, 68(a5)
-; RV64IZCMP-NEXT:    sw s9, 64(a5)
-; RV64IZCMP-NEXT:    sw s8, 60(a5)
-; RV64IZCMP-NEXT:    sw s7, 56(a5)
-; RV64IZCMP-NEXT:    sw s6, 52(a5)
-; RV64IZCMP-NEXT:    sw s5, 48(a5)
-; RV64IZCMP-NEXT:    sw s4, 44(a5)
-; RV64IZCMP-NEXT:    sw s3, 40(a5)
-; RV64IZCMP-NEXT:    sw s2, 36(a5)
-; RV64IZCMP-NEXT:    sw t6, 32(a5)
-; RV64IZCMP-NEXT:    sw t5, 28(a5)
-; RV64IZCMP-NEXT:    sw t4, 24(a5)
-; RV64IZCMP-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 20(a5)
-; RV64IZCMP-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, 16(a5)
-; RV64IZCMP-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
-; RV64IZCMP-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
-; RV64IZCMP-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
-; RV64IZCMP-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-NEXT:    lui a0, %hi(var_test_irq)
+; RV64IZCMP-NEXT:    addi a0, a0, %lo(var_test_irq)
+; RV64IZCMP-NEXT:    lw a1, 0(a0)
+; RV64IZCMP-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a1, 4(a0)
+; RV64IZCMP-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a1, 8(a0)
+; RV64IZCMP-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a1, 12(a0)
+; RV64IZCMP-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw a1, 16(a0)
+; RV64IZCMP-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IZCMP-NEXT:    lw t3, 20(a0)
+; RV64IZCMP-NEXT:    lw t4, 24(a0)
+; RV64IZCMP-NEXT:    lw t5, 28(a0)
+; RV64IZCMP-NEXT:    lw t6, 32(a0)
+; RV64IZCMP-NEXT:    lw s2, 36(a0)
+; RV64IZCMP-NEXT:    lw s3, 40(a0)
+; RV64IZCMP-NEXT:    lw s4, 44(a0)
+; RV64IZCMP-NEXT:    lw s5, 48(a0)
+; RV64IZCMP-NEXT:    lw s6, 52(a0)
+; RV64IZCMP-NEXT:    lw s7, 56(a0)
+; RV64IZCMP-NEXT:    lw s8, 60(a0)
+; RV64IZCMP-NEXT:    lw s9, 64(a0)
+; RV64IZCMP-NEXT:    lw s10, 68(a0)
+; RV64IZCMP-NEXT:    lw s11, 72(a0)
+; RV64IZCMP-NEXT:    lw ra, 76(a0)
+; RV64IZCMP-NEXT:    lw t2, 80(a0)
+; RV64IZCMP-NEXT:    lw s0, 84(a0)
+; RV64IZCMP-NEXT:    lw s1, 88(a0)
+; RV64IZCMP-NEXT:    lw t1, 92(a0)
+; RV64IZCMP-NEXT:    lw t0, 96(a0)
+; RV64IZCMP-NEXT:    lw a7, 100(a0)
+; RV64IZCMP-NEXT:    lw a6, 104(a0)
+; RV64IZCMP-NEXT:    lw a5, 108(a0)
+; RV64IZCMP-NEXT:    lw a1, 124(a0)
+; RV64IZCMP-NEXT:    lw a2, 120(a0)
+; RV64IZCMP-NEXT:    lw a3, 116(a0)
+; RV64IZCMP-NEXT:    lw a4, 112(a0)
+; RV64IZCMP-NEXT:    sw a1, 124(a0)
+; RV64IZCMP-NEXT:    sw a2, 120(a0)
+; RV64IZCMP-NEXT:    sw a3, 116(a0)
+; RV64IZCMP-NEXT:    sw a4, 112(a0)
+; RV64IZCMP-NEXT:    sw a5, 108(a0)
+; RV64IZCMP-NEXT:    sw a6, 104(a0)
+; RV64IZCMP-NEXT:    sw a7, 100(a0)
+; RV64IZCMP-NEXT:    sw t0, 96(a0)
+; RV64IZCMP-NEXT:    sw t1, 92(a0)
+; RV64IZCMP-NEXT:    sw s1, 88(a0)
+; RV64IZCMP-NEXT:    sw s0, 84(a0)
+; RV64IZCMP-NEXT:    sw t2, 80(a0)
+; RV64IZCMP-NEXT:    sw ra, 76(a0)
+; RV64IZCMP-NEXT:    sw s11, 72(a0)
+; RV64IZCMP-NEXT:    sw s10, 68(a0)
+; RV64IZCMP-NEXT:    sw s9, 64(a0)
+; RV64IZCMP-NEXT:    sw s8, 60(a0)
+; RV64IZCMP-NEXT:    sw s7, 56(a0)
+; RV64IZCMP-NEXT:    sw s6, 52(a0)
+; RV64IZCMP-NEXT:    sw s5, 48(a0)
+; RV64IZCMP-NEXT:    sw s4, 44(a0)
+; RV64IZCMP-NEXT:    sw s3, 40(a0)
+; RV64IZCMP-NEXT:    sw s2, 36(a0)
+; RV64IZCMP-NEXT:    sw t6, 32(a0)
+; RV64IZCMP-NEXT:    sw t5, 28(a0)
+; RV64IZCMP-NEXT:    sw t4, 24(a0)
+; RV64IZCMP-NEXT:    sw t3, 20(a0)
+; RV64IZCMP-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 16(a0)
+; RV64IZCMP-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 12(a0)
+; RV64IZCMP-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 8(a0)
+; RV64IZCMP-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 4(a0)
+; RV64IZCMP-NEXT:    ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IZCMP-NEXT:    sw a1, 0(a0)
 ; RV64IZCMP-NEXT:    cm.popret {ra, s0-s11}, 160
 ;
 ; RV32IZCMP-SR-LABEL: callee_no_irq:
 ; RV32IZCMP-SR:       # %bb.0:
 ; RV32IZCMP-SR-NEXT:    cm.push {ra, s0-s11}, -96
-; RV32IZCMP-SR-NEXT:    lui a6, %hi(var_test_irq)
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq)(a6)
-; RV32IZCMP-SR-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
-; RV32IZCMP-SR-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
-; RV32IZCMP-SR-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
-; RV32IZCMP-SR-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    addi a5, a6, %lo(var_test_irq)
-; RV32IZCMP-SR-NEXT:    lw a0, 16(a5)
-; RV32IZCMP-SR-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw a0, 20(a5)
-; RV32IZCMP-SR-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw t4, 24(a5)
-; RV32IZCMP-SR-NEXT:    lw t5, 28(a5)
-; RV32IZCMP-SR-NEXT:    lw t6, 32(a5)
-; RV32IZCMP-SR-NEXT:    lw s2, 36(a5)
-; RV32IZCMP-SR-NEXT:    lw s3, 40(a5)
-; RV32IZCMP-SR-NEXT:    lw s4, 44(a5)
-; RV32IZCMP-SR-NEXT:    lw s5, 48(a5)
-; RV32IZCMP-SR-NEXT:    lw s6, 52(a5)
-; RV32IZCMP-SR-NEXT:    lw s7, 56(a5)
-; RV32IZCMP-SR-NEXT:    lw s8, 60(a5)
-; RV32IZCMP-SR-NEXT:    lw s9, 64(a5)
-; RV32IZCMP-SR-NEXT:    lw s10, 68(a5)
-; RV32IZCMP-SR-NEXT:    lw s11, 72(a5)
-; RV32IZCMP-SR-NEXT:    lw ra, 76(a5)
-; RV32IZCMP-SR-NEXT:    lw s1, 80(a5)
-; RV32IZCMP-SR-NEXT:    lw t3, 84(a5)
-; RV32IZCMP-SR-NEXT:    lw t2, 88(a5)
-; RV32IZCMP-SR-NEXT:    lw t1, 92(a5)
-; RV32IZCMP-SR-NEXT:    lw t0, 96(a5)
-; RV32IZCMP-SR-NEXT:    lw s0, 100(a5)
-; RV32IZCMP-SR-NEXT:    lw a7, 104(a5)
-; RV32IZCMP-SR-NEXT:    lw a4, 108(a5)
-; RV32IZCMP-SR-NEXT:    lw a0, 124(a5)
-; RV32IZCMP-SR-NEXT:    lw a1, 120(a5)
-; RV32IZCMP-SR-NEXT:    lw a2, 116(a5)
-; RV32IZCMP-SR-NEXT:    lw a3, 112(a5)
-; RV32IZCMP-SR-NEXT:    sw a0, 124(a5)
-; RV32IZCMP-SR-NEXT:    sw a1, 120(a5)
-; RV32IZCMP-SR-NEXT:    sw a2, 116(a5)
-; RV32IZCMP-SR-NEXT:    sw a3, 112(a5)
-; RV32IZCMP-SR-NEXT:    sw a4, 108(a5)
-; RV32IZCMP-SR-NEXT:    sw a7, 104(a5)
-; RV32IZCMP-SR-NEXT:    sw s0, 100(a5)
-; RV32IZCMP-SR-NEXT:    sw t0, 96(a5)
-; RV32IZCMP-SR-NEXT:    sw t1, 92(a5)
-; RV32IZCMP-SR-NEXT:    sw t2, 88(a5)
-; RV32IZCMP-SR-NEXT:    sw t3, 84(a5)
-; RV32IZCMP-SR-NEXT:    sw s1, 80(a5)
-; RV32IZCMP-SR-NEXT:    sw ra, 76(a5)
-; RV32IZCMP-SR-NEXT:    sw s11, 72(a5)
-; RV32IZCMP-SR-NEXT:    sw s10, 68(a5)
-; RV32IZCMP-SR-NEXT:    sw s9, 64(a5)
-; RV32IZCMP-SR-NEXT:    sw s8, 60(a5)
-; RV32IZCMP-SR-NEXT:    sw s7, 56(a5)
-; RV32IZCMP-SR-NEXT:    sw s6, 52(a5)
-; RV32IZCMP-SR-NEXT:    sw s5, 48(a5)
-; RV32IZCMP-SR-NEXT:    sw s4, 44(a5)
-; RV32IZCMP-SR-NEXT:    sw s3, 40(a5)
-; RV32IZCMP-SR-NEXT:    sw s2, 36(a5)
-; RV32IZCMP-SR-NEXT:    sw t6, 32(a5)
-; RV32IZCMP-SR-NEXT:    sw t5, 28(a5)
-; RV32IZCMP-SR-NEXT:    sw t4, 24(a5)
-; RV32IZCMP-SR-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, 20(a5)
-; RV32IZCMP-SR-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, 16(a5)
-; RV32IZCMP-SR-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
-; RV32IZCMP-SR-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
-; RV32IZCMP-SR-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
-; RV32IZCMP-SR-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-SR-NEXT:    lui a0, %hi(var_test_irq)
+; RV32IZCMP-SR-NEXT:    addi a0, a0, %lo(var_test_irq)
+; RV32IZCMP-SR-NEXT:    lw a1, 0(a0)
+; RV32IZCMP-SR-NEXT:    sw a1, 28(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    lw a1, 4(a0)
+; RV32IZCMP-SR-NEXT:    sw a1, 24(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    lw a1, 8(a0)
+; RV32IZCMP-SR-NEXT:    sw a1, 20(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    lw a1, 12(a0)
+; RV32IZCMP-SR-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    lw a1, 16(a0)
+; RV32IZCMP-SR-NEXT:    sw a1, 12(sp) # 4-byte Folded Spill
+; RV32IZCMP-SR-NEXT:    lw t3, 20(a0)
+; RV32IZCMP-SR-NEXT:    lw t4, 24(a0)
+; RV32IZCMP-SR-NEXT:    lw t5, 28(a0)
+; RV32IZCMP-SR-NEXT:    lw t6, 32(a0)
+; RV32IZCMP-SR-NEXT:    lw s2, 36(a0)
+; RV32IZCMP-SR-NEXT:    lw s3, 40(a0)
+; RV32IZCMP-SR-NEXT:    lw s4, 44(a0)
+; RV32IZCMP-SR-NEXT:    lw s5, 48(a0)
+; RV32IZCMP-SR-NEXT:    lw s6, 52(a0)
+; RV32IZCMP-SR-NEXT:    lw s7, 56(a0)
+; RV32IZCMP-SR-NEXT:    lw s8, 60(a0)
+; RV32IZCMP-SR-NEXT:    lw s9, 64(a0)
+; RV32IZCMP-SR-NEXT:    lw s10, 68(a0)
+; RV32IZCMP-SR-NEXT:    lw s11, 72(a0)
+; RV32IZCMP-SR-NEXT:    lw ra, 76(a0)
+; RV32IZCMP-SR-NEXT:    lw t2, 80(a0)
+; RV32IZCMP-SR-NEXT:    lw s0, 84(a0)
+; RV32IZCMP-SR-NEXT:    lw s1, 88(a0)
+; RV32IZCMP-SR-NEXT:    lw t1, 92(a0)
+; RV32IZCMP-SR-NEXT:    lw t0, 96(a0)
+; RV32IZCMP-SR-NEXT:    lw a7, 100(a0)
+; RV32IZCMP-SR-NEXT:    lw a6, 104(a0)
+; RV32IZCMP-SR-NEXT:    lw a5, 108(a0)
+; RV32IZCMP-SR-NEXT:    lw a1, 124(a0)
+; RV32IZCMP-SR-NEXT:    lw a2, 120(a0)
+; RV32IZCMP-SR-NEXT:    lw a3, 116(a0)
+; RV32IZCMP-SR-NEXT:    lw a4, 112(a0)
+; RV32IZCMP-SR-NEXT:    sw a1, 124(a0)
+; RV32IZCMP-SR-NEXT:    sw a2, 120(a0)
+; RV32IZCMP-SR-NEXT:    sw a3, 116(a0)
+; RV32IZCMP-SR-NEXT:    sw a4, 112(a0)
+; RV32IZCMP-SR-NEXT:    sw a5, 108(a0)
+; RV32IZCMP-SR-NEXT:    sw a6, 104(a0)
+; RV32IZCMP-SR-NEXT:    sw a7, 100(a0)
+; RV32IZCMP-SR-NEXT:    sw t0, 96(a0)
+; RV32IZCMP-SR-NEXT:    sw t1, 92(a0)
+; RV32IZCMP-SR-NEXT:    sw s1, 88(a0)
+; RV32IZCMP-SR-NEXT:    sw s0, 84(a0)
+; RV32IZCMP-SR-NEXT:    sw t2, 80(a0)
+; RV32IZCMP-SR-NEXT:    sw ra, 76(a0)
+; RV32IZCMP-SR-NEXT:    sw s11, 72(a0)
+; RV32IZCMP-SR-NEXT:    sw s10, 68(a0)
+; RV32IZCMP-SR-NEXT:    sw s9, 64(a0)
+; RV32IZCMP-SR-NEXT:    sw s8, 60(a0)
+; RV32IZCMP-SR-NEXT:    sw s7, 56(a0)
+; RV32IZCMP-SR-NEXT:    sw s6, 52(a0)
+; RV32IZCMP-SR-NEXT:    sw s5, 48(a0)
+; RV32IZCMP-SR-NEXT:    sw s4, 44(a0)
+; RV32IZCMP-SR-NEXT:    sw s3, 40(a0)
+; RV32IZCMP-SR-NEXT:    sw s2, 36(a0)
+; RV32IZCMP-SR-NEXT:    sw t6, 32(a0)
+; RV32IZCMP-SR-NEXT:    sw t5, 28(a0)
+; RV32IZCMP-SR-NEXT:    sw t4, 24(a0)
+; RV32IZCMP-SR-NEXT:    sw t3, 20(a0)
+; RV32IZCMP-SR-NEXT:    lw a1, 12(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    sw a1, 16(a0)
+; RV32IZCMP-SR-NEXT:    lw a1, 16(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    sw a1, 12(a0)
+; RV32IZCMP-SR-NEXT:    lw a1, 20(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    sw a1, 8(a0)
+; RV32IZCMP-SR-NEXT:    lw a1, 24(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    sw a1, 4(a0)
+; RV32IZCMP-SR-NEXT:    lw a1, 28(sp) # 4-byte Folded Reload
+; RV32IZCMP-SR-NEXT:    sw a1, 0(a0)
 ; RV32IZCMP-SR-NEXT:    cm.popret {ra, s0-s11}, 96
 ;
 ; RV64IZCMP-SR-LABEL: callee_no_irq:
 ; RV64IZCMP-SR:       # %bb.0:
 ; RV64IZCMP-SR-NEXT:    cm.push {ra, s0-s11}, -160
-; RV64IZCMP-SR-NEXT:    lui a6, %hi(var_test_irq)
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq)(a6)
-; RV64IZCMP-SR-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
-; RV64IZCMP-SR-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
-; RV64IZCMP-SR-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
-; RV64IZCMP-SR-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    addi a5, a6, %lo(var_test_irq)
-; RV64IZCMP-SR-NEXT:    lw a0, 16(a5)
-; RV64IZCMP-SR-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw a0, 20(a5)
-; RV64IZCMP-SR-NEXT:    sd a0, 0(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw t4, 24(a5)
-; RV64IZCMP-SR-NEXT:    lw t5, 28(a5)
-; RV64IZCMP-SR-NEXT:    lw t6, 32(a5)
-; RV64IZCMP-SR-NEXT:    lw s2, 36(a5)
-; RV64IZCMP-SR-NEXT:    lw s3, 40(a5)
-; RV64IZCMP-SR-NEXT:    lw s4, 44(a5)
-; RV64IZCMP-SR-NEXT:    lw s5, 48(a5)
-; RV64IZCMP-SR-NEXT:    lw s6, 52(a5)
-; RV64IZCMP-SR-NEXT:    lw s7, 56(a5)
-; RV64IZCMP-SR-NEXT:    lw s8, 60(a5)
-; RV64IZCMP-SR-NEXT:    lw s9, 64(a5)
-; RV64IZCMP-SR-NEXT:    lw s10, 68(a5)
-; RV64IZCMP-SR-NEXT:    lw s11, 72(a5)
-; RV64IZCMP-SR-NEXT:    lw ra, 76(a5)
-; RV64IZCMP-SR-NEXT:    lw s1, 80(a5)
-; RV64IZCMP-SR-NEXT:    lw t3, 84(a5)
-; RV64IZCMP-SR-NEXT:    lw t2, 88(a5)
-; RV64IZCMP-SR-NEXT:    lw t1, 92(a5)
-; RV64IZCMP-SR-NEXT:    lw t0, 96(a5)
-; RV64IZCMP-SR-NEXT:    lw s0, 100(a5)
-; RV64IZCMP-SR-NEXT:    lw a7, 104(a5)
-; RV64IZCMP-SR-NEXT:    lw a4, 108(a5)
-; RV64IZCMP-SR-NEXT:    lw a0, 124(a5)
-; RV64IZCMP-SR-NEXT:    lw a1, 120(a5)
-; RV64IZCMP-SR-NEXT:    lw a2, 116(a5)
-; RV64IZCMP-SR-NEXT:    lw a3, 112(a5)
-; RV64IZCMP-SR-NEXT:    sw a0, 124(a5)
-; RV64IZCMP-SR-NEXT:    sw a1, 120(a5)
-; RV64IZCMP-SR-NEXT:    sw a2, 116(a5)
-; RV64IZCMP-SR-NEXT:    sw a3, 112(a5)
-; RV64IZCMP-SR-NEXT:    sw a4, 108(a5)
-; RV64IZCMP-SR-NEXT:    sw a7, 104(a5)
-; RV64IZCMP-SR-NEXT:    sw s0, 100(a5)
-; RV64IZCMP-SR-NEXT:    sw t0, 96(a5)
-; RV64IZCMP-SR-NEXT:    sw t1, 92(a5)
-; RV64IZCMP-SR-NEXT:    sw t2, 88(a5)
-; RV64IZCMP-SR-NEXT:    sw t3, 84(a5)
-; RV64IZCMP-SR-NEXT:    sw s1, 80(a5)
-; RV64IZCMP-SR-NEXT:    sw ra, 76(a5)
-; RV64IZCMP-SR-NEXT:    sw s11, 72(a5)
-; RV64IZCMP-SR-NEXT:    sw s10, 68(a5)
-; RV64IZCMP-SR-NEXT:    sw s9, 64(a5)
-; RV64IZCMP-SR-NEXT:    sw s8, 60(a5)
-; RV64IZCMP-SR-NEXT:    sw s7, 56(a5)
-; RV64IZCMP-SR-NEXT:    sw s6, 52(a5)
-; RV64IZCMP-SR-NEXT:    sw s5, 48(a5)
-; RV64IZCMP-SR-NEXT:    sw s4, 44(a5)
-; RV64IZCMP-SR-NEXT:    sw s3, 40(a5)
-; RV64IZCMP-SR-NEXT:    sw s2, 36(a5)
-; RV64IZCMP-SR-NEXT:    sw t6, 32(a5)
-; RV64IZCMP-SR-NEXT:    sw t5, 28(a5)
-; RV64IZCMP-SR-NEXT:    sw t4, 24(a5)
-; RV64IZCMP-SR-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, 20(a5)
-; RV64IZCMP-SR-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, 16(a5)
-; RV64IZCMP-SR-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
-; RV64IZCMP-SR-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
-; RV64IZCMP-SR-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
-; RV64IZCMP-SR-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-SR-NEXT:    lui a0, %hi(var_test_irq)
+; RV64IZCMP-SR-NEXT:    addi a0, a0, %lo(var_test_irq)
+; RV64IZCMP-SR-NEXT:    lw a1, 0(a0)
+; RV64IZCMP-SR-NEXT:    sd a1, 40(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    lw a1, 4(a0)
+; RV64IZCMP-SR-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    lw a1, 8(a0)
+; RV64IZCMP-SR-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    lw a1, 12(a0)
+; RV64IZCMP-SR-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    lw a1, 16(a0)
+; RV64IZCMP-SR-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64IZCMP-SR-NEXT:    lw t3, 20(a0)
+; RV64IZCMP-SR-NEXT:    lw t4, 24(a0)
+; RV64IZCMP-SR-NEXT:    lw t5, 28(a0)
+; RV64IZCMP-SR-NEXT:    lw t6, 32(a0)
+; RV64IZCMP-SR-NEXT:    lw s2, 36(a0)
+; RV64IZCMP-SR-NEXT:    lw s3, 40(a0)
+; RV64IZCMP-SR-NEXT:    lw s4, 44(a0)
+; RV64IZCMP-SR-NEXT:    lw s5, 48(a0)
+; RV64IZCMP-SR-NEXT:    lw s6, 52(a0)
+; RV64IZCMP-SR-NEXT:    lw s7, 56(a0)
+; RV64IZCMP-SR-NEXT:    lw s8, 60(a0)
+; RV64IZCMP-SR-NEXT:    lw s9, 64(a0)
+; RV64IZCMP-SR-NEXT:    lw s10, 68(a0)
+; RV64IZCMP-SR-NEXT:    lw s11, 72(a0)
+; RV64IZCMP-SR-NEXT:    lw ra, 76(a0)
+; RV64IZCMP-SR-NEXT:    lw t2, 80(a0)
+; RV64IZCMP-SR-NEXT:    lw s0, 84(a0)
+; RV64IZCMP-SR-NEXT:    lw s1, 88(a0)
+; RV64IZCMP-SR-NEXT:    lw t1, 92(a0)
+; RV64IZCMP-SR-NEXT:    lw t0, 96(a0)
+; RV64IZCMP-SR-NEXT:    lw a7, 100(a0)
+; RV64IZCMP-SR-NEXT:    lw a6, 104(a0)
+; RV64IZCMP-SR-NEXT:    lw a5, 108(a0)
+; RV64IZCMP-SR-NEXT:    lw a1, 124(a0)
+; RV64IZCMP-SR-NEXT:    lw a2, 120(a0)
+; RV64IZCMP-SR-NEXT:    lw a3, 116(a0)
+; RV64IZCMP-SR-NEXT:    lw a4, 112(a0)
+; RV64IZCMP-SR-NEXT:    sw a1, 124(a0)
+; RV64IZCMP-SR-NEXT:    sw a2, 120(a0)
+; RV64IZCMP-SR-NEXT:    sw a3, 116(a0)
+; RV64IZCMP-SR-NEXT:    sw a4, 112(a0)
+; RV64IZCMP-SR-NEXT:    sw a5, 108(a0)
+; RV64IZCMP-SR-NEXT:    sw a6, 104(a0)
+; RV64IZCMP-SR-NEXT:    sw a7, 100(a0)
+; RV64IZCMP-SR-NEXT:    sw t0, 96(a0)
+; RV64IZCMP-SR-NEXT:    sw t1, 92(a0)
+; RV64IZCMP-SR-NEXT:    sw s1, 88(a0)
+; RV64IZCMP-SR-NEXT:    sw s0, 84(a0)
+; RV64IZCMP-SR-NEXT:    sw t2, 80(a0)
+; RV64IZCMP-SR-NEXT:    sw ra, 76(a0)
+; RV64IZCMP-SR-NEXT:    sw s11, 72(a0)
+; RV64IZCMP-SR-NEXT:    sw s10, 68(a0)
+; RV64IZCMP-SR-NEXT:    sw s9, 64(a0)
+; RV64IZCMP-SR-NEXT:    sw s8, 60(a0)
+; RV64IZCMP-SR-NEXT:    sw s7, 56(a0)
+; RV64IZCMP-SR-NEXT:    sw s6, 52(a0)
+; RV64IZCMP-SR-NEXT:    sw s5, 48(a0)
+; RV64IZCMP-SR-NEXT:    sw s4, 44(a0)
+; RV64IZCMP-SR-NEXT:    sw s3, 40(a0)
+; RV64IZCMP-SR-NEXT:    sw s2, 36(a0)
+; RV64IZCMP-SR-NEXT:    sw t6, 32(a0)
+; RV64IZCMP-SR-NEXT:    sw t5, 28(a0)
+; RV64IZCMP-SR-NEXT:    sw t4, 24(a0)
+; RV64IZCMP-SR-NEXT:    sw t3, 20(a0)
+; RV64IZCMP-SR-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    sw a1, 16(a0)
+; RV64IZCMP-SR-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    sw a1, 12(a0)
+; RV64IZCMP-SR-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    sw a1, 8(a0)
+; RV64IZCMP-SR-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    sw a1, 4(a0)
+; RV64IZCMP-SR-NEXT:    ld a1, 40(sp) # 8-byte Folded Reload
+; RV64IZCMP-SR-NEXT:    sw a1, 0(a0)
 ; RV64IZCMP-SR-NEXT:    cm.popret {ra, s0-s11}, 160
 ;
 ; RV32I-LABEL: callee_no_irq:
@@ -2891,84 +2857,82 @@ define void @callee_no_irq() nounwind{
 ; RV32I-NEXT:    sw s9, 36(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lui a6, %hi(var_test_irq)
-; RV32I-NEXT:    lw a0, %lo(var_test_irq)(a6)
-; RV32I-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
-; RV32I-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
-; RV32I-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
-; RV32I-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    addi a5, a6, %lo(var_test_irq)
-; RV32I-NEXT:    lw a0, 16(a5)
-; RV32I-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, 20(a5)
-; RV32I-NEXT:    sw a0, 4(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw t0, 24(a5)
-; RV32I-NEXT:    lw t1, 28(a5)
-; RV32I-NEXT:    lw t2, 32(a5)
-; RV32I-NEXT:    lw t3, 36(a5)
-; RV32I-NEXT:    lw t4, 40(a5)
-; RV32I-NEXT:    lw t5, 44(a5)
-; RV32I-NEXT:    lw t6, 48(a5)
-; RV32I-NEXT:    lw s0, 52(a5)
-; RV32I-NEXT:    lw s1, 56(a5)
-; RV32I-NEXT:    lw s2, 60(a5)
-; RV32I-NEXT:    lw s3, 64(a5)
-; RV32I-NEXT:    lw s4, 68(a5)
-; RV32I-NEXT:    lw s5, 72(a5)
-; RV32I-NEXT:    lw s6, 76(a5)
-; RV32I-NEXT:    lw s7, 80(a5)
-; RV32I-NEXT:    lw s8, 84(a5)
-; RV32I-NEXT:    lw s9, 88(a5)
-; RV32I-NEXT:    lw s10, 92(a5)
-; RV32I-NEXT:    lw s11, 96(a5)
-; RV32I-NEXT:    lw ra, 100(a5)
-; RV32I-NEXT:    lw a7, 104(a5)
-; RV32I-NEXT:    lw a4, 108(a5)
-; RV32I-NEXT:    lw a0, 124(a5)
-; RV32I-NEXT:    lw a1, 120(a5)
-; RV32I-NEXT:    lw a2, 116(a5)
-; RV32I-NEXT:    lw a3, 112(a5)
-; RV32I-NEXT:    sw a0, 124(a5)
-; RV32I-NEXT:    sw a1, 120(a5)
-; RV32I-NEXT:    sw a2, 116(a5)
-; RV32I-NEXT:    sw a3, 112(a5)
-; RV32I-NEXT:    sw a4, 108(a5)
-; RV32I-NEXT:    sw a7, 104(a5)
-; RV32I-NEXT:    sw ra, 100(a5)
-; RV32I-NEXT:    sw s11, 96(a5)
-; RV32I-NEXT:    sw s10, 92(a5)
-; RV32I-NEXT:    sw s9, 88(a5)
-; RV32I-NEXT:    sw s8, 84(a5)
-; RV32I-NEXT:    sw s7, 80(a5)
-; RV32I-NEXT:    sw s6, 76(a5)
-; RV32I-NEXT:    sw s5, 72(a5)
-; RV32I-NEXT:    sw s4, 68(a5)
-; RV32I-NEXT:    sw s3, 64(a5)
-; RV32I-NEXT:    sw s2, 60(a5)
-; RV32I-NEXT:    sw s1, 56(a5)
-; RV32I-NEXT:    sw s0, 52(a5)
-; RV32I-NEXT:    sw t6, 48(a5)
-; RV32I-NEXT:    sw t5, 44(a5)
-; RV32I-NEXT:    sw t4, 40(a5)
-; RV32I-NEXT:    sw t3, 36(a5)
-; RV32I-NEXT:    sw t2, 32(a5)
-; RV32I-NEXT:    sw t1, 28(a5)
-; RV32I-NEXT:    sw t0, 24(a5)
-; RV32I-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 20(a5)
-; RV32I-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, 16(a5)
-; RV32I-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
-; RV32I-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
-; RV32I-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
-; RV32I-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV32I-NEXT:    lui a0, %hi(var_test_irq)
+; RV32I-NEXT:    addi a0, a0, %lo(var_test_irq)
+; RV32I-NEXT:    lw a1, 0(a0)
+; RV32I-NEXT:    sw a1, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a1, 4(a0)
+; RV32I-NEXT:    sw a1, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a1, 8(a0)
+; RV32I-NEXT:    sw a1, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a1, 12(a0)
+; RV32I-NEXT:    sw a1, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a1, 16(a0)
+; RV32I-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a6, 20(a0)
+; RV32I-NEXT:    lw a7, 24(a0)
+; RV32I-NEXT:    lw t0, 28(a0)
+; RV32I-NEXT:    lw t1, 32(a0)
+; RV32I-NEXT:    lw t2, 36(a0)
+; RV32I-NEXT:    lw t3, 40(a0)
+; RV32I-NEXT:    lw t4, 44(a0)
+; RV32I-NEXT:    lw t5, 48(a0)
+; RV32I-NEXT:    lw t6, 52(a0)
+; RV32I-NEXT:    lw s0, 56(a0)
+; RV32I-NEXT:    lw s1, 60(a0)
+; RV32I-NEXT:    lw s2, 64(a0)
+; RV32I-NEXT:    lw s3, 68(a0)
+; RV32I-NEXT:    lw s4, 72(a0)
+; RV32I-NEXT:    lw s5, 76(a0)
+; RV32I-NEXT:    lw s6, 80(a0)
+; RV32I-NEXT:    lw s7, 84(a0)
+; RV32I-NEXT:    lw s8, 88(a0)
+; RV32I-NEXT:    lw s9, 92(a0)
+; RV32I-NEXT:    lw s10, 96(a0)
+; RV32I-NEXT:    lw s11, 100(a0)
+; RV32I-NEXT:    lw ra, 104(a0)
+; RV32I-NEXT:    lw a5, 108(a0)
+; RV32I-NEXT:    lw a1, 124(a0)
+; RV32I-NEXT:    lw a2, 120(a0)
+; RV32I-NEXT:    lw a3, 116(a0)
+; RV32I-NEXT:    lw a4, 112(a0)
+; RV32I-NEXT:    sw a1, 124(a0)
+; RV32I-NEXT:    sw a2, 120(a0)
+; RV32I-NEXT:    sw a3, 116(a0)
+; RV32I-NEXT:    sw a4, 112(a0)
+; RV32I-NEXT:    sw a5, 108(a0)
+; RV32I-NEXT:    sw ra, 104(a0)
+; RV32I-NEXT:    sw s11, 100(a0)
+; RV32I-NEXT:    sw s10, 96(a0)
+; RV32I-NEXT:    sw s9, 92(a0)
+; RV32I-NEXT:    sw s8, 88(a0)
+; RV32I-NEXT:    sw s7, 84(a0)
+; RV32I-NEXT:    sw s6, 80(a0)
+; RV32I-NEXT:    sw s5, 76(a0)
+; RV32I-NEXT:    sw s4, 72(a0)
+; RV32I-NEXT:    sw s3, 68(a0)
+; RV32I-NEXT:    sw s2, 64(a0)
+; RV32I-NEXT:    sw s1, 60(a0)
+; RV32I-NEXT:    sw s0, 56(a0)
+; RV32I-NEXT:    sw t6, 52(a0)
+; RV32I-NEXT:    sw t5, 48(a0)
+; RV32I-NEXT:    sw t4, 44(a0)
+; RV32I-NEXT:    sw t3, 40(a0)
+; RV32I-NEXT:    sw t2, 36(a0)
+; RV32I-NEXT:    sw t1, 32(a0)
+; RV32I-NEXT:    sw t0, 28(a0)
+; RV32I-NEXT:    sw a7, 24(a0)
+; RV32I-NEXT:    sw a6, 20(a0)
+; RV32I-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 16(a0)
+; RV32I-NEXT:    lw a1, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 12(a0)
+; RV32I-NEXT:    lw a1, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 8(a0)
+; RV32I-NEXT:    lw a1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 4(a0)
+; RV32I-NEXT:    lw a1, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 0(a0)
 ; RV32I-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
@@ -2987,112 +2951,110 @@ define void @callee_no_irq() nounwind{
 ;
 ; RV64I-LABEL: callee_no_irq:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    addi sp, sp, -160
-; RV64I-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s0, 144(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s1, 136(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s2, 128(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s3, 120(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s4, 112(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s5, 104(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s6, 96(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s7, 88(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s8, 80(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s9, 72(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s10, 64(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    sd s11, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lui a6, %hi(var_test_irq)
-; RV64I-NEXT:    lw a0, %lo(var_test_irq)(a6)
-; RV64I-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
-; RV64I-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
-; RV64I-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
-; RV64I-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    addi a5, a6, %lo(var_test_irq)
-; RV64I-NEXT:    lw a0, 16(a5)
-; RV64I-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, 20(a5)
-; RV64I-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw t0, 24(a5)
-; RV64I-NEXT:    lw t1, 28(a5)
-; RV64I-NEXT:    lw t2, 32(a5)
-; RV64I-NEXT:    lw t3, 36(a5)
-; RV64I-NEXT:    lw t4, 40(a5)
-; RV64I-NEXT:    lw t5, 44(a5)
-; RV64I-NEXT:    lw t6, 48(a5)
-; RV64I-NEXT:    lw s0, 52(a5)
-; RV64I-NEXT:    lw s1, 56(a5)
-; RV64I-NEXT:    lw s2, 60(a5)
-; RV64I-NEXT:    lw s3, 64(a5)
-; RV64I-NEXT:    lw s4, 68(a5)
-; RV64I-NEXT:    lw s5, 72(a5)
-; RV64I-NEXT:    lw s6, 76(a5)
-; RV64I-NEXT:    lw s7, 80(a5)
-; RV64I-NEXT:    lw s8, 84(a5)
-; RV64I-NEXT:    lw s9, 88(a5)
-; RV64I-NEXT:    lw s10, 92(a5)
-; RV64I-NEXT:    lw s11, 96(a5)
-; RV64I-NEXT:    lw ra, 100(a5)
-; RV64I-NEXT:    lw a7, 104(a5)
-; RV64I-NEXT:    lw a4, 108(a5)
-; RV64I-NEXT:    lw a0, 124(a5)
-; RV64I-NEXT:    lw a1, 120(a5)
-; RV64I-NEXT:    lw a2, 116(a5)
-; RV64I-NEXT:    lw a3, 112(a5)
-; RV64I-NEXT:    sw a0, 124(a5)
-; RV64I-NEXT:    sw a1, 120(a5)
-; RV64I-NEXT:    sw a2, 116(a5)
-; RV64I-NEXT:    sw a3, 112(a5)
-; RV64I-NEXT:    sw a4, 108(a5)
-; RV64I-NEXT:    sw a7, 104(a5)
-; RV64I-NEXT:    sw ra, 100(a5)
-; RV64I-NEXT:    sw s11, 96(a5)
-; RV64I-NEXT:    sw s10, 92(a5)
-; RV64I-NEXT:    sw s9, 88(a5)
-; RV64I-NEXT:    sw s8, 84(a5)
-; RV64I-NEXT:    sw s7, 80(a5)
-; RV64I-NEXT:    sw s6, 76(a5)
-; RV64I-NEXT:    sw s5, 72(a5)
-; RV64I-NEXT:    sw s4, 68(a5)
-; RV64I-NEXT:    sw s3, 64(a5)
-; RV64I-NEXT:    sw s2, 60(a5)
-; RV64I-NEXT:    sw s1, 56(a5)
-; RV64I-NEXT:    sw s0, 52(a5)
-; RV64I-NEXT:    sw t6, 48(a5)
-; RV64I-NEXT:    sw t5, 44(a5)
-; RV64I-NEXT:    sw t4, 40(a5)
-; RV64I-NEXT:    sw t3, 36(a5)
-; RV64I-NEXT:    sw t2, 32(a5)
-; RV64I-NEXT:    sw t1, 28(a5)
-; RV64I-NEXT:    sw t0, 24(a5)
-; RV64I-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 20(a5)
-; RV64I-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, 16(a5)
-; RV64I-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
-; RV64I-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
-; RV64I-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
-; RV64I-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq)(a6)
-; RV64I-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s5, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s6, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s7, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s8, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s9, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s10, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s11, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    addi sp, sp, 160
+; RV64I-NEXT:    addi sp, sp, -144
+; RV64I-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s5, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s6, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s7, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s8, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s9, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s10, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s11, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a0, %hi(var_test_irq)
+; RV64I-NEXT:    addi a0, a0, %lo(var_test_irq)
+; RV64I-NEXT:    lw a1, 0(a0)
+; RV64I-NEXT:    sd a1, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a1, 4(a0)
+; RV64I-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a1, 8(a0)
+; RV64I-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a1, 12(a0)
+; RV64I-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a1, 16(a0)
+; RV64I-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a6, 20(a0)
+; RV64I-NEXT:    lw a7, 24(a0)
+; RV64I-NEXT:    lw t0, 28(a0)
+; RV64I-NEXT:    lw t1, 32(a0)
+; RV64I-NEXT:    lw t2, 36(a0)
+; RV64I-NEXT:    lw t3, 40(a0)
+; RV64I-NEXT:    lw t4, 44(a0)
+; RV64I-NEXT:    lw t5, 48(a0)
+; RV64I-NEXT:    lw t6, 52(a0)
+; RV64I-NEXT:    lw s0, 56(a0)
+; RV64I-NEXT:    lw s1, 60(a0)
+; RV64I-NEXT:    lw s2, 64(a0)
+; RV64I-NEXT:    lw s3, 68(a0)
+; RV64I-NEXT:    lw s4, 72(a0)
+; RV64I-NEXT:    lw s5, 76(a0)
+; RV64I-NEXT:    lw s6, 80(a0)
+; RV64I-NEXT:    lw s7, 84(a0)
+; RV64I-NEXT:    lw s8, 88(a0)
+; RV64I-NEXT:    lw s9, 92(a0)
+; RV64I-NEXT:    lw s10, 96(a0)
+; RV64I-NEXT:    lw s11, 100(a0)
+; RV64I-NEXT:    lw ra, 104(a0)
+; RV64I-NEXT:    lw a5, 108(a0)
+; RV64I-NEXT:    lw a1, 124(a0)
+; RV64I-NEXT:    lw a2, 120(a0)
+; RV64I-NEXT:    lw a3, 116(a0)
+; RV64I-NEXT:    lw a4, 112(a0)
+; RV64I-NEXT:    sw a1, 124(a0)
+; RV64I-NEXT:    sw a2, 120(a0)
+; RV64I-NEXT:    sw a3, 116(a0)
+; RV64I-NEXT:    sw a4, 112(a0)
+; RV64I-NEXT:    sw a5, 108(a0)
+; RV64I-NEXT:    sw ra, 104(a0)
+; RV64I-NEXT:    sw s11, 100(a0)
+; RV64I-NEXT:    sw s10, 96(a0)
+; RV64I-NEXT:    sw s9, 92(a0)
+; RV64I-NEXT:    sw s8, 88(a0)
+; RV64I-NEXT:    sw s7, 84(a0)
+; RV64I-NEXT:    sw s6, 80(a0)
+; RV64I-NEXT:    sw s5, 76(a0)
+; RV64I-NEXT:    sw s4, 72(a0)
+; RV64I-NEXT:    sw s3, 68(a0)
+; RV64I-NEXT:    sw s2, 64(a0)
+; RV64I-NEXT:    sw s1, 60(a0)
+; RV64I-NEXT:    sw s0, 56(a0)
+; RV64I-NEXT:    sw t6, 52(a0)
+; RV64I-NEXT:    sw t5, 48(a0)
+; RV64I-NEXT:    sw t4, 44(a0)
+; RV64I-NEXT:    sw t3, 40(a0)
+; RV64I-NEXT:    sw t2, 36(a0)
+; RV64I-NEXT:    sw t1, 32(a0)
+; RV64I-NEXT:    sw t0, 28(a0)
+; RV64I-NEXT:    sw a7, 24(a0)
+; RV64I-NEXT:    sw a6, 20(a0)
+; RV64I-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 16(a0)
+; RV64I-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 12(a0)
+; RV64I-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 8(a0)
+; RV64I-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 4(a0)
+; RV64I-NEXT:    ld a1, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 0(a0)
+; RV64I-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s7, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s8, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s9, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s10, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s11, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 144
 ; RV64I-NEXT:    ret
   %val = load [32 x i32], ptr @var_test_irq
   store volatile [32 x i32] %val, ptr @var_test_irq
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll
index de4c21f324688..4ad6b3e34c143 100644
--- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll
@@ -175,12 +175,12 @@ define dso_local i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind {
 define dso_local i64 @ld_sd_global(i64 %a) nounwind {
 ; RV64I-LABEL: ld_sd_global:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    lui a2, %hi(G)
-; RV64I-NEXT:    ld a1, %lo(G)(a2)
-; RV64I-NEXT:    addi a3, a2, %lo(G)
-; RV64I-NEXT:    sd a0, %lo(G)(a2)
-; RV64I-NEXT:    ld zero, 72(a3)
-; RV64I-NEXT:    sd a0, 72(a3)
+; RV64I-NEXT:    lui a1, %hi(G)
+; RV64I-NEXT:    addi a2, a1, %lo(G)
+; RV64I-NEXT:    ld a1, 0(a2)
+; RV64I-NEXT:    sd a0, 0(a2)
+; RV64I-NEXT:    ld zero, 72(a2)
+; RV64I-NEXT:    sd a0, 72(a2)
 ; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:    ret
   %1 = load volatile i64, ptr @G
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll
index 391117c72ece7..d04dbe9bf27ed 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll
@@ -13,13 +13,13 @@ define void @baz() nounwind {
 ; CHECK-LABEL: baz:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lui a0, %hi(foo)
-; CHECK-NEXT:    addi a1, a0, %lo(foo)
-; CHECK-NEXT:    lw a1, 4(a1)
-; CHECK-NEXT:    lw a0, %lo(foo)(a0)
+; CHECK-NEXT:    addi a0, a0, %lo(foo)
+; CHECK-NEXT:    lw a1, 4(a0)
+; CHECK-NEXT:    lw a0, 0(a0)
 ; CHECK-NEXT:    lui a2, %hi(bar)
-; CHECK-NEXT:    sw a1, %lo(bar)(a2)
-; CHECK-NEXT:    addi a1, a2, %lo(bar)
-; CHECK-NEXT:    sw a0, 4(a1)
+; CHECK-NEXT:    addi a2, a2, %lo(bar)
+; CHECK-NEXT:    sw a1, 0(a2)
+; CHECK-NEXT:    sw a0, 4(a2)
 ; CHECK-NEXT:    ret
 entry:
   %0 = load i32, ptr getelementptr inbounds ([2 x i32], ptr @foo, i64 0, i64 1), align 4
diff --git a/llvm/test/CodeGen/RISCV/saverestore.ll b/llvm/test/CodeGen/RISCV/saverestore.ll
index f3dc2d0ef5078..59f59014810b8 100644
--- a/llvm/test/CodeGen/RISCV/saverestore.ll
+++ b/llvm/test/CodeGen/RISCV/saverestore.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=riscv32 < %s | FileCheck %s -check-prefix=RV32I
 ; RUN: llc -mtriple=riscv64 < %s | FileCheck %s -check-prefix=RV64I
 ; RUN: llc -mtriple=riscv32 -mattr=+save-restore < %s | FileCheck %s -check-prefix=RV32I-SR
@@ -13,28 +14,280 @@
 
 define void @callee_saved0() nounwind {
 ; RV32I-LABEL: callee_saved0:
-; RV32I-NOT:     call t0, __riscv_save
-; RV32I-NOT:     tail __riscv_restore
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a0, %hi(var0)
+; RV32I-NEXT:    addi a0, a0, %lo(var0)
+; RV32I-NEXT:    lw a1, 0(a0)
+; RV32I-NEXT:    lw a2, 4(a0)
+; RV32I-NEXT:    lw a3, 8(a0)
+; RV32I-NEXT:    lw a4, 12(a0)
+; RV32I-NEXT:    lw a5, 16(a0)
+; RV32I-NEXT:    lw a6, 20(a0)
+; RV32I-NEXT:    lw a7, 24(a0)
+; RV32I-NEXT:    lw t0, 28(a0)
+; RV32I-NEXT:    lw t1, 32(a0)
+; RV32I-NEXT:    lw t2, 36(a0)
+; RV32I-NEXT:    lw t3, 40(a0)
+; RV32I-NEXT:    lw t4, 44(a0)
+; RV32I-NEXT:    lw t5, 48(a0)
+; RV32I-NEXT:    lw t6, 52(a0)
+; RV32I-NEXT:    lw s0, 68(a0)
+; RV32I-NEXT:    lw s1, 64(a0)
+; RV32I-NEXT:    lw s2, 60(a0)
+; RV32I-NEXT:    lw s3, 56(a0)
+; RV32I-NEXT:    sw s0, 68(a0)
+; RV32I-NEXT:    sw s1, 64(a0)
+; RV32I-NEXT:    sw s2, 60(a0)
+; RV32I-NEXT:    sw s3, 56(a0)
+; RV32I-NEXT:    sw t6, 52(a0)
+; RV32I-NEXT:    sw t5, 48(a0)
+; RV32I-NEXT:    sw t4, 44(a0)
+; RV32I-NEXT:    sw t3, 40(a0)
+; RV32I-NEXT:    sw t2, 36(a0)
+; RV32I-NEXT:    sw t1, 32(a0)
+; RV32I-NEXT:    sw t0, 28(a0)
+; RV32I-NEXT:    sw a7, 24(a0)
+; RV32I-NEXT:    sw a6, 20(a0)
+; RV32I-NEXT:    sw a5, 16(a0)
+; RV32I-NEXT:    sw a4, 12(a0)
+; RV32I-NEXT:    sw a3, 8(a0)
+; RV32I-NEXT:    sw a2, 4(a0)
+; RV32I-NEXT:    sw a1, 0(a0)
+; RV32I-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: callee_saved0:
-; RV64I-NOT:     call t0, __riscv_save
-; RV64I-NOT:     tail __riscv_restore
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd s0, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a0, %hi(var0)
+; RV64I-NEXT:    addi a0, a0, %lo(var0)
+; RV64I-NEXT:    lw a1, 0(a0)
+; RV64I-NEXT:    lw a2, 4(a0)
+; RV64I-NEXT:    lw a3, 8(a0)
+; RV64I-NEXT:    lw a4, 12(a0)
+; RV64I-NEXT:    lw a5, 16(a0)
+; RV64I-NEXT:    lw a6, 20(a0)
+; RV64I-NEXT:    lw a7, 24(a0)
+; RV64I-NEXT:    lw t0, 28(a0)
+; RV64I-NEXT:    lw t1, 32(a0)
+; RV64I-NEXT:    lw t2, 36(a0)
+; RV64I-NEXT:    lw t3, 40(a0)
+; RV64I-NEXT:    lw t4, 44(a0)
+; RV64I-NEXT:    lw t5, 48(a0)
+; RV64I-NEXT:    lw t6, 52(a0)
+; RV64I-NEXT:    lw s0, 68(a0)
+; RV64I-NEXT:    lw s1, 64(a0)
+; RV64I-NEXT:    lw s2, 60(a0)
+; RV64I-NEXT:    lw s3, 56(a0)
+; RV64I-NEXT:    sw s0, 68(a0)
+; RV64I-NEXT:    sw s1, 64(a0)
+; RV64I-NEXT:    sw s2, 60(a0)
+; RV64I-NEXT:    sw s3, 56(a0)
+; RV64I-NEXT:    sw t6, 52(a0)
+; RV64I-NEXT:    sw t5, 48(a0)
+; RV64I-NEXT:    sw t4, 44(a0)
+; RV64I-NEXT:    sw t3, 40(a0)
+; RV64I-NEXT:    sw t2, 36(a0)
+; RV64I-NEXT:    sw t1, 32(a0)
+; RV64I-NEXT:    sw t0, 28(a0)
+; RV64I-NEXT:    sw a7, 24(a0)
+; RV64I-NEXT:    sw a6, 20(a0)
+; RV64I-NEXT:    sw a5, 16(a0)
+; RV64I-NEXT:    sw a4, 12(a0)
+; RV64I-NEXT:    sw a3, 8(a0)
+; RV64I-NEXT:    sw a2, 4(a0)
+; RV64I-NEXT:    sw a1, 0(a0)
+; RV64I-NEXT:    ld s0, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
 ;
 ; RV32I-SR-LABEL: callee_saved0:
-; RV32I-SR:         call t0, __riscv_save_5
-; RV32I-SR:         tail __riscv_restore_5
+; RV32I-SR:       # %bb.0:
+; RV32I-SR-NEXT:    call t0, __riscv_save_4
+; RV32I-SR-NEXT:    lui a0, %hi(var0)
+; RV32I-SR-NEXT:    addi a0, a0, %lo(var0)
+; RV32I-SR-NEXT:    lw a1, 0(a0)
+; RV32I-SR-NEXT:    lw a2, 4(a0)
+; RV32I-SR-NEXT:    lw a3, 8(a0)
+; RV32I-SR-NEXT:    lw a4, 12(a0)
+; RV32I-SR-NEXT:    lw a5, 16(a0)
+; RV32I-SR-NEXT:    lw a6, 20(a0)
+; RV32I-SR-NEXT:    lw a7, 24(a0)
+; RV32I-SR-NEXT:    lw t0, 28(a0)
+; RV32I-SR-NEXT:    lw t1, 32(a0)
+; RV32I-SR-NEXT:    lw t2, 36(a0)
+; RV32I-SR-NEXT:    lw t3, 40(a0)
+; RV32I-SR-NEXT:    lw t4, 44(a0)
+; RV32I-SR-NEXT:    lw t5, 48(a0)
+; RV32I-SR-NEXT:    lw t6, 52(a0)
+; RV32I-SR-NEXT:    lw s0, 68(a0)
+; RV32I-SR-NEXT:    lw s1, 64(a0)
+; RV32I-SR-NEXT:    lw s2, 60(a0)
+; RV32I-SR-NEXT:    lw s3, 56(a0)
+; RV32I-SR-NEXT:    sw s0, 68(a0)
+; RV32I-SR-NEXT:    sw s1, 64(a0)
+; RV32I-SR-NEXT:    sw s2, 60(a0)
+; RV32I-SR-NEXT:    sw s3, 56(a0)
+; RV32I-SR-NEXT:    sw t6, 52(a0)
+; RV32I-SR-NEXT:    sw t5, 48(a0)
+; RV32I-SR-NEXT:    sw t4, 44(a0)
+; RV32I-SR-NEXT:    sw t3, 40(a0)
+; RV32I-SR-NEXT:    sw t2, 36(a0)
+; RV32I-SR-NEXT:    sw t1, 32(a0)
+; RV32I-SR-NEXT:    sw t0, 28(a0)
+; RV32I-SR-NEXT:    sw a7, 24(a0)
+; RV32I-SR-NEXT:    sw a6, 20(a0)
+; RV32I-SR-NEXT:    sw a5, 16(a0)
+; RV32I-SR-NEXT:    sw a4, 12(a0)
+; RV32I-SR-NEXT:    sw a3, 8(a0)
+; RV32I-SR-NEXT:    sw a2, 4(a0)
+; RV32I-SR-NEXT:    sw a1, 0(a0)
+; RV32I-SR-NEXT:    tail __riscv_restore_4
 ;
 ; RV64I-SR-LABEL: callee_saved0:
-; RV64I-SR:         call t0, __riscv_save_5
-; RV64I-SR:         tail __riscv_restore_5
+; RV64I-SR:       # %bb.0:
+; RV64I-SR-NEXT:    call t0, __riscv_save_4
+; RV64I-SR-NEXT:    lui a0, %hi(var0)
+; RV64I-SR-NEXT:    addi a0, a0, %lo(var0)
+; RV64I-SR-NEXT:    lw a1, 0(a0)
+; RV64I-SR-NEXT:    lw a2, 4(a0)
+; RV64I-SR-NEXT:    lw a3, 8(a0)
+; RV64I-SR-NEXT:    lw a4, 12(a0)
+; RV64I-SR-NEXT:    lw a5, 16(a0)
+; RV64I-SR-NEXT:    lw a6, 20(a0)
+; RV64I-SR-NEXT:    lw a7, 24(a0)
+; RV64I-SR-NEXT:    lw t0, 28(a0)
+; RV64I-SR-NEXT:    lw t1, 32(a0)
+; RV64I-SR-NEXT:    lw t2, 36(a0)
+; RV64I-SR-NEXT:    lw t3, 40(a0)
+; RV64I-SR-NEXT:    lw t4, 44(a0)
+; RV64I-SR-NEXT:    lw t5, 48(a0)
+; RV64I-SR-NEXT:    lw t6, 52(a0)
+; RV64I-SR-NEXT:    lw s0, 68(a0)
+; RV64I-SR-NEXT:    lw s1, 64(a0)
+; RV64I-SR-NEXT:    lw s2, 60(a0)
+; RV64I-SR-NEXT:    lw s3, 56(a0)
+; RV64I-SR-NEXT:    sw s0, 68(a0)
+; RV64I-SR-NEXT:    sw s1, 64(a0)
+; RV64I-SR-NEXT:    sw s2, 60(a0)
+; RV64I-SR-NEXT:    sw s3, 56(a0)
+; RV64I-SR-NEXT:    sw t6, 52(a0)
+; RV64I-SR-NEXT:    sw t5, 48(a0)
+; RV64I-SR-NEXT:    sw t4, 44(a0)
+; RV64I-SR-NEXT:    sw t3, 40(a0)
+; RV64I-SR-NEXT:    sw t2, 36(a0)
+; RV64I-SR-NEXT:    sw t1, 32(a0)
+; RV64I-SR-NEXT:    sw t0, 28(a0)
+; RV64I-SR-NEXT:    sw a7, 24(a0)
+; RV64I-SR-NEXT:    sw a6, 20(a0)
+; RV64I-SR-NEXT:    sw a5, 16(a0)
+; RV64I-SR-NEXT:    sw a4, 12(a0)
+; RV64I-SR-NEXT:    sw a3, 8(a0)
+; RV64I-SR-NEXT:    sw a2, 4(a0)
+; RV64I-SR-NEXT:    sw a1, 0(a0)
+; RV64I-SR-NEXT:    tail __riscv_restore_4
 ;
 ; RV32I-FP-SR-LABEL: callee_saved0:
-; RV32I-FP-SR:         call t0, __riscv_save_5
-; RV32I-FP-SR:         tail __riscv_restore_5
+; RV32I-FP-SR:       # %bb.0:
+; RV32I-FP-SR-NEXT:    call t0, __riscv_save_4
+; RV32I-FP-SR-NEXT:    lui a0, %hi(var0)
+; RV32I-FP-SR-NEXT:    addi a0, a0, %lo(var0)
+; RV32I-FP-SR-NEXT:    lw a1, 0(a0)
+; RV32I-FP-SR-NEXT:    lw a2, 4(a0)
+; RV32I-FP-SR-NEXT:    lw a3, 8(a0)
+; RV32I-FP-SR-NEXT:    lw a4, 12(a0)
+; RV32I-FP-SR-NEXT:    lw a5, 16(a0)
+; RV32I-FP-SR-NEXT:    lw a6, 20(a0)
+; RV32I-FP-SR-NEXT:    lw a7, 24(a0)
+; RV32I-FP-SR-NEXT:    lw t0, 28(a0)
+; RV32I-FP-SR-NEXT:    lw t1, 32(a0)
+; RV32I-FP-SR-NEXT:    lw t2, 36(a0)
+; RV32I-FP-SR-NEXT:    lw t3, 40(a0)
+; RV32I-FP-SR-NEXT:    lw t4, 44(a0)
+; RV32I-FP-SR-NEXT:    lw t5, 48(a0)
+; RV32I-FP-SR-NEXT:    lw t6, 52(a0)
+; RV32I-FP-SR-NEXT:    lw s0, 68(a0)
+; RV32I-FP-SR-NEXT:    lw s1, 64(a0)
+; RV32I-FP-SR-NEXT:    lw s2, 60(a0)
+; RV32I-FP-SR-NEXT:    lw s3, 56(a0)
+; RV32I-FP-SR-NEXT:    sw s0, 68(a0)
+; RV32I-FP-SR-NEXT:    sw s1, 64(a0)
+; RV32I-FP-SR-NEXT:    sw s2, 60(a0)
+; RV32I-FP-SR-NEXT:    sw s3, 56(a0)
+; RV32I-FP-SR-NEXT:    sw t6, 52(a0)
+; RV32I-FP-SR-NEXT:    sw t5, 48(a0)
+; RV32I-FP-SR-NEXT:    sw t4, 44(a0)
+; RV32I-FP-SR-NEXT:    sw t3, 40(a0)
+; RV32I-FP-SR-NEXT:    sw t2, 36(a0)
+; RV32I-FP-SR-NEXT:    sw t1, 32(a0)
+; RV32I-FP-SR-NEXT:    sw t0, 28(a0)
+; RV32I-FP-SR-NEXT:    sw a7, 24(a0)
+; RV32I-FP-SR-NEXT:    sw a6, 20(a0)
+; RV32I-FP-SR-NEXT:    sw a5, 16(a0)
+; RV32I-FP-SR-NEXT:    sw a4, 12(a0)
+; RV32I-FP-SR-NEXT:    sw a3, 8(a0)
+; RV32I-FP-SR-NEXT:    sw a2, 4(a0)
+; RV32I-FP-SR-NEXT:    sw a1, 0(a0)
+; RV32I-FP-SR-NEXT:    tail __riscv_restore_4
 ;
 ; RV64I-FP-SR-LABEL: callee_saved0:
-; RV64I-FP-SR:         call t0, __riscv_save_5
-; RV64I-FP-SR:         tail __riscv_restore_5
+; RV64I-FP-SR:       # %bb.0:
+; RV64I-FP-SR-NEXT:    call t0, __riscv_save_4
+; RV64I-FP-SR-NEXT:    lui a0, %hi(var0)
+; RV64I-FP-SR-NEXT:    addi a0, a0, %lo(var0)
+; RV64I-FP-SR-NEXT:    lw a1, 0(a0)
+; RV64I-FP-SR-NEXT:    lw a2, 4(a0)
+; RV64I-FP-SR-NEXT:    lw a3, 8(a0)
+; RV64I-FP-SR-NEXT:    lw a4, 12(a0)
+; RV64I-FP-SR-NEXT:    lw a5, 16(a0)
+; RV64I-FP-SR-NEXT:    lw a6, 20(a0)
+; RV64I-FP-SR-NEXT:    lw a7, 24(a0)
+; RV64I-FP-SR-NEXT:    lw t0, 28(a0)
+; RV64I-FP-SR-NEXT:    lw t1, 32(a0)
+; RV64I-FP-SR-NEXT:    lw t2, 36(a0)
+; RV64I-FP-SR-NEXT:    lw t3, 40(a0)
+; RV64I-FP-SR-NEXT:    lw t4, 44(a0)
+; RV64I-FP-SR-NEXT:    lw t5, 48(a0)
+; RV64I-FP-SR-NEXT:    lw t6, 52(a0)
+; RV64I-FP-SR-NEXT:    lw s0, 68(a0)
+; RV64I-FP-SR-NEXT:    lw s1, 64(a0)
+; RV64I-FP-SR-NEXT:    lw s2, 60(a0)
+; RV64I-FP-SR-NEXT:    lw s3, 56(a0)
+; RV64I-FP-SR-NEXT:    sw s0, 68(a0)
+; RV64I-FP-SR-NEXT:    sw s1, 64(a0)
+; RV64I-FP-SR-NEXT:    sw s2, 60(a0)
+; RV64I-FP-SR-NEXT:    sw s3, 56(a0)
+; RV64I-FP-SR-NEXT:    sw t6, 52(a0)
+; RV64I-FP-SR-NEXT:    sw t5, 48(a0)
+; RV64I-FP-SR-NEXT:    sw t4, 44(a0)
+; RV64I-FP-SR-NEXT:    sw t3, 40(a0)
+; RV64I-FP-SR-NEXT:    sw t2, 36(a0)
+; RV64I-FP-SR-NEXT:    sw t1, 32(a0)
+; RV64I-FP-SR-NEXT:    sw t0, 28(a0)
+; RV64I-FP-SR-NEXT:    sw a7, 24(a0)
+; RV64I-FP-SR-NEXT:    sw a6, 20(a0)
+; RV64I-FP-SR-NEXT:    sw a5, 16(a0)
+; RV64I-FP-SR-NEXT:    sw a4, 12(a0)
+; RV64I-FP-SR-NEXT:    sw a3, 8(a0)
+; RV64I-FP-SR-NEXT:    sw a2, 4(a0)
+; RV64I-FP-SR-NEXT:    sw a1, 0(a0)
+; RV64I-FP-SR-NEXT:    tail __riscv_restore_4
   %val = load [18 x i32], ptr @var0
   store volatile [18 x i32] %val, ptr @var0
   ret void
@@ -42,28 +295,376 @@ define void @callee_saved0() nounwind {
 
 define void @callee_saved1() nounwind {
 ; RV32I-LABEL: callee_saved1:
-; RV32I-NOT:     call t0, __riscv_save
-; RV32I-NOT:     tail __riscv_restore
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    sw s0, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s6, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s7, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s8, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s9, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a0, %hi(var1)
+; RV32I-NEXT:    addi a0, a0, %lo(var1)
+; RV32I-NEXT:    lw a1, 0(a0)
+; RV32I-NEXT:    lw a2, 4(a0)
+; RV32I-NEXT:    lw a3, 8(a0)
+; RV32I-NEXT:    lw a4, 12(a0)
+; RV32I-NEXT:    lw a5, 16(a0)
+; RV32I-NEXT:    lw a6, 20(a0)
+; RV32I-NEXT:    lw a7, 24(a0)
+; RV32I-NEXT:    lw t0, 28(a0)
+; RV32I-NEXT:    lw t1, 32(a0)
+; RV32I-NEXT:    lw t2, 36(a0)
+; RV32I-NEXT:    lw t3, 40(a0)
+; RV32I-NEXT:    lw t4, 44(a0)
+; RV32I-NEXT:    lw t5, 48(a0)
+; RV32I-NEXT:    lw t6, 52(a0)
+; RV32I-NEXT:    lw s0, 56(a0)
+; RV32I-NEXT:    lw s1, 60(a0)
+; RV32I-NEXT:    lw s2, 64(a0)
+; RV32I-NEXT:    lw s3, 68(a0)
+; RV32I-NEXT:    lw s4, 72(a0)
+; RV32I-NEXT:    lw s5, 76(a0)
+; RV32I-NEXT:    lw s6, 92(a0)
+; RV32I-NEXT:    lw s7, 88(a0)
+; RV32I-NEXT:    lw s8, 84(a0)
+; RV32I-NEXT:    lw s9, 80(a0)
+; RV32I-NEXT:    sw s6, 92(a0)
+; RV32I-NEXT:    sw s7, 88(a0)
+; RV32I-NEXT:    sw s8, 84(a0)
+; RV32I-NEXT:    sw s9, 80(a0)
+; RV32I-NEXT:    sw s5, 76(a0)
+; RV32I-NEXT:    sw s4, 72(a0)
+; RV32I-NEXT:    sw s3, 68(a0)
+; RV32I-NEXT:    sw s2, 64(a0)
+; RV32I-NEXT:    sw s1, 60(a0)
+; RV32I-NEXT:    sw s0, 56(a0)
+; RV32I-NEXT:    sw t6, 52(a0)
+; RV32I-NEXT:    sw t5, 48(a0)
+; RV32I-NEXT:    sw t4, 44(a0)
+; RV32I-NEXT:    sw t3, 40(a0)
+; RV32I-NEXT:    sw t2, 36(a0)
+; RV32I-NEXT:    sw t1, 32(a0)
+; RV32I-NEXT:    sw t0, 28(a0)
+; RV32I-NEXT:    sw a7, 24(a0)
+; RV32I-NEXT:    sw a6, 20(a0)
+; RV32I-NEXT:    sw a5, 16(a0)
+; RV32I-NEXT:    sw a4, 12(a0)
+; RV32I-NEXT:    sw a3, 8(a0)
+; RV32I-NEXT:    sw a2, 4(a0)
+; RV32I-NEXT:    sw a1, 0(a0)
+; RV32I-NEXT:    lw s0, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s9, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: callee_saved1:
-; RV64I-NOT:     call t0, __riscv_save
-; RV64I-NOT:     tail __riscv_restore
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -80
+; RV64I-NEXT:    sd s0, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s5, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s6, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s7, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s8, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s9, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a0, %hi(var1)
+; RV64I-NEXT:    addi a0, a0, %lo(var1)
+; RV64I-NEXT:    lw a1, 0(a0)
+; RV64I-NEXT:    lw a2, 4(a0)
+; RV64I-NEXT:    lw a3, 8(a0)
+; RV64I-NEXT:    lw a4, 12(a0)
+; RV64I-NEXT:    lw a5, 16(a0)
+; RV64I-NEXT:    lw a6, 20(a0)
+; RV64I-NEXT:    lw a7, 24(a0)
+; RV64I-NEXT:    lw t0, 28(a0)
+; RV64I-NEXT:    lw t1, 32(a0)
+; RV64I-NEXT:    lw t2, 36(a0)
+; RV64I-NEXT:    lw t3, 40(a0)
+; RV64I-NEXT:    lw t4, 44(a0)
+; RV64I-NEXT:    lw t5, 48(a0)
+; RV64I-NEXT:    lw t6, 52(a0)
+; RV64I-NEXT:    lw s0, 56(a0)
+; RV64I-NEXT:    lw s1, 60(a0)
+; RV64I-NEXT:    lw s2, 64(a0)
+; RV64I-NEXT:    lw s3, 68(a0)
+; RV64I-NEXT:    lw s4, 72(a0)
+; RV64I-NEXT:    lw s5, 76(a0)
+; RV64I-NEXT:    lw s6, 92(a0)
+; RV64I-NEXT:    lw s7, 88(a0)
+; RV64I-NEXT:    lw s8, 84(a0)
+; RV64I-NEXT:    lw s9, 80(a0)
+; RV64I-NEXT:    sw s6, 92(a0)
+; RV64I-NEXT:    sw s7, 88(a0)
+; RV64I-NEXT:    sw s8, 84(a0)
+; RV64I-NEXT:    sw s9, 80(a0)
+; RV64I-NEXT:    sw s5, 76(a0)
+; RV64I-NEXT:    sw s4, 72(a0)
+; RV64I-NEXT:    sw s3, 68(a0)
+; RV64I-NEXT:    sw s2, 64(a0)
+; RV64I-NEXT:    sw s1, 60(a0)
+; RV64I-NEXT:    sw s0, 56(a0)
+; RV64I-NEXT:    sw t6, 52(a0)
+; RV64I-NEXT:    sw t5, 48(a0)
+; RV64I-NEXT:    sw t4, 44(a0)
+; RV64I-NEXT:    sw t3, 40(a0)
+; RV64I-NEXT:    sw t2, 36(a0)
+; RV64I-NEXT:    sw t1, 32(a0)
+; RV64I-NEXT:    sw t0, 28(a0)
+; RV64I-NEXT:    sw a7, 24(a0)
+; RV64I-NEXT:    sw a6, 20(a0)
+; RV64I-NEXT:    sw a5, 16(a0)
+; RV64I-NEXT:    sw a4, 12(a0)
+; RV64I-NEXT:    sw a3, 8(a0)
+; RV64I-NEXT:    sw a2, 4(a0)
+; RV64I-NEXT:    sw a1, 0(a0)
+; RV64I-NEXT:    ld s0, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s7, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s8, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s9, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 80
+; RV64I-NEXT:    ret
 ;
 ; RV32I-SR-LABEL: callee_saved1:
-; RV32I-SR:         call t0, __riscv_save_11
-; RV32I-SR:         tail __riscv_restore_11
+; RV32I-SR:       # %bb.0:
+; RV32I-SR-NEXT:    call t0, __riscv_save_10
+; RV32I-SR-NEXT:    lui a0, %hi(var1)
+; RV32I-SR-NEXT:    addi a0, a0, %lo(var1)
+; RV32I-SR-NEXT:    lw a1, 0(a0)
+; RV32I-SR-NEXT:    lw a2, 4(a0)
+; RV32I-SR-NEXT:    lw a3, 8(a0)
+; RV32I-SR-NEXT:    lw a4, 12(a0)
+; RV32I-SR-NEXT:    lw a5, 16(a0)
+; RV32I-SR-NEXT:    lw a6, 20(a0)
+; RV32I-SR-NEXT:    lw a7, 24(a0)
+; RV32I-SR-NEXT:    lw t0, 28(a0)
+; RV32I-SR-NEXT:    lw t1, 32(a0)
+; RV32I-SR-NEXT:    lw t2, 36(a0)
+; RV32I-SR-NEXT:    lw t3, 40(a0)
+; RV32I-SR-NEXT:    lw t4, 44(a0)
+; RV32I-SR-NEXT:    lw t5, 48(a0)
+; RV32I-SR-NEXT:    lw t6, 52(a0)
+; RV32I-SR-NEXT:    lw s0, 56(a0)
+; RV32I-SR-NEXT:    lw s1, 60(a0)
+; RV32I-SR-NEXT:    lw s2, 64(a0)
+; RV32I-SR-NEXT:    lw s3, 68(a0)
+; RV32I-SR-NEXT:    lw s4, 72(a0)
+; RV32I-SR-NEXT:    lw s5, 76(a0)
+; RV32I-SR-NEXT:    lw s6, 92(a0)
+; RV32I-SR-NEXT:    lw s7, 88(a0)
+; RV32I-SR-NEXT:    lw s8, 84(a0)
+; RV32I-SR-NEXT:    lw s9, 80(a0)
+; RV32I-SR-NEXT:    sw s6, 92(a0)
+; RV32I-SR-NEXT:    sw s7, 88(a0)
+; RV32I-SR-NEXT:    sw s8, 84(a0)
+; RV32I-SR-NEXT:    sw s9, 80(a0)
+; RV32I-SR-NEXT:    sw s5, 76(a0)
+; RV32I-SR-NEXT:    sw s4, 72(a0)
+; RV32I-SR-NEXT:    sw s3, 68(a0)
+; RV32I-SR-NEXT:    sw s2, 64(a0)
+; RV32I-SR-NEXT:    sw s1, 60(a0)
+; RV32I-SR-NEXT:    sw s0, 56(a0)
+; RV32I-SR-NEXT:    sw t6, 52(a0)
+; RV32I-SR-NEXT:    sw t5, 48(a0)
+; RV32I-SR-NEXT:    sw t4, 44(a0)
+; RV32I-SR-NEXT:    sw t3, 40(a0)
+; RV32I-SR-NEXT:    sw t2, 36(a0)
+; RV32I-SR-NEXT:    sw t1, 32(a0)
+; RV32I-SR-NEXT:    sw t0, 28(a0)
+; RV32I-SR-NEXT:    sw a7, 24(a0)
+; RV32I-SR-NEXT:    sw a6, 20(a0)
+; RV32I-SR-NEXT:    sw a5, 16(a0)
+; RV32I-SR-NEXT:    sw a4, 12(a0)
+; RV32I-SR-NEXT:    sw a3, 8(a0)
+; RV32I-SR-NEXT:    sw a2, 4(a0)
+; RV32I-SR-NEXT:    sw a1, 0(a0)
+; RV32I-SR-NEXT:    tail __riscv_restore_10
 ;
 ; RV64I-SR-LABEL: callee_saved1:
-; RV64I-SR:         call t0, __riscv_save_11
-; RV64I-SR:         tail __riscv_restore_11
+; RV64I-SR:       # %bb.0:
+; RV64I-SR-NEXT:    call t0, __riscv_save_10
+; RV64I-SR-NEXT:    lui a0, %hi(var1)
+; RV64I-SR-NEXT:    addi a0, a0, %lo(var1)
+; RV64I-SR-NEXT:    lw a1, 0(a0)
+; RV64I-SR-NEXT:    lw a2, 4(a0)
+; RV64I-SR-NEXT:    lw a3, 8(a0)
+; RV64I-SR-NEXT:    lw a4, 12(a0)
+; RV64I-SR-NEXT:    lw a5, 16(a0)
+; RV64I-SR-NEXT:    lw a6, 20(a0)
+; RV64I-SR-NEXT:    lw a7, 24(a0)
+; RV64I-SR-NEXT:    lw t0, 28(a0)
+; RV64I-SR-NEXT:    lw t1, 32(a0)
+; RV64I-SR-NEXT:    lw t2, 36(a0)
+; RV64I-SR-NEXT:    lw t3, 40(a0)
+; RV64I-SR-NEXT:    lw t4, 44(a0)
+; RV64I-SR-NEXT:    lw t5, 48(a0)
+; RV64I-SR-NEXT:    lw t6, 52(a0)
+; RV64I-SR-NEXT:    lw s0, 56(a0)
+; RV64I-SR-NEXT:    lw s1, 60(a0)
+; RV64I-SR-NEXT:    lw s2, 64(a0)
+; RV64I-SR-NEXT:    lw s3, 68(a0)
+; RV64I-SR-NEXT:    lw s4, 72(a0)
+; RV64I-SR-NEXT:    lw s5, 76(a0)
+; RV64I-SR-NEXT:    lw s6, 92(a0)
+; RV64I-SR-NEXT:    lw s7, 88(a0)
+; RV64I-SR-NEXT:    lw s8, 84(a0)
+; RV64I-SR-NEXT:    lw s9, 80(a0)
+; RV64I-SR-NEXT:    sw s6, 92(a0)
+; RV64I-SR-NEXT:    sw s7, 88(a0)
+; RV64I-SR-NEXT:    sw s8, 84(a0)
+; RV64I-SR-NEXT:    sw s9, 80(a0)
+; RV64I-SR-NEXT:    sw s5, 76(a0)
+; RV64I-SR-NEXT:    sw s4, 72(a0)
+; RV64I-SR-NEXT:    sw s3, 68(a0)
+; RV64I-SR-NEXT:    sw s2, 64(a0)
+; RV64I-SR-NEXT:    sw s1, 60(a0)
+; RV64I-SR-NEXT:    sw s0, 56(a0)
+; RV64I-SR-NEXT:    sw t6, 52(a0)
+; RV64I-SR-NEXT:    sw t5, 48(a0)
+; RV64I-SR-NEXT:    sw t4, 44(a0)
+; RV64I-SR-NEXT:    sw t3, 40(a0)
+; RV64I-SR-NEXT:    sw t2, 36(a0)
+; RV64I-SR-NEXT:    sw t1, 32(a0)
+; RV64I-SR-NEXT:    sw t0, 28(a0)
+; RV64I-SR-NEXT:    sw a7, 24(a0)
+; RV64I-SR-NEXT:    sw a6, 20(a0)
+; RV64I-SR-NEXT:    sw a5, 16(a0)
+; RV64I-SR-NEXT:    sw a4, 12(a0)
+; RV64I-SR-NEXT:    sw a3, 8(a0)
+; RV64I-SR-NEXT:    sw a2, 4(a0)
+; RV64I-SR-NEXT:    sw a1, 0(a0)
+; RV64I-SR-NEXT:    tail __riscv_restore_10
 ;
 ; RV32I-FP-SR-LABEL: callee_saved1:
-; RV32I-FP-SR:         call t0, __riscv_save_11
-; RV32I-FP-SR:         tail __riscv_restore_11
+; RV32I-FP-SR:       # %bb.0:
+; RV32I-FP-SR-NEXT:    call t0, __riscv_save_10
+; RV32I-FP-SR-NEXT:    lui a0, %hi(var1)
+; RV32I-FP-SR-NEXT:    addi a0, a0, %lo(var1)
+; RV32I-FP-SR-NEXT:    lw a1, 0(a0)
+; RV32I-FP-SR-NEXT:    lw a2, 4(a0)
+; RV32I-FP-SR-NEXT:    lw a3, 8(a0)
+; RV32I-FP-SR-NEXT:    lw a4, 12(a0)
+; RV32I-FP-SR-NEXT:    lw a5, 16(a0)
+; RV32I-FP-SR-NEXT:    lw a6, 20(a0)
+; RV32I-FP-SR-NEXT:    lw a7, 24(a0)
+; RV32I-FP-SR-NEXT:    lw t0, 28(a0)
+; RV32I-FP-SR-NEXT:    lw t1, 32(a0)
+; RV32I-FP-SR-NEXT:    lw t2, 36(a0)
+; RV32I-FP-SR-NEXT:    lw t3, 40(a0)
+; RV32I-FP-SR-NEXT:    lw t4, 44(a0)
+; RV32I-FP-SR-NEXT:    lw t5, 48(a0)
+; RV32I-FP-SR-NEXT:    lw t6, 52(a0)
+; RV32I-FP-SR-NEXT:    lw s0, 56(a0)
+; RV32I-FP-SR-NEXT:    lw s1, 60(a0)
+; RV32I-FP-SR-NEXT:    lw s2, 64(a0)
+; RV32I-FP-SR-NEXT:    lw s3, 68(a0)
+; RV32I-FP-SR-NEXT:    lw s4, 72(a0)
+; RV32I-FP-SR-NEXT:    lw s5, 76(a0)
+; RV32I-FP-SR-NEXT:    lw s6, 92(a0)
+; RV32I-FP-SR-NEXT:    lw s7, 88(a0)
+; RV32I-FP-SR-NEXT:    lw s8, 84(a0)
+; RV32I-FP-SR-NEXT:    lw s9, 80(a0)
+; RV32I-FP-SR-NEXT:    sw s6, 92(a0)
+; RV32I-FP-SR-NEXT:    sw s7, 88(a0)
+; RV32I-FP-SR-NEXT:    sw s8, 84(a0)
+; RV32I-FP-SR-NEXT:    sw s9, 80(a0)
+; RV32I-FP-SR-NEXT:    sw s5, 76(a0)
+; RV32I-FP-SR-NEXT:    sw s4, 72(a0)
+; RV32I-FP-SR-NEXT:    sw s3, 68(a0)
+; RV32I-FP-SR-NEXT:    sw s2, 64(a0)
+; RV32I-FP-SR-NEXT:    sw s1, 60(a0)
+; RV32I-FP-SR-NEXT:    sw s0, 56(a0)
+; RV32I-FP-SR-NEXT:    sw t6, 52(a0)
+; RV32I-FP-SR-NEXT:    sw t5, 48(a0)
+; RV32I-FP-SR-NEXT:    sw t4, 44(a0)
+; RV32I-FP-SR-NEXT:    sw t3, 40(a0)
+; RV32I-FP-SR-NEXT:    sw t2, 36(a0)
+; RV32I-FP-SR-NEXT:    sw t1, 32(a0)
+; RV32I-FP-SR-NEXT:    sw t0, 28(a0)
+; RV32I-FP-SR-NEXT:    sw a7, 24(a0)
+; RV32I-FP-SR-NEXT:    sw a6, 20(a0)
+; RV32I-FP-SR-NEXT:    sw a5, 16(a0)
+; RV32I-FP-SR-NEXT:    sw a4, 12(a0)
+; RV32I-FP-SR-NEXT:    sw a3, 8(a0)
+; RV32I-FP-SR-NEXT:    sw a2, 4(a0)
+; RV32I-FP-SR-NEXT:    sw a1, 0(a0)
+; RV32I-FP-SR-NEXT:    tail __riscv_restore_10
 ;
 ; RV64I-FP-SR-LABEL: callee_saved1:
-; RV64I-FP-SR:         call t0, __riscv_save_11
-; RV64I-FP-SR:         tail __riscv_restore_11
+; RV64I-FP-SR:       # %bb.0:
+; RV64I-FP-SR-NEXT:    call t0, __riscv_save_10
+; RV64I-FP-SR-NEXT:    lui a0, %hi(var1)
+; RV64I-FP-SR-NEXT:    addi a0, a0, %lo(var1)
+; RV64I-FP-SR-NEXT:    lw a1, 0(a0)
+; RV64I-FP-SR-NEXT:    lw a2, 4(a0)
+; RV64I-FP-SR-NEXT:    lw a3, 8(a0)
+; RV64I-FP-SR-NEXT:    lw a4, 12(a0)
+; RV64I-FP-SR-NEXT:    lw a5, 16(a0)
+; RV64I-FP-SR-NEXT:    lw a6, 20(a0)
+; RV64I-FP-SR-NEXT:    lw a7, 24(a0)
+; RV64I-FP-SR-NEXT:    lw t0, 28(a0)
+; RV64I-FP-SR-NEXT:    lw t1, 32(a0)
+; RV64I-FP-SR-NEXT:    lw t2, 36(a0)
+; RV64I-FP-SR-NEXT:    lw t3, 40(a0)
+; RV64I-FP-SR-NEXT:    lw t4, 44(a0)
+; RV64I-FP-SR-NEXT:    lw t5, 48(a0)
+; RV64I-FP-SR-NEXT:    lw t6, 52(a0)
+; RV64I-FP-SR-NEXT:    lw s0, 56(a0)
+; RV64I-FP-SR-NEXT:    lw s1, 60(a0)
+; RV64I-FP-SR-NEXT:    lw s2, 64(a0)
+; RV64I-FP-SR-NEXT:    lw s3, 68(a0)
+; RV64I-FP-SR-NEXT:    lw s4, 72(a0)
+; RV64I-FP-SR-NEXT:    lw s5, 76(a0)
+; RV64I-FP-SR-NEXT:    lw s6, 92(a0)
+; RV64I-FP-SR-NEXT:    lw s7, 88(a0)
+; RV64I-FP-SR-NEXT:    lw s8, 84(a0)
+; RV64I-FP-SR-NEXT:    lw s9, 80(a0)
+; RV64I-FP-SR-NEXT:    sw s6, 92(a0)
+; RV64I-FP-SR-NEXT:    sw s7, 88(a0)
+; RV64I-FP-SR-NEXT:    sw s8, 84(a0)
+; RV64I-FP-SR-NEXT:    sw s9, 80(a0)
+; RV64I-FP-SR-NEXT:    sw s5, 76(a0)
+; RV64I-FP-SR-NEXT:    sw s4, 72(a0)
+; RV64I-FP-SR-NEXT:    sw s3, 68(a0)
+; RV64I-FP-SR-NEXT:    sw s2, 64(a0)
+; RV64I-FP-SR-NEXT:    sw s1, 60(a0)
+; RV64I-FP-SR-NEXT:    sw s0, 56(a0)
+; RV64I-FP-SR-NEXT:    sw t6, 52(a0)
+; RV64I-FP-SR-NEXT:    sw t5, 48(a0)
+; RV64I-FP-SR-NEXT:    sw t4, 44(a0)
+; RV64I-FP-SR-NEXT:    sw t3, 40(a0)
+; RV64I-FP-SR-NEXT:    sw t2, 36(a0)
+; RV64I-FP-SR-NEXT:    sw t1, 32(a0)
+; RV64I-FP-SR-NEXT:    sw t0, 28(a0)
+; RV64I-FP-SR-NEXT:    sw a7, 24(a0)
+; RV64I-FP-SR-NEXT:    sw a6, 20(a0)
+; RV64I-FP-SR-NEXT:    sw a5, 16(a0)
+; RV64I-FP-SR-NEXT:    sw a4, 12(a0)
+; RV64I-FP-SR-NEXT:    sw a3, 8(a0)
+; RV64I-FP-SR-NEXT:    sw a2, 4(a0)
+; RV64I-FP-SR-NEXT:    sw a1, 0(a0)
+; RV64I-FP-SR-NEXT:    tail __riscv_restore_10
   %val = load [24 x i32], ptr @var1
   store volatile [24 x i32] %val, ptr @var1
   ret void
@@ -71,28 +672,504 @@ define void @callee_saved1() nounwind {
 
 define void @callee_saved2() nounwind {
 ; RV32I-LABEL: callee_saved2:
-; RV32I-NOT:     call t0, __riscv_save
-; RV32I-NOT:     tail __riscv_restore
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -64
+; RV32I-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 52(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 48(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s6, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s7, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s8, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s9, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s10, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s11, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a0, %hi(var2)
+; RV32I-NEXT:    addi a0, a0, %lo(var2)
+; RV32I-NEXT:    lw a1, 0(a0)
+; RV32I-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a1, 4(a0)
+; RV32I-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a1, 8(a0)
+; RV32I-NEXT:    sw a1, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a4, 12(a0)
+; RV32I-NEXT:    lw a5, 16(a0)
+; RV32I-NEXT:    lw a6, 20(a0)
+; RV32I-NEXT:    lw a7, 24(a0)
+; RV32I-NEXT:    lw t0, 28(a0)
+; RV32I-NEXT:    lw t1, 32(a0)
+; RV32I-NEXT:    lw t2, 36(a0)
+; RV32I-NEXT:    lw t3, 40(a0)
+; RV32I-NEXT:    lw t4, 44(a0)
+; RV32I-NEXT:    lw t5, 48(a0)
+; RV32I-NEXT:    lw t6, 52(a0)
+; RV32I-NEXT:    lw s0, 56(a0)
+; RV32I-NEXT:    lw s1, 60(a0)
+; RV32I-NEXT:    lw s2, 64(a0)
+; RV32I-NEXT:    lw s3, 68(a0)
+; RV32I-NEXT:    lw s4, 72(a0)
+; RV32I-NEXT:    lw s5, 76(a0)
+; RV32I-NEXT:    lw s6, 80(a0)
+; RV32I-NEXT:    lw s7, 84(a0)
+; RV32I-NEXT:    lw s8, 88(a0)
+; RV32I-NEXT:    lw s9, 92(a0)
+; RV32I-NEXT:    lw s10, 96(a0)
+; RV32I-NEXT:    lw s11, 100(a0)
+; RV32I-NEXT:    lw ra, 116(a0)
+; RV32I-NEXT:    lw a1, 112(a0)
+; RV32I-NEXT:    lw a2, 108(a0)
+; RV32I-NEXT:    lw a3, 104(a0)
+; RV32I-NEXT:    sw ra, 116(a0)
+; RV32I-NEXT:    sw a1, 112(a0)
+; RV32I-NEXT:    sw a2, 108(a0)
+; RV32I-NEXT:    sw a3, 104(a0)
+; RV32I-NEXT:    sw s11, 100(a0)
+; RV32I-NEXT:    sw s10, 96(a0)
+; RV32I-NEXT:    sw s9, 92(a0)
+; RV32I-NEXT:    sw s8, 88(a0)
+; RV32I-NEXT:    sw s7, 84(a0)
+; RV32I-NEXT:    sw s6, 80(a0)
+; RV32I-NEXT:    sw s5, 76(a0)
+; RV32I-NEXT:    sw s4, 72(a0)
+; RV32I-NEXT:    sw s3, 68(a0)
+; RV32I-NEXT:    sw s2, 64(a0)
+; RV32I-NEXT:    sw s1, 60(a0)
+; RV32I-NEXT:    sw s0, 56(a0)
+; RV32I-NEXT:    sw t6, 52(a0)
+; RV32I-NEXT:    sw t5, 48(a0)
+; RV32I-NEXT:    sw t4, 44(a0)
+; RV32I-NEXT:    sw t3, 40(a0)
+; RV32I-NEXT:    sw t2, 36(a0)
+; RV32I-NEXT:    sw t1, 32(a0)
+; RV32I-NEXT:    sw t0, 28(a0)
+; RV32I-NEXT:    sw a7, 24(a0)
+; RV32I-NEXT:    sw a6, 20(a0)
+; RV32I-NEXT:    sw a5, 16(a0)
+; RV32I-NEXT:    sw a4, 12(a0)
+; RV32I-NEXT:    lw a1, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 8(a0)
+; RV32I-NEXT:    lw a1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 4(a0)
+; RV32I-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    sw a1, 0(a0)
+; RV32I-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s9, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s10, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 64
+; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: callee_saved2:
-; RV64I-NOT:     call t0, __riscv_save
-; RV64I-NOT:     tail __riscv_restore
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -128
+; RV64I-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s5, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s6, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s7, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s8, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s9, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s10, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s11, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a0, %hi(var2)
+; RV64I-NEXT:    addi a0, a0, %lo(var2)
+; RV64I-NEXT:    lw a1, 0(a0)
+; RV64I-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a1, 4(a0)
+; RV64I-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a1, 8(a0)
+; RV64I-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lw a4, 12(a0)
+; RV64I-NEXT:    lw a5, 16(a0)
+; RV64I-NEXT:    lw a6, 20(a0)
+; RV64I-NEXT:    lw a7, 24(a0)
+; RV64I-NEXT:    lw t0, 28(a0)
+; RV64I-NEXT:    lw t1, 32(a0)
+; RV64I-NEXT:    lw t2, 36(a0)
+; RV64I-NEXT:    lw t3, 40(a0)
+; RV64I-NEXT:    lw t4, 44(a0)
+; RV64I-NEXT:    lw t5, 48(a0)
+; RV64I-NEXT:    lw t6, 52(a0)
+; RV64I-NEXT:    lw s0, 56(a0)
+; RV64I-NEXT:    lw s1, 60(a0)
+; RV64I-NEXT:    lw s2, 64(a0)
+; RV64I-NEXT:    lw s3, 68(a0)
+; RV64I-NEXT:    lw s4, 72(a0)
+; RV64I-NEXT:    lw s5, 76(a0)
+; RV64I-NEXT:    lw s6, 80(a0)
+; RV64I-NEXT:    lw s7, 84(a0)
+; RV64I-NEXT:    lw s8, 88(a0)
+; RV64I-NEXT:    lw s9, 92(a0)
+; RV64I-NEXT:    lw s10, 96(a0)
+; RV64I-NEXT:    lw s11, 100(a0)
+; RV64I-NEXT:    lw ra, 116(a0)
+; RV64I-NEXT:    lw a1, 112(a0)
+; RV64I-NEXT:    lw a2, 108(a0)
+; RV64I-NEXT:    lw a3, 104(a0)
+; RV64I-NEXT:    sw ra, 116(a0)
+; RV64I-NEXT:    sw a1, 112(a0)
+; RV64I-NEXT:    sw a2, 108(a0)
+; RV64I-NEXT:    sw a3, 104(a0)
+; RV64I-NEXT:    sw s11, 100(a0)
+; RV64I-NEXT:    sw s10, 96(a0)
+; RV64I-NEXT:    sw s9, 92(a0)
+; RV64I-NEXT:    sw s8, 88(a0)
+; RV64I-NEXT:    sw s7, 84(a0)
+; RV64I-NEXT:    sw s6, 80(a0)
+; RV64I-NEXT:    sw s5, 76(a0)
+; RV64I-NEXT:    sw s4, 72(a0)
+; RV64I-NEXT:    sw s3, 68(a0)
+; RV64I-NEXT:    sw s2, 64(a0)
+; RV64I-NEXT:    sw s1, 60(a0)
+; RV64I-NEXT:    sw s0, 56(a0)
+; RV64I-NEXT:    sw t6, 52(a0)
+; RV64I-NEXT:    sw t5, 48(a0)
+; RV64I-NEXT:    sw t4, 44(a0)
+; RV64I-NEXT:    sw t3, 40(a0)
+; RV64I-NEXT:    sw t2, 36(a0)
+; RV64I-NEXT:    sw t1, 32(a0)
+; RV64I-NEXT:    sw t0, 28(a0)
+; RV64I-NEXT:    sw a7, 24(a0)
+; RV64I-NEXT:    sw a6, 20(a0)
+; RV64I-NEXT:    sw a5, 16(a0)
+; RV64I-NEXT:    sw a4, 12(a0)
+; RV64I-NEXT:    ld a1, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 8(a0)
+; RV64I-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 4(a0)
+; RV64I-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    sw a1, 0(a0)
+; RV64I-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s7, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s8, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s9, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s10, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s11, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 128
+; RV64I-NEXT:    ret
 ;
 ; RV32I-SR-LABEL: callee_saved2:
-; RV32I-SR:         call t0, __riscv_save_12
-; RV32I-SR:         tail __riscv_restore_12
+; RV32I-SR:       # %bb.0:
+; RV32I-SR-NEXT:    call t0, __riscv_save_12
+; RV32I-SR-NEXT:    addi sp, sp, -16
+; RV32I-SR-NEXT:    lui a0, %hi(var2)
+; RV32I-SR-NEXT:    addi a0, a0, %lo(var2)
+; RV32I-SR-NEXT:    lw a1, 0(a0)
+; RV32I-SR-NEXT:    sw a1, 12(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    lw a1, 4(a0)
+; RV32I-SR-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    lw a1, 8(a0)
+; RV32I-SR-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    lw a4, 12(a0)
+; RV32I-SR-NEXT:    lw a5, 16(a0)
+; RV32I-SR-NEXT:    lw a6, 20(a0)
+; RV32I-SR-NEXT:    lw a7, 24(a0)
+; RV32I-SR-NEXT:    lw t0, 28(a0)
+; RV32I-SR-NEXT:    lw t1, 32(a0)
+; RV32I-SR-NEXT:    lw t2, 36(a0)
+; RV32I-SR-NEXT:    lw t3, 40(a0)
+; RV32I-SR-NEXT:    lw t4, 44(a0)
+; RV32I-SR-NEXT:    lw t5, 48(a0)
+; RV32I-SR-NEXT:    lw t6, 52(a0)
+; RV32I-SR-NEXT:    lw s0, 56(a0)
+; RV32I-SR-NEXT:    lw s1, 60(a0)
+; RV32I-SR-NEXT:    lw s2, 64(a0)
+; RV32I-SR-NEXT:    lw s3, 68(a0)
+; RV32I-SR-NEXT:    lw s4, 72(a0)
+; RV32I-SR-NEXT:    lw s5, 76(a0)
+; RV32I-SR-NEXT:    lw s6, 80(a0)
+; RV32I-SR-NEXT:    lw s7, 84(a0)
+; RV32I-SR-NEXT:    lw s8, 88(a0)
+; RV32I-SR-NEXT:    lw s9, 92(a0)
+; RV32I-SR-NEXT:    lw s10, 96(a0)
+; RV32I-SR-NEXT:    lw s11, 100(a0)
+; RV32I-SR-NEXT:    lw ra, 116(a0)
+; RV32I-SR-NEXT:    lw a1, 112(a0)
+; RV32I-SR-NEXT:    lw a2, 108(a0)
+; RV32I-SR-NEXT:    lw a3, 104(a0)
+; RV32I-SR-NEXT:    sw ra, 116(a0)
+; RV32I-SR-NEXT:    sw a1, 112(a0)
+; RV32I-SR-NEXT:    sw a2, 108(a0)
+; RV32I-SR-NEXT:    sw a3, 104(a0)
+; RV32I-SR-NEXT:    sw s11, 100(a0)
+; RV32I-SR-NEXT:    sw s10, 96(a0)
+; RV32I-SR-NEXT:    sw s9, 92(a0)
+; RV32I-SR-NEXT:    sw s8, 88(a0)
+; RV32I-SR-NEXT:    sw s7, 84(a0)
+; RV32I-SR-NEXT:    sw s6, 80(a0)
+; RV32I-SR-NEXT:    sw s5, 76(a0)
+; RV32I-SR-NEXT:    sw s4, 72(a0)
+; RV32I-SR-NEXT:    sw s3, 68(a0)
+; RV32I-SR-NEXT:    sw s2, 64(a0)
+; RV32I-SR-NEXT:    sw s1, 60(a0)
+; RV32I-SR-NEXT:    sw s0, 56(a0)
+; RV32I-SR-NEXT:    sw t6, 52(a0)
+; RV32I-SR-NEXT:    sw t5, 48(a0)
+; RV32I-SR-NEXT:    sw t4, 44(a0)
+; RV32I-SR-NEXT:    sw t3, 40(a0)
+; RV32I-SR-NEXT:    sw t2, 36(a0)
+; RV32I-SR-NEXT:    sw t1, 32(a0)
+; RV32I-SR-NEXT:    sw t0, 28(a0)
+; RV32I-SR-NEXT:    sw a7, 24(a0)
+; RV32I-SR-NEXT:    sw a6, 20(a0)
+; RV32I-SR-NEXT:    sw a5, 16(a0)
+; RV32I-SR-NEXT:    sw a4, 12(a0)
+; RV32I-SR-NEXT:    lw a1, 4(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    sw a1, 8(a0)
+; RV32I-SR-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    sw a1, 4(a0)
+; RV32I-SR-NEXT:    lw a1, 12(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    sw a1, 0(a0)
+; RV32I-SR-NEXT:    addi sp, sp, 16
+; RV32I-SR-NEXT:    tail __riscv_restore_12
 ;
 ; RV64I-SR-LABEL: callee_saved2:
-; RV64I-SR:         call t0, __riscv_save_12
-; RV64I-SR:         tail __riscv_restore_12
+; RV64I-SR:       # %bb.0:
+; RV64I-SR-NEXT:    call t0, __riscv_save_12
+; RV64I-SR-NEXT:    addi sp, sp, -32
+; RV64I-SR-NEXT:    lui a0, %hi(var2)
+; RV64I-SR-NEXT:    addi a0, a0, %lo(var2)
+; RV64I-SR-NEXT:    lw a1, 0(a0)
+; RV64I-SR-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    lw a1, 4(a0)
+; RV64I-SR-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    lw a1, 8(a0)
+; RV64I-SR-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    lw a4, 12(a0)
+; RV64I-SR-NEXT:    lw a5, 16(a0)
+; RV64I-SR-NEXT:    lw a6, 20(a0)
+; RV64I-SR-NEXT:    lw a7, 24(a0)
+; RV64I-SR-NEXT:    lw t0, 28(a0)
+; RV64I-SR-NEXT:    lw t1, 32(a0)
+; RV64I-SR-NEXT:    lw t2, 36(a0)
+; RV64I-SR-NEXT:    lw t3, 40(a0)
+; RV64I-SR-NEXT:    lw t4, 44(a0)
+; RV64I-SR-NEXT:    lw t5, 48(a0)
+; RV64I-SR-NEXT:    lw t6, 52(a0)
+; RV64I-SR-NEXT:    lw s0, 56(a0)
+; RV64I-SR-NEXT:    lw s1, 60(a0)
+; RV64I-SR-NEXT:    lw s2, 64(a0)
+; RV64I-SR-NEXT:    lw s3, 68(a0)
+; RV64I-SR-NEXT:    lw s4, 72(a0)
+; RV64I-SR-NEXT:    lw s5, 76(a0)
+; RV64I-SR-NEXT:    lw s6, 80(a0)
+; RV64I-SR-NEXT:    lw s7, 84(a0)
+; RV64I-SR-NEXT:    lw s8, 88(a0)
+; RV64I-SR-NEXT:    lw s9, 92(a0)
+; RV64I-SR-NEXT:    lw s10, 96(a0)
+; RV64I-SR-NEXT:    lw s11, 100(a0)
+; RV64I-SR-NEXT:    lw ra, 116(a0)
+; RV64I-SR-NEXT:    lw a1, 112(a0)
+; RV64I-SR-NEXT:    lw a2, 108(a0)
+; RV64I-SR-NEXT:    lw a3, 104(a0)
+; RV64I-SR-NEXT:    sw ra, 116(a0)
+; RV64I-SR-NEXT:    sw a1, 112(a0)
+; RV64I-SR-NEXT:    sw a2, 108(a0)
+; RV64I-SR-NEXT:    sw a3, 104(a0)
+; RV64I-SR-NEXT:    sw s11, 100(a0)
+; RV64I-SR-NEXT:    sw s10, 96(a0)
+; RV64I-SR-NEXT:    sw s9, 92(a0)
+; RV64I-SR-NEXT:    sw s8, 88(a0)
+; RV64I-SR-NEXT:    sw s7, 84(a0)
+; RV64I-SR-NEXT:    sw s6, 80(a0)
+; RV64I-SR-NEXT:    sw s5, 76(a0)
+; RV64I-SR-NEXT:    sw s4, 72(a0)
+; RV64I-SR-NEXT:    sw s3, 68(a0)
+; RV64I-SR-NEXT:    sw s2, 64(a0)
+; RV64I-SR-NEXT:    sw s1, 60(a0)
+; RV64I-SR-NEXT:    sw s0, 56(a0)
+; RV64I-SR-NEXT:    sw t6, 52(a0)
+; RV64I-SR-NEXT:    sw t5, 48(a0)
+; RV64I-SR-NEXT:    sw t4, 44(a0)
+; RV64I-SR-NEXT:    sw t3, 40(a0)
+; RV64I-SR-NEXT:    sw t2, 36(a0)
+; RV64I-SR-NEXT:    sw t1, 32(a0)
+; RV64I-SR-NEXT:    sw t0, 28(a0)
+; RV64I-SR-NEXT:    sw a7, 24(a0)
+; RV64I-SR-NEXT:    sw a6, 20(a0)
+; RV64I-SR-NEXT:    sw a5, 16(a0)
+; RV64I-SR-NEXT:    sw a4, 12(a0)
+; RV64I-SR-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    sw a1, 8(a0)
+; RV64I-SR-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    sw a1, 4(a0)
+; RV64I-SR-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    sw a1, 0(a0)
+; RV64I-SR-NEXT:    addi sp, sp, 32
+; RV64I-SR-NEXT:    tail __riscv_restore_12
 ;
 ; RV32I-FP-SR-LABEL: callee_saved2:
-; RV32I-FP-SR:         call t0, __riscv_save_12
-; RV32I-FP-SR:         tail __riscv_restore_12
+; RV32I-FP-SR:       # %bb.0:
+; RV32I-FP-SR-NEXT:    call t0, __riscv_save_12
+; RV32I-FP-SR-NEXT:    addi sp, sp, -16
+; RV32I-FP-SR-NEXT:    lui a0, %hi(var2)
+; RV32I-FP-SR-NEXT:    addi a0, a0, %lo(var2)
+; RV32I-FP-SR-NEXT:    lw a1, 0(a0)
+; RV32I-FP-SR-NEXT:    sw a1, 12(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    lw a1, 4(a0)
+; RV32I-FP-SR-NEXT:    sw a1, 8(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    lw a1, 8(a0)
+; RV32I-FP-SR-NEXT:    sw a1, 4(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    lw a4, 12(a0)
+; RV32I-FP-SR-NEXT:    lw a5, 16(a0)
+; RV32I-FP-SR-NEXT:    lw a6, 20(a0)
+; RV32I-FP-SR-NEXT:    lw a7, 24(a0)
+; RV32I-FP-SR-NEXT:    lw t0, 28(a0)
+; RV32I-FP-SR-NEXT:    lw t1, 32(a0)
+; RV32I-FP-SR-NEXT:    lw t2, 36(a0)
+; RV32I-FP-SR-NEXT:    lw t3, 40(a0)
+; RV32I-FP-SR-NEXT:    lw t4, 44(a0)
+; RV32I-FP-SR-NEXT:    lw t5, 48(a0)
+; RV32I-FP-SR-NEXT:    lw t6, 52(a0)
+; RV32I-FP-SR-NEXT:    lw s0, 56(a0)
+; RV32I-FP-SR-NEXT:    lw s1, 60(a0)
+; RV32I-FP-SR-NEXT:    lw s2, 64(a0)
+; RV32I-FP-SR-NEXT:    lw s3, 68(a0)
+; RV32I-FP-SR-NEXT:    lw s4, 72(a0)
+; RV32I-FP-SR-NEXT:    lw s5, 76(a0)
+; RV32I-FP-SR-NEXT:    lw s6, 80(a0)
+; RV32I-FP-SR-NEXT:    lw s7, 84(a0)
+; RV32I-FP-SR-NEXT:    lw s8, 88(a0)
+; RV32I-FP-SR-NEXT:    lw s9, 92(a0)
+; RV32I-FP-SR-NEXT:    lw s10, 96(a0)
+; RV32I-FP-SR-NEXT:    lw s11, 100(a0)
+; RV32I-FP-SR-NEXT:    lw ra, 116(a0)
+; RV32I-FP-SR-NEXT:    lw a1, 112(a0)
+; RV32I-FP-SR-NEXT:    lw a2, 108(a0)
+; RV32I-FP-SR-NEXT:    lw a3, 104(a0)
+; RV32I-FP-SR-NEXT:    sw ra, 116(a0)
+; RV32I-FP-SR-NEXT:    sw a1, 112(a0)
+; RV32I-FP-SR-NEXT:    sw a2, 108(a0)
+; RV32I-FP-SR-NEXT:    sw a3, 104(a0)
+; RV32I-FP-SR-NEXT:    sw s11, 100(a0)
+; RV32I-FP-SR-NEXT:    sw s10, 96(a0)
+; RV32I-FP-SR-NEXT:    sw s9, 92(a0)
+; RV32I-FP-SR-NEXT:    sw s8, 88(a0)
+; RV32I-FP-SR-NEXT:    sw s7, 84(a0)
+; RV32I-FP-SR-NEXT:    sw s6, 80(a0)
+; RV32I-FP-SR-NEXT:    sw s5, 76(a0)
+; RV32I-FP-SR-NEXT:    sw s4, 72(a0)
+; RV32I-FP-SR-NEXT:    sw s3, 68(a0)
+; RV32I-FP-SR-NEXT:    sw s2, 64(a0)
+; RV32I-FP-SR-NEXT:    sw s1, 60(a0)
+; RV32I-FP-SR-NEXT:    sw s0, 56(a0)
+; RV32I-FP-SR-NEXT:    sw t6, 52(a0)
+; RV32I-FP-SR-NEXT:    sw t5, 48(a0)
+; RV32I-FP-SR-NEXT:    sw t4, 44(a0)
+; RV32I-FP-SR-NEXT:    sw t3, 40(a0)
+; RV32I-FP-SR-NEXT:    sw t2, 36(a0)
+; RV32I-FP-SR-NEXT:    sw t1, 32(a0)
+; RV32I-FP-SR-NEXT:    sw t0, 28(a0)
+; RV32I-FP-SR-NEXT:    sw a7, 24(a0)
+; RV32I-FP-SR-NEXT:    sw a6, 20(a0)
+; RV32I-FP-SR-NEXT:    sw a5, 16(a0)
+; RV32I-FP-SR-NEXT:    sw a4, 12(a0)
+; RV32I-FP-SR-NEXT:    lw a1, 4(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    sw a1, 8(a0)
+; RV32I-FP-SR-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    sw a1, 4(a0)
+; RV32I-FP-SR-NEXT:    lw a1, 12(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    sw a1, 0(a0)
+; RV32I-FP-SR-NEXT:    addi sp, sp, 16
+; RV32I-FP-SR-NEXT:    tail __riscv_restore_12
 ;
 ; RV64I-FP-SR-LABEL: callee_saved2:
-; RV64I-FP-SR:         call t0, __riscv_save_12
-; RV64I-FP-SR:         tail __riscv_restore_12
+; RV64I-FP-SR:       # %bb.0:
+; RV64I-FP-SR-NEXT:    call t0, __riscv_save_12
+; RV64I-FP-SR-NEXT:    addi sp, sp, -32
+; RV64I-FP-SR-NEXT:    lui a0, %hi(var2)
+; RV64I-FP-SR-NEXT:    addi a0, a0, %lo(var2)
+; RV64I-FP-SR-NEXT:    lw a1, 0(a0)
+; RV64I-FP-SR-NEXT:    sd a1, 24(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    lw a1, 4(a0)
+; RV64I-FP-SR-NEXT:    sd a1, 16(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    lw a1, 8(a0)
+; RV64I-FP-SR-NEXT:    sd a1, 8(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    lw a4, 12(a0)
+; RV64I-FP-SR-NEXT:    lw a5, 16(a0)
+; RV64I-FP-SR-NEXT:    lw a6, 20(a0)
+; RV64I-FP-SR-NEXT:    lw a7, 24(a0)
+; RV64I-FP-SR-NEXT:    lw t0, 28(a0)
+; RV64I-FP-SR-NEXT:    lw t1, 32(a0)
+; RV64I-FP-SR-NEXT:    lw t2, 36(a0)
+; RV64I-FP-SR-NEXT:    lw t3, 40(a0)
+; RV64I-FP-SR-NEXT:    lw t4, 44(a0)
+; RV64I-FP-SR-NEXT:    lw t5, 48(a0)
+; RV64I-FP-SR-NEXT:    lw t6, 52(a0)
+; RV64I-FP-SR-NEXT:    lw s0, 56(a0)
+; RV64I-FP-SR-NEXT:    lw s1, 60(a0)
+; RV64I-FP-SR-NEXT:    lw s2, 64(a0)
+; RV64I-FP-SR-NEXT:    lw s3, 68(a0)
+; RV64I-FP-SR-NEXT:    lw s4, 72(a0)
+; RV64I-FP-SR-NEXT:    lw s5, 76(a0)
+; RV64I-FP-SR-NEXT:    lw s6, 80(a0)
+; RV64I-FP-SR-NEXT:    lw s7, 84(a0)
+; RV64I-FP-SR-NEXT:    lw s8, 88(a0)
+; RV64I-FP-SR-NEXT:    lw s9, 92(a0)
+; RV64I-FP-SR-NEXT:    lw s10, 96(a0)
+; RV64I-FP-SR-NEXT:    lw s11, 100(a0)
+; RV64I-FP-SR-NEXT:    lw ra, 116(a0)
+; RV64I-FP-SR-NEXT:    lw a1, 112(a0)
+; RV64I-FP-SR-NEXT:    lw a2, 108(a0)
+; RV64I-FP-SR-NEXT:    lw a3, 104(a0)
+; RV64I-FP-SR-NEXT:    sw ra, 116(a0)
+; RV64I-FP-SR-NEXT:    sw a1, 112(a0)
+; RV64I-FP-SR-NEXT:    sw a2, 108(a0)
+; RV64I-FP-SR-NEXT:    sw a3, 104(a0)
+; RV64I-FP-SR-NEXT:    sw s11, 100(a0)
+; RV64I-FP-SR-NEXT:    sw s10, 96(a0)
+; RV64I-FP-SR-NEXT:    sw s9, 92(a0)
+; RV64I-FP-SR-NEXT:    sw s8, 88(a0)
+; RV64I-FP-SR-NEXT:    sw s7, 84(a0)
+; RV64I-FP-SR-NEXT:    sw s6, 80(a0)
+; RV64I-FP-SR-NEXT:    sw s5, 76(a0)
+; RV64I-FP-SR-NEXT:    sw s4, 72(a0)
+; RV64I-FP-SR-NEXT:    sw s3, 68(a0)
+; RV64I-FP-SR-NEXT:    sw s2, 64(a0)
+; RV64I-FP-SR-NEXT:    sw s1, 60(a0)
+; RV64I-FP-SR-NEXT:    sw s0, 56(a0)
+; RV64I-FP-SR-NEXT:    sw t6, 52(a0)
+; RV64I-FP-SR-NEXT:    sw t5, 48(a0)
+; RV64I-FP-SR-NEXT:    sw t4, 44(a0)
+; RV64I-FP-SR-NEXT:    sw t3, 40(a0)
+; RV64I-FP-SR-NEXT:    sw t2, 36(a0)
+; RV64I-FP-SR-NEXT:    sw t1, 32(a0)
+; RV64I-FP-SR-NEXT:    sw t0, 28(a0)
+; RV64I-FP-SR-NEXT:    sw a7, 24(a0)
+; RV64I-FP-SR-NEXT:    sw a6, 20(a0)
+; RV64I-FP-SR-NEXT:    sw a5, 16(a0)
+; RV64I-FP-SR-NEXT:    sw a4, 12(a0)
+; RV64I-FP-SR-NEXT:    ld a1, 8(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    sw a1, 8(a0)
+; RV64I-FP-SR-NEXT:    ld a1, 16(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    sw a1, 4(a0)
+; RV64I-FP-SR-NEXT:    ld a1, 24(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    sw a1, 0(a0)
+; RV64I-FP-SR-NEXT:    addi sp, sp, 32
+; RV64I-FP-SR-NEXT:    tail __riscv_restore_12
   %val = load [30 x i32], ptr @var2
   store volatile [30 x i32] %val, ptr @var2
   ret void
@@ -103,34 +1180,78 @@ define void @callee_saved2() nounwind {
 
 define void @callee_saved_fp() nounwind {
 ; RV32I-LABEL: callee_saved_fp:
-; RV32I-NOT:     call t0, __riscv_save
-; RV32I-NOT:     tail __riscv_restore
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw s1, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s5, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s6, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    #APP
+; RV32I-NEXT:    #NO_APP
+; RV32I-NEXT:    lw s1, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: callee_saved_fp:
-; RV64I-NOT:     call t0, __riscv_save
-; RV64I-NOT:     tail __riscv_restore
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd s1, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s5, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s6, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    #APP
+; RV64I-NEXT:    #NO_APP
+; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    ret
 ;
 ; RV32I-SR-LABEL: callee_saved_fp:
-; RV32I-SR:         call t0, __riscv_save_7
-; RV32I-SR:         tail __riscv_restore_7
+; RV32I-SR:       # %bb.0:
+; RV32I-SR-NEXT:    call t0, __riscv_save_7
+; RV32I-SR-NEXT:    #APP
+; RV32I-SR-NEXT:    #NO_APP
+; RV32I-SR-NEXT:    tail __riscv_restore_7
 ;
 ; RV64I-SR-LABEL: callee_saved_fp:
-; RV64I-SR:         call t0, __riscv_save_7
-; RV64I-SR:         tail __riscv_restore_7
+; RV64I-SR:       # %bb.0:
+; RV64I-SR-NEXT:    call t0, __riscv_save_7
+; RV64I-SR-NEXT:    #APP
+; RV64I-SR-NEXT:    #NO_APP
+; RV64I-SR-NEXT:    tail __riscv_restore_7
 ;
 ; RV32I-FP-SR-LABEL: callee_saved_fp:
-; RV32I-FP-SR:         call t0, __riscv_save_7
+; RV32I-FP-SR:       # %bb.0:
+; RV32I-FP-SR-NEXT:    call t0, __riscv_save_7
 ; RV32I-FP-SR-NEXT:    addi sp, sp, -16
-; RV32I-FP-SR-NEXT:    fsw fs0, 12(sp)
-; RV32I-FP-SR:         flw fs0, 12(sp)
+; RV32I-FP-SR-NEXT:    fsw fs0, 12(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    #APP
+; RV32I-FP-SR-NEXT:    #NO_APP
+; RV32I-FP-SR-NEXT:    flw fs0, 12(sp) # 4-byte Folded Reload
 ; RV32I-FP-SR-NEXT:    addi sp, sp, 16
 ; RV32I-FP-SR-NEXT:    tail __riscv_restore_7
 ;
 ; RV64I-FP-SR-LABEL: callee_saved_fp:
-; RV64I-FP-SR:         call t0, __riscv_save_7
+; RV64I-FP-SR:       # %bb.0:
+; RV64I-FP-SR-NEXT:    call t0, __riscv_save_7
 ; RV64I-FP-SR-NEXT:    addi sp, sp, -16
-; RV64I-FP-SR-NEXT:    fsd fs0, 8(sp)
-; RV64I-FP-SR:         fld fs0, 8(sp)
+; RV64I-FP-SR-NEXT:    fsd fs0, 8(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    #APP
+; RV64I-FP-SR-NEXT:    #NO_APP
+; RV64I-FP-SR-NEXT:    fld fs0, 8(sp) # 8-byte Folded Reload
 ; RV64I-FP-SR-NEXT:    addi sp, sp, 16
 ; RV64I-FP-SR-NEXT:    tail __riscv_restore_7
   call void asm sideeffect "", "~{f8},~{x9},~{x18},~{x19},~{x20},~{x21},~{x22}"()
@@ -143,34 +1264,328 @@ declare i32 @tail_callee(i32 %i)
 
 define i32 @tail_call(i32 %i) nounwind {
 ; RV32I-LABEL: tail_call:
-; RV32I-NOT:     call t0, __riscv_save
-; RV32I:         tail tail_callee
-; RV32I-NOT:     tail __riscv_restore
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw s0, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s4, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a1, %hi(var0)
+; RV32I-NEXT:    addi a1, a1, %lo(var0)
+; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    lw a3, 4(a1)
+; RV32I-NEXT:    lw a4, 8(a1)
+; RV32I-NEXT:    lw a5, 12(a1)
+; RV32I-NEXT:    lw a6, 16(a1)
+; RV32I-NEXT:    lw a7, 20(a1)
+; RV32I-NEXT:    lw t0, 24(a1)
+; RV32I-NEXT:    lw t1, 28(a1)
+; RV32I-NEXT:    lw t2, 32(a1)
+; RV32I-NEXT:    lw t3, 36(a1)
+; RV32I-NEXT:    lw t4, 40(a1)
+; RV32I-NEXT:    lw t5, 44(a1)
+; RV32I-NEXT:    lw t6, 48(a1)
+; RV32I-NEXT:    lw s0, 52(a1)
+; RV32I-NEXT:    lw s1, 68(a1)
+; RV32I-NEXT:    lw s2, 64(a1)
+; RV32I-NEXT:    lw s3, 60(a1)
+; RV32I-NEXT:    lw s4, 56(a1)
+; RV32I-NEXT:    sw s1, 68(a1)
+; RV32I-NEXT:    sw s2, 64(a1)
+; RV32I-NEXT:    sw s3, 60(a1)
+; RV32I-NEXT:    sw s4, 56(a1)
+; RV32I-NEXT:    sw s0, 52(a1)
+; RV32I-NEXT:    sw t6, 48(a1)
+; RV32I-NEXT:    sw t5, 44(a1)
+; RV32I-NEXT:    sw t4, 40(a1)
+; RV32I-NEXT:    sw t3, 36(a1)
+; RV32I-NEXT:    sw t2, 32(a1)
+; RV32I-NEXT:    sw t1, 28(a1)
+; RV32I-NEXT:    sw t0, 24(a1)
+; RV32I-NEXT:    sw a7, 20(a1)
+; RV32I-NEXT:    sw a6, 16(a1)
+; RV32I-NEXT:    sw a5, 12(a1)
+; RV32I-NEXT:    sw a4, 8(a1)
+; RV32I-NEXT:    sw a3, 4(a1)
+; RV32I-NEXT:    sw a2, 0(a1)
+; RV32I-NEXT:    lw s0, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    tail tail_callee
 ;
 ; RV64I-LABEL: tail_call:
-; RV64I-NOT:     call t0, __riscv_save
-; RV64I:         tail tail_callee
-; RV64I-NOT:     tail __riscv_restore
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi sp, sp, -48
+; RV64I-NEXT:    sd s0, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s4, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a1, %hi(var0)
+; RV64I-NEXT:    addi a1, a1, %lo(var0)
+; RV64I-NEXT:    lw a2, 0(a1)
+; RV64I-NEXT:    lw a3, 4(a1)
+; RV64I-NEXT:    lw a4, 8(a1)
+; RV64I-NEXT:    lw a5, 12(a1)
+; RV64I-NEXT:    lw a6, 16(a1)
+; RV64I-NEXT:    lw a7, 20(a1)
+; RV64I-NEXT:    lw t0, 24(a1)
+; RV64I-NEXT:    lw t1, 28(a1)
+; RV64I-NEXT:    lw t2, 32(a1)
+; RV64I-NEXT:    lw t3, 36(a1)
+; RV64I-NEXT:    lw t4, 40(a1)
+; RV64I-NEXT:    lw t5, 44(a1)
+; RV64I-NEXT:    lw t6, 48(a1)
+; RV64I-NEXT:    lw s0, 52(a1)
+; RV64I-NEXT:    lw s1, 68(a1)
+; RV64I-NEXT:    lw s2, 64(a1)
+; RV64I-NEXT:    lw s3, 60(a1)
+; RV64I-NEXT:    lw s4, 56(a1)
+; RV64I-NEXT:    sw s1, 68(a1)
+; RV64I-NEXT:    sw s2, 64(a1)
+; RV64I-NEXT:    sw s3, 60(a1)
+; RV64I-NEXT:    sw s4, 56(a1)
+; RV64I-NEXT:    sw s0, 52(a1)
+; RV64I-NEXT:    sw t6, 48(a1)
+; RV64I-NEXT:    sw t5, 44(a1)
+; RV64I-NEXT:    sw t4, 40(a1)
+; RV64I-NEXT:    sw t3, 36(a1)
+; RV64I-NEXT:    sw t2, 32(a1)
+; RV64I-NEXT:    sw t1, 28(a1)
+; RV64I-NEXT:    sw t0, 24(a1)
+; RV64I-NEXT:    sw a7, 20(a1)
+; RV64I-NEXT:    sw a6, 16(a1)
+; RV64I-NEXT:    sw a5, 12(a1)
+; RV64I-NEXT:    sw a4, 8(a1)
+; RV64I-NEXT:    sw a3, 4(a1)
+; RV64I-NEXT:    sw a2, 0(a1)
+; RV64I-NEXT:    ld s0, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 48
+; RV64I-NEXT:    tail tail_callee
 ;
 ; RV32I-SR-LABEL: tail_call:
-; RV32I-SR-NOT:     call t0, __riscv_save
-; RV32I-SR:         tail tail_callee
-; RV32I-SR-NOT:     tail __riscv_restore
+; RV32I-SR:       # %bb.0: # %entry
+; RV32I-SR-NEXT:    addi sp, sp, -32
+; RV32I-SR-NEXT:    sw s0, 28(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw s1, 24(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw s2, 20(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw s3, 16(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw s4, 12(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    lui a1, %hi(var0)
+; RV32I-SR-NEXT:    addi a1, a1, %lo(var0)
+; RV32I-SR-NEXT:    lw a2, 0(a1)
+; RV32I-SR-NEXT:    lw a3, 4(a1)
+; RV32I-SR-NEXT:    lw a4, 8(a1)
+; RV32I-SR-NEXT:    lw a5, 12(a1)
+; RV32I-SR-NEXT:    lw a6, 16(a1)
+; RV32I-SR-NEXT:    lw a7, 20(a1)
+; RV32I-SR-NEXT:    lw t0, 24(a1)
+; RV32I-SR-NEXT:    lw t1, 28(a1)
+; RV32I-SR-NEXT:    lw t2, 32(a1)
+; RV32I-SR-NEXT:    lw t3, 36(a1)
+; RV32I-SR-NEXT:    lw t4, 40(a1)
+; RV32I-SR-NEXT:    lw t5, 44(a1)
+; RV32I-SR-NEXT:    lw t6, 48(a1)
+; RV32I-SR-NEXT:    lw s0, 52(a1)
+; RV32I-SR-NEXT:    lw s1, 68(a1)
+; RV32I-SR-NEXT:    lw s2, 64(a1)
+; RV32I-SR-NEXT:    lw s3, 60(a1)
+; RV32I-SR-NEXT:    lw s4, 56(a1)
+; RV32I-SR-NEXT:    sw s1, 68(a1)
+; RV32I-SR-NEXT:    sw s2, 64(a1)
+; RV32I-SR-NEXT:    sw s3, 60(a1)
+; RV32I-SR-NEXT:    sw s4, 56(a1)
+; RV32I-SR-NEXT:    sw s0, 52(a1)
+; RV32I-SR-NEXT:    sw t6, 48(a1)
+; RV32I-SR-NEXT:    sw t5, 44(a1)
+; RV32I-SR-NEXT:    sw t4, 40(a1)
+; RV32I-SR-NEXT:    sw t3, 36(a1)
+; RV32I-SR-NEXT:    sw t2, 32(a1)
+; RV32I-SR-NEXT:    sw t1, 28(a1)
+; RV32I-SR-NEXT:    sw t0, 24(a1)
+; RV32I-SR-NEXT:    sw a7, 20(a1)
+; RV32I-SR-NEXT:    sw a6, 16(a1)
+; RV32I-SR-NEXT:    sw a5, 12(a1)
+; RV32I-SR-NEXT:    sw a4, 8(a1)
+; RV32I-SR-NEXT:    sw a3, 4(a1)
+; RV32I-SR-NEXT:    sw a2, 0(a1)
+; RV32I-SR-NEXT:    lw s0, 28(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw s1, 24(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw s2, 20(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw s3, 16(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw s4, 12(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    addi sp, sp, 32
+; RV32I-SR-NEXT:    tail tail_callee
 ;
 ; RV64I-SR-LABEL: tail_call:
-; RV64I-SR-NOT:     call t0, __riscv_save
-; RV64I-SR:         tail tail_callee
-; RV64I-SR-NOT:     tail __riscv_restore
+; RV64I-SR:       # %bb.0: # %entry
+; RV64I-SR-NEXT:    addi sp, sp, -48
+; RV64I-SR-NEXT:    sd s0, 40(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd s1, 32(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd s2, 24(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd s3, 16(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd s4, 8(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    lui a1, %hi(var0)
+; RV64I-SR-NEXT:    addi a1, a1, %lo(var0)
+; RV64I-SR-NEXT:    lw a2, 0(a1)
+; RV64I-SR-NEXT:    lw a3, 4(a1)
+; RV64I-SR-NEXT:    lw a4, 8(a1)
+; RV64I-SR-NEXT:    lw a5, 12(a1)
+; RV64I-SR-NEXT:    lw a6, 16(a1)
+; RV64I-SR-NEXT:    lw a7, 20(a1)
+; RV64I-SR-NEXT:    lw t0, 24(a1)
+; RV64I-SR-NEXT:    lw t1, 28(a1)
+; RV64I-SR-NEXT:    lw t2, 32(a1)
+; RV64I-SR-NEXT:    lw t3, 36(a1)
+; RV64I-SR-NEXT:    lw t4, 40(a1)
+; RV64I-SR-NEXT:    lw t5, 44(a1)
+; RV64I-SR-NEXT:    lw t6, 48(a1)
+; RV64I-SR-NEXT:    lw s0, 52(a1)
+; RV64I-SR-NEXT:    lw s1, 68(a1)
+; RV64I-SR-NEXT:    lw s2, 64(a1)
+; RV64I-SR-NEXT:    lw s3, 60(a1)
+; RV64I-SR-NEXT:    lw s4, 56(a1)
+; RV64I-SR-NEXT:    sw s1, 68(a1)
+; RV64I-SR-NEXT:    sw s2, 64(a1)
+; RV64I-SR-NEXT:    sw s3, 60(a1)
+; RV64I-SR-NEXT:    sw s4, 56(a1)
+; RV64I-SR-NEXT:    sw s0, 52(a1)
+; RV64I-SR-NEXT:    sw t6, 48(a1)
+; RV64I-SR-NEXT:    sw t5, 44(a1)
+; RV64I-SR-NEXT:    sw t4, 40(a1)
+; RV64I-SR-NEXT:    sw t3, 36(a1)
+; RV64I-SR-NEXT:    sw t2, 32(a1)
+; RV64I-SR-NEXT:    sw t1, 28(a1)
+; RV64I-SR-NEXT:    sw t0, 24(a1)
+; RV64I-SR-NEXT:    sw a7, 20(a1)
+; RV64I-SR-NEXT:    sw a6, 16(a1)
+; RV64I-SR-NEXT:    sw a5, 12(a1)
+; RV64I-SR-NEXT:    sw a4, 8(a1)
+; RV64I-SR-NEXT:    sw a3, 4(a1)
+; RV64I-SR-NEXT:    sw a2, 0(a1)
+; RV64I-SR-NEXT:    ld s0, 40(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld s1, 32(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld s2, 24(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld s3, 16(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld s4, 8(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    addi sp, sp, 48
+; RV64I-SR-NEXT:    tail tail_callee
 ;
 ; RV32I-FP-SR-LABEL: tail_call:
-; RV32I-FP-SR-NOT:     call t0, __riscv_save
-; RV32I-FP-SR:         tail tail_callee
-; RV32I-FP-SR-NOT:     tail __riscv_restore
+; RV32I-FP-SR:       # %bb.0: # %entry
+; RV32I-FP-SR-NEXT:    addi sp, sp, -32
+; RV32I-FP-SR-NEXT:    sw s0, 28(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw s1, 24(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw s2, 20(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw s3, 16(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw s4, 12(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    lui a1, %hi(var0)
+; RV32I-FP-SR-NEXT:    addi a1, a1, %lo(var0)
+; RV32I-FP-SR-NEXT:    lw a2, 0(a1)
+; RV32I-FP-SR-NEXT:    lw a3, 4(a1)
+; RV32I-FP-SR-NEXT:    lw a4, 8(a1)
+; RV32I-FP-SR-NEXT:    lw a5, 12(a1)
+; RV32I-FP-SR-NEXT:    lw a6, 16(a1)
+; RV32I-FP-SR-NEXT:    lw a7, 20(a1)
+; RV32I-FP-SR-NEXT:    lw t0, 24(a1)
+; RV32I-FP-SR-NEXT:    lw t1, 28(a1)
+; RV32I-FP-SR-NEXT:    lw t2, 32(a1)
+; RV32I-FP-SR-NEXT:    lw t3, 36(a1)
+; RV32I-FP-SR-NEXT:    lw t4, 40(a1)
+; RV32I-FP-SR-NEXT:    lw t5, 44(a1)
+; RV32I-FP-SR-NEXT:    lw t6, 48(a1)
+; RV32I-FP-SR-NEXT:    lw s0, 52(a1)
+; RV32I-FP-SR-NEXT:    lw s1, 68(a1)
+; RV32I-FP-SR-NEXT:    lw s2, 64(a1)
+; RV32I-FP-SR-NEXT:    lw s3, 60(a1)
+; RV32I-FP-SR-NEXT:    lw s4, 56(a1)
+; RV32I-FP-SR-NEXT:    sw s1, 68(a1)
+; RV32I-FP-SR-NEXT:    sw s2, 64(a1)
+; RV32I-FP-SR-NEXT:    sw s3, 60(a1)
+; RV32I-FP-SR-NEXT:    sw s4, 56(a1)
+; RV32I-FP-SR-NEXT:    sw s0, 52(a1)
+; RV32I-FP-SR-NEXT:    sw t6, 48(a1)
+; RV32I-FP-SR-NEXT:    sw t5, 44(a1)
+; RV32I-FP-SR-NEXT:    sw t4, 40(a1)
+; RV32I-FP-SR-NEXT:    sw t3, 36(a1)
+; RV32I-FP-SR-NEXT:    sw t2, 32(a1)
+; RV32I-FP-SR-NEXT:    sw t1, 28(a1)
+; RV32I-FP-SR-NEXT:    sw t0, 24(a1)
+; RV32I-FP-SR-NEXT:    sw a7, 20(a1)
+; RV32I-FP-SR-NEXT:    sw a6, 16(a1)
+; RV32I-FP-SR-NEXT:    sw a5, 12(a1)
+; RV32I-FP-SR-NEXT:    sw a4, 8(a1)
+; RV32I-FP-SR-NEXT:    sw a3, 4(a1)
+; RV32I-FP-SR-NEXT:    sw a2, 0(a1)
+; RV32I-FP-SR-NEXT:    lw s0, 28(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw s1, 24(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw s2, 20(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw s3, 16(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw s4, 12(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    addi sp, sp, 32
+; RV32I-FP-SR-NEXT:    tail tail_callee
 ;
 ; RV64I-FP-SR-LABEL: tail_call:
-; RV64I-FP-SR-NOT:     call t0, __riscv_save
-; RV64I-FP-SR:         tail tail_callee
-; RV64I-FP-SR-NOT:     tail __riscv_restore
+; RV64I-FP-SR:       # %bb.0: # %entry
+; RV64I-FP-SR-NEXT:    addi sp, sp, -48
+; RV64I-FP-SR-NEXT:    sd s0, 40(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd s1, 32(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd s2, 24(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd s3, 16(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd s4, 8(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    lui a1, %hi(var0)
+; RV64I-FP-SR-NEXT:    addi a1, a1, %lo(var0)
+; RV64I-FP-SR-NEXT:    lw a2, 0(a1)
+; RV64I-FP-SR-NEXT:    lw a3, 4(a1)
+; RV64I-FP-SR-NEXT:    lw a4, 8(a1)
+; RV64I-FP-SR-NEXT:    lw a5, 12(a1)
+; RV64I-FP-SR-NEXT:    lw a6, 16(a1)
+; RV64I-FP-SR-NEXT:    lw a7, 20(a1)
+; RV64I-FP-SR-NEXT:    lw t0, 24(a1)
+; RV64I-FP-SR-NEXT:    lw t1, 28(a1)
+; RV64I-FP-SR-NEXT:    lw t2, 32(a1)
+; RV64I-FP-SR-NEXT:    lw t3, 36(a1)
+; RV64I-FP-SR-NEXT:    lw t4, 40(a1)
+; RV64I-FP-SR-NEXT:    lw t5, 44(a1)
+; RV64I-FP-SR-NEXT:    lw t6, 48(a1)
+; RV64I-FP-SR-NEXT:    lw s0, 52(a1)
+; RV64I-FP-SR-NEXT:    lw s1, 68(a1)
+; RV64I-FP-SR-NEXT:    lw s2, 64(a1)
+; RV64I-FP-SR-NEXT:    lw s3, 60(a1)
+; RV64I-FP-SR-NEXT:    lw s4, 56(a1)
+; RV64I-FP-SR-NEXT:    sw s1, 68(a1)
+; RV64I-FP-SR-NEXT:    sw s2, 64(a1)
+; RV64I-FP-SR-NEXT:    sw s3, 60(a1)
+; RV64I-FP-SR-NEXT:    sw s4, 56(a1)
+; RV64I-FP-SR-NEXT:    sw s0, 52(a1)
+; RV64I-FP-SR-NEXT:    sw t6, 48(a1)
+; RV64I-FP-SR-NEXT:    sw t5, 44(a1)
+; RV64I-FP-SR-NEXT:    sw t4, 40(a1)
+; RV64I-FP-SR-NEXT:    sw t3, 36(a1)
+; RV64I-FP-SR-NEXT:    sw t2, 32(a1)
+; RV64I-FP-SR-NEXT:    sw t1, 28(a1)
+; RV64I-FP-SR-NEXT:    sw t0, 24(a1)
+; RV64I-FP-SR-NEXT:    sw a7, 20(a1)
+; RV64I-FP-SR-NEXT:    sw a6, 16(a1)
+; RV64I-FP-SR-NEXT:    sw a5, 12(a1)
+; RV64I-FP-SR-NEXT:    sw a4, 8(a1)
+; RV64I-FP-SR-NEXT:    sw a3, 4(a1)
+; RV64I-FP-SR-NEXT:    sw a2, 0(a1)
+; RV64I-FP-SR-NEXT:    ld s0, 40(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld s1, 32(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld s2, 24(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld s3, 16(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld s4, 8(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    addi sp, sp, 48
+; RV64I-FP-SR-NEXT:    tail tail_callee
 entry:
   %val = load [18 x i32], ptr @var0
   store volatile [18 x i32] %val, ptr @var0
@@ -185,28 +1600,124 @@ declare void @llvm.va_end(ptr)
 
 define i32 @varargs(ptr %fmt, ...) nounwind {
 ; RV32I-LABEL: varargs:
-; RV32I-NOT:     call t0, __riscv_save
-; RV32I-NOT:     tail __riscv_restore
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -48
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    sw a7, 44(sp)
+; RV32I-NEXT:    sw a6, 40(sp)
+; RV32I-NEXT:    sw a5, 36(sp)
+; RV32I-NEXT:    sw a4, 32(sp)
+; RV32I-NEXT:    sw a3, 28(sp)
+; RV32I-NEXT:    sw a2, 24(sp)
+; RV32I-NEXT:    sw a1, 20(sp)
+; RV32I-NEXT:    addi a1, sp, 24
+; RV32I-NEXT:    sw a1, 12(sp)
+; RV32I-NEXT:    addi sp, sp, 48
+; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: varargs:
-; RV64I-NOT:     call t0, __riscv_save
-; RV64I-NOT:     tail __riscv_restore
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -80
+; RV64I-NEXT:    sd a7, 72(sp)
+; RV64I-NEXT:    sd a6, 64(sp)
+; RV64I-NEXT:    sd a5, 56(sp)
+; RV64I-NEXT:    sd a4, 48(sp)
+; RV64I-NEXT:    sd a3, 40(sp)
+; RV64I-NEXT:    addi a0, sp, 24
+; RV64I-NEXT:    sd a0, 8(sp)
+; RV64I-NEXT:    lwu a0, 12(sp)
+; RV64I-NEXT:    lwu a3, 8(sp)
+; RV64I-NEXT:    sd a2, 32(sp)
+; RV64I-NEXT:    sd a1, 24(sp)
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    addi a1, a0, 4
+; RV64I-NEXT:    sw a1, 8(sp)
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    sw a1, 12(sp)
+; RV64I-NEXT:    lw a0, 0(a0)
+; RV64I-NEXT:    addi sp, sp, 80
+; RV64I-NEXT:    ret
 ;
 ; RV32I-SR-LABEL: varargs:
-; RV32I-SR-NOT:     call t0, __riscv_save
-; RV32I-SR-NOT:     tail __riscv_restore
+; RV32I-SR:       # %bb.0:
+; RV32I-SR-NEXT:    addi sp, sp, -48
+; RV32I-SR-NEXT:    mv a0, a1
+; RV32I-SR-NEXT:    sw a7, 44(sp)
+; RV32I-SR-NEXT:    sw a6, 40(sp)
+; RV32I-SR-NEXT:    sw a5, 36(sp)
+; RV32I-SR-NEXT:    sw a4, 32(sp)
+; RV32I-SR-NEXT:    sw a3, 28(sp)
+; RV32I-SR-NEXT:    sw a2, 24(sp)
+; RV32I-SR-NEXT:    sw a1, 20(sp)
+; RV32I-SR-NEXT:    addi a1, sp, 24
+; RV32I-SR-NEXT:    sw a1, 12(sp)
+; RV32I-SR-NEXT:    addi sp, sp, 48
+; RV32I-SR-NEXT:    ret
 ;
 ; RV64I-SR-LABEL: varargs:
-; RV64I-SR-NOT:     call t0, __riscv_save
-; RV64I-SR-NOT:     tail __riscv_restore
+; RV64I-SR:       # %bb.0:
+; RV64I-SR-NEXT:    addi sp, sp, -80
+; RV64I-SR-NEXT:    sd a7, 72(sp)
+; RV64I-SR-NEXT:    sd a6, 64(sp)
+; RV64I-SR-NEXT:    sd a5, 56(sp)
+; RV64I-SR-NEXT:    sd a4, 48(sp)
+; RV64I-SR-NEXT:    sd a3, 40(sp)
+; RV64I-SR-NEXT:    addi a0, sp, 24
+; RV64I-SR-NEXT:    sd a0, 8(sp)
+; RV64I-SR-NEXT:    lwu a0, 12(sp)
+; RV64I-SR-NEXT:    lwu a3, 8(sp)
+; RV64I-SR-NEXT:    sd a2, 32(sp)
+; RV64I-SR-NEXT:    sd a1, 24(sp)
+; RV64I-SR-NEXT:    slli a0, a0, 32
+; RV64I-SR-NEXT:    or a0, a0, a3
+; RV64I-SR-NEXT:    addi a1, a0, 4
+; RV64I-SR-NEXT:    sw a1, 8(sp)
+; RV64I-SR-NEXT:    srli a1, a1, 32
+; RV64I-SR-NEXT:    sw a1, 12(sp)
+; RV64I-SR-NEXT:    lw a0, 0(a0)
+; RV64I-SR-NEXT:    addi sp, sp, 80
+; RV64I-SR-NEXT:    ret
 ;
 ; RV32I-FP-SR-LABEL: varargs:
-; RV32I-FP-SR-NOT:     call t0, __riscv_save
-; RV32I-FP-SR-NOT:     tail __riscv_restore
+; RV32I-FP-SR:       # %bb.0:
+; RV32I-FP-SR-NEXT:    addi sp, sp, -48
+; RV32I-FP-SR-NEXT:    mv a0, a1
+; RV32I-FP-SR-NEXT:    sw a7, 44(sp)
+; RV32I-FP-SR-NEXT:    sw a6, 40(sp)
+; RV32I-FP-SR-NEXT:    sw a5, 36(sp)
+; RV32I-FP-SR-NEXT:    sw a4, 32(sp)
+; RV32I-FP-SR-NEXT:    sw a3, 28(sp)
+; RV32I-FP-SR-NEXT:    sw a2, 24(sp)
+; RV32I-FP-SR-NEXT:    sw a1, 20(sp)
+; RV32I-FP-SR-NEXT:    addi a1, sp, 24
+; RV32I-FP-SR-NEXT:    sw a1, 12(sp)
+; RV32I-FP-SR-NEXT:    addi sp, sp, 48
+; RV32I-FP-SR-NEXT:    ret
 ;
 ; RV64I-FP-SR-LABEL: varargs:
-; RV64I-FP-SR-NOT:     call t0, __riscv_save
-; RV64I-FP-SR-NOT:     tail __riscv_restore
+; RV64I-FP-SR:       # %bb.0:
+; RV64I-FP-SR-NEXT:    addi sp, sp, -80
+; RV64I-FP-SR-NEXT:    sd a7, 72(sp)
+; RV64I-FP-SR-NEXT:    sd a6, 64(sp)
+; RV64I-FP-SR-NEXT:    sd a5, 56(sp)
+; RV64I-FP-SR-NEXT:    sd a4, 48(sp)
+; RV64I-FP-SR-NEXT:    sd a3, 40(sp)
+; RV64I-FP-SR-NEXT:    addi a0, sp, 24
+; RV64I-FP-SR-NEXT:    sd a0, 8(sp)
+; RV64I-FP-SR-NEXT:    lwu a0, 12(sp)
+; RV64I-FP-SR-NEXT:    lwu a3, 8(sp)
+; RV64I-FP-SR-NEXT:    sd a2, 32(sp)
+; RV64I-FP-SR-NEXT:    sd a1, 24(sp)
+; RV64I-FP-SR-NEXT:    slli a0, a0, 32
+; RV64I-FP-SR-NEXT:    or a0, a0, a3
+; RV64I-FP-SR-NEXT:    addi a1, a0, 4
+; RV64I-FP-SR-NEXT:    sw a1, 8(sp)
+; RV64I-FP-SR-NEXT:    srli a1, a1, 32
+; RV64I-FP-SR-NEXT:    sw a1, 12(sp)
+; RV64I-FP-SR-NEXT:    lw a0, 0(a0)
+; RV64I-FP-SR-NEXT:    addi sp, sp, 80
+; RV64I-FP-SR-NEXT:    ret
   %va = alloca ptr, align 4
   call void @llvm.va_start(ptr %va)
   %argp.cur = load ptr, ptr %va, align 4
@@ -219,28 +1730,280 @@ define i32 @varargs(ptr %fmt, ...) nounwind {
 
 define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind {
 ; RV32I-LABEL: many_args:
-; RV32I-NOT:     call t0, __riscv_save
-; RV32I-NOT:     tail __riscv_restore
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s2, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s3, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lui a0, %hi(var0)
+; RV32I-NEXT:    addi a0, a0, %lo(var0)
+; RV32I-NEXT:    lw a1, 0(a0)
+; RV32I-NEXT:    lw a2, 4(a0)
+; RV32I-NEXT:    lw a3, 8(a0)
+; RV32I-NEXT:    lw a4, 12(a0)
+; RV32I-NEXT:    lw a5, 16(a0)
+; RV32I-NEXT:    lw a6, 20(a0)
+; RV32I-NEXT:    lw a7, 24(a0)
+; RV32I-NEXT:    lw t0, 28(a0)
+; RV32I-NEXT:    lw t1, 32(a0)
+; RV32I-NEXT:    lw t2, 36(a0)
+; RV32I-NEXT:    lw t3, 40(a0)
+; RV32I-NEXT:    lw t4, 44(a0)
+; RV32I-NEXT:    lw t5, 48(a0)
+; RV32I-NEXT:    lw t6, 52(a0)
+; RV32I-NEXT:    lw s0, 68(a0)
+; RV32I-NEXT:    lw s1, 64(a0)
+; RV32I-NEXT:    lw s2, 60(a0)
+; RV32I-NEXT:    lw s3, 56(a0)
+; RV32I-NEXT:    sw s0, 68(a0)
+; RV32I-NEXT:    sw s1, 64(a0)
+; RV32I-NEXT:    sw s2, 60(a0)
+; RV32I-NEXT:    sw s3, 56(a0)
+; RV32I-NEXT:    sw t6, 52(a0)
+; RV32I-NEXT:    sw t5, 48(a0)
+; RV32I-NEXT:    sw t4, 44(a0)
+; RV32I-NEXT:    sw t3, 40(a0)
+; RV32I-NEXT:    sw t2, 36(a0)
+; RV32I-NEXT:    sw t1, 32(a0)
+; RV32I-NEXT:    sw t0, 28(a0)
+; RV32I-NEXT:    sw a7, 24(a0)
+; RV32I-NEXT:    sw a6, 20(a0)
+; RV32I-NEXT:    sw a5, 16(a0)
+; RV32I-NEXT:    sw a4, 12(a0)
+; RV32I-NEXT:    sw a3, 8(a0)
+; RV32I-NEXT:    sw a2, 4(a0)
+; RV32I-NEXT:    sw a1, 0(a0)
+; RV32I-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: many_args:
-; RV64I-NOT:     call t0, __riscv_save
-; RV64I-NOT:     tail __riscv_restore
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd s0, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s2, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s3, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    lui a0, %hi(var0)
+; RV64I-NEXT:    addi a0, a0, %lo(var0)
+; RV64I-NEXT:    lw a1, 0(a0)
+; RV64I-NEXT:    lw a2, 4(a0)
+; RV64I-NEXT:    lw a3, 8(a0)
+; RV64I-NEXT:    lw a4, 12(a0)
+; RV64I-NEXT:    lw a5, 16(a0)
+; RV64I-NEXT:    lw a6, 20(a0)
+; RV64I-NEXT:    lw a7, 24(a0)
+; RV64I-NEXT:    lw t0, 28(a0)
+; RV64I-NEXT:    lw t1, 32(a0)
+; RV64I-NEXT:    lw t2, 36(a0)
+; RV64I-NEXT:    lw t3, 40(a0)
+; RV64I-NEXT:    lw t4, 44(a0)
+; RV64I-NEXT:    lw t5, 48(a0)
+; RV64I-NEXT:    lw t6, 52(a0)
+; RV64I-NEXT:    lw s0, 68(a0)
+; RV64I-NEXT:    lw s1, 64(a0)
+; RV64I-NEXT:    lw s2, 60(a0)
+; RV64I-NEXT:    lw s3, 56(a0)
+; RV64I-NEXT:    sw s0, 68(a0)
+; RV64I-NEXT:    sw s1, 64(a0)
+; RV64I-NEXT:    sw s2, 60(a0)
+; RV64I-NEXT:    sw s3, 56(a0)
+; RV64I-NEXT:    sw t6, 52(a0)
+; RV64I-NEXT:    sw t5, 48(a0)
+; RV64I-NEXT:    sw t4, 44(a0)
+; RV64I-NEXT:    sw t3, 40(a0)
+; RV64I-NEXT:    sw t2, 36(a0)
+; RV64I-NEXT:    sw t1, 32(a0)
+; RV64I-NEXT:    sw t0, 28(a0)
+; RV64I-NEXT:    sw a7, 24(a0)
+; RV64I-NEXT:    sw a6, 20(a0)
+; RV64I-NEXT:    sw a5, 16(a0)
+; RV64I-NEXT:    sw a4, 12(a0)
+; RV64I-NEXT:    sw a3, 8(a0)
+; RV64I-NEXT:    sw a2, 4(a0)
+; RV64I-NEXT:    sw a1, 0(a0)
+; RV64I-NEXT:    ld s0, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
 ;
 ; RV32I-SR-LABEL: many_args:
-; RV32I-SR:         call t0, __riscv_save_5
-; RV32I-SR:         tail __riscv_restore_5
+; RV32I-SR:       # %bb.0: # %entry
+; RV32I-SR-NEXT:    call t0, __riscv_save_4
+; RV32I-SR-NEXT:    lui a0, %hi(var0)
+; RV32I-SR-NEXT:    addi a0, a0, %lo(var0)
+; RV32I-SR-NEXT:    lw a1, 0(a0)
+; RV32I-SR-NEXT:    lw a2, 4(a0)
+; RV32I-SR-NEXT:    lw a3, 8(a0)
+; RV32I-SR-NEXT:    lw a4, 12(a0)
+; RV32I-SR-NEXT:    lw a5, 16(a0)
+; RV32I-SR-NEXT:    lw a6, 20(a0)
+; RV32I-SR-NEXT:    lw a7, 24(a0)
+; RV32I-SR-NEXT:    lw t0, 28(a0)
+; RV32I-SR-NEXT:    lw t1, 32(a0)
+; RV32I-SR-NEXT:    lw t2, 36(a0)
+; RV32I-SR-NEXT:    lw t3, 40(a0)
+; RV32I-SR-NEXT:    lw t4, 44(a0)
+; RV32I-SR-NEXT:    lw t5, 48(a0)
+; RV32I-SR-NEXT:    lw t6, 52(a0)
+; RV32I-SR-NEXT:    lw s0, 68(a0)
+; RV32I-SR-NEXT:    lw s1, 64(a0)
+; RV32I-SR-NEXT:    lw s2, 60(a0)
+; RV32I-SR-NEXT:    lw s3, 56(a0)
+; RV32I-SR-NEXT:    sw s0, 68(a0)
+; RV32I-SR-NEXT:    sw s1, 64(a0)
+; RV32I-SR-NEXT:    sw s2, 60(a0)
+; RV32I-SR-NEXT:    sw s3, 56(a0)
+; RV32I-SR-NEXT:    sw t6, 52(a0)
+; RV32I-SR-NEXT:    sw t5, 48(a0)
+; RV32I-SR-NEXT:    sw t4, 44(a0)
+; RV32I-SR-NEXT:    sw t3, 40(a0)
+; RV32I-SR-NEXT:    sw t2, 36(a0)
+; RV32I-SR-NEXT:    sw t1, 32(a0)
+; RV32I-SR-NEXT:    sw t0, 28(a0)
+; RV32I-SR-NEXT:    sw a7, 24(a0)
+; RV32I-SR-NEXT:    sw a6, 20(a0)
+; RV32I-SR-NEXT:    sw a5, 16(a0)
+; RV32I-SR-NEXT:    sw a4, 12(a0)
+; RV32I-SR-NEXT:    sw a3, 8(a0)
+; RV32I-SR-NEXT:    sw a2, 4(a0)
+; RV32I-SR-NEXT:    sw a1, 0(a0)
+; RV32I-SR-NEXT:    tail __riscv_restore_4
 ;
 ; RV64I-SR-LABEL: many_args:
-; RV64I-SR:         call t0, __riscv_save_5
-; RV64I-SR:         tail __riscv_restore_5
+; RV64I-SR:       # %bb.0: # %entry
+; RV64I-SR-NEXT:    call t0, __riscv_save_4
+; RV64I-SR-NEXT:    lui a0, %hi(var0)
+; RV64I-SR-NEXT:    addi a0, a0, %lo(var0)
+; RV64I-SR-NEXT:    lw a1, 0(a0)
+; RV64I-SR-NEXT:    lw a2, 4(a0)
+; RV64I-SR-NEXT:    lw a3, 8(a0)
+; RV64I-SR-NEXT:    lw a4, 12(a0)
+; RV64I-SR-NEXT:    lw a5, 16(a0)
+; RV64I-SR-NEXT:    lw a6, 20(a0)
+; RV64I-SR-NEXT:    lw a7, 24(a0)
+; RV64I-SR-NEXT:    lw t0, 28(a0)
+; RV64I-SR-NEXT:    lw t1, 32(a0)
+; RV64I-SR-NEXT:    lw t2, 36(a0)
+; RV64I-SR-NEXT:    lw t3, 40(a0)
+; RV64I-SR-NEXT:    lw t4, 44(a0)
+; RV64I-SR-NEXT:    lw t5, 48(a0)
+; RV64I-SR-NEXT:    lw t6, 52(a0)
+; RV64I-SR-NEXT:    lw s0, 68(a0)
+; RV64I-SR-NEXT:    lw s1, 64(a0)
+; RV64I-SR-NEXT:    lw s2, 60(a0)
+; RV64I-SR-NEXT:    lw s3, 56(a0)
+; RV64I-SR-NEXT:    sw s0, 68(a0)
+; RV64I-SR-NEXT:    sw s1, 64(a0)
+; RV64I-SR-NEXT:    sw s2, 60(a0)
+; RV64I-SR-NEXT:    sw s3, 56(a0)
+; RV64I-SR-NEXT:    sw t6, 52(a0)
+; RV64I-SR-NEXT:    sw t5, 48(a0)
+; RV64I-SR-NEXT:    sw t4, 44(a0)
+; RV64I-SR-NEXT:    sw t3, 40(a0)
+; RV64I-SR-NEXT:    sw t2, 36(a0)
+; RV64I-SR-NEXT:    sw t1, 32(a0)
+; RV64I-SR-NEXT:    sw t0, 28(a0)
+; RV64I-SR-NEXT:    sw a7, 24(a0)
+; RV64I-SR-NEXT:    sw a6, 20(a0)
+; RV64I-SR-NEXT:    sw a5, 16(a0)
+; RV64I-SR-NEXT:    sw a4, 12(a0)
+; RV64I-SR-NEXT:    sw a3, 8(a0)
+; RV64I-SR-NEXT:    sw a2, 4(a0)
+; RV64I-SR-NEXT:    sw a1, 0(a0)
+; RV64I-SR-NEXT:    tail __riscv_restore_4
 ;
 ; RV32I-FP-SR-LABEL: many_args:
-; RV32I-FP-SR:         call t0, __riscv_save_5
-; RV32I-FP-SR:         tail __riscv_restore_5
+; RV32I-FP-SR:       # %bb.0: # %entry
+; RV32I-FP-SR-NEXT:    call t0, __riscv_save_4
+; RV32I-FP-SR-NEXT:    lui a0, %hi(var0)
+; RV32I-FP-SR-NEXT:    addi a0, a0, %lo(var0)
+; RV32I-FP-SR-NEXT:    lw a1, 0(a0)
+; RV32I-FP-SR-NEXT:    lw a2, 4(a0)
+; RV32I-FP-SR-NEXT:    lw a3, 8(a0)
+; RV32I-FP-SR-NEXT:    lw a4, 12(a0)
+; RV32I-FP-SR-NEXT:    lw a5, 16(a0)
+; RV32I-FP-SR-NEXT:    lw a6, 20(a0)
+; RV32I-FP-SR-NEXT:    lw a7, 24(a0)
+; RV32I-FP-SR-NEXT:    lw t0, 28(a0)
+; RV32I-FP-SR-NEXT:    lw t1, 32(a0)
+; RV32I-FP-SR-NEXT:    lw t2, 36(a0)
+; RV32I-FP-SR-NEXT:    lw t3, 40(a0)
+; RV32I-FP-SR-NEXT:    lw t4, 44(a0)
+; RV32I-FP-SR-NEXT:    lw t5, 48(a0)
+; RV32I-FP-SR-NEXT:    lw t6, 52(a0)
+; RV32I-FP-SR-NEXT:    lw s0, 68(a0)
+; RV32I-FP-SR-NEXT:    lw s1, 64(a0)
+; RV32I-FP-SR-NEXT:    lw s2, 60(a0)
+; RV32I-FP-SR-NEXT:    lw s3, 56(a0)
+; RV32I-FP-SR-NEXT:    sw s0, 68(a0)
+; RV32I-FP-SR-NEXT:    sw s1, 64(a0)
+; RV32I-FP-SR-NEXT:    sw s2, 60(a0)
+; RV32I-FP-SR-NEXT:    sw s3, 56(a0)
+; RV32I-FP-SR-NEXT:    sw t6, 52(a0)
+; RV32I-FP-SR-NEXT:    sw t5, 48(a0)
+; RV32I-FP-SR-NEXT:    sw t4, 44(a0)
+; RV32I-FP-SR-NEXT:    sw t3, 40(a0)
+; RV32I-FP-SR-NEXT:    sw t2, 36(a0)
+; RV32I-FP-SR-NEXT:    sw t1, 32(a0)
+; RV32I-FP-SR-NEXT:    sw t0, 28(a0)
+; RV32I-FP-SR-NEXT:    sw a7, 24(a0)
+; RV32I-FP-SR-NEXT:    sw a6, 20(a0)
+; RV32I-FP-SR-NEXT:    sw a5, 16(a0)
+; RV32I-FP-SR-NEXT:    sw a4, 12(a0)
+; RV32I-FP-SR-NEXT:    sw a3, 8(a0)
+; RV32I-FP-SR-NEXT:    sw a2, 4(a0)
+; RV32I-FP-SR-NEXT:    sw a1, 0(a0)
+; RV32I-FP-SR-NEXT:    tail __riscv_restore_4
 ;
 ; RV64I-FP-SR-LABEL: many_args:
-; RV64I-FP-SR:         call t0, __riscv_save_5
-; RV64I-FP-SR:         tail __riscv_restore_5
+; RV64I-FP-SR:       # %bb.0: # %entry
+; RV64I-FP-SR-NEXT:    call t0, __riscv_save_4
+; RV64I-FP-SR-NEXT:    lui a0, %hi(var0)
+; RV64I-FP-SR-NEXT:    addi a0, a0, %lo(var0)
+; RV64I-FP-SR-NEXT:    lw a1, 0(a0)
+; RV64I-FP-SR-NEXT:    lw a2, 4(a0)
+; RV64I-FP-SR-NEXT:    lw a3, 8(a0)
+; RV64I-FP-SR-NEXT:    lw a4, 12(a0)
+; RV64I-FP-SR-NEXT:    lw a5, 16(a0)
+; RV64I-FP-SR-NEXT:    lw a6, 20(a0)
+; RV64I-FP-SR-NEXT:    lw a7, 24(a0)
+; RV64I-FP-SR-NEXT:    lw t0, 28(a0)
+; RV64I-FP-SR-NEXT:    lw t1, 32(a0)
+; RV64I-FP-SR-NEXT:    lw t2, 36(a0)
+; RV64I-FP-SR-NEXT:    lw t3, 40(a0)
+; RV64I-FP-SR-NEXT:    lw t4, 44(a0)
+; RV64I-FP-SR-NEXT:    lw t5, 48(a0)
+; RV64I-FP-SR-NEXT:    lw t6, 52(a0)
+; RV64I-FP-SR-NEXT:    lw s0, 68(a0)
+; RV64I-FP-SR-NEXT:    lw s1, 64(a0)
+; RV64I-FP-SR-NEXT:    lw s2, 60(a0)
+; RV64I-FP-SR-NEXT:    lw s3, 56(a0)
+; RV64I-FP-SR-NEXT:    sw s0, 68(a0)
+; RV64I-FP-SR-NEXT:    sw s1, 64(a0)
+; RV64I-FP-SR-NEXT:    sw s2, 60(a0)
+; RV64I-FP-SR-NEXT:    sw s3, 56(a0)
+; RV64I-FP-SR-NEXT:    sw t6, 52(a0)
+; RV64I-FP-SR-NEXT:    sw t5, 48(a0)
+; RV64I-FP-SR-NEXT:    sw t4, 44(a0)
+; RV64I-FP-SR-NEXT:    sw t3, 40(a0)
+; RV64I-FP-SR-NEXT:    sw t2, 36(a0)
+; RV64I-FP-SR-NEXT:    sw t1, 32(a0)
+; RV64I-FP-SR-NEXT:    sw t0, 28(a0)
+; RV64I-FP-SR-NEXT:    sw a7, 24(a0)
+; RV64I-FP-SR-NEXT:    sw a6, 20(a0)
+; RV64I-FP-SR-NEXT:    sw a5, 16(a0)
+; RV64I-FP-SR-NEXT:    sw a4, 12(a0)
+; RV64I-FP-SR-NEXT:    sw a3, 8(a0)
+; RV64I-FP-SR-NEXT:    sw a2, 4(a0)
+; RV64I-FP-SR-NEXT:    sw a1, 0(a0)
+; RV64I-FP-SR-NEXT:    tail __riscv_restore_4
 entry:
   %val = load [18 x i32], ptr @var0
   store volatile [18 x i32] %val, ptr @var0
@@ -255,40 +2018,108 @@ declare void @notdead(ptr)
 
 define void @alloca(i32 %n) nounwind {
 ; RV32I-LABEL: alloca:
-; RV32I-NOT:     call t0, __riscv_save
-; RV32I:         addi s0, sp, 16
-; RV32I:         addi sp, s0, -16
-; RV32I-NOT:     tail __riscv_restore
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    addi s0, sp, 16
+; RV32I-NEXT:    mv s1, sp
+; RV32I-NEXT:    addi a0, a0, 15
+; RV32I-NEXT:    andi a0, a0, -16
+; RV32I-NEXT:    sub a0, sp, a0
+; RV32I-NEXT:    mv sp, a0
+; RV32I-NEXT:    call notdead
+; RV32I-NEXT:    mv sp, s1
+; RV32I-NEXT:    addi sp, s0, -16
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: alloca:
-; RV64I-NOT:     call t0, __riscv_save
-; RV64I:         addi s0, sp, 32
-; RV64I:         addi sp, s0, -32
-; RV64I-NOT:     tail __riscv_restore
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    addi s0, sp, 32
+; RV64I-NEXT:    mv s1, sp
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    addi a0, a0, 15
+; RV64I-NEXT:    andi a0, a0, -16
+; RV64I-NEXT:    sub a0, sp, a0
+; RV64I-NEXT:    mv sp, a0
+; RV64I-NEXT:    call notdead
+; RV64I-NEXT:    mv sp, s1
+; RV64I-NEXT:    addi sp, s0, -32
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
 ;
 ; RV32I-SR-LABEL: alloca:
-; RV32I-SR:         call t0, __riscv_save_2
-; RV32I-SR:         addi s0, sp, 16
-; RV32I-SR:         addi sp, s0, -16
-; RV32I-SR:         tail __riscv_restore_2
+; RV32I-SR:       # %bb.0:
+; RV32I-SR-NEXT:    call t0, __riscv_save_2
+; RV32I-SR-NEXT:    addi s0, sp, 16
+; RV32I-SR-NEXT:    mv s1, sp
+; RV32I-SR-NEXT:    addi a0, a0, 15
+; RV32I-SR-NEXT:    andi a0, a0, -16
+; RV32I-SR-NEXT:    sub a0, sp, a0
+; RV32I-SR-NEXT:    mv sp, a0
+; RV32I-SR-NEXT:    call notdead
+; RV32I-SR-NEXT:    mv sp, s1
+; RV32I-SR-NEXT:    addi sp, s0, -16
+; RV32I-SR-NEXT:    tail __riscv_restore_2
 ;
 ; RV64I-SR-LABEL: alloca:
-; RV64I-SR:         call t0, __riscv_save_2
-; RV64I-SR:         addi s0, sp, 32
-; RV64I-SR:         addi sp, s0, -32
-; RV64I-SR:         tail __riscv_restore_2
+; RV64I-SR:       # %bb.0:
+; RV64I-SR-NEXT:    call t0, __riscv_save_2
+; RV64I-SR-NEXT:    addi s0, sp, 32
+; RV64I-SR-NEXT:    mv s1, sp
+; RV64I-SR-NEXT:    slli a0, a0, 32
+; RV64I-SR-NEXT:    srli a0, a0, 32
+; RV64I-SR-NEXT:    addi a0, a0, 15
+; RV64I-SR-NEXT:    andi a0, a0, -16
+; RV64I-SR-NEXT:    sub a0, sp, a0
+; RV64I-SR-NEXT:    mv sp, a0
+; RV64I-SR-NEXT:    call notdead
+; RV64I-SR-NEXT:    mv sp, s1
+; RV64I-SR-NEXT:    addi sp, s0, -32
+; RV64I-SR-NEXT:    tail __riscv_restore_2
 ;
 ; RV32I-FP-SR-LABEL: alloca:
-; RV32I-FP-SR:         call t0, __riscv_save_2
-; RV32I-FP-SR:         addi s0, sp, 16
-; RV32I-FP-SR:         addi sp, s0, -16
-; RV32I-FP-SR:         tail __riscv_restore_2
+; RV32I-FP-SR:       # %bb.0:
+; RV32I-FP-SR-NEXT:    call t0, __riscv_save_2
+; RV32I-FP-SR-NEXT:    addi s0, sp, 16
+; RV32I-FP-SR-NEXT:    mv s1, sp
+; RV32I-FP-SR-NEXT:    addi a0, a0, 15
+; RV32I-FP-SR-NEXT:    andi a0, a0, -16
+; RV32I-FP-SR-NEXT:    sub a0, sp, a0
+; RV32I-FP-SR-NEXT:    mv sp, a0
+; RV32I-FP-SR-NEXT:    call notdead
+; RV32I-FP-SR-NEXT:    mv sp, s1
+; RV32I-FP-SR-NEXT:    addi sp, s0, -16
+; RV32I-FP-SR-NEXT:    tail __riscv_restore_2
 ;
 ; RV64I-FP-SR-LABEL: alloca:
-; RV64I-FP-SR:         call t0, __riscv_save_2
-; RV64I-FP-SR:         addi s0, sp, 32
-; RV64I-FP-SR:         addi sp, s0, -32
-; RV64I-FP-SR:         tail __riscv_restore_2
+; RV64I-FP-SR:       # %bb.0:
+; RV64I-FP-SR-NEXT:    call t0, __riscv_save_2
+; RV64I-FP-SR-NEXT:    addi s0, sp, 32
+; RV64I-FP-SR-NEXT:    mv s1, sp
+; RV64I-FP-SR-NEXT:    slli a0, a0, 32
+; RV64I-FP-SR-NEXT:    srli a0, a0, 32
+; RV64I-FP-SR-NEXT:    addi a0, a0, 15
+; RV64I-FP-SR-NEXT:    andi a0, a0, -16
+; RV64I-FP-SR-NEXT:    sub a0, sp, a0
+; RV64I-FP-SR-NEXT:    mv sp, a0
+; RV64I-FP-SR-NEXT:    call notdead
+; RV64I-FP-SR-NEXT:    mv sp, s1
+; RV64I-FP-SR-NEXT:    addi sp, s0, -32
+; RV64I-FP-SR-NEXT:    tail __riscv_restore_2
   %sp = call ptr @llvm.stacksave()
   %addr = alloca i8, i32 %n
   call void @notdead(ptr %addr)
@@ -301,28 +2132,318 @@ define void @alloca(i32 %n) nounwind {
 declare i32 @foo(...)
 define void @interrupt() nounwind "interrupt"="supervisor" {
 ; RV32I-LABEL: interrupt:
-; RV32I-NOT:     call t0, __riscv_save
-; RV32I-NOT:     tail __riscv_restore
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -64
+; RV32I-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t0, 56(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t1, 52(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t2, 48(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a1, 40(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a3, 32(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a4, 28(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a5, 24(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a6, 20(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a7, 16(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t3, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t4, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t5, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw t6, 0(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call foo
+; RV32I-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t0, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t2, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a1, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a2, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a3, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a4, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a5, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw a7, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw t6, 0(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 64
+; RV32I-NEXT:    sret
 ;
 ; RV64I-LABEL: interrupt:
-; RV64I-NOT:     call t0, __riscv_save
-; RV64I-NOT:     tail __riscv_restore
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -128
+; RV64I-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t0, 112(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t1, 104(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t2, 96(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a1, 80(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a2, 72(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a3, 64(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a4, 56(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a5, 48(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a6, 40(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd a7, 32(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t3, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t4, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t5, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd t6, 0(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    call foo
+; RV64I-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t0, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t1, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t2, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a3, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a4, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a5, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a6, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld a7, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t3, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t4, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t5, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld t6, 0(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 128
+; RV64I-NEXT:    sret
 ;
 ; RV32I-SR-LABEL: interrupt:
-; RV32I-SR-NOT:     call t0, __riscv_save
-; RV32I-SR-NOT:     tail __riscv_restore
+; RV32I-SR:       # %bb.0:
+; RV32I-SR-NEXT:    addi sp, sp, -64
+; RV32I-SR-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw t0, 56(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw t1, 52(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw t2, 48(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw a1, 40(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw a3, 32(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw a4, 28(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw a5, 24(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw a6, 20(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw a7, 16(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw t3, 12(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw t4, 8(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw t5, 4(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    sw t6, 0(sp) # 4-byte Folded Spill
+; RV32I-SR-NEXT:    call foo
+; RV32I-SR-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw t0, 56(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw t2, 48(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw a1, 40(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw a2, 36(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw a3, 32(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw a4, 28(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw a5, 24(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw a7, 16(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw t3, 12(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw t4, 8(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw t5, 4(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    lw t6, 0(sp) # 4-byte Folded Reload
+; RV32I-SR-NEXT:    addi sp, sp, 64
+; RV32I-SR-NEXT:    sret
 ;
 ; RV64I-SR-LABEL: interrupt:
-; RV64I-SR-NOT:     call t0, __riscv_save
-; RV64I-SR-NOT:     tail __riscv_restore
+; RV64I-SR:       # %bb.0:
+; RV64I-SR-NEXT:    addi sp, sp, -128
+; RV64I-SR-NEXT:    sd ra, 120(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd t0, 112(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd t1, 104(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd t2, 96(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd a0, 88(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd a1, 80(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd a2, 72(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd a3, 64(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd a4, 56(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd a5, 48(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd a6, 40(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd a7, 32(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd t3, 24(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd t4, 16(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd t5, 8(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    sd t6, 0(sp) # 8-byte Folded Spill
+; RV64I-SR-NEXT:    call foo
+; RV64I-SR-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld t0, 112(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld t1, 104(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld t2, 96(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld a3, 64(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld a4, 56(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld a5, 48(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld a6, 40(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld a7, 32(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld t3, 24(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld t4, 16(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld t5, 8(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    ld t6, 0(sp) # 8-byte Folded Reload
+; RV64I-SR-NEXT:    addi sp, sp, 128
+; RV64I-SR-NEXT:    sret
 ;
 ; RV32I-FP-SR-LABEL: interrupt:
-; RV32I-FP-SR-NOT:     call t0, __riscv_save
-; RV32I-FP-SR-NOT:     tail __riscv_restore
+; RV32I-FP-SR:       # %bb.0:
+; RV32I-FP-SR-NEXT:    addi sp, sp, -144
+; RV32I-FP-SR-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw t0, 136(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw t1, 132(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw t2, 128(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw a0, 124(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw a1, 120(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw a2, 116(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw a3, 112(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw a4, 108(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw a5, 104(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw a6, 100(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw a7, 96(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw t3, 92(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw t4, 88(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw t5, 84(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    sw t6, 80(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw ft0, 76(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw ft1, 72(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw ft2, 68(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw ft3, 64(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw ft4, 60(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw ft5, 56(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw ft6, 52(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw ft7, 48(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw fa0, 44(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw fa1, 40(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw fa2, 36(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw fa3, 32(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw fa4, 28(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw fa5, 24(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw fa6, 20(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw fa7, 16(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw ft8, 12(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw ft9, 8(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw ft10, 4(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    fsw ft11, 0(sp) # 4-byte Folded Spill
+; RV32I-FP-SR-NEXT:    call foo
+; RV32I-FP-SR-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw t0, 136(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw t1, 132(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw t2, 128(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw a0, 124(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw a1, 120(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw a2, 116(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw a3, 112(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw a4, 108(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw a5, 104(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw a6, 100(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw a7, 96(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw t3, 92(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw t4, 88(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw t5, 84(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    lw t6, 80(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw ft0, 76(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw ft1, 72(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw ft2, 68(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw ft3, 64(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw ft4, 60(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw ft5, 56(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw ft6, 52(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw ft7, 48(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw fa0, 44(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw fa1, 40(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw fa2, 36(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw fa3, 32(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw fa4, 28(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw fa5, 24(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw fa6, 20(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw fa7, 16(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw ft8, 12(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw ft9, 8(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw ft10, 4(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    flw ft11, 0(sp) # 4-byte Folded Reload
+; RV32I-FP-SR-NEXT:    addi sp, sp, 144
+; RV32I-FP-SR-NEXT:    sret
 ;
 ; RV64I-FP-SR-LABEL: interrupt:
-; RV64I-FP-SR-NOT:     call t0, __riscv_save
-; RV64I-FP-SR-NOT:     tail __riscv_restore
+; RV64I-FP-SR:       # %bb.0:
+; RV64I-FP-SR-NEXT:    addi sp, sp, -288
+; RV64I-FP-SR-NEXT:    sd ra, 280(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd t0, 272(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd t1, 264(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd t2, 256(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd a0, 248(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd a1, 240(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd a2, 232(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd a3, 224(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd a4, 216(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd a5, 208(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd a6, 200(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd a7, 192(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd t3, 184(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd t4, 176(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd t5, 168(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    sd t6, 160(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd ft0, 152(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd ft1, 144(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd ft2, 136(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd ft3, 128(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd ft4, 120(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd ft5, 112(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd ft6, 104(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd ft7, 96(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd fa0, 88(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd fa1, 80(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd fa2, 72(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd fa3, 64(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd fa4, 56(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd fa5, 48(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd fa6, 40(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd fa7, 32(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd ft8, 24(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd ft9, 16(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd ft10, 8(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    fsd ft11, 0(sp) # 8-byte Folded Spill
+; RV64I-FP-SR-NEXT:    call foo
+; RV64I-FP-SR-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld t0, 272(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld t1, 264(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld t2, 256(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld a1, 240(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld a2, 232(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld a3, 224(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld a4, 216(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld a5, 208(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld a6, 200(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld a7, 192(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld t3, 184(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld t4, 176(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld t5, 168(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    ld t6, 160(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld ft0, 152(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld ft1, 144(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld ft2, 136(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld ft3, 128(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld ft4, 120(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld ft5, 112(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld ft6, 104(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld ft7, 96(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld fa0, 88(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld fa1, 80(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld fa2, 72(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld fa3, 64(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld fa4, 56(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld fa5, 48(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld fa6, 40(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld fa7, 32(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld ft8, 24(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld ft9, 16(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld ft10, 8(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    fld ft11, 0(sp) # 8-byte Folded Reload
+; RV64I-FP-SR-NEXT:    addi sp, sp, 288
+; RV64I-FP-SR-NEXT:    sret
   %call = call i32 @foo()
   ret void
 }
diff --git a/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll b/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll
index 771a72f8d55f0..11ff368e44601 100644
--- a/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll
+++ b/llvm/test/CodeGen/RISCV/zext-with-load-is-free.ll
@@ -10,11 +10,11 @@ define dso_local i32 @test_zext_i8() nounwind {
 ; RV32I-LABEL: test_zext_i8:
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    lui a0, %hi(bytes)
-; RV32I-NEXT:    lbu a1, %lo(bytes)(a0)
+; RV32I-NEXT:    addi a0, a0, %lo(bytes)
+; RV32I-NEXT:    lbu a1, 0(a0)
 ; RV32I-NEXT:    li a2, 136
 ; RV32I-NEXT:    bne a1, a2, .LBB0_3
 ; RV32I-NEXT:  # %bb.1: # %entry
-; RV32I-NEXT:    addi a0, a0, %lo(bytes)
 ; RV32I-NEXT:    lbu a0, 1(a0)
 ; RV32I-NEXT:    li a1, 7
 ; RV32I-NEXT:    bne a0, a1, .LBB0_3
@@ -45,12 +45,12 @@ define dso_local i32 @test_zext_i16() nounwind {
 ; RV32I-LABEL: test_zext_i16:
 ; RV32I:       # %bb.0: # %entry
 ; RV32I-NEXT:    lui a0, %hi(shorts)
-; RV32I-NEXT:    lhu a1, %lo(shorts)(a0)
+; RV32I-NEXT:    addi a0, a0, %lo(shorts)
+; RV32I-NEXT:    lhu a1, 0(a0)
 ; RV32I-NEXT:    lui a2, 16
 ; RV32I-NEXT:    addi a2, a2, -120
 ; RV32I-NEXT:    bne a1, a2, .LBB1_3
 ; RV32I-NEXT:  # %bb.1: # %entry
-; RV32I-NEXT:    addi a0, a0, %lo(shorts)
 ; RV32I-NEXT:    lhu a0, 2(a0)
 ; RV32I-NEXT:    li a1, 7
 ; RV32I-NEXT:    bne a0, a1, .LBB1_3

>From 1a3b3a99a703c973cefe4b8f6a18e646c2e294f0 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 22 May 2024 23:28:48 -0700
Subject: [PATCH 2/2] [RISCV] Add a rematerializable pseudo instruction for
 LUI+ADDI for global addresses.

This allows register allocation to rematerialize these instead of
spilling and reloading. We need to make it a single instruction due
to limitations in rematerialization.

This pseudo is expanded to an LUI+ADDI pair between regalloc and
post RA scheduling.

Co-authored-by: Jesse Huang <jesse.huang at sifive.com>
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.td       |  16 +
 .../lib/Target/RISCV/RISCVMergeBaseOffset.cpp |  35 ++-
 .../RISCV/RISCVPostRAExpandPseudoInsts.cpp    |  23 ++
 .../test/CodeGen/RISCV/callee-saved-fpr32s.ll |  32 +-
 .../test/CodeGen/RISCV/callee-saved-fpr64s.ll |  20 +-
 llvm/test/CodeGen/RISCV/callee-saved-gprs.ll  |  40 +--
 llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll    |  22 +-
 .../CodeGen/RISCV/ctz_zero_return_test.ll     |   8 +-
 .../early-clobber-tied-def-subreg-liveness.ll |  14 +-
 .../test/CodeGen/RISCV/fold-addi-loadstore.ll |  16 +-
 llvm/test/CodeGen/RISCV/mem.ll                |   4 +-
 llvm/test/CodeGen/RISCV/mem64.ll              |   4 +-
 llvm/test/CodeGen/RISCV/memcpy.ll             |  70 ++---
 llvm/test/CodeGen/RISCV/push-pop-popret.ll    |  24 +-
 llvm/test/CodeGen/RISCV/rv32xtheadbb.ll       |   4 +-
 llvm/test/CodeGen/RISCV/rv32zbb.ll            |   4 +-
 .../CodeGen/RISCV/rv64-legal-i32/mem64.ll     |   4 +-
 .../CodeGen/RISCV/rvv/active_lane_mask.ll     |  40 +--
 .../CodeGen/RISCV/rvv/fixed-vectors-int.ll    |   4 +-
 .../rvv/fixed-vectors-interleaved-access.ll   | 275 +++++++++---------
 .../RISCV/rvv/fixed-vectors-mask-buildvec.ll  |  20 +-
 .../RISCV/rvv/fixed-vectors-masked-gather.ll  |  16 +-
 .../rvv/fixed-vectors-shuffle-reverse.ll      |  80 ++---
 .../RISCV/rvv/fixed-vectors-stepvector.ll     |  10 +-
 .../test/CodeGen/RISCV/rvv/shuffle-reverse.ll |  50 ++--
 llvm/test/CodeGen/RISCV/tail-calls.ll         |   8 +-
 llvm/test/CodeGen/RISCV/unroll-loop-cse.ll    |  32 +-
 27 files changed, 459 insertions(+), 416 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 9d574edb4e6d1..8903ddc1903af 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1311,6 +1311,22 @@ def : Pat<(FrameAddrRegImm (iPTR GPR:$rs1), simm12:$imm12),
 
 /// HI and ADD_LO address nodes.
 
+let Size = 8, isReMaterializable = 1 in
+def PseudoLIaddr : Pseudo<(outs GPR:$dst), (ins uimm20_lui:$hi, simm12:$lo), []>,
+                     Sched<[WriteIALU]>;
+
+def LUIADDI : PatFrag<(ops node:$hi, node:$lo),
+                      (riscv_add_lo (riscv_hi node:$hi), node:$lo)>;
+
+def : Pat<(LUIADDI tglobaladdr:$hi, tglobaladdr:$lo),
+          (PseudoLIaddr tglobaladdr:$hi, tglobaladdr:$lo)>;
+def : Pat<(LUIADDI tblockaddress:$hi, tblockaddress:$lo),
+          (PseudoLIaddr tblockaddress:$hi, tblockaddress:$lo)>;
+def : Pat<(LUIADDI tjumptable:$hi, tjumptable:$lo),
+          (PseudoLIaddr tjumptable:$hi, tjumptable:$lo)>;
+def : Pat<(LUIADDI tconstpool:$hi, tconstpool:$lo),
+          (PseudoLIaddr tconstpool:$hi, tconstpool:$lo)>;
+
 def : Pat<(riscv_hi tglobaladdr:$in), (LUI tglobaladdr:$in)>;
 def : Pat<(riscv_hi tblockaddress:$in), (LUI tblockaddress:$in)>;
 def : Pat<(riscv_hi tjumptable:$in), (LUI tjumptable:$in)>;
diff --git a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
index 410989177a8b9..1b8ad38682b55 100644
--- a/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp
@@ -84,7 +84,8 @@ INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
 //    3) The offset value in the Global Address or Constant Pool is 0.
 bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
                                              MachineInstr *&Lo) {
-  if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC)
+  if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC &&
+      Hi.getOpcode() != RISCV::PseudoLIaddr)
     return false;
 
   const MachineOperand &HiOp1 = Hi.getOperand(1);
@@ -97,16 +98,22 @@ bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
       HiOp1.getOffset() != 0)
     return false;
 
-  Register HiDestReg = Hi.getOperand(0).getReg();
-  if (!MRI->hasOneUse(HiDestReg))
-    return false;
+  if (Hi.getOpcode() == RISCV::PseudoLIaddr) {
+    // Most of the code should handle it correctly without modification by
+    // setting Lo and Hi both point to PseudoLIaddr
+    Lo = &Hi;
+  } else {
+    Register HiDestReg = Hi.getOperand(0).getReg();
+    if (!MRI->hasOneUse(HiDestReg))
+      return false;
 
-  Lo = &*MRI->use_instr_begin(HiDestReg);
-  if (Lo->getOpcode() != RISCV::ADDI)
-    return false;
+    Lo = &*MRI->use_instr_begin(HiDestReg);
+    if (Lo->getOpcode() != RISCV::ADDI)
+      return false;
+  }
 
   const MachineOperand &LoOp2 = Lo->getOperand(2);
-  if (Hi.getOpcode() == RISCV::LUI) {
+  if (Hi.getOpcode() == RISCV::LUI || Hi.getOpcode() == RISCV::PseudoLIaddr) {
     if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
         !(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) ||
         LoOp2.getOffset() != 0)
@@ -466,6 +473,13 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
 
   Hi.getOperand(1).setOffset(NewOffset);
   MachineOperand &ImmOp = Lo.getOperand(2);
+  // Expand PseudoLIaddr into LUI
+  if (Hi.getOpcode() == RISCV::PseudoLIaddr) {
+    auto *TII = ST->getInstrInfo();
+    Hi.setDesc(TII->get(RISCV::LUI));
+    Hi.removeOperand(2);
+  }
+
   if (Hi.getOpcode() != RISCV::AUIPC)
     ImmOp.setOffset(NewOffset);
 
@@ -501,6 +515,11 @@ bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
     }
   }
 
+  // Prevent Lo (originally PseudoLIaddr, which is also pointed by Hi) from
+  // being erased
+  if (&Lo == &Hi)
+    return true;
+
   MRI->replaceRegWith(Lo.getOperand(0).getReg(), Hi.getOperand(0).getReg());
   Lo.eraseFromParent();
   return true;
diff --git a/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
index 52f2ce27164d6..ce82fbea10063 100644
--- a/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVPostRAExpandPseudoInsts.cpp
@@ -44,6 +44,7 @@ class RISCVPostRAExpandPseudo : public MachineFunctionPass {
   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
                 MachineBasicBlock::iterator &NextMBBI);
   bool expandMovImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
+  bool expandLIaddr(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
 };
 
 char RISCVPostRAExpandPseudo::ID = 0;
@@ -75,6 +76,8 @@ bool RISCVPostRAExpandPseudo::expandMI(MachineBasicBlock &MBB,
   switch (MBBI->getOpcode()) {
   case RISCV::PseudoMovImm:
     return expandMovImm(MBB, MBBI);
+  case RISCV::PseudoLIaddr:
+    return expandLIaddr(MBB, MBBI);
   default:
     return false;
   }
@@ -101,6 +104,26 @@ bool RISCVPostRAExpandPseudo::expandMovImm(MachineBasicBlock &MBB,
   return true;
 }
 
+bool RISCVPostRAExpandPseudo::expandLIaddr(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MBBI) {
+  DebugLoc DL = MBBI->getDebugLoc();
+
+  Register DstReg = MBBI->getOperand(0).getReg();
+  bool DstIsDead = MBBI->getOperand(0).isDead();
+  bool Renamable = MBBI->getOperand(0).isRenamable();
+
+  BuildMI(MBB, MBBI, DL, TII->get(RISCV::LUI))
+      .addReg(DstReg, RegState::Define | getRenamableRegState(Renamable))
+      .add(MBBI->getOperand(1));
+  BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADDI))
+      .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead) |
+                          getRenamableRegState(Renamable))
+      .addReg(DstReg, RegState::Kill | getRenamableRegState(Renamable))
+      .add(MBBI->getOperand(2));
+  MBBI->eraseFromParent();
+  return true;
+}
+
 } // end of anonymous namespace
 
 INITIALIZE_PASS(RISCVPostRAExpandPseudo, "riscv-expand-pseudolisimm32",
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
index b525f9aa59c09..036daf587eda0 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
@@ -706,8 +706,8 @@ define void @caller() nounwind {
 ; ILP32-NEXT:    addi sp, sp, -144
 ; ILP32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
 ; ILP32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    lui a0, %hi(var)
-; ILP32-NEXT:    addi s0, a0, %lo(var)
+; ILP32-NEXT:    lui s0, %hi(var)
+; ILP32-NEXT:    addi s0, s0, %lo(var)
 ; ILP32-NEXT:    flw fa5, 0(s0)
 ; ILP32-NEXT:    fsw fa5, 132(sp) # 4-byte Folded Spill
 ; ILP32-NEXT:    flw fa5, 4(s0)
@@ -847,8 +847,8 @@ define void @caller() nounwind {
 ; ILP32E-NEXT:    addi sp, sp, -136
 ; ILP32E-NEXT:    sw ra, 132(sp) # 4-byte Folded Spill
 ; ILP32E-NEXT:    sw s0, 128(sp) # 4-byte Folded Spill
-; ILP32E-NEXT:    lui a0, %hi(var)
-; ILP32E-NEXT:    addi s0, a0, %lo(var)
+; ILP32E-NEXT:    lui s0, %hi(var)
+; ILP32E-NEXT:    addi s0, s0, %lo(var)
 ; ILP32E-NEXT:    flw fa5, 0(s0)
 ; ILP32E-NEXT:    fsw fa5, 124(sp) # 4-byte Folded Spill
 ; ILP32E-NEXT:    flw fa5, 4(s0)
@@ -988,8 +988,8 @@ define void @caller() nounwind {
 ; LP64-NEXT:    addi sp, sp, -144
 ; LP64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
 ; LP64-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
-; LP64-NEXT:    lui a0, %hi(var)
-; LP64-NEXT:    addi s0, a0, %lo(var)
+; LP64-NEXT:    lui s0, %hi(var)
+; LP64-NEXT:    addi s0, s0, %lo(var)
 ; LP64-NEXT:    flw fa5, 0(s0)
 ; LP64-NEXT:    fsw fa5, 124(sp) # 4-byte Folded Spill
 ; LP64-NEXT:    flw fa5, 4(s0)
@@ -1129,8 +1129,8 @@ define void @caller() nounwind {
 ; LP64E-NEXT:    addi sp, sp, -144
 ; LP64E-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
 ; LP64E-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    lui a0, %hi(var)
-; LP64E-NEXT:    addi s0, a0, %lo(var)
+; LP64E-NEXT:    lui s0, %hi(var)
+; LP64E-NEXT:    addi s0, s0, %lo(var)
 ; LP64E-NEXT:    flw fa5, 0(s0)
 ; LP64E-NEXT:    fsw fa5, 124(sp) # 4-byte Folded Spill
 ; LP64E-NEXT:    flw fa5, 4(s0)
@@ -1282,8 +1282,8 @@ define void @caller() nounwind {
 ; ILP32F-NEXT:    fsw fs9, 96(sp) # 4-byte Folded Spill
 ; ILP32F-NEXT:    fsw fs10, 92(sp) # 4-byte Folded Spill
 ; ILP32F-NEXT:    fsw fs11, 88(sp) # 4-byte Folded Spill
-; ILP32F-NEXT:    lui a0, %hi(var)
-; ILP32F-NEXT:    addi s0, a0, %lo(var)
+; ILP32F-NEXT:    lui s0, %hi(var)
+; ILP32F-NEXT:    addi s0, s0, %lo(var)
 ; ILP32F-NEXT:    flw fa5, 0(s0)
 ; ILP32F-NEXT:    fsw fa5, 84(sp) # 4-byte Folded Spill
 ; ILP32F-NEXT:    flw fa5, 4(s0)
@@ -1423,8 +1423,8 @@ define void @caller() nounwind {
 ; LP64F-NEXT:    fsw fs9, 88(sp) # 4-byte Folded Spill
 ; LP64F-NEXT:    fsw fs10, 84(sp) # 4-byte Folded Spill
 ; LP64F-NEXT:    fsw fs11, 80(sp) # 4-byte Folded Spill
-; LP64F-NEXT:    lui a0, %hi(var)
-; LP64F-NEXT:    addi s0, a0, %lo(var)
+; LP64F-NEXT:    lui s0, %hi(var)
+; LP64F-NEXT:    addi s0, s0, %lo(var)
 ; LP64F-NEXT:    flw fa5, 0(s0)
 ; LP64F-NEXT:    fsw fa5, 76(sp) # 4-byte Folded Spill
 ; LP64F-NEXT:    flw fa5, 4(s0)
@@ -1564,8 +1564,8 @@ define void @caller() nounwind {
 ; ILP32D-NEXT:    fsd fs9, 104(sp) # 8-byte Folded Spill
 ; ILP32D-NEXT:    fsd fs10, 96(sp) # 8-byte Folded Spill
 ; ILP32D-NEXT:    fsd fs11, 88(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    lui a0, %hi(var)
-; ILP32D-NEXT:    addi s0, a0, %lo(var)
+; ILP32D-NEXT:    lui s0, %hi(var)
+; ILP32D-NEXT:    addi s0, s0, %lo(var)
 ; ILP32D-NEXT:    flw fa5, 0(s0)
 ; ILP32D-NEXT:    fsw fa5, 84(sp) # 4-byte Folded Spill
 ; ILP32D-NEXT:    flw fa5, 4(s0)
@@ -1705,8 +1705,8 @@ define void @caller() nounwind {
 ; LP64D-NEXT:    fsd fs9, 96(sp) # 8-byte Folded Spill
 ; LP64D-NEXT:    fsd fs10, 88(sp) # 8-byte Folded Spill
 ; LP64D-NEXT:    fsd fs11, 80(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    lui a0, %hi(var)
-; LP64D-NEXT:    addi s0, a0, %lo(var)
+; LP64D-NEXT:    lui s0, %hi(var)
+; LP64D-NEXT:    addi s0, s0, %lo(var)
 ; LP64D-NEXT:    flw fa5, 0(s0)
 ; LP64D-NEXT:    fsw fa5, 76(sp) # 4-byte Folded Spill
 ; LP64D-NEXT:    flw fa5, 4(s0)
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
index 0b761e354d442..98696008c1ee7 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
@@ -438,8 +438,8 @@ define void @caller() nounwind {
 ; ILP32-NEXT:    addi sp, sp, -272
 ; ILP32-NEXT:    sw ra, 268(sp) # 4-byte Folded Spill
 ; ILP32-NEXT:    sw s0, 264(sp) # 4-byte Folded Spill
-; ILP32-NEXT:    lui a0, %hi(var)
-; ILP32-NEXT:    addi s0, a0, %lo(var)
+; ILP32-NEXT:    lui s0, %hi(var)
+; ILP32-NEXT:    addi s0, s0, %lo(var)
 ; ILP32-NEXT:    fld fa5, 0(s0)
 ; ILP32-NEXT:    fsd fa5, 256(sp) # 8-byte Folded Spill
 ; ILP32-NEXT:    fld fa5, 8(s0)
@@ -579,8 +579,8 @@ define void @caller() nounwind {
 ; LP64-NEXT:    addi sp, sp, -272
 ; LP64-NEXT:    sd ra, 264(sp) # 8-byte Folded Spill
 ; LP64-NEXT:    sd s0, 256(sp) # 8-byte Folded Spill
-; LP64-NEXT:    lui a0, %hi(var)
-; LP64-NEXT:    addi s0, a0, %lo(var)
+; LP64-NEXT:    lui s0, %hi(var)
+; LP64-NEXT:    addi s0, s0, %lo(var)
 ; LP64-NEXT:    fld fa5, 0(s0)
 ; LP64-NEXT:    fsd fa5, 248(sp) # 8-byte Folded Spill
 ; LP64-NEXT:    fld fa5, 8(s0)
@@ -720,8 +720,8 @@ define void @caller() nounwind {
 ; LP64E-NEXT:    addi sp, sp, -272
 ; LP64E-NEXT:    sd ra, 264(sp) # 8-byte Folded Spill
 ; LP64E-NEXT:    sd s0, 256(sp) # 8-byte Folded Spill
-; LP64E-NEXT:    lui a0, %hi(var)
-; LP64E-NEXT:    addi s0, a0, %lo(var)
+; LP64E-NEXT:    lui s0, %hi(var)
+; LP64E-NEXT:    addi s0, s0, %lo(var)
 ; LP64E-NEXT:    fld fa5, 0(s0)
 ; LP64E-NEXT:    fsd fa5, 248(sp) # 8-byte Folded Spill
 ; LP64E-NEXT:    fld fa5, 8(s0)
@@ -873,8 +873,8 @@ define void @caller() nounwind {
 ; ILP32D-NEXT:    fsd fs9, 184(sp) # 8-byte Folded Spill
 ; ILP32D-NEXT:    fsd fs10, 176(sp) # 8-byte Folded Spill
 ; ILP32D-NEXT:    fsd fs11, 168(sp) # 8-byte Folded Spill
-; ILP32D-NEXT:    lui a0, %hi(var)
-; ILP32D-NEXT:    addi s0, a0, %lo(var)
+; ILP32D-NEXT:    lui s0, %hi(var)
+; ILP32D-NEXT:    addi s0, s0, %lo(var)
 ; ILP32D-NEXT:    fld fa5, 0(s0)
 ; ILP32D-NEXT:    fsd fa5, 160(sp) # 8-byte Folded Spill
 ; ILP32D-NEXT:    fld fa5, 8(s0)
@@ -1014,8 +1014,8 @@ define void @caller() nounwind {
 ; LP64D-NEXT:    fsd fs9, 176(sp) # 8-byte Folded Spill
 ; LP64D-NEXT:    fsd fs10, 168(sp) # 8-byte Folded Spill
 ; LP64D-NEXT:    fsd fs11, 160(sp) # 8-byte Folded Spill
-; LP64D-NEXT:    lui a0, %hi(var)
-; LP64D-NEXT:    addi s0, a0, %lo(var)
+; LP64D-NEXT:    lui s0, %hi(var)
+; LP64D-NEXT:    addi s0, s0, %lo(var)
 ; LP64D-NEXT:    fld fa5, 0(s0)
 ; LP64D-NEXT:    fsd fa5, 152(sp) # 8-byte Folded Spill
 ; LP64D-NEXT:    fld fa5, 8(s0)
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
index 7a6f9136a242b..74ae30eb66b99 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
@@ -1060,8 +1060,8 @@ define void @caller() nounwind {
 ; RV32I-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lui a0, %hi(var)
-; RV32I-NEXT:    addi s0, a0, %lo(var)
+; RV32I-NEXT:    lui s0, %hi(var)
+; RV32I-NEXT:    addi s0, s0, %lo(var)
 ; RV32I-NEXT:    lw a0, 0(s0)
 ; RV32I-NEXT:    sw a0, 88(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lw a0, 4(s0)
@@ -1191,8 +1191,8 @@ define void @caller() nounwind {
 ; RV32I-ILP32E-NEXT:    sw ra, 132(sp) # 4-byte Folded Spill
 ; RV32I-ILP32E-NEXT:    sw s0, 128(sp) # 4-byte Folded Spill
 ; RV32I-ILP32E-NEXT:    sw s1, 124(sp) # 4-byte Folded Spill
-; RV32I-ILP32E-NEXT:    lui a0, %hi(var)
-; RV32I-ILP32E-NEXT:    addi s0, a0, %lo(var)
+; RV32I-ILP32E-NEXT:    lui s0, %hi(var)
+; RV32I-ILP32E-NEXT:    addi s0, s0, %lo(var)
 ; RV32I-ILP32E-NEXT:    lw a0, 0(s0)
 ; RV32I-ILP32E-NEXT:    sw a0, 120(sp) # 4-byte Folded Spill
 ; RV32I-ILP32E-NEXT:    lw a0, 4(s0)
@@ -1343,8 +1343,8 @@ define void @caller() nounwind {
 ; RV32I-WITH-FP-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
 ; RV32I-WITH-FP-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
 ; RV32I-WITH-FP-NEXT:    addi s0, sp, 144
-; RV32I-WITH-FP-NEXT:    lui a0, %hi(var)
-; RV32I-WITH-FP-NEXT:    addi s1, a0, %lo(var)
+; RV32I-WITH-FP-NEXT:    lui s1, %hi(var)
+; RV32I-WITH-FP-NEXT:    addi s1, s1, %lo(var)
 ; RV32I-WITH-FP-NEXT:    lw a0, 0(s1)
 ; RV32I-WITH-FP-NEXT:    sw a0, -56(s0) # 4-byte Folded Spill
 ; RV32I-WITH-FP-NEXT:    lw a0, 4(s1)
@@ -1474,8 +1474,8 @@ define void @caller() nounwind {
 ; RV32IZCMP:       # %bb.0:
 ; RV32IZCMP-NEXT:    cm.push {ra, s0-s11}, -112
 ; RV32IZCMP-NEXT:    addi sp, sp, -48
-; RV32IZCMP-NEXT:    lui a0, %hi(var)
-; RV32IZCMP-NEXT:    addi s0, a0, %lo(var)
+; RV32IZCMP-NEXT:    lui s0, %hi(var)
+; RV32IZCMP-NEXT:    addi s0, s0, %lo(var)
 ; RV32IZCMP-NEXT:    lw a0, 0(s0)
 ; RV32IZCMP-NEXT:    sw a0, 92(sp) # 4-byte Folded Spill
 ; RV32IZCMP-NEXT:    lw a0, 4(s0)
@@ -1603,8 +1603,8 @@ define void @caller() nounwind {
 ; RV32IZCMP-WITH-FP-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
 ; RV32IZCMP-WITH-FP-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
 ; RV32IZCMP-WITH-FP-NEXT:    addi s0, sp, 144
-; RV32IZCMP-WITH-FP-NEXT:    lui a0, %hi(var)
-; RV32IZCMP-WITH-FP-NEXT:    addi s1, a0, %lo(var)
+; RV32IZCMP-WITH-FP-NEXT:    lui s1, %hi(var)
+; RV32IZCMP-WITH-FP-NEXT:    addi s1, s1, %lo(var)
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, 0(s1)
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, -56(s0) # 4-byte Folded Spill
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, 4(s1)
@@ -1746,8 +1746,8 @@ define void @caller() nounwind {
 ; RV64I-NEXT:    sd s9, 184(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s10, 176(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s11, 168(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lui a0, %hi(var)
-; RV64I-NEXT:    addi s0, a0, %lo(var)
+; RV64I-NEXT:    lui s0, %hi(var)
+; RV64I-NEXT:    addi s0, s0, %lo(var)
 ; RV64I-NEXT:    lw a0, 0(s0)
 ; RV64I-NEXT:    sd a0, 160(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    lw a0, 4(s0)
@@ -1877,8 +1877,8 @@ define void @caller() nounwind {
 ; RV64I-LP64E-NEXT:    sd ra, 264(sp) # 8-byte Folded Spill
 ; RV64I-LP64E-NEXT:    sd s0, 256(sp) # 8-byte Folded Spill
 ; RV64I-LP64E-NEXT:    sd s1, 248(sp) # 8-byte Folded Spill
-; RV64I-LP64E-NEXT:    lui a0, %hi(var)
-; RV64I-LP64E-NEXT:    addi s0, a0, %lo(var)
+; RV64I-LP64E-NEXT:    lui s0, %hi(var)
+; RV64I-LP64E-NEXT:    addi s0, s0, %lo(var)
 ; RV64I-LP64E-NEXT:    lw a0, 0(s0)
 ; RV64I-LP64E-NEXT:    sd a0, 240(sp) # 8-byte Folded Spill
 ; RV64I-LP64E-NEXT:    lw a0, 4(s0)
@@ -2029,8 +2029,8 @@ define void @caller() nounwind {
 ; RV64I-WITH-FP-NEXT:    sd s10, 192(sp) # 8-byte Folded Spill
 ; RV64I-WITH-FP-NEXT:    sd s11, 184(sp) # 8-byte Folded Spill
 ; RV64I-WITH-FP-NEXT:    addi s0, sp, 288
-; RV64I-WITH-FP-NEXT:    lui a0, %hi(var)
-; RV64I-WITH-FP-NEXT:    addi s1, a0, %lo(var)
+; RV64I-WITH-FP-NEXT:    lui s1, %hi(var)
+; RV64I-WITH-FP-NEXT:    addi s1, s1, %lo(var)
 ; RV64I-WITH-FP-NEXT:    lw a0, 0(s1)
 ; RV64I-WITH-FP-NEXT:    sd a0, -112(s0) # 8-byte Folded Spill
 ; RV64I-WITH-FP-NEXT:    lw a0, 4(s1)
@@ -2160,8 +2160,8 @@ define void @caller() nounwind {
 ; RV64IZCMP:       # %bb.0:
 ; RV64IZCMP-NEXT:    cm.push {ra, s0-s11}, -160
 ; RV64IZCMP-NEXT:    addi sp, sp, -128
-; RV64IZCMP-NEXT:    lui a0, %hi(var)
-; RV64IZCMP-NEXT:    addi s0, a0, %lo(var)
+; RV64IZCMP-NEXT:    lui s0, %hi(var)
+; RV64IZCMP-NEXT:    addi s0, s0, %lo(var)
 ; RV64IZCMP-NEXT:    lw a0, 0(s0)
 ; RV64IZCMP-NEXT:    sd a0, 168(sp) # 8-byte Folded Spill
 ; RV64IZCMP-NEXT:    lw a0, 4(s0)
@@ -2289,8 +2289,8 @@ define void @caller() nounwind {
 ; RV64IZCMP-WITH-FP-NEXT:    sd s10, 192(sp) # 8-byte Folded Spill
 ; RV64IZCMP-WITH-FP-NEXT:    sd s11, 184(sp) # 8-byte Folded Spill
 ; RV64IZCMP-WITH-FP-NEXT:    addi s0, sp, 288
-; RV64IZCMP-WITH-FP-NEXT:    lui a0, %hi(var)
-; RV64IZCMP-WITH-FP-NEXT:    addi s1, a0, %lo(var)
+; RV64IZCMP-WITH-FP-NEXT:    lui s1, %hi(var)
+; RV64IZCMP-WITH-FP-NEXT:    addi s1, s1, %lo(var)
 ; RV64IZCMP-WITH-FP-NEXT:    lw a0, 0(s1)
 ; RV64IZCMP-WITH-FP-NEXT:    sd a0, -112(s0) # 8-byte Folded Spill
 ; RV64IZCMP-WITH-FP-NEXT:    lw a0, 4(s1)
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index 549d531e829ea..a90c244437a03 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -383,8 +383,8 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32I-NEXT:    addi s4, a0, %lo(.LCPI3_0)
+; RV32I-NEXT:    lui s4, %hi(.LCPI3_0)
+; RV32I-NEXT:    addi s4, s4, %lo(.LCPI3_0)
 ; RV32I-NEXT:    neg a0, s2
 ; RV32I-NEXT:    and a0, s2, a0
 ; RV32I-NEXT:    mv a1, s3
@@ -442,9 +442,9 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ; RV32M-LABEL: test_cttz_i64:
 ; RV32M:       # %bb.0:
 ; RV32M-NEXT:    lui a2, 30667
-; RV32M-NEXT:    addi a2, a2, 1329
-; RV32M-NEXT:    lui a3, %hi(.LCPI3_0)
-; RV32M-NEXT:    addi a3, a3, %lo(.LCPI3_0)
+; RV32M-NEXT:    addi a3, a2, 1329
+; RV32M-NEXT:    lui a2, %hi(.LCPI3_0)
+; RV32M-NEXT:    addi a2, a2, %lo(.LCPI3_0)
 ; RV32M-NEXT:    bnez a1, .LBB3_3
 ; RV32M-NEXT:  # %bb.1:
 ; RV32M-NEXT:    li a1, 32
@@ -452,18 +452,18 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ; RV32M-NEXT:  .LBB3_2:
 ; RV32M-NEXT:    neg a1, a0
 ; RV32M-NEXT:    and a0, a0, a1
-; RV32M-NEXT:    mul a0, a0, a2
+; RV32M-NEXT:    mul a0, a0, a3
 ; RV32M-NEXT:    srli a0, a0, 27
-; RV32M-NEXT:    add a0, a3, a0
+; RV32M-NEXT:    add a0, a2, a0
 ; RV32M-NEXT:    lbu a0, 0(a0)
 ; RV32M-NEXT:    li a1, 0
 ; RV32M-NEXT:    ret
 ; RV32M-NEXT:  .LBB3_3:
 ; RV32M-NEXT:    neg a4, a1
 ; RV32M-NEXT:    and a1, a1, a4
-; RV32M-NEXT:    mul a1, a1, a2
+; RV32M-NEXT:    mul a1, a1, a3
 ; RV32M-NEXT:    srli a1, a1, 27
-; RV32M-NEXT:    add a1, a3, a1
+; RV32M-NEXT:    add a1, a2, a1
 ; RV32M-NEXT:    lbu a1, 0(a1)
 ; RV32M-NEXT:    bnez a0, .LBB3_2
 ; RV32M-NEXT:  .LBB3_4:
@@ -814,8 +814,8 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
 ; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3
 ; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    lui a0, %hi(.LCPI7_0)
-; RV32I-NEXT:    addi s4, a0, %lo(.LCPI7_0)
+; RV32I-NEXT:    lui s4, %hi(.LCPI7_0)
+; RV32I-NEXT:    addi s4, s4, %lo(.LCPI7_0)
 ; RV32I-NEXT:    neg a0, s1
 ; RV32I-NEXT:    and a0, s1, a0
 ; RV32I-NEXT:    mv a1, s3
diff --git a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
index 9ae30e646fdbf..fe6e20d852d59 100644
--- a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
+++ b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
@@ -48,8 +48,8 @@ define signext i32 @ctz_dereferencing_pointer(ptr %b) nounwind {
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call __mulsi3
 ; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    lui a0, %hi(.LCPI0_0)
-; RV32I-NEXT:    addi s3, a0, %lo(.LCPI0_0)
+; RV32I-NEXT:    lui s3, %hi(.LCPI0_0)
+; RV32I-NEXT:    addi s3, s3, %lo(.LCPI0_0)
 ; RV32I-NEXT:    neg a0, s4
 ; RV32I-NEXT:    and a0, s4, a0
 ; RV32I-NEXT:    mv a1, s1
@@ -511,8 +511,8 @@ define signext i32 @ctz4(i64 %b) nounwind {
 ; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui a0, %hi(.LCPI6_0)
-; RV32I-NEXT:    addi s4, a0, %lo(.LCPI6_0)
+; RV32I-NEXT:    lui s4, %hi(.LCPI6_0)
+; RV32I-NEXT:    addi s4, s4, %lo(.LCPI6_0)
 ; RV32I-NEXT:    neg a0, s2
 ; RV32I-NEXT:    and a0, s2, a0
 ; RV32I-NEXT:    mv a1, s3
diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
index eb6ac985287a1..478d2eae9dca2 100644
--- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
+++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
@@ -24,31 +24,31 @@ define void @_Z3foov() {
 ; CHECK-NEXT:    lui a0, %hi(.L__const._Z3foov.var_49)
 ; CHECK-NEXT:    addi a0, a0, %lo(.L__const._Z3foov.var_49)
 ; CHECK-NEXT:    vsetivli zero, 2, e16, m2, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
+; CHECK-NEXT:    vle16.v v8, (a0)
 ; CHECK-NEXT:    lui a0, %hi(.L__const._Z3foov.var_48)
 ; CHECK-NEXT:    addi a0, a0, %lo(.L__const._Z3foov.var_48)
-; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v10, (a0)
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add a0, sp, a0
 ; CHECK-NEXT:    addi a0, a0, 16
-; CHECK-NEXT:    vs1r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    vs1r.v v10, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    lui a0, %hi(.L__const._Z3foov.var_46)
 ; CHECK-NEXT:    addi a0, a0, %lo(.L__const._Z3foov.var_46)
-; CHECK-NEXT:    vle16.v v12, (a0)
+; CHECK-NEXT:    vle16.v v10, (a0)
 ; CHECK-NEXT:    lui a0, %hi(.L__const._Z3foov.var_45)
 ; CHECK-NEXT:    addi a0, a0, %lo(.L__const._Z3foov.var_45)
-; CHECK-NEXT:    vle16.v v14, (a0)
+; CHECK-NEXT:    vle16.v v12, (a0)
 ; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 1
+; CHECK-NEXT:    vs2r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    vs2r.v v10, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    vs2r.v v12, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    vs2r.v v14, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT:    add a0, a0, a1
-; CHECK-NEXT:    vs2r.v v16, (a0) # Unknown-size Folded Spill
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    lui a0, %hi(.L__const._Z3foov.var_40)
diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
index 2a00604dd07a0..9331f2f0e1361 100644
--- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
+++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll
@@ -59,8 +59,8 @@ entry:
 define dso_local i64 @load_g_1() nounwind {
 ; RV32I-LABEL: load_g_1:
 ; RV32I:       # %bb.0: # %entry
-; RV32I-NEXT:    lui a0, %hi(g_1)
-; RV32I-NEXT:    addi a1, a0, %lo(g_1)
+; RV32I-NEXT:    lui a1, %hi(g_1)
+; RV32I-NEXT:    addi a1, a1, %lo(g_1)
 ; RV32I-NEXT:    lw a0, 0(a1)
 ; RV32I-NEXT:    lw a1, 4(a1)
 ; RV32I-NEXT:    ret
@@ -94,8 +94,8 @@ entry:
 define dso_local i64 @load_g_2() nounwind {
 ; RV32I-LABEL: load_g_2:
 ; RV32I:       # %bb.0: # %entry
-; RV32I-NEXT:    lui a0, %hi(g_2)
-; RV32I-NEXT:    addi a1, a0, %lo(g_2)
+; RV32I-NEXT:    lui a1, %hi(g_2)
+; RV32I-NEXT:    addi a1, a1, %lo(g_2)
 ; RV32I-NEXT:    lw a0, 0(a1)
 ; RV32I-NEXT:    lw a1, 4(a1)
 ; RV32I-NEXT:    ret
@@ -129,8 +129,8 @@ entry:
 define dso_local i64 @load_g_4() nounwind {
 ; RV32I-LABEL: load_g_4:
 ; RV32I:       # %bb.0: # %entry
-; RV32I-NEXT:    lui a0, %hi(g_4)
-; RV32I-NEXT:    addi a1, a0, %lo(g_4)
+; RV32I-NEXT:    lui a1, %hi(g_4)
+; RV32I-NEXT:    addi a1, a1, %lo(g_4)
 ; RV32I-NEXT:    lw a0, 0(a1)
 ; RV32I-NEXT:    lw a1, 4(a1)
 ; RV32I-NEXT:    ret
@@ -389,8 +389,8 @@ define dso_local i32 @load_ga() local_unnamed_addr #0 {
 define dso_local i64 @load_ga_8() nounwind {
 ; RV32I-LABEL: load_ga_8:
 ; RV32I:       # %bb.0: # %entry
-; RV32I-NEXT:    lui a0, %hi(ga_8)
-; RV32I-NEXT:    addi a1, a0, %lo(ga_8)
+; RV32I-NEXT:    lui a1, %hi(ga_8)
+; RV32I-NEXT:    addi a1, a1, %lo(ga_8)
 ; RV32I-NEXT:    lw a0, 8(a1)
 ; RV32I-NEXT:    lw a1, 12(a1)
 ; RV32I-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/mem.ll b/llvm/test/CodeGen/RISCV/mem.ll
index 5d55595b3158b..d1c5cef6ac00b 100644
--- a/llvm/test/CodeGen/RISCV/mem.ll
+++ b/llvm/test/CodeGen/RISCV/mem.ll
@@ -168,8 +168,8 @@ define dso_local i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind {
 define dso_local i32 @lw_sw_global(i32 %a) nounwind {
 ; RV32I-LABEL: lw_sw_global:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    lui a1, %hi(G)
-; RV32I-NEXT:    addi a2, a1, %lo(G)
+; RV32I-NEXT:    lui a2, %hi(G)
+; RV32I-NEXT:    addi a2, a2, %lo(G)
 ; RV32I-NEXT:    lw a1, 0(a2)
 ; RV32I-NEXT:    sw a0, 0(a2)
 ; RV32I-NEXT:    lw zero, 36(a2)
diff --git a/llvm/test/CodeGen/RISCV/mem64.ll b/llvm/test/CodeGen/RISCV/mem64.ll
index 379b3483a1b91..df3834e254fd2 100644
--- a/llvm/test/CodeGen/RISCV/mem64.ll
+++ b/llvm/test/CodeGen/RISCV/mem64.ll
@@ -213,8 +213,8 @@ define dso_local i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind {
 define dso_local i64 @ld_sd_global(i64 %a) nounwind {
 ; RV64I-LABEL: ld_sd_global:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    lui a1, %hi(G)
-; RV64I-NEXT:    addi a2, a1, %lo(G)
+; RV64I-NEXT:    lui a2, %hi(G)
+; RV64I-NEXT:    addi a2, a2, %lo(G)
 ; RV64I-NEXT:    ld a1, 0(a2)
 ; RV64I-NEXT:    sd a0, 0(a2)
 ; RV64I-NEXT:    ld zero, 72(a2)
diff --git a/llvm/test/CodeGen/RISCV/memcpy.ll b/llvm/test/CodeGen/RISCV/memcpy.ll
index 6cbd27bc21256..1c5a6442e1324 100644
--- a/llvm/test/CodeGen/RISCV/memcpy.ll
+++ b/llvm/test/CodeGen/RISCV/memcpy.ll
@@ -23,48 +23,48 @@
 define i32 @t0() {
 ; RV32-LABEL: t0:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    lui a0, %hi(dst)
-; RV32-NEXT:    addi a0, a0, %lo(dst)
-; RV32-NEXT:    lui a1, %hi(src)
-; RV32-NEXT:    addi a1, a1, %lo(src)
-; RV32-NEXT:    lbu a2, 10(a1)
-; RV32-NEXT:    lh a3, 8(a1)
-; RV32-NEXT:    lw a4, 4(a1)
-; RV32-NEXT:    lw a1, 0(a1)
-; RV32-NEXT:    sb a2, 10(a0)
-; RV32-NEXT:    sh a3, 8(a0)
-; RV32-NEXT:    sw a4, 4(a0)
-; RV32-NEXT:    sw a1, 0(a0)
+; RV32-NEXT:    lui a0, %hi(src)
+; RV32-NEXT:    addi a0, a0, %lo(src)
+; RV32-NEXT:    lbu a1, 10(a0)
+; RV32-NEXT:    lui a2, %hi(dst)
+; RV32-NEXT:    addi a2, a2, %lo(dst)
+; RV32-NEXT:    lh a3, 8(a0)
+; RV32-NEXT:    lw a4, 4(a0)
+; RV32-NEXT:    lw a0, 0(a0)
+; RV32-NEXT:    sb a1, 10(a2)
+; RV32-NEXT:    sh a3, 8(a2)
+; RV32-NEXT:    sw a4, 4(a2)
+; RV32-NEXT:    sw a0, 0(a2)
 ; RV32-NEXT:    li a0, 0
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: t0:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    lui a0, %hi(dst)
-; RV64-NEXT:    lui a1, %hi(src)
-; RV64-NEXT:    addi a1, a1, %lo(src)
-; RV64-NEXT:    lbu a2, 10(a1)
-; RV64-NEXT:    lh a3, 8(a1)
-; RV64-NEXT:    ld a1, 0(a1)
-; RV64-NEXT:    addi a0, a0, %lo(dst)
-; RV64-NEXT:    sb a2, 10(a0)
-; RV64-NEXT:    sh a3, 8(a0)
-; RV64-NEXT:    sd a1, 0(a0)
+; RV64-NEXT:    lui a0, %hi(src)
+; RV64-NEXT:    addi a0, a0, %lo(src)
+; RV64-NEXT:    lbu a1, 10(a0)
+; RV64-NEXT:    lh a2, 8(a0)
+; RV64-NEXT:    ld a0, 0(a0)
+; RV64-NEXT:    lui a3, %hi(dst)
+; RV64-NEXT:    addi a3, a3, %lo(dst)
+; RV64-NEXT:    sb a1, 10(a3)
+; RV64-NEXT:    sh a2, 8(a3)
+; RV64-NEXT:    sd a0, 0(a3)
 ; RV64-NEXT:    li a0, 0
 ; RV64-NEXT:    ret
 ;
 ; RV32-FAST-LABEL: t0:
 ; RV32-FAST:       # %bb.0: # %entry
-; RV32-FAST-NEXT:    lui a0, %hi(dst)
-; RV32-FAST-NEXT:    lui a1, %hi(src)
-; RV32-FAST-NEXT:    addi a1, a1, %lo(src)
-; RV32-FAST-NEXT:    lw a2, 7(a1)
-; RV32-FAST-NEXT:    lw a3, 4(a1)
-; RV32-FAST-NEXT:    lw a1, 0(a1)
-; RV32-FAST-NEXT:    addi a0, a0, %lo(dst)
-; RV32-FAST-NEXT:    sw a2, 7(a0)
-; RV32-FAST-NEXT:    sw a3, 4(a0)
-; RV32-FAST-NEXT:    sw a1, 0(a0)
+; RV32-FAST-NEXT:    lui a0, %hi(src)
+; RV32-FAST-NEXT:    addi a0, a0, %lo(src)
+; RV32-FAST-NEXT:    lw a1, 7(a0)
+; RV32-FAST-NEXT:    lw a2, 4(a0)
+; RV32-FAST-NEXT:    lw a0, 0(a0)
+; RV32-FAST-NEXT:    lui a3, %hi(dst)
+; RV32-FAST-NEXT:    addi a3, a3, %lo(dst)
+; RV32-FAST-NEXT:    sw a1, 7(a3)
+; RV32-FAST-NEXT:    sw a2, 4(a3)
+; RV32-FAST-NEXT:    sw a0, 0(a3)
 ; RV32-FAST-NEXT:    li a0, 0
 ; RV32-FAST-NEXT:    ret
 ;
@@ -163,9 +163,6 @@ define void @t2(ptr nocapture %C) nounwind {
 ;
 ; RV64-FAST-LABEL: t2:
 ; RV64-FAST:       # %bb.0: # %entry
-; RV64-FAST-NEXT:    lui a1, 1156
-; RV64-FAST-NEXT:    addi a1, a1, 332
-; RV64-FAST-NEXT:    sw a1, 32(a0)
 ; RV64-FAST-NEXT:    lui a1, %hi(.L.str2)
 ; RV64-FAST-NEXT:    addi a1, a1, %lo(.L.str2)
 ; RV64-FAST-NEXT:    ld a2, 24(a1)
@@ -176,6 +173,9 @@ define void @t2(ptr nocapture %C) nounwind {
 ; RV64-FAST-NEXT:    sd a3, 16(a0)
 ; RV64-FAST-NEXT:    sd a4, 8(a0)
 ; RV64-FAST-NEXT:    sd a1, 0(a0)
+; RV64-FAST-NEXT:    lui a1, 1156
+; RV64-FAST-NEXT:    addi a1, a1, 332
+; RV64-FAST-NEXT:    sw a1, 32(a0)
 ; RV64-FAST-NEXT:    ret
 entry:
   tail call void @llvm.memcpy.p0.p0.i64(ptr %C, ptr @.str2, i64 36, i1 false)
diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
index 89c50914f61db..4bb1c97d4e216 100644
--- a/llvm/test/CodeGen/RISCV/push-pop-popret.ll
+++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
@@ -721,8 +721,8 @@ define i32 @nocompress(i32 signext %size) {
 ; RV32IZCMP-NEXT:    andi a0, a0, -16
 ; RV32IZCMP-NEXT:    sub s2, sp, a0
 ; RV32IZCMP-NEXT:    mv sp, s2
-; RV32IZCMP-NEXT:    lui a0, %hi(var)
-; RV32IZCMP-NEXT:    addi s1, a0, %lo(var)
+; RV32IZCMP-NEXT:    lui s1, %hi(var)
+; RV32IZCMP-NEXT:    addi s1, s1, %lo(var)
 ; RV32IZCMP-NEXT:    lw s3, 0(s1)
 ; RV32IZCMP-NEXT:    lw s4, 4(s1)
 ; RV32IZCMP-NEXT:    lw s5, 8(s1)
@@ -761,8 +761,8 @@ define i32 @nocompress(i32 signext %size) {
 ; RV64IZCMP-NEXT:    andi a0, a0, -16
 ; RV64IZCMP-NEXT:    sub s2, sp, a0
 ; RV64IZCMP-NEXT:    mv sp, s2
-; RV64IZCMP-NEXT:    lui a0, %hi(var)
-; RV64IZCMP-NEXT:    addi s1, a0, %lo(var)
+; RV64IZCMP-NEXT:    lui s1, %hi(var)
+; RV64IZCMP-NEXT:    addi s1, s1, %lo(var)
 ; RV64IZCMP-NEXT:    lw s3, 0(s1)
 ; RV64IZCMP-NEXT:    lw s4, 4(s1)
 ; RV64IZCMP-NEXT:    lw s5, 8(s1)
@@ -799,8 +799,8 @@ define i32 @nocompress(i32 signext %size) {
 ; RV32IZCMP-SR-NEXT:    andi a0, a0, -16
 ; RV32IZCMP-SR-NEXT:    sub s2, sp, a0
 ; RV32IZCMP-SR-NEXT:    mv sp, s2
-; RV32IZCMP-SR-NEXT:    lui a0, %hi(var)
-; RV32IZCMP-SR-NEXT:    addi s1, a0, %lo(var)
+; RV32IZCMP-SR-NEXT:    lui s1, %hi(var)
+; RV32IZCMP-SR-NEXT:    addi s1, s1, %lo(var)
 ; RV32IZCMP-SR-NEXT:    lw s3, 0(s1)
 ; RV32IZCMP-SR-NEXT:    lw s4, 4(s1)
 ; RV32IZCMP-SR-NEXT:    lw s5, 8(s1)
@@ -839,8 +839,8 @@ define i32 @nocompress(i32 signext %size) {
 ; RV64IZCMP-SR-NEXT:    andi a0, a0, -16
 ; RV64IZCMP-SR-NEXT:    sub s2, sp, a0
 ; RV64IZCMP-SR-NEXT:    mv sp, s2
-; RV64IZCMP-SR-NEXT:    lui a0, %hi(var)
-; RV64IZCMP-SR-NEXT:    addi s1, a0, %lo(var)
+; RV64IZCMP-SR-NEXT:    lui s1, %hi(var)
+; RV64IZCMP-SR-NEXT:    addi s1, s1, %lo(var)
 ; RV64IZCMP-SR-NEXT:    lw s3, 0(s1)
 ; RV64IZCMP-SR-NEXT:    lw s4, 4(s1)
 ; RV64IZCMP-SR-NEXT:    lw s5, 8(s1)
@@ -886,8 +886,8 @@ define i32 @nocompress(i32 signext %size) {
 ; RV32I-NEXT:    andi a0, a0, -16
 ; RV32I-NEXT:    sub s1, sp, a0
 ; RV32I-NEXT:    mv sp, s1
-; RV32I-NEXT:    lui a0, %hi(var)
-; RV32I-NEXT:    addi s2, a0, %lo(var)
+; RV32I-NEXT:    lui s2, %hi(var)
+; RV32I-NEXT:    addi s2, s2, %lo(var)
 ; RV32I-NEXT:    lw s3, 0(s2)
 ; RV32I-NEXT:    lw s4, 4(s2)
 ; RV32I-NEXT:    lw s5, 8(s2)
@@ -944,8 +944,8 @@ define i32 @nocompress(i32 signext %size) {
 ; RV64I-NEXT:    andi a0, a0, -16
 ; RV64I-NEXT:    sub s1, sp, a0
 ; RV64I-NEXT:    mv sp, s1
-; RV64I-NEXT:    lui a0, %hi(var)
-; RV64I-NEXT:    addi s2, a0, %lo(var)
+; RV64I-NEXT:    lui s2, %hi(var)
+; RV64I-NEXT:    addi s2, s2, %lo(var)
 ; RV64I-NEXT:    lw s3, 0(s2)
 ; RV64I-NEXT:    lw s4, 4(s2)
 ; RV64I-NEXT:    lw s5, 8(s2)
diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
index b45ab135fa1c7..197366e7e05fe 100644
--- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
@@ -209,8 +209,8 @@ define i64 @cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32I-NEXT:    addi s4, a0, %lo(.LCPI3_0)
+; RV32I-NEXT:    lui s4, %hi(.LCPI3_0)
+; RV32I-NEXT:    addi s4, s4, %lo(.LCPI3_0)
 ; RV32I-NEXT:    neg a0, s2
 ; RV32I-NEXT:    and a0, s2, a0
 ; RV32I-NEXT:    mv a1, s3
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 7e6c3f9c87d27..f25aa0de89da8 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -199,8 +199,8 @@ define i64 @cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __mulsi3
 ; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32I-NEXT:    addi s4, a0, %lo(.LCPI3_0)
+; RV32I-NEXT:    lui s4, %hi(.LCPI3_0)
+; RV32I-NEXT:    addi s4, s4, %lo(.LCPI3_0)
 ; RV32I-NEXT:    neg a0, s2
 ; RV32I-NEXT:    and a0, s2, a0
 ; RV32I-NEXT:    mv a1, s3
diff --git a/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll b/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll
index 4ad6b3e34c143..c78dd794271cd 100644
--- a/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll
+++ b/llvm/test/CodeGen/RISCV/rv64-legal-i32/mem64.ll
@@ -175,8 +175,8 @@ define dso_local i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind {
 define dso_local i64 @ld_sd_global(i64 %a) nounwind {
 ; RV64I-LABEL: ld_sd_global:
 ; RV64I:       # %bb.0:
-; RV64I-NEXT:    lui a1, %hi(G)
-; RV64I-NEXT:    addi a2, a1, %lo(G)
+; RV64I-NEXT:    lui a2, %hi(G)
+; RV64I-NEXT:    addi a2, a2, %lo(G)
 ; RV64I-NEXT:    ld a1, 0(a2)
 ; RV64I-NEXT:    sd a0, 0(a2)
 ; RV64I-NEXT:    ld zero, 72(a2)
diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
index 9cb3991f31f94..08b310213d16e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll
@@ -126,28 +126,28 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) {
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; CHECK-NEXT:    vid.v v8
 ; CHECK-NEXT:    vsaddu.vx v8, v8, a1
-; CHECK-NEXT:    vmsltu.vx v0, v8, a2
 ; CHECK-NEXT:    lui a0, %hi(.LCPI9_0)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI9_0)
-; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v16, (a0)
+; CHECK-NEXT:    vmsltu.vx v0, v8, a2
 ; CHECK-NEXT:    lui a0, %hi(.LCPI9_1)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI9_1)
-; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vsext.vf8 v24, v16
+; CHECK-NEXT:    vsaddu.vx v16, v24, a1
+; CHECK-NEXT:    vmsltu.vx v9, v16, a2
 ; CHECK-NEXT:    vsext.vf8 v16, v8
 ; CHECK-NEXT:    vsaddu.vx v16, v16, a1
-; CHECK-NEXT:    vmsltu.vx v8, v16, a2
-; CHECK-NEXT:    vsext.vf8 v16, v9
-; CHECK-NEXT:    vsaddu.vx v16, v16, a1
 ; CHECK-NEXT:    lui a0, %hi(.LCPI9_2)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI9_2)
-; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vmsltu.vx v10, v16, a2
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vi v0, v8, 2
+; CHECK-NEXT:    vslideup.vi v0, v9, 2
 ; CHECK-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
 ; CHECK-NEXT:    vslideup.vi v0, v10, 4
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v16, v9
+; CHECK-NEXT:    vsext.vf8 v16, v8
 ; CHECK-NEXT:    vsaddu.vx v8, v16, a1
 ; CHECK-NEXT:    vmsltu.vx v16, v8, a2
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
@@ -169,13 +169,13 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
 ; CHECK-NEXT:    vle8.v v9, (a0)
 ; CHECK-NEXT:    vsext.vf8 v16, v8
 ; CHECK-NEXT:    vsaddu.vx v16, v16, a1
-; CHECK-NEXT:    vmsltu.vx v10, v16, a2
+; CHECK-NEXT:    vmsltu.vx v8, v16, a2
 ; CHECK-NEXT:    vsext.vf8 v16, v9
 ; CHECK-NEXT:    vsaddu.vx v16, v16, a1
-; CHECK-NEXT:    vmsltu.vx v8, v16, a2
 ; CHECK-NEXT:    lui a0, %hi(.LCPI10_2)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_2)
 ; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vmsltu.vx v10, v16, a2
 ; CHECK-NEXT:    lui a0, %hi(.LCPI10_3)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_3)
 ; CHECK-NEXT:    vle8.v v11, (a0)
@@ -187,10 +187,10 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
 ; CHECK-NEXT:    vmsltu.vx v11, v16, a2
 ; CHECK-NEXT:    vid.v v16
 ; CHECK-NEXT:    vsaddu.vx v16, v16, a1
-; CHECK-NEXT:    vmsltu.vx v0, v16, a2
 ; CHECK-NEXT:    lui a0, %hi(.LCPI10_4)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_4)
 ; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    vmsltu.vx v0, v16, a2
 ; CHECK-NEXT:    lui a0, %hi(.LCPI10_5)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_5)
 ; CHECK-NEXT:    vle8.v v13, (a0)
@@ -201,27 +201,27 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) {
 ; CHECK-NEXT:    vsaddu.vx v16, v16, a1
 ; CHECK-NEXT:    vmsltu.vx v13, v16, a2
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vi v8, v10, 2
+; CHECK-NEXT:    vslideup.vi v10, v8, 2
 ; CHECK-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vi v8, v9, 4
+; CHECK-NEXT:    vslideup.vi v10, v9, 4
 ; CHECK-NEXT:    lui a0, %hi(.LCPI10_6)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI10_6)
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vle8.v v9, (a0)
+; CHECK-NEXT:    vle8.v v8, (a0)
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vi v8, v11, 6
+; CHECK-NEXT:    vslideup.vi v10, v11, 6
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
 ; CHECK-NEXT:    vslideup.vi v0, v12, 2
 ; CHECK-NEXT:    vsetivli zero, 6, e8, mf2, tu, ma
 ; CHECK-NEXT:    vslideup.vi v0, v13, 4
 ; CHECK-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; CHECK-NEXT:    vsext.vf8 v16, v9
+; CHECK-NEXT:    vsext.vf8 v16, v8
 ; CHECK-NEXT:    vsaddu.vx v16, v16, a1
-; CHECK-NEXT:    vmsltu.vx v9, v16, a2
+; CHECK-NEXT:    vmsltu.vx v8, v16, a2
 ; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vslideup.vi v0, v9, 6
+; CHECK-NEXT:    vslideup.vi v0, v8, 6
 ; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vslideup.vi v0, v8, 8
+; CHECK-NEXT:    vslideup.vi v0, v10, 8
 ; CHECK-NEXT:    ret
   %mask = call <128 x i1> @llvm.get.active.lane.mask.v128i1.i64(i64 %index, i64 %tc)
   ret <128 x i1> %mask
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 79c36a629465d..f4d7074c7f6b2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -3459,6 +3459,8 @@ define void @mulhu_v4i64(ptr %x) {
 ; RV64-NEXT:    lui a1, %hi(.LCPI184_0)
 ; RV64-NEXT:    addi a1, a1, %lo(.LCPI184_0)
 ; RV64-NEXT:    vle64.v v10, (a1)
+; RV64-NEXT:    vmulhu.vv v10, v8, v10
+; RV64-NEXT:    vsub.vv v8, v8, v10
 ; RV64-NEXT:    li a1, -1
 ; RV64-NEXT:    slli a1, a1, 63
 ; RV64-NEXT:    vmv.s.x v12, a1
@@ -3466,8 +3468,6 @@ define void @mulhu_v4i64(ptr %x) {
 ; RV64-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
 ; RV64-NEXT:    vslideup.vi v14, v12, 2
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    vmulhu.vv v10, v8, v10
-; RV64-NEXT:    vsub.vv v8, v8, v10
 ; RV64-NEXT:    vmulhu.vv v8, v8, v14
 ; RV64-NEXT:    vadd.vv v8, v8, v10
 ; RV64-NEXT:    lui a1, 12320
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index 178a920169ad9..bc3e135a588a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -159,17 +159,16 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    li a3, 82
+; RV32-NEXT:    li a3, 80
 ; RV32-NEXT:    mul a2, a2, a3
 ; RV32-NEXT:    sub sp, sp, a2
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd2, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 82 * vlenb
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 80 * vlenb
 ; RV32-NEXT:    addi a3, a1, 256
 ; RV32-NEXT:    li a2, 32
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, ma
 ; RV32-NEXT:    vle32.v v16, (a3)
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 57
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 6
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
@@ -177,26 +176,26 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-NEXT:    vslideup.vi v8, v16, 4
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 41
+; RV32-NEXT:    li a5, 40
 ; RV32-NEXT:    mul a4, a4, a5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
 ; RV32-NEXT:    vs4r.v v8, (a4) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a4, 12
-; RV32-NEXT:    vmv.s.x v1, a4
+; RV32-NEXT:    vmv.s.x v0, a4
 ; RV32-NEXT:    vsetivli zero, 16, e32, m8, ta, ma
 ; RV32-NEXT:    vslidedown.vi v16, v16, 16
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a5, a4, 6
-; RV32-NEXT:    add a4, a5, a4
+; RV32-NEXT:    li a5, 56
+; RV32-NEXT:    mul a4, a4, a5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
 ; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vmv1r.v v3, v0
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; RV32-NEXT:    vslideup.vi v8, v16, 10, v0.t
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 45
+; RV32-NEXT:    li a5, 44
 ; RV32-NEXT:    mul a4, a4, a5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
@@ -206,8 +205,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
 ; RV32-NEXT:    vle16.v v8, (a4)
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a5, a4, 5
-; RV32-NEXT:    add a4, a5, a4
+; RV32-NEXT:    slli a4, a4, 5
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
 ; RV32-NEXT:    vs4r.v v8, (a4) # Unknown-size Folded Spill
@@ -216,21 +214,21 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    lui a5, 1
 ; RV32-NEXT:    vle16.v v8, (a4)
 ; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a6, 25
+; RV32-NEXT:    li a6, 24
 ; RV32-NEXT:    mul a4, a4, a6
 ; RV32-NEXT:    add a4, sp, a4
 ; RV32-NEXT:    addi a4, a4, 16
 ; RV32-NEXT:    vs4r.v v8, (a4) # Unknown-size Folded Spill
 ; RV32-NEXT:    vle32.v v8, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a4, 73
+; RV32-NEXT:    li a4, 72
 ; RV32-NEXT:    mul a1, a1, a4
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vle32.v v24, (a3)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 49
+; RV32-NEXT:    li a3, 48
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -238,27 +236,26 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    addi a1, a5, -64
 ; RV32-NEXT:    vmv.s.x v0, a1
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 37
+; RV32-NEXT:    li a3, 36
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs1r.v v0, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 5
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    slli a1, a1, 5
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl4r.v v4, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vrgatherei16.vv v16, v8, v4
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 25
+; RV32-NEXT:    li a3, 24
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vrgatherei16.vv v16, v24, v8, v0.t
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 45
+; RV32-NEXT:    li a3, 44
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -266,259 +263,257 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vsetivli zero, 12, e32, m4, tu, ma
 ; RV32-NEXT:    vmv.v.v v8, v16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 45
+; RV32-NEXT:    li a3, 44
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 57
-; RV32-NEXT:    mul a1, a1, a3
+; RV32-NEXT:    slli a1, a1, 6
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; RV32-NEXT:    vslideup.vi v12, v8, 2
+; RV32-NEXT:    vmv1r.v v8, v3
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 21
-; RV32-NEXT:    mul a1, a1, a3
+; RV32-NEXT:    slli a1, a1, 4
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs1r.v v1, (a1) # Unknown-size Folded Spill
-; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vs1r.v v3, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vmv1r.v v0, v3
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 6
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    li a3, 56
+; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vslideup.vi v12, v16, 8, v0.t
-; RV32-NEXT:    vmv.v.v v20, v12
 ; RV32-NEXT:    lui a1, %hi(.LCPI6_2)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_2)
 ; RV32-NEXT:    lui a3, %hi(.LCPI6_3)
 ; RV32-NEXT:    addi a3, a3, %lo(.LCPI6_3)
-; RV32-NEXT:    lui a4, %hi(.LCPI6_4)
 ; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; RV32-NEXT:    vle16.v v4, (a1)
-; RV32-NEXT:    vle16.v v16, (a3)
-; RV32-NEXT:    addi a1, a4, %lo(.LCPI6_4)
+; RV32-NEXT:    vle16.v v0, (a1)
+; RV32-NEXT:    vle16.v v4, (a3)
+; RV32-NEXT:    lui a1, %hi(.LCPI6_4)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_4)
 ; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
-; RV32-NEXT:    vle16.v v2, (a1)
+; RV32-NEXT:    vle16.v v10, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 73
+; RV32-NEXT:    li a3, 72
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT:    vrgatherei16.vv v24, v8, v4
+; RV32-NEXT:    vrgatherei16.vv v24, v16, v0
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 37
+; RV32-NEXT:    li a3, 36
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl1r.v v0, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 49
+; RV32-NEXT:    li a3, 48
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vrgatherei16.vv v24, v16, v4, v0.t
 ; RV32-NEXT:    vsetivli zero, 12, e32, m4, tu, ma
-; RV32-NEXT:    vmv.v.v v20, v24
+; RV32-NEXT:    vmv.v.v v12, v24
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 37
+; RV32-NEXT:    li a3, 36
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs4r.v v20, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vs4r.v v12, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 57
-; RV32-NEXT:    mul a1, a1, a3
+; RV32-NEXT:    slli a1, a1, 6
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT:    vrgatherei16.vv v16, v24, v2
-; RV32-NEXT:    vmv1r.v v0, v1
+; RV32-NEXT:    vrgatherei16.vv v12, v24, v10
+; RV32-NEXT:    vmv1r.v v0, v8
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 6
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    li a3, 56
+; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vslideup.vi v16, v8, 6, v0.t
+; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vslideup.vi v12, v24, 6, v0.t
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 5
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    slli a1, a1, 5
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs4r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vs4r.v v12, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, %hi(.LCPI6_5)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_5)
 ; RV32-NEXT:    lui a3, %hi(.LCPI6_6)
 ; RV32-NEXT:    addi a3, a3, %lo(.LCPI6_6)
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT:    vle16.v v16, (a1)
-; RV32-NEXT:    vle16.v v4, (a3)
-; RV32-NEXT:    li a1, 960
-; RV32-NEXT:    vmv.s.x v0, a1
+; RV32-NEXT:    vle16.v v12, (a1)
+; RV32-NEXT:    vle16.v v8, (a3)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 13
+; RV32-NEXT:    li a3, 12
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs1r.v v0, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    li a1, 960
+; RV32-NEXT:    vmv.s.x v8, a1
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 73
+; RV32-NEXT:    li a3, 72
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v8, v24, v16
+; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vrgatherei16.vv v24, v0, v12
+; RV32-NEXT:    vmv1r.v v3, v8
+; RV32-NEXT:    vmv1r.v v0, v8
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 49
+; RV32-NEXT:    li a3, 12
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v8, v16, v4, v0.t
+; RV32-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vrgatherei16.vv v24, v16, v8, v0.t
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 25
+; RV32-NEXT:    li a3, 24
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, %hi(.LCPI6_7)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_7)
 ; RV32-NEXT:    lui a3, %hi(.LCPI6_8)
 ; RV32-NEXT:    addi a3, a3, %lo(.LCPI6_8)
-; RV32-NEXT:    lui a4, %hi(.LCPI6_9)
 ; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; RV32-NEXT:    vle16.v v8, (a1)
-; RV32-NEXT:    addi a1, a4, %lo(.LCPI6_9)
+; RV32-NEXT:    lui a1, %hi(.LCPI6_9)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_9)
 ; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
-; RV32-NEXT:    vle16.v v24, (a3)
-; RV32-NEXT:    vle16.v v28, (a1)
+; RV32-NEXT:    vle16.v v4, (a3)
+; RV32-NEXT:    vle16.v v12, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 57
-; RV32-NEXT:    mul a1, a1, a3
+; RV32-NEXT:    slli a1, a1, 3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vs4r.v v12, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 6
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT:    vrgatherei16.vv v4, v0, v8
+; RV32-NEXT:    vrgatherei16.vv v12, v24, v8
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 21
-; RV32-NEXT:    mul a1, a1, a3
+; RV32-NEXT:    slli a1, a1, 4
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl1r.v v0, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 6
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    li a3, 56
+; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vslideup.vi v4, v8, 4, v0.t
+; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vmv4r.v v24, v16
+; RV32-NEXT:    vslideup.vi v12, v16, 4, v0.t
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 21
+; RV32-NEXT:    li a3, 12
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs4r.v v4, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vs4r.v v12, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 73
+; RV32-NEXT:    li a3, 72
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT:    vrgatherei16.vv v8, v0, v24
+; RV32-NEXT:    vrgatherei16.vv v8, v16, v4
+; RV32-NEXT:    vmv1r.v v0, v3
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 13
+; RV32-NEXT:    li a3, 48
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl1r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl4r.v v28, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vrgatherei16.vv v8, v16, v28, v0.t
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 13
-; RV32-NEXT:    mul a1, a1, a3
+; RV32-NEXT:    slli a1, a1, 4
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, %hi(.LCPI6_10)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_10)
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
-; RV32-NEXT:    vle16.v v8, (a1)
+; RV32-NEXT:    vle16.v v12, (a1)
 ; RV32-NEXT:    lui a1, 15
 ; RV32-NEXT:    vmv.s.x v3, a1
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 57
-; RV32-NEXT:    mul a1, a1, a3
+; RV32-NEXT:    slli a1, a1, 6
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vslideup.vi v12, v16, 6
+; RV32-NEXT:    vslideup.vi v8, v16, 6
 ; RV32-NEXT:    vmv1r.v v0, v3
+; RV32-NEXT:    vrgatherei16.vv v8, v24, v12, v0.t
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 6
-; RV32-NEXT:    add a1, a3, a1
-; RV32-NEXT:    add a1, sp, a1
-; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v12, v16, v8, v0.t
-; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 57
-; RV32-NEXT:    mul a1, a1, a3
+; RV32-NEXT:    slli a1, a1, 2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs4r.v v12, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, %hi(.LCPI6_11)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_11)
 ; RV32-NEXT:    lui a3, %hi(.LCPI6_12)
 ; RV32-NEXT:    addi a3, a3, %lo(.LCPI6_12)
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
-; RV32-NEXT:    vle16.v v8, (a1)
-; RV32-NEXT:    vle16.v v12, (a3)
+; RV32-NEXT:    vle16.v v24, (a1)
+; RV32-NEXT:    vle16.v v4, (a3)
 ; RV32-NEXT:    li a1, 1008
 ; RV32-NEXT:    vmv.s.x v0, a1
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    slli a1, a1, 3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs1r.v v0, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 73
+; RV32-NEXT:    li a3, 72
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v24, v16, v8
+; RV32-NEXT:    vrgatherei16.vv v8, v16, v24
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 49
+; RV32-NEXT:    li a3, 48
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
-; RV32-NEXT:    vrgatherei16.vv v24, v16, v12, v0.t
+; RV32-NEXT:    vrgatherei16.vv v8, v16, v4, v0.t
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 2
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    slli a1, a1, 6
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vs8r.v v24, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, %hi(.LCPI6_13)
 ; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_13)
 ; RV32-NEXT:    lui a3, %hi(.LCPI6_14)
 ; RV32-NEXT:    addi a3, a3, %lo(.LCPI6_14)
-; RV32-NEXT:    lui a4, %hi(.LCPI6_15)
 ; RV32-NEXT:    vsetivli zero, 16, e16, m2, ta, ma
 ; RV32-NEXT:    vle16.v v20, (a1)
-; RV32-NEXT:    addi a1, a4, %lo(.LCPI6_15)
+; RV32-NEXT:    lui a1, %hi(.LCPI6_15)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI6_15)
 ; RV32-NEXT:    vsetvli zero, a2, e16, m4, ta, ma
 ; RV32-NEXT:    vle16.v v24, (a3)
 ; RV32-NEXT:    vle16.v v8, (a1)
@@ -526,27 +521,26 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vs4r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vmv1r.v v0, v3
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 41
+; RV32-NEXT:    li a3, 40
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl4r.v v16, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 6
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    li a3, 56
+; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, mu
 ; RV32-NEXT:    vrgatherei16.vv v16, v8, v20, v0.t
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a3, a1, 5
-; RV32-NEXT:    add a1, a3, a1
+; RV32-NEXT:    slli a1, a1, 5
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl4r.v v20, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 25
+; RV32-NEXT:    li a3, 24
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -554,7 +548,7 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vsetivli zero, 10, e32, m4, tu, ma
 ; RV32-NEXT:    vmv.v.v v20, v8
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a3, 73
+; RV32-NEXT:    li a3, 72
 ; RV32-NEXT:    mul a1, a1, a3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -562,12 +556,12 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vsetvli zero, a2, e32, m8, ta, mu
 ; RV32-NEXT:    vrgatherei16.vv v8, v0, v24
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 2
+; RV32-NEXT:    slli a1, a1, 3
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl1r.v v0, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 49
+; RV32-NEXT:    li a2, 48
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
@@ -576,31 +570,28 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vl4r.v v4, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vrgatherei16.vv v8, v24, v4, v0.t
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 21
-; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    slli a1, a1, 4
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl4r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 13
+; RV32-NEXT:    li a2, 12
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl4r.v v24, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsetivli zero, 10, e32, m4, tu, ma
 ; RV32-NEXT:    vmv.v.v v24, v0
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 57
-; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    slli a1, a1, 6
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl4r.v v28, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a2, a1, 2
-; RV32-NEXT:    add a1, a2, a1
+; RV32-NEXT:    slli a1, a1, 2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
-; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vl4r.v v28, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmv.v.v v28, v0
 ; RV32-NEXT:    vmv.v.v v16, v8
 ; RV32-NEXT:    addi a1, a0, 320
@@ -614,21 +605,21 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_
 ; RV32-NEXT:    vse32.v v20, (a1)
 ; RV32-NEXT:    addi a1, a0, 64
 ; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    li a3, 37
+; RV32-NEXT:    li a3, 36
 ; RV32-NEXT:    mul a2, a2, a3
 ; RV32-NEXT:    add a2, sp, a2
 ; RV32-NEXT:    addi a2, a2, 16
 ; RV32-NEXT:    vl4r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vse32.v v8, (a1)
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 45
+; RV32-NEXT:    li a2, 44
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vl4r.v v8, (a1) # Unknown-size Folded Reload
 ; RV32-NEXT:    vse32.v v8, (a0)
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 82
+; RV32-NEXT:    li a1, 80
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
 ; RV32-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
index 1748315186936..7608349ef7aef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll
@@ -549,20 +549,20 @@ define <128 x i1> @buildvec_mask_v128i1() {
 define <128 x i1> @buildvec_mask_optsize_v128i1() optsize {
 ; CHECK-LABEL: buildvec_mask_optsize_v128i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI21_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI21_0)
-; CHECK-NEXT:    li a1, 128
-; CHECK-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; CHECK-NEXT:    vlm.v v0, (a0)
+; CHECK-NEXT:    li a0, 128
+; CHECK-NEXT:    lui a1, %hi(.LCPI21_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI21_0)
+; CHECK-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; CHECK-NEXT:    vlm.v v0, (a1)
 ; CHECK-NEXT:    ret
 ;
 ; ZVE32F-LABEL: buildvec_mask_optsize_v128i1:
 ; ZVE32F:       # %bb.0:
-; ZVE32F-NEXT:    lui a0, %hi(.LCPI21_0)
-; ZVE32F-NEXT:    addi a0, a0, %lo(.LCPI21_0)
-; ZVE32F-NEXT:    li a1, 128
-; ZVE32F-NEXT:    vsetvli zero, a1, e8, m8, ta, ma
-; ZVE32F-NEXT:    vlm.v v0, (a0)
+; ZVE32F-NEXT:    li a0, 128
+; ZVE32F-NEXT:    lui a1, %hi(.LCPI21_0)
+; ZVE32F-NEXT:    addi a1, a1, %lo(.LCPI21_0)
+; ZVE32F-NEXT:    vsetvli zero, a0, e8, m8, ta, ma
+; ZVE32F-NEXT:    vlm.v v0, (a1)
 ; ZVE32F-NEXT:    ret
   ret <128 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1>
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index db0969c85a8e2..69341981288b9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -13327,22 +13327,22 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) {
 define <8 x i16> @mgather_shuffle_vrgather(ptr %base) {
 ; RV32-LABEL: mgather_shuffle_vrgather:
 ; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, %hi(.LCPI119_0)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI119_0)
 ; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT:    vle16.v v9, (a0)
-; RV32-NEXT:    lui a0, %hi(.LCPI119_0)
-; RV32-NEXT:    addi a0, a0, %lo(.LCPI119_0)
+; RV32-NEXT:    vle16.v v9, (a1)
 ; RV32-NEXT:    vle16.v v10, (a0)
-; RV32-NEXT:    vrgather.vv v8, v9, v10
+; RV32-NEXT:    vrgather.vv v8, v10, v9
 ; RV32-NEXT:    ret
 ;
 ; RV64V-LABEL: mgather_shuffle_vrgather:
 ; RV64V:       # %bb.0:
+; RV64V-NEXT:    lui a1, %hi(.LCPI119_0)
+; RV64V-NEXT:    addi a1, a1, %lo(.LCPI119_0)
 ; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; RV64V-NEXT:    vle16.v v9, (a0)
-; RV64V-NEXT:    lui a0, %hi(.LCPI119_0)
-; RV64V-NEXT:    addi a0, a0, %lo(.LCPI119_0)
+; RV64V-NEXT:    vle16.v v9, (a1)
 ; RV64V-NEXT:    vle16.v v10, (a0)
-; RV64V-NEXT:    vrgather.vv v8, v9, v10
+; RV64V-NEXT:    vrgather.vv v8, v10, v9
 ; RV64V-NEXT:    ret
 ;
 ; RV64ZVE32F-LABEL: mgather_shuffle_vrgather:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll
index d70ed2fb0e266..4b1f0beb48700 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll
@@ -228,11 +228,11 @@ define <16 x i8> @reverse_v16i8(<16 x i8> %a) {
 define <32 x i8> @reverse_v32i8(<32 x i8> %a) {
 ; CHECK-LABEL: reverse_v32i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI12_0)
-; CHECK-NEXT:    li a1, 32
-; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    lui a1, %hi(.LCPI12_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI12_0)
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v12, (a1)
 ; CHECK-NEXT:    vrgather.vv v10, v8, v12
 ; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
@@ -243,11 +243,11 @@ define <32 x i8> @reverse_v32i8(<32 x i8> %a) {
 define <64 x i8> @reverse_v64i8(<64 x i8> %a) {
 ; CHECK-LABEL: reverse_v64i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI13_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI13_0)
-; CHECK-NEXT:    li a1, 64
-; CHECK-NEXT:    vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT:    vle8.v v16, (a0)
+; CHECK-NEXT:    li a0, 64
+; CHECK-NEXT:    lui a1, %hi(.LCPI13_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI13_0)
+; CHECK-NEXT:    vsetvli zero, a0, e8, m4, ta, ma
+; CHECK-NEXT:    vle8.v v16, (a1)
 ; CHECK-NEXT:    vrgather.vv v12, v8, v16
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
@@ -323,11 +323,11 @@ define <16 x i16> @reverse_v16i16(<16 x i16> %a) {
 define <32 x i16> @reverse_v32i16(<32 x i16> %a) {
 ; CHECK-LABEL: reverse_v32i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI19_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI19_0)
-; CHECK-NEXT:    li a1, 32
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    lui a1, %hi(.LCPI19_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI19_0)
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vle8.v v12, (a1)
 ; CHECK-NEXT:    vsext.vf2 v16, v12
 ; CHECK-NEXT:    vrgather.vv v12, v8, v16
 ; CHECK-NEXT:    vmv.v.v v8, v12
@@ -520,11 +520,11 @@ define <16 x half> @reverse_v16f16(<16 x half> %a) {
 define <32 x half> @reverse_v32f16(<32 x half> %a) {
 ; CHECK-LABEL: reverse_v32f16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI34_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI34_0)
-; CHECK-NEXT:    li a1, 32
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    lui a1, %hi(.LCPI34_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI34_0)
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vle8.v v12, (a1)
 ; CHECK-NEXT:    vsext.vf2 v16, v12
 ; CHECK-NEXT:    vrgather.vv v12, v8, v16
 ; CHECK-NEXT:    vmv.v.v v8, v12
@@ -820,33 +820,33 @@ define <6 x i64> @reverse_v6i64(<6 x i64> %a) {
 define <12 x i64> @reverse_v12i64(<12 x i64> %a) {
 ; RV32-BITS-UNKNOWN-LABEL: reverse_v12i64:
 ; RV32-BITS-UNKNOWN:       # %bb.0:
-; RV32-BITS-UNKNOWN-NEXT:    lui a0, %hi(.LCPI46_0)
-; RV32-BITS-UNKNOWN-NEXT:    addi a0, a0, %lo(.LCPI46_0)
-; RV32-BITS-UNKNOWN-NEXT:    li a1, 32
-; RV32-BITS-UNKNOWN-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; RV32-BITS-UNKNOWN-NEXT:    vle16.v v24, (a0)
+; RV32-BITS-UNKNOWN-NEXT:    li a0, 32
+; RV32-BITS-UNKNOWN-NEXT:    lui a1, %hi(.LCPI46_0)
+; RV32-BITS-UNKNOWN-NEXT:    addi a1, a1, %lo(.LCPI46_0)
+; RV32-BITS-UNKNOWN-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; RV32-BITS-UNKNOWN-NEXT:    vle16.v v24, (a1)
 ; RV32-BITS-UNKNOWN-NEXT:    vrgatherei16.vv v16, v8, v24
 ; RV32-BITS-UNKNOWN-NEXT:    vmv.v.v v8, v16
 ; RV32-BITS-UNKNOWN-NEXT:    ret
 ;
 ; RV32-BITS-256-LABEL: reverse_v12i64:
 ; RV32-BITS-256:       # %bb.0:
-; RV32-BITS-256-NEXT:    lui a0, %hi(.LCPI46_0)
-; RV32-BITS-256-NEXT:    addi a0, a0, %lo(.LCPI46_0)
-; RV32-BITS-256-NEXT:    li a1, 32
-; RV32-BITS-256-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; RV32-BITS-256-NEXT:    vle16.v v24, (a0)
+; RV32-BITS-256-NEXT:    li a0, 32
+; RV32-BITS-256-NEXT:    lui a1, %hi(.LCPI46_0)
+; RV32-BITS-256-NEXT:    addi a1, a1, %lo(.LCPI46_0)
+; RV32-BITS-256-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; RV32-BITS-256-NEXT:    vle16.v v24, (a1)
 ; RV32-BITS-256-NEXT:    vrgatherei16.vv v16, v8, v24
 ; RV32-BITS-256-NEXT:    vmv.v.v v8, v16
 ; RV32-BITS-256-NEXT:    ret
 ;
 ; RV32-BITS-512-LABEL: reverse_v12i64:
 ; RV32-BITS-512:       # %bb.0:
-; RV32-BITS-512-NEXT:    lui a0, %hi(.LCPI46_0)
-; RV32-BITS-512-NEXT:    addi a0, a0, %lo(.LCPI46_0)
-; RV32-BITS-512-NEXT:    li a1, 32
-; RV32-BITS-512-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; RV32-BITS-512-NEXT:    vle16.v v24, (a0)
+; RV32-BITS-512-NEXT:    li a0, 32
+; RV32-BITS-512-NEXT:    lui a1, %hi(.LCPI46_0)
+; RV32-BITS-512-NEXT:    addi a1, a1, %lo(.LCPI46_0)
+; RV32-BITS-512-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; RV32-BITS-512-NEXT:    vle16.v v24, (a1)
 ; RV32-BITS-512-NEXT:    vrgatherei16.vv v16, v8, v24
 ; RV32-BITS-512-NEXT:    vmv.v.v v8, v16
 ; RV32-BITS-512-NEXT:    ret
@@ -883,11 +883,11 @@ define <12 x i64> @reverse_v12i64(<12 x i64> %a) {
 ;
 ; RV32-ZVBB-LABEL: reverse_v12i64:
 ; RV32-ZVBB:       # %bb.0:
-; RV32-ZVBB-NEXT:    lui a0, %hi(.LCPI46_0)
-; RV32-ZVBB-NEXT:    addi a0, a0, %lo(.LCPI46_0)
-; RV32-ZVBB-NEXT:    li a1, 32
-; RV32-ZVBB-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; RV32-ZVBB-NEXT:    vle16.v v24, (a0)
+; RV32-ZVBB-NEXT:    li a0, 32
+; RV32-ZVBB-NEXT:    lui a1, %hi(.LCPI46_0)
+; RV32-ZVBB-NEXT:    addi a1, a1, %lo(.LCPI46_0)
+; RV32-ZVBB-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; RV32-ZVBB-NEXT:    vle16.v v24, (a1)
 ; RV32-ZVBB-NEXT:    vrgatherei16.vv v16, v8, v24
 ; RV32-ZVBB-NEXT:    vmv.v.v v8, v16
 ; RV32-ZVBB-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
index 0161ac4bc338d..e2580c132f65e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
@@ -225,11 +225,11 @@ declare <16 x i64> @llvm.experimental.stepvector.v16i64()
 define <16 x i64> @stepvector_v16i64() {
 ; RV32-LABEL: stepvector_v16i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI16_0)
-; RV32-NEXT:    addi a0, a0, %lo(.LCPI16_0)
-; RV32-NEXT:    li a1, 32
-; RV32-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; RV32-NEXT:    vle8.v v16, (a0)
+; RV32-NEXT:    li a0, 32
+; RV32-NEXT:    lui a1, %hi(.LCPI16_0)
+; RV32-NEXT:    addi a1, a1, %lo(.LCPI16_0)
+; RV32-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; RV32-NEXT:    vle8.v v16, (a1)
 ; RV32-NEXT:    vsext.vf4 v8, v16
 ; RV32-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
index 6e327457bebff..368f454fa5fda 100644
--- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll
@@ -106,11 +106,11 @@ define <16 x i8> @v16i8(<16 x i8> %a) {
 define <32 x i8> @v16i8_2(<16 x i8> %a, <16 x i8> %b) {
 ; CHECK-LABEL: v16i8_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI7_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI7_0)
-; CHECK-NEXT:    li a1, 32
-; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    lui a1, %hi(.LCPI7_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI7_0)
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v12, (a1)
 ; CHECK-NEXT:    vmv1r.v v14, v9
 ; CHECK-NEXT:    vrgather.vv v10, v8, v12
 ; CHECK-NEXT:    vid.v v8
@@ -230,11 +230,11 @@ define <16 x i16> @v16i16(<16 x i16> %a) {
 define <32 x i16> @v16i16_2(<16 x i16> %a, <16 x i16> %b) {
 ; CHECK-LABEL: v16i16_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI15_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI15_0)
-; CHECK-NEXT:    li a1, 32
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    lui a1, %hi(.LCPI15_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI15_0)
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vle16.v v16, (a1)
 ; CHECK-NEXT:    vmv2r.v v20, v10
 ; CHECK-NEXT:    vmv2r.v v12, v8
 ; CHECK-NEXT:    vrgather.vv v8, v12, v16
@@ -363,11 +363,11 @@ define <16 x i32> @v16i32(<16 x i32> %a) {
 define <32 x i32> @v16i32_2(<16 x i32> %a, <16 x i32> %b) {
 ; CHECK-LABEL: v16i32_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI23_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI23_0)
-; CHECK-NEXT:    li a1, 32
-; CHECK-NEXT:    vsetvli zero, a1, e32, m8, ta, ma
-; CHECK-NEXT:    vle16.v v20, (a0)
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    lui a1, %hi(.LCPI23_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI23_0)
+; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
+; CHECK-NEXT:    vle16.v v20, (a1)
 ; CHECK-NEXT:    vmv4r.v v24, v12
 ; CHECK-NEXT:    vmv4r.v v16, v8
 ; CHECK-NEXT:    vrgatherei16.vv v8, v16, v20
@@ -548,11 +548,11 @@ define <16 x half> @v16f16(<16 x half> %a) {
 define <32 x half> @v16f16_2(<16 x half> %a) {
 ; CHECK-LABEL: v16f16_2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI35_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI35_0)
-; CHECK-NEXT:    li a1, 32
-; CHECK-NEXT:    vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT:    vle16.v v16, (a0)
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    lui a1, %hi(.LCPI35_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI35_0)
+; CHECK-NEXT:    vsetvli zero, a0, e16, m4, ta, ma
+; CHECK-NEXT:    vle16.v v16, (a1)
 ; CHECK-NEXT:    vrgather.vv v12, v8, v16
 ; CHECK-NEXT:    vmv.v.v v8, v12
 ; CHECK-NEXT:    ret
@@ -719,11 +719,11 @@ define <8 x double> @v4f64_2(<4 x double> %a, <4 x double> %b) {
 define <32 x i8> @v32i8(<32 x i8> %a) {
 ; CHECK-LABEL: v32i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI46_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI46_0)
-; CHECK-NEXT:    li a1, 32
-; CHECK-NEXT:    vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT:    vle8.v v12, (a0)
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    lui a1, %hi(.LCPI46_0)
+; CHECK-NEXT:    addi a1, a1, %lo(.LCPI46_0)
+; CHECK-NEXT:    vsetvli zero, a0, e8, m2, ta, ma
+; CHECK-NEXT:    vle8.v v12, (a1)
 ; CHECK-NEXT:    vrgather.vv v10, v8, v12
 ; CHECK-NEXT:    vmv.v.v v8, v10
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll
index 87d69bfad38c2..d3e495bb723ad 100644
--- a/llvm/test/CodeGen/RISCV/tail-calls.ll
+++ b/llvm/test/CodeGen/RISCV/tail-calls.ll
@@ -56,12 +56,12 @@ define void @caller_indirect_tail(i32 %a) nounwind {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    beqz a0, .LBB3_2
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    lui a0, %hi(callee_indirect2)
-; CHECK-NEXT:    addi t1, a0, %lo(callee_indirect2)
+; CHECK-NEXT:    lui t1, %hi(callee_indirect2)
+; CHECK-NEXT:    addi t1, t1, %lo(callee_indirect2)
 ; CHECK-NEXT:    jr t1
 ; CHECK-NEXT:  .LBB3_2:
-; CHECK-NEXT:    lui a0, %hi(callee_indirect1)
-; CHECK-NEXT:    addi t1, a0, %lo(callee_indirect1)
+; CHECK-NEXT:    lui t1, %hi(callee_indirect1)
+; CHECK-NEXT:    addi t1, t1, %lo(callee_indirect1)
 ; CHECK-NEXT:    jr t1
 
 
diff --git a/llvm/test/CodeGen/RISCV/unroll-loop-cse.ll b/llvm/test/CodeGen/RISCV/unroll-loop-cse.ll
index 2fd4572d23456..6530736304837 100644
--- a/llvm/test/CodeGen/RISCV/unroll-loop-cse.ll
+++ b/llvm/test/CodeGen/RISCV/unroll-loop-cse.ll
@@ -10,36 +10,30 @@
 define signext i32 @unroll_loop_cse() {
 ; CHECK-LABEL: unroll_loop_cse:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(x)
-; CHECK-NEXT:    lw a3, %lo(x)(a1)
-; CHECK-NEXT:    lui a2, %hi(check)
-; CHECK-NEXT:    lw a4, %lo(check)(a2)
+; CHECK-NEXT:    lui a0, %hi(x)
+; CHECK-NEXT:    lw a1, %lo(x)(a0)
+; CHECK-NEXT:    lui a0, %hi(check)
+; CHECK-NEXT:    lw a2, %lo(check)(a0)
 ; CHECK-NEXT:    li a0, 1
-; CHECK-NEXT:    bne a3, a4, .LBB0_6
-; CHECK-NEXT:  # %bb.1:
-; CHECK-NEXT:    addi a1, a1, %lo(x)
-; CHECK-NEXT:    lw a1, 4(a1)
-; CHECK-NEXT:    addi a2, a2, %lo(check)
-; CHECK-NEXT:    lw a2, 4(a2)
 ; CHECK-NEXT:    bne a1, a2, .LBB0_6
-; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    lui a1, %hi(x)
 ; CHECK-NEXT:    addi a1, a1, %lo(x)
-; CHECK-NEXT:    lw a3, 8(a1)
+; CHECK-NEXT:    lw a3, 4(a1)
 ; CHECK-NEXT:    lui a2, %hi(check)
 ; CHECK-NEXT:    addi a2, a2, %lo(check)
+; CHECK-NEXT:    lw a4, 4(a2)
+; CHECK-NEXT:    bne a3, a4, .LBB0_6
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    lw a3, 8(a1)
 ; CHECK-NEXT:    lw a4, 8(a2)
 ; CHECK-NEXT:    bne a3, a4, .LBB0_6
 ; CHECK-NEXT:  # %bb.3:
-; CHECK-NEXT:    lw a1, 12(a1)
-; CHECK-NEXT:    lw a2, 12(a2)
-; CHECK-NEXT:    bne a1, a2, .LBB0_6
+; CHECK-NEXT:    lw a3, 12(a1)
+; CHECK-NEXT:    lw a4, 12(a2)
+; CHECK-NEXT:    bne a3, a4, .LBB0_6
 ; CHECK-NEXT:  # %bb.4:
-; CHECK-NEXT:    lui a1, %hi(x)
-; CHECK-NEXT:    addi a1, a1, %lo(x)
 ; CHECK-NEXT:    lw a3, 16(a1)
-; CHECK-NEXT:    lui a2, %hi(check)
-; CHECK-NEXT:    addi a2, a2, %lo(check)
 ; CHECK-NEXT:    lw a4, 16(a2)
 ; CHECK-NEXT:    bne a3, a4, .LBB0_6
 ; CHECK-NEXT:  # %bb.5:



More information about the llvm-commits mailing list