[llvm] Rematerialize load RISCV backend (PR #73910)

via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 30 00:59:56 PST 2023


https://github.com/niwinanto updated https://github.com/llvm/llvm-project/pull/73910

>From 6a6306b5e1c511d8e4b895958963e687e95b5988 Mon Sep 17 00:00:00 2001
From: Niwin Anto <niwin.anto at hightec-rt.com>
Date: Thu, 30 Nov 2023 09:13:47 +0100
Subject: [PATCH] Rematerialize load RISCV backend

---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp      |   48 +
 llvm/lib/Target/RISCV/RISCVInstrInfo.h        |    1 +
 llvm/lib/Target/RISCV/RISCVInstrInfo.td       |    2 +-
 llvm/test/CodeGen/RISCV/callee-saved-gprs.ll  |  432 +++----
 llvm/test/CodeGen/RISCV/calling-conv-half.ll  |    8 +-
 llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll    |   88 +-
 .../CodeGen/RISCV/ctz_zero_return_test.ll     |   78 +-
 .../CodeGen/RISCV/fastcc-without-f-reg.ll     | 1152 ++++++++---------
 llvm/test/CodeGen/RISCV/forced-atomics.ll     |    4 +-
 llvm/test/CodeGen/RISCV/nontemporal.ll        |  320 ++---
 llvm/test/CodeGen/RISCV/pr64645.ll            |    2 +-
 llvm/test/CodeGen/RISCV/push-pop-popret.ll    |  768 +++++------
 .../RISCV/remat-stack-load-aggressive.ll      |   62 +
 llvm/test/CodeGen/RISCV/rv32xtheadbb.ll       |   41 +-
 llvm/test/CodeGen/RISCV/rv32zbb.ll            |   41 +-
 .../CodeGen/RISCV/rvv/no-reserved-frame.ll    |   21 +-
 .../CodeGen/RISCV/rvv/rvv-out-arguments.ll    |   24 +-
 .../CodeGen/RISCV/srem-seteq-illegal-types.ll |    6 +-
 ...lar-shift-by-byte-multiple-legalization.ll |  264 ++--
 .../RISCV/wide-scalar-shift-legalization.ll   |  176 +--
 20 files changed, 1790 insertions(+), 1748 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/remat-stack-load-aggressive.ll

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 6c5712dc795bc75..0f615ca38b93d45 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineCombinerPattern.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -46,6 +47,11 @@ static cl::opt<bool> PreferWholeRegisterMove(
     "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden,
     cl::desc("Prefer whole register move for vector registers."));
 
+static cl::opt<bool>
+    AggressiveLoadRemat("riscv-enable-load-remat-aggressive", cl::init(true),
+                        cl::Hidden,
+                        cl::desc("Rematerialize load aggressively"));
+
 static cl::opt<MachineTraceStrategy> ForceMachineCombinerStrategy(
     "riscv-force-machine-combiner-strategy", cl::Hidden,
     cl::desc("Force machine combiner to use a specific strategy for machine "
@@ -1567,6 +1573,48 @@ bool RISCVInstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
   return MI.isAsCheapAsAMove();
 }
 
+bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
+    const MachineInstr &MI) const {
+  if (TargetInstrInfo::isReallyTriviallyReMaterializable(MI))
+    return true;
+
+  const MachineFunction &MF = *MI.getMF();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  const MachineOperand &Dest = MI.getOperand(0);
+  if (!MRI.hasOneUse(Dest.getReg()))
+    return false;
+
+  MachineInstr *UseMI = &*MRI.use_instr_begin(Dest.getReg());
+  MachineBasicBlock::const_iterator DefItr(MI);
+  MachineBasicBlock::const_iterator UseItr(UseMI);
+  
+  const MachineBasicBlock *MBB = nullptr;
+  if ((MBB = MI.getParent()) != UseMI->getParent())
+    return false;
+
+  // When loading from stack and the stack slot is not modifed before its use,
+  // then materialize this load.
+  int FrameIdx = 0;
+  if (isLoadFromStackSlot(MI, FrameIdx) && AggressiveLoadRemat) {
+    for (; DefItr != UseItr && DefItr != MBB->end(); DefItr++) {
+      int StoreFrameIdx = 0;
+      if ((*DefItr).isCall() || (isStoreToStackSlot(*DefItr, StoreFrameIdx) &&
+                                 StoreFrameIdx == FrameIdx))
+        return false;
+    }
+    return true;
+  } else if (MI.mayLoad() && AggressiveLoadRemat) {
+    for (; DefItr != UseItr && DefItr != MBB->end(); DefItr++) {
+      if ((*DefItr).isCall() || (*DefItr).mayStore())
+        return false;
+    }
+    return true;
+  }
+
+  return false;
+}
+
 std::optional<DestSourcePair>
 RISCVInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
   if (MI.isMoveReg())
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 8f860077c303170..c572281edde49da 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -138,6 +138,7 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
                                bool) const override;
 
   bool isAsCheapAsAMove(const MachineInstr &MI) const override;
+  bool isReallyTriviallyReMaterializable(const MachineInstr &MI) const override;
 
   std::optional<DestSourcePair>
   isCopyInstrImpl(const MachineInstr &MI) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index edc08187d8f775a..78128a4a89892df 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -626,7 +626,7 @@ def BGE  : BranchCC_rri<0b101, "bge">;
 def BLTU : BranchCC_rri<0b110, "bltu">;
 def BGEU : BranchCC_rri<0b111, "bgeu">;
 
-let IsSignExtendingOpW = 1 in {
+let IsSignExtendingOpW = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 def LB  : Load_ri<0b000, "lb">, Sched<[WriteLDB, ReadMemBase]>;
 def LH  : Load_ri<0b001, "lh">, Sched<[WriteLDH, ReadMemBase]>;
 def LW  : Load_ri<0b010, "lw">, Sched<[WriteLDW, ReadMemBase]>;
diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
index 09ecbbc7e8feb81..201850060fe04a4 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
@@ -50,16 +50,16 @@ define void @callee() nounwind {
 ; RV32I-NEXT:    sw s9, 36(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lui a6, %hi(var)
-; RV32I-NEXT:    lw a0, %lo(var)(a6)
+; RV32I-NEXT:    lui a4, %hi(var)
+; RV32I-NEXT:    lw a0, %lo(var)(a4)
 ; RV32I-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var+4)(a6)
+; RV32I-NEXT:    lw a0, %lo(var+4)(a4)
 ; RV32I-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var+8)(a6)
+; RV32I-NEXT:    lw a0, %lo(var+8)(a4)
 ; RV32I-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var+12)(a6)
+; RV32I-NEXT:    lw a0, %lo(var+12)(a4)
 ; RV32I-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    addi a5, a6, %lo(var)
+; RV32I-NEXT:    addi a5, a4, %lo(var)
 ; RV32I-NEXT:    lw a0, 16(a5)
 ; RV32I-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lw a0, 20(a5)
@@ -84,18 +84,18 @@ define void @callee() nounwind {
 ; RV32I-NEXT:    lw s10, 92(a5)
 ; RV32I-NEXT:    lw s11, 96(a5)
 ; RV32I-NEXT:    lw ra, 100(a5)
-; RV32I-NEXT:    lw a7, 104(a5)
-; RV32I-NEXT:    lw a4, 108(a5)
-; RV32I-NEXT:    lw a0, 124(a5)
-; RV32I-NEXT:    lw a1, 120(a5)
-; RV32I-NEXT:    lw a2, 116(a5)
-; RV32I-NEXT:    lw a3, 112(a5)
-; RV32I-NEXT:    sw a0, 124(a5)
-; RV32I-NEXT:    sw a1, 120(a5)
-; RV32I-NEXT:    sw a2, 116(a5)
-; RV32I-NEXT:    sw a3, 112(a5)
-; RV32I-NEXT:    sw a4, 108(a5)
-; RV32I-NEXT:    sw a7, 104(a5)
+; RV32I-NEXT:    lw a6, 104(a5)
+; RV32I-NEXT:    lw a3, 108(a5)
+; RV32I-NEXT:    lw a7, 124(a5)
+; RV32I-NEXT:    lw a0, 120(a5)
+; RV32I-NEXT:    lw a1, 116(a5)
+; RV32I-NEXT:    lw a2, 112(a5)
+; RV32I-NEXT:    sw a7, 124(a5)
+; RV32I-NEXT:    sw a0, 120(a5)
+; RV32I-NEXT:    sw a1, 116(a5)
+; RV32I-NEXT:    sw a2, 112(a5)
+; RV32I-NEXT:    sw a3, 108(a5)
+; RV32I-NEXT:    sw a6, 104(a5)
 ; RV32I-NEXT:    sw ra, 100(a5)
 ; RV32I-NEXT:    sw s11, 96(a5)
 ; RV32I-NEXT:    sw s10, 92(a5)
@@ -121,13 +121,13 @@ define void @callee() nounwind {
 ; RV32I-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    sw a0, 16(a5)
 ; RV32I-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var+12)(a6)
+; RV32I-NEXT:    sw a0, %lo(var+12)(a4)
 ; RV32I-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var+8)(a6)
+; RV32I-NEXT:    sw a0, %lo(var+8)(a4)
 ; RV32I-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var+4)(a6)
+; RV32I-NEXT:    sw a0, %lo(var+4)(a4)
 ; RV32I-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var)(a6)
+; RV32I-NEXT:    sw a0, %lo(var)(a4)
 ; RV32I-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
@@ -161,16 +161,16 @@ define void @callee() nounwind {
 ; RV32I-WITH-FP-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
 ; RV32I-WITH-FP-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
 ; RV32I-WITH-FP-NEXT:    addi s0, sp, 80
-; RV32I-WITH-FP-NEXT:    lui a6, %hi(var)
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var)(a6)
+; RV32I-WITH-FP-NEXT:    lui a4, %hi(var)
+; RV32I-WITH-FP-NEXT:    lw a0, %lo(var)(a4)
 ; RV32I-WITH-FP-NEXT:    sw a0, -56(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+4)(a6)
+; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+4)(a4)
 ; RV32I-WITH-FP-NEXT:    sw a0, -60(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+8)(a6)
+; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+8)(a4)
 ; RV32I-WITH-FP-NEXT:    sw a0, -64(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+12)(a6)
+; RV32I-WITH-FP-NEXT:    lw a0, %lo(var+12)(a4)
 ; RV32I-WITH-FP-NEXT:    sw a0, -68(s0) # 4-byte Folded Spill
-; RV32I-WITH-FP-NEXT:    addi a5, a6, %lo(var)
+; RV32I-WITH-FP-NEXT:    addi a5, a4, %lo(var)
 ; RV32I-WITH-FP-NEXT:    lw a0, 16(a5)
 ; RV32I-WITH-FP-NEXT:    sw a0, -72(s0) # 4-byte Folded Spill
 ; RV32I-WITH-FP-NEXT:    lw a0, 20(a5)
@@ -195,20 +195,20 @@ define void @callee() nounwind {
 ; RV32I-WITH-FP-NEXT:    lw s10, 88(a5)
 ; RV32I-WITH-FP-NEXT:    lw s11, 92(a5)
 ; RV32I-WITH-FP-NEXT:    lw ra, 96(a5)
-; RV32I-WITH-FP-NEXT:    lw t0, 100(a5)
-; RV32I-WITH-FP-NEXT:    lw a7, 104(a5)
-; RV32I-WITH-FP-NEXT:    lw a4, 108(a5)
-; RV32I-WITH-FP-NEXT:    lw a0, 124(a5)
-; RV32I-WITH-FP-NEXT:    lw a1, 120(a5)
-; RV32I-WITH-FP-NEXT:    lw a2, 116(a5)
-; RV32I-WITH-FP-NEXT:    lw a3, 112(a5)
-; RV32I-WITH-FP-NEXT:    sw a0, 124(a5)
-; RV32I-WITH-FP-NEXT:    sw a1, 120(a5)
-; RV32I-WITH-FP-NEXT:    sw a2, 116(a5)
-; RV32I-WITH-FP-NEXT:    sw a3, 112(a5)
-; RV32I-WITH-FP-NEXT:    sw a4, 108(a5)
-; RV32I-WITH-FP-NEXT:    sw a7, 104(a5)
-; RV32I-WITH-FP-NEXT:    sw t0, 100(a5)
+; RV32I-WITH-FP-NEXT:    lw a7, 100(a5)
+; RV32I-WITH-FP-NEXT:    lw a6, 104(a5)
+; RV32I-WITH-FP-NEXT:    lw a3, 108(a5)
+; RV32I-WITH-FP-NEXT:    lw t0, 124(a5)
+; RV32I-WITH-FP-NEXT:    lw a0, 120(a5)
+; RV32I-WITH-FP-NEXT:    lw a1, 116(a5)
+; RV32I-WITH-FP-NEXT:    lw a2, 112(a5)
+; RV32I-WITH-FP-NEXT:    sw t0, 124(a5)
+; RV32I-WITH-FP-NEXT:    sw a0, 120(a5)
+; RV32I-WITH-FP-NEXT:    sw a1, 116(a5)
+; RV32I-WITH-FP-NEXT:    sw a2, 112(a5)
+; RV32I-WITH-FP-NEXT:    sw a3, 108(a5)
+; RV32I-WITH-FP-NEXT:    sw a6, 104(a5)
+; RV32I-WITH-FP-NEXT:    sw a7, 100(a5)
 ; RV32I-WITH-FP-NEXT:    sw ra, 96(a5)
 ; RV32I-WITH-FP-NEXT:    sw s11, 92(a5)
 ; RV32I-WITH-FP-NEXT:    sw s10, 88(a5)
@@ -234,13 +234,13 @@ define void @callee() nounwind {
 ; RV32I-WITH-FP-NEXT:    lw a0, -72(s0) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    sw a0, 16(a5)
 ; RV32I-WITH-FP-NEXT:    lw a0, -68(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+12)(a6)
+; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+12)(a4)
 ; RV32I-WITH-FP-NEXT:    lw a0, -64(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+8)(a6)
+; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+8)(a4)
 ; RV32I-WITH-FP-NEXT:    lw a0, -60(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+4)(a6)
+; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+4)(a4)
 ; RV32I-WITH-FP-NEXT:    lw a0, -56(s0) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    sw a0, %lo(var)(a6)
+; RV32I-WITH-FP-NEXT:    sw a0, %lo(var)(a4)
 ; RV32I-WITH-FP-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
@@ -260,16 +260,16 @@ define void @callee() nounwind {
 ; RV32IZCMP-LABEL: callee:
 ; RV32IZCMP:       # %bb.0:
 ; RV32IZCMP-NEXT:    cm.push {ra, s0-s11}, -96
-; RV32IZCMP-NEXT:    lui a6, %hi(var)
-; RV32IZCMP-NEXT:    lw a0, %lo(var)(a6)
+; RV32IZCMP-NEXT:    lui a4, %hi(var)
+; RV32IZCMP-NEXT:    lw a0, %lo(var)(a4)
 ; RV32IZCMP-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var+4)(a6)
+; RV32IZCMP-NEXT:    lw a0, %lo(var+4)(a4)
 ; RV32IZCMP-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var+8)(a6)
+; RV32IZCMP-NEXT:    lw a0, %lo(var+8)(a4)
 ; RV32IZCMP-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var+12)(a6)
+; RV32IZCMP-NEXT:    lw a0, %lo(var+12)(a4)
 ; RV32IZCMP-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    addi a5, a6, %lo(var)
+; RV32IZCMP-NEXT:    addi a5, a4, %lo(var)
 ; RV32IZCMP-NEXT:    lw a0, 16(a5)
 ; RV32IZCMP-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
 ; RV32IZCMP-NEXT:    lw a0, 20(a5)
@@ -289,28 +289,28 @@ define void @callee() nounwind {
 ; RV32IZCMP-NEXT:    lw s11, 72(a5)
 ; RV32IZCMP-NEXT:    lw ra, 76(a5)
 ; RV32IZCMP-NEXT:    lw s1, 80(a5)
-; RV32IZCMP-NEXT:    lw t3, 84(a5)
-; RV32IZCMP-NEXT:    lw t2, 88(a5)
-; RV32IZCMP-NEXT:    lw t1, 92(a5)
-; RV32IZCMP-NEXT:    lw t0, 96(a5)
+; RV32IZCMP-NEXT:    lw t2, 84(a5)
+; RV32IZCMP-NEXT:    lw t1, 88(a5)
+; RV32IZCMP-NEXT:    lw t0, 92(a5)
+; RV32IZCMP-NEXT:    lw a7, 96(a5)
 ; RV32IZCMP-NEXT:    lw s0, 100(a5)
-; RV32IZCMP-NEXT:    lw a7, 104(a5)
-; RV32IZCMP-NEXT:    lw a4, 108(a5)
-; RV32IZCMP-NEXT:    lw a0, 124(a5)
-; RV32IZCMP-NEXT:    lw a1, 120(a5)
-; RV32IZCMP-NEXT:    lw a2, 116(a5)
-; RV32IZCMP-NEXT:    lw a3, 112(a5)
-; RV32IZCMP-NEXT:    sw a0, 124(a5)
-; RV32IZCMP-NEXT:    sw a1, 120(a5)
-; RV32IZCMP-NEXT:    sw a2, 116(a5)
-; RV32IZCMP-NEXT:    sw a3, 112(a5)
-; RV32IZCMP-NEXT:    sw a4, 108(a5)
-; RV32IZCMP-NEXT:    sw a7, 104(a5)
+; RV32IZCMP-NEXT:    lw a6, 104(a5)
+; RV32IZCMP-NEXT:    lw a3, 108(a5)
+; RV32IZCMP-NEXT:    lw t3, 124(a5)
+; RV32IZCMP-NEXT:    lw a0, 120(a5)
+; RV32IZCMP-NEXT:    lw a1, 116(a5)
+; RV32IZCMP-NEXT:    lw a2, 112(a5)
+; RV32IZCMP-NEXT:    sw t3, 124(a5)
+; RV32IZCMP-NEXT:    sw a0, 120(a5)
+; RV32IZCMP-NEXT:    sw a1, 116(a5)
+; RV32IZCMP-NEXT:    sw a2, 112(a5)
+; RV32IZCMP-NEXT:    sw a3, 108(a5)
+; RV32IZCMP-NEXT:    sw a6, 104(a5)
 ; RV32IZCMP-NEXT:    sw s0, 100(a5)
-; RV32IZCMP-NEXT:    sw t0, 96(a5)
-; RV32IZCMP-NEXT:    sw t1, 92(a5)
-; RV32IZCMP-NEXT:    sw t2, 88(a5)
-; RV32IZCMP-NEXT:    sw t3, 84(a5)
+; RV32IZCMP-NEXT:    sw a7, 96(a5)
+; RV32IZCMP-NEXT:    sw t0, 92(a5)
+; RV32IZCMP-NEXT:    sw t1, 88(a5)
+; RV32IZCMP-NEXT:    sw t2, 84(a5)
 ; RV32IZCMP-NEXT:    sw s1, 80(a5)
 ; RV32IZCMP-NEXT:    sw ra, 76(a5)
 ; RV32IZCMP-NEXT:    sw s11, 72(a5)
@@ -331,13 +331,13 @@ define void @callee() nounwind {
 ; RV32IZCMP-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
 ; RV32IZCMP-NEXT:    sw a0, 16(a5)
 ; RV32IZCMP-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var+12)(a6)
+; RV32IZCMP-NEXT:    sw a0, %lo(var+12)(a4)
 ; RV32IZCMP-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var+8)(a6)
+; RV32IZCMP-NEXT:    sw a0, %lo(var+8)(a4)
 ; RV32IZCMP-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var+4)(a6)
+; RV32IZCMP-NEXT:    sw a0, %lo(var+4)(a4)
 ; RV32IZCMP-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var)(a6)
+; RV32IZCMP-NEXT:    sw a0, %lo(var)(a4)
 ; RV32IZCMP-NEXT:    cm.popret {ra, s0-s11}, 96
 ;
 ; RV32IZCMP-WITH-FP-LABEL: callee:
@@ -357,16 +357,16 @@ define void @callee() nounwind {
 ; RV32IZCMP-WITH-FP-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
 ; RV32IZCMP-WITH-FP-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
 ; RV32IZCMP-WITH-FP-NEXT:    addi s0, sp, 80
-; RV32IZCMP-WITH-FP-NEXT:    lui a6, %hi(var)
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var)(a6)
+; RV32IZCMP-WITH-FP-NEXT:    lui a4, %hi(var)
+; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var)(a4)
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, -56(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+4)(a6)
+; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+4)(a4)
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, -60(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+8)(a6)
+; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+8)(a4)
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, -64(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+12)(a6)
+; RV32IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+12)(a4)
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, -68(s0) # 4-byte Folded Spill
-; RV32IZCMP-WITH-FP-NEXT:    addi a5, a6, %lo(var)
+; RV32IZCMP-WITH-FP-NEXT:    addi a5, a4, %lo(var)
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, 16(a5)
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, -72(s0) # 4-byte Folded Spill
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, 20(a5)
@@ -386,30 +386,30 @@ define void @callee() nounwind {
 ; RV32IZCMP-WITH-FP-NEXT:    lw s10, 68(a5)
 ; RV32IZCMP-WITH-FP-NEXT:    lw s11, 72(a5)
 ; RV32IZCMP-WITH-FP-NEXT:    lw ra, 76(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw t4, 80(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw t3, 84(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw t2, 88(a5)
+; RV32IZCMP-WITH-FP-NEXT:    lw t3, 80(a5)
+; RV32IZCMP-WITH-FP-NEXT:    lw t2, 84(a5)
+; RV32IZCMP-WITH-FP-NEXT:    lw t1, 88(a5)
 ; RV32IZCMP-WITH-FP-NEXT:    lw s1, 92(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw t1, 96(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw t0, 100(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a7, 104(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a4, 108(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a0, 124(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a1, 120(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a2, 116(a5)
-; RV32IZCMP-WITH-FP-NEXT:    lw a3, 112(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, 124(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a1, 120(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a2, 116(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a3, 112(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a4, 108(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw a7, 104(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw t0, 100(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw t1, 96(a5)
+; RV32IZCMP-WITH-FP-NEXT:    lw t0, 96(a5)
+; RV32IZCMP-WITH-FP-NEXT:    lw a7, 100(a5)
+; RV32IZCMP-WITH-FP-NEXT:    lw a6, 104(a5)
+; RV32IZCMP-WITH-FP-NEXT:    lw a3, 108(a5)
+; RV32IZCMP-WITH-FP-NEXT:    lw t4, 124(a5)
+; RV32IZCMP-WITH-FP-NEXT:    lw a0, 120(a5)
+; RV32IZCMP-WITH-FP-NEXT:    lw a1, 116(a5)
+; RV32IZCMP-WITH-FP-NEXT:    lw a2, 112(a5)
+; RV32IZCMP-WITH-FP-NEXT:    sw t4, 124(a5)
+; RV32IZCMP-WITH-FP-NEXT:    sw a0, 120(a5)
+; RV32IZCMP-WITH-FP-NEXT:    sw a1, 116(a5)
+; RV32IZCMP-WITH-FP-NEXT:    sw a2, 112(a5)
+; RV32IZCMP-WITH-FP-NEXT:    sw a3, 108(a5)
+; RV32IZCMP-WITH-FP-NEXT:    sw a6, 104(a5)
+; RV32IZCMP-WITH-FP-NEXT:    sw a7, 100(a5)
+; RV32IZCMP-WITH-FP-NEXT:    sw t0, 96(a5)
 ; RV32IZCMP-WITH-FP-NEXT:    sw s1, 92(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw t2, 88(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw t3, 84(a5)
-; RV32IZCMP-WITH-FP-NEXT:    sw t4, 80(a5)
+; RV32IZCMP-WITH-FP-NEXT:    sw t1, 88(a5)
+; RV32IZCMP-WITH-FP-NEXT:    sw t2, 84(a5)
+; RV32IZCMP-WITH-FP-NEXT:    sw t3, 80(a5)
 ; RV32IZCMP-WITH-FP-NEXT:    sw ra, 76(a5)
 ; RV32IZCMP-WITH-FP-NEXT:    sw s11, 72(a5)
 ; RV32IZCMP-WITH-FP-NEXT:    sw s10, 68(a5)
@@ -430,13 +430,13 @@ define void @callee() nounwind {
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, -72(s0) # 4-byte Folded Reload
 ; RV32IZCMP-WITH-FP-NEXT:    sw a0, 16(a5)
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, -68(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+12)(a6)
+; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+12)(a4)
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, -64(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+8)(a6)
+; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+8)(a4)
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, -60(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+4)(a6)
+; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+4)(a4)
 ; RV32IZCMP-WITH-FP-NEXT:    lw a0, -56(s0) # 4-byte Folded Reload
-; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var)(a6)
+; RV32IZCMP-WITH-FP-NEXT:    sw a0, %lo(var)(a4)
 ; RV32IZCMP-WITH-FP-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
 ; RV32IZCMP-WITH-FP-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
 ; RV32IZCMP-WITH-FP-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
@@ -469,16 +469,16 @@ define void @callee() nounwind {
 ; RV64I-NEXT:    sd s9, 72(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s10, 64(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s11, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lui a6, %hi(var)
-; RV64I-NEXT:    lw a0, %lo(var)(a6)
+; RV64I-NEXT:    lui a4, %hi(var)
+; RV64I-NEXT:    lw a0, %lo(var)(a4)
 ; RV64I-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var+4)(a6)
+; RV64I-NEXT:    lw a0, %lo(var+4)(a4)
 ; RV64I-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var+8)(a6)
+; RV64I-NEXT:    lw a0, %lo(var+8)(a4)
 ; RV64I-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var+12)(a6)
+; RV64I-NEXT:    lw a0, %lo(var+12)(a4)
 ; RV64I-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    addi a5, a6, %lo(var)
+; RV64I-NEXT:    addi a5, a4, %lo(var)
 ; RV64I-NEXT:    lw a0, 16(a5)
 ; RV64I-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    lw a0, 20(a5)
@@ -503,18 +503,18 @@ define void @callee() nounwind {
 ; RV64I-NEXT:    lw s10, 92(a5)
 ; RV64I-NEXT:    lw s11, 96(a5)
 ; RV64I-NEXT:    lw ra, 100(a5)
-; RV64I-NEXT:    lw a7, 104(a5)
-; RV64I-NEXT:    lw a4, 108(a5)
-; RV64I-NEXT:    lw a0, 124(a5)
-; RV64I-NEXT:    lw a1, 120(a5)
-; RV64I-NEXT:    lw a2, 116(a5)
-; RV64I-NEXT:    lw a3, 112(a5)
-; RV64I-NEXT:    sw a0, 124(a5)
-; RV64I-NEXT:    sw a1, 120(a5)
-; RV64I-NEXT:    sw a2, 116(a5)
-; RV64I-NEXT:    sw a3, 112(a5)
-; RV64I-NEXT:    sw a4, 108(a5)
-; RV64I-NEXT:    sw a7, 104(a5)
+; RV64I-NEXT:    lw a6, 104(a5)
+; RV64I-NEXT:    lw a3, 108(a5)
+; RV64I-NEXT:    lw a7, 124(a5)
+; RV64I-NEXT:    lw a0, 120(a5)
+; RV64I-NEXT:    lw a1, 116(a5)
+; RV64I-NEXT:    lw a2, 112(a5)
+; RV64I-NEXT:    sw a7, 124(a5)
+; RV64I-NEXT:    sw a0, 120(a5)
+; RV64I-NEXT:    sw a1, 116(a5)
+; RV64I-NEXT:    sw a2, 112(a5)
+; RV64I-NEXT:    sw a3, 108(a5)
+; RV64I-NEXT:    sw a6, 104(a5)
 ; RV64I-NEXT:    sw ra, 100(a5)
 ; RV64I-NEXT:    sw s11, 96(a5)
 ; RV64I-NEXT:    sw s10, 92(a5)
@@ -540,13 +540,13 @@ define void @callee() nounwind {
 ; RV64I-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    sw a0, 16(a5)
 ; RV64I-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var+12)(a6)
+; RV64I-NEXT:    sw a0, %lo(var+12)(a4)
 ; RV64I-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var+8)(a6)
+; RV64I-NEXT:    sw a0, %lo(var+8)(a4)
 ; RV64I-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var+4)(a6)
+; RV64I-NEXT:    sw a0, %lo(var+4)(a4)
 ; RV64I-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var)(a6)
+; RV64I-NEXT:    sw a0, %lo(var)(a4)
 ; RV64I-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
@@ -580,16 +580,16 @@ define void @callee() nounwind {
 ; RV64I-WITH-FP-NEXT:    sd s10, 64(sp) # 8-byte Folded Spill
 ; RV64I-WITH-FP-NEXT:    sd s11, 56(sp) # 8-byte Folded Spill
 ; RV64I-WITH-FP-NEXT:    addi s0, sp, 160
-; RV64I-WITH-FP-NEXT:    lui a6, %hi(var)
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var)(a6)
+; RV64I-WITH-FP-NEXT:    lui a4, %hi(var)
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var)(a4)
 ; RV64I-WITH-FP-NEXT:    sd a0, -112(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+4)(a6)
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+4)(a4)
 ; RV64I-WITH-FP-NEXT:    sd a0, -120(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+8)(a6)
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+8)(a4)
 ; RV64I-WITH-FP-NEXT:    sd a0, -128(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+12)(a6)
+; RV64I-WITH-FP-NEXT:    lw a0, %lo(var+12)(a4)
 ; RV64I-WITH-FP-NEXT:    sd a0, -136(s0) # 8-byte Folded Spill
-; RV64I-WITH-FP-NEXT:    addi a5, a6, %lo(var)
+; RV64I-WITH-FP-NEXT:    addi a5, a4, %lo(var)
 ; RV64I-WITH-FP-NEXT:    lw a0, 16(a5)
 ; RV64I-WITH-FP-NEXT:    sd a0, -144(s0) # 8-byte Folded Spill
 ; RV64I-WITH-FP-NEXT:    lw a0, 20(a5)
@@ -614,20 +614,20 @@ define void @callee() nounwind {
 ; RV64I-WITH-FP-NEXT:    lw s10, 88(a5)
 ; RV64I-WITH-FP-NEXT:    lw s11, 92(a5)
 ; RV64I-WITH-FP-NEXT:    lw ra, 96(a5)
-; RV64I-WITH-FP-NEXT:    lw t0, 100(a5)
-; RV64I-WITH-FP-NEXT:    lw a7, 104(a5)
-; RV64I-WITH-FP-NEXT:    lw a4, 108(a5)
-; RV64I-WITH-FP-NEXT:    lw a0, 124(a5)
-; RV64I-WITH-FP-NEXT:    lw a1, 120(a5)
-; RV64I-WITH-FP-NEXT:    lw a2, 116(a5)
-; RV64I-WITH-FP-NEXT:    lw a3, 112(a5)
-; RV64I-WITH-FP-NEXT:    sw a0, 124(a5)
-; RV64I-WITH-FP-NEXT:    sw a1, 120(a5)
-; RV64I-WITH-FP-NEXT:    sw a2, 116(a5)
-; RV64I-WITH-FP-NEXT:    sw a3, 112(a5)
-; RV64I-WITH-FP-NEXT:    sw a4, 108(a5)
-; RV64I-WITH-FP-NEXT:    sw a7, 104(a5)
-; RV64I-WITH-FP-NEXT:    sw t0, 100(a5)
+; RV64I-WITH-FP-NEXT:    lw a7, 100(a5)
+; RV64I-WITH-FP-NEXT:    lw a6, 104(a5)
+; RV64I-WITH-FP-NEXT:    lw a3, 108(a5)
+; RV64I-WITH-FP-NEXT:    lw t0, 124(a5)
+; RV64I-WITH-FP-NEXT:    lw a0, 120(a5)
+; RV64I-WITH-FP-NEXT:    lw a1, 116(a5)
+; RV64I-WITH-FP-NEXT:    lw a2, 112(a5)
+; RV64I-WITH-FP-NEXT:    sw t0, 124(a5)
+; RV64I-WITH-FP-NEXT:    sw a0, 120(a5)
+; RV64I-WITH-FP-NEXT:    sw a1, 116(a5)
+; RV64I-WITH-FP-NEXT:    sw a2, 112(a5)
+; RV64I-WITH-FP-NEXT:    sw a3, 108(a5)
+; RV64I-WITH-FP-NEXT:    sw a6, 104(a5)
+; RV64I-WITH-FP-NEXT:    sw a7, 100(a5)
 ; RV64I-WITH-FP-NEXT:    sw ra, 96(a5)
 ; RV64I-WITH-FP-NEXT:    sw s11, 92(a5)
 ; RV64I-WITH-FP-NEXT:    sw s10, 88(a5)
@@ -653,13 +653,13 @@ define void @callee() nounwind {
 ; RV64I-WITH-FP-NEXT:    ld a0, -144(s0) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    sw a0, 16(a5)
 ; RV64I-WITH-FP-NEXT:    ld a0, -136(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+12)(a6)
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+12)(a4)
 ; RV64I-WITH-FP-NEXT:    ld a0, -128(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+8)(a6)
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+8)(a4)
 ; RV64I-WITH-FP-NEXT:    ld a0, -120(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+4)(a6)
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+4)(a4)
 ; RV64I-WITH-FP-NEXT:    ld a0, -112(s0) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    sw a0, %lo(var)(a6)
+; RV64I-WITH-FP-NEXT:    sw a0, %lo(var)(a4)
 ; RV64I-WITH-FP-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
@@ -679,16 +679,16 @@ define void @callee() nounwind {
 ; RV64IZCMP-LABEL: callee:
 ; RV64IZCMP:       # %bb.0:
 ; RV64IZCMP-NEXT:    cm.push {ra, s0-s11}, -160
-; RV64IZCMP-NEXT:    lui a6, %hi(var)
-; RV64IZCMP-NEXT:    lw a0, %lo(var)(a6)
+; RV64IZCMP-NEXT:    lui a4, %hi(var)
+; RV64IZCMP-NEXT:    lw a0, %lo(var)(a4)
 ; RV64IZCMP-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var+4)(a6)
+; RV64IZCMP-NEXT:    lw a0, %lo(var+4)(a4)
 ; RV64IZCMP-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var+8)(a6)
+; RV64IZCMP-NEXT:    lw a0, %lo(var+8)(a4)
 ; RV64IZCMP-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var+12)(a6)
+; RV64IZCMP-NEXT:    lw a0, %lo(var+12)(a4)
 ; RV64IZCMP-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    addi a5, a6, %lo(var)
+; RV64IZCMP-NEXT:    addi a5, a4, %lo(var)
 ; RV64IZCMP-NEXT:    lw a0, 16(a5)
 ; RV64IZCMP-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
 ; RV64IZCMP-NEXT:    lw a0, 20(a5)
@@ -708,28 +708,28 @@ define void @callee() nounwind {
 ; RV64IZCMP-NEXT:    lw s11, 72(a5)
 ; RV64IZCMP-NEXT:    lw ra, 76(a5)
 ; RV64IZCMP-NEXT:    lw s1, 80(a5)
-; RV64IZCMP-NEXT:    lw t3, 84(a5)
-; RV64IZCMP-NEXT:    lw t2, 88(a5)
-; RV64IZCMP-NEXT:    lw t1, 92(a5)
-; RV64IZCMP-NEXT:    lw t0, 96(a5)
+; RV64IZCMP-NEXT:    lw t2, 84(a5)
+; RV64IZCMP-NEXT:    lw t1, 88(a5)
+; RV64IZCMP-NEXT:    lw t0, 92(a5)
+; RV64IZCMP-NEXT:    lw a7, 96(a5)
 ; RV64IZCMP-NEXT:    lw s0, 100(a5)
-; RV64IZCMP-NEXT:    lw a7, 104(a5)
-; RV64IZCMP-NEXT:    lw a4, 108(a5)
-; RV64IZCMP-NEXT:    lw a0, 124(a5)
-; RV64IZCMP-NEXT:    lw a1, 120(a5)
-; RV64IZCMP-NEXT:    lw a2, 116(a5)
-; RV64IZCMP-NEXT:    lw a3, 112(a5)
-; RV64IZCMP-NEXT:    sw a0, 124(a5)
-; RV64IZCMP-NEXT:    sw a1, 120(a5)
-; RV64IZCMP-NEXT:    sw a2, 116(a5)
-; RV64IZCMP-NEXT:    sw a3, 112(a5)
-; RV64IZCMP-NEXT:    sw a4, 108(a5)
-; RV64IZCMP-NEXT:    sw a7, 104(a5)
+; RV64IZCMP-NEXT:    lw a6, 104(a5)
+; RV64IZCMP-NEXT:    lw a3, 108(a5)
+; RV64IZCMP-NEXT:    lw t3, 124(a5)
+; RV64IZCMP-NEXT:    lw a0, 120(a5)
+; RV64IZCMP-NEXT:    lw a1, 116(a5)
+; RV64IZCMP-NEXT:    lw a2, 112(a5)
+; RV64IZCMP-NEXT:    sw t3, 124(a5)
+; RV64IZCMP-NEXT:    sw a0, 120(a5)
+; RV64IZCMP-NEXT:    sw a1, 116(a5)
+; RV64IZCMP-NEXT:    sw a2, 112(a5)
+; RV64IZCMP-NEXT:    sw a3, 108(a5)
+; RV64IZCMP-NEXT:    sw a6, 104(a5)
 ; RV64IZCMP-NEXT:    sw s0, 100(a5)
-; RV64IZCMP-NEXT:    sw t0, 96(a5)
-; RV64IZCMP-NEXT:    sw t1, 92(a5)
-; RV64IZCMP-NEXT:    sw t2, 88(a5)
-; RV64IZCMP-NEXT:    sw t3, 84(a5)
+; RV64IZCMP-NEXT:    sw a7, 96(a5)
+; RV64IZCMP-NEXT:    sw t0, 92(a5)
+; RV64IZCMP-NEXT:    sw t1, 88(a5)
+; RV64IZCMP-NEXT:    sw t2, 84(a5)
 ; RV64IZCMP-NEXT:    sw s1, 80(a5)
 ; RV64IZCMP-NEXT:    sw ra, 76(a5)
 ; RV64IZCMP-NEXT:    sw s11, 72(a5)
@@ -750,13 +750,13 @@ define void @callee() nounwind {
 ; RV64IZCMP-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
 ; RV64IZCMP-NEXT:    sw a0, 16(a5)
 ; RV64IZCMP-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var+12)(a6)
+; RV64IZCMP-NEXT:    sw a0, %lo(var+12)(a4)
 ; RV64IZCMP-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var+8)(a6)
+; RV64IZCMP-NEXT:    sw a0, %lo(var+8)(a4)
 ; RV64IZCMP-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var+4)(a6)
+; RV64IZCMP-NEXT:    sw a0, %lo(var+4)(a4)
 ; RV64IZCMP-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var)(a6)
+; RV64IZCMP-NEXT:    sw a0, %lo(var)(a4)
 ; RV64IZCMP-NEXT:    cm.popret {ra, s0-s11}, 160
 ;
 ; RV64IZCMP-WITH-FP-LABEL: callee:
@@ -776,16 +776,16 @@ define void @callee() nounwind {
 ; RV64IZCMP-WITH-FP-NEXT:    sd s10, 64(sp) # 8-byte Folded Spill
 ; RV64IZCMP-WITH-FP-NEXT:    sd s11, 56(sp) # 8-byte Folded Spill
 ; RV64IZCMP-WITH-FP-NEXT:    addi s0, sp, 160
-; RV64IZCMP-WITH-FP-NEXT:    lui a6, %hi(var)
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var)(a6)
+; RV64IZCMP-WITH-FP-NEXT:    lui a4, %hi(var)
+; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var)(a4)
 ; RV64IZCMP-WITH-FP-NEXT:    sd a0, -112(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+4)(a6)
+; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+4)(a4)
 ; RV64IZCMP-WITH-FP-NEXT:    sd a0, -120(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+8)(a6)
+; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+8)(a4)
 ; RV64IZCMP-WITH-FP-NEXT:    sd a0, -128(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+12)(a6)
+; RV64IZCMP-WITH-FP-NEXT:    lw a0, %lo(var+12)(a4)
 ; RV64IZCMP-WITH-FP-NEXT:    sd a0, -136(s0) # 8-byte Folded Spill
-; RV64IZCMP-WITH-FP-NEXT:    addi a5, a6, %lo(var)
+; RV64IZCMP-WITH-FP-NEXT:    addi a5, a4, %lo(var)
 ; RV64IZCMP-WITH-FP-NEXT:    lw a0, 16(a5)
 ; RV64IZCMP-WITH-FP-NEXT:    sd a0, -144(s0) # 8-byte Folded Spill
 ; RV64IZCMP-WITH-FP-NEXT:    lw a0, 20(a5)
@@ -805,30 +805,30 @@ define void @callee() nounwind {
 ; RV64IZCMP-WITH-FP-NEXT:    lw s10, 68(a5)
 ; RV64IZCMP-WITH-FP-NEXT:    lw s11, 72(a5)
 ; RV64IZCMP-WITH-FP-NEXT:    lw ra, 76(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw t4, 80(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw t3, 84(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw t2, 88(a5)
+; RV64IZCMP-WITH-FP-NEXT:    lw t3, 80(a5)
+; RV64IZCMP-WITH-FP-NEXT:    lw t2, 84(a5)
+; RV64IZCMP-WITH-FP-NEXT:    lw t1, 88(a5)
 ; RV64IZCMP-WITH-FP-NEXT:    lw s1, 92(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw t1, 96(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw t0, 100(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw a7, 104(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw a4, 108(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw a0, 124(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw a1, 120(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw a2, 116(a5)
-; RV64IZCMP-WITH-FP-NEXT:    lw a3, 112(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, 124(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw a1, 120(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw a2, 116(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw a3, 112(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw a4, 108(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw a7, 104(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw t0, 100(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw t1, 96(a5)
+; RV64IZCMP-WITH-FP-NEXT:    lw t0, 96(a5)
+; RV64IZCMP-WITH-FP-NEXT:    lw a7, 100(a5)
+; RV64IZCMP-WITH-FP-NEXT:    lw a6, 104(a5)
+; RV64IZCMP-WITH-FP-NEXT:    lw a3, 108(a5)
+; RV64IZCMP-WITH-FP-NEXT:    lw t4, 124(a5)
+; RV64IZCMP-WITH-FP-NEXT:    lw a0, 120(a5)
+; RV64IZCMP-WITH-FP-NEXT:    lw a1, 116(a5)
+; RV64IZCMP-WITH-FP-NEXT:    lw a2, 112(a5)
+; RV64IZCMP-WITH-FP-NEXT:    sw t4, 124(a5)
+; RV64IZCMP-WITH-FP-NEXT:    sw a0, 120(a5)
+; RV64IZCMP-WITH-FP-NEXT:    sw a1, 116(a5)
+; RV64IZCMP-WITH-FP-NEXT:    sw a2, 112(a5)
+; RV64IZCMP-WITH-FP-NEXT:    sw a3, 108(a5)
+; RV64IZCMP-WITH-FP-NEXT:    sw a6, 104(a5)
+; RV64IZCMP-WITH-FP-NEXT:    sw a7, 100(a5)
+; RV64IZCMP-WITH-FP-NEXT:    sw t0, 96(a5)
 ; RV64IZCMP-WITH-FP-NEXT:    sw s1, 92(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw t2, 88(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw t3, 84(a5)
-; RV64IZCMP-WITH-FP-NEXT:    sw t4, 80(a5)
+; RV64IZCMP-WITH-FP-NEXT:    sw t1, 88(a5)
+; RV64IZCMP-WITH-FP-NEXT:    sw t2, 84(a5)
+; RV64IZCMP-WITH-FP-NEXT:    sw t3, 80(a5)
 ; RV64IZCMP-WITH-FP-NEXT:    sw ra, 76(a5)
 ; RV64IZCMP-WITH-FP-NEXT:    sw s11, 72(a5)
 ; RV64IZCMP-WITH-FP-NEXT:    sw s10, 68(a5)
@@ -849,13 +849,13 @@ define void @callee() nounwind {
 ; RV64IZCMP-WITH-FP-NEXT:    ld a0, -144(s0) # 8-byte Folded Reload
 ; RV64IZCMP-WITH-FP-NEXT:    sw a0, 16(a5)
 ; RV64IZCMP-WITH-FP-NEXT:    ld a0, -136(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+12)(a6)
+; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+12)(a4)
 ; RV64IZCMP-WITH-FP-NEXT:    ld a0, -128(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+8)(a6)
+; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+8)(a4)
 ; RV64IZCMP-WITH-FP-NEXT:    ld a0, -120(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+4)(a6)
+; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var+4)(a4)
 ; RV64IZCMP-WITH-FP-NEXT:    ld a0, -112(s0) # 8-byte Folded Reload
-; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var)(a6)
+; RV64IZCMP-WITH-FP-NEXT:    sw a0, %lo(var)(a4)
 ; RV64IZCMP-WITH-FP-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
 ; RV64IZCMP-WITH-FP-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
 ; RV64IZCMP-WITH-FP-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
index 6587f0c8c5af7bf..1c7d0b3a39a46dd 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
@@ -227,8 +227,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
 ; RV32I-NEXT:    addi sp, sp, -16
 ; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lhu a0, 16(sp)
 ; RV32I-NEXT:    mv s0, a7
+; RV32I-NEXT:    lhu a0, 16(sp)
 ; RV32I-NEXT:    call __extendhfsf2 at plt
 ; RV32I-NEXT:    call __fixsfsi at plt
 ; RV32I-NEXT:    add a0, s0, a0
@@ -242,8 +242,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
 ; RV64I-NEXT:    addi sp, sp, -16
 ; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lhu a0, 16(sp)
 ; RV64I-NEXT:    mv s0, a7
+; RV64I-NEXT:    lhu a0, 16(sp)
 ; RV64I-NEXT:    call __extendhfsf2 at plt
 ; RV64I-NEXT:    call __fixsfdi at plt
 ; RV64I-NEXT:    addw a0, s0, a0
@@ -257,8 +257,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
 ; RV32IF-NEXT:    addi sp, sp, -16
 ; RV32IF-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
 ; RV32IF-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; RV32IF-NEXT:    lhu a0, 16(sp)
 ; RV32IF-NEXT:    mv s0, a7
+; RV32IF-NEXT:    lhu a0, 16(sp)
 ; RV32IF-NEXT:    call __extendhfsf2 at plt
 ; RV32IF-NEXT:    fmv.w.x fa5, a0
 ; RV32IF-NEXT:    fcvt.w.s a0, fa5, rtz
@@ -273,8 +273,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
 ; RV64IF-NEXT:    addi sp, sp, -16
 ; RV64IF-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
 ; RV64IF-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
-; RV64IF-NEXT:    lhu a0, 16(sp)
 ; RV64IF-NEXT:    mv s0, a7
+; RV64IF-NEXT:    lhu a0, 16(sp)
 ; RV64IF-NEXT:    call __extendhfsf2 at plt
 ; RV64IF-NEXT:    fmv.w.x fa5, a0
 ; RV64IF-NEXT:    fcvt.l.s a0, fa5, rtz
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index da67176e3f0ca03..8ec8638507f4172 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -374,38 +374,37 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    neg a0, a0
 ; RV32I-NEXT:    and a0, s0, a0
 ; RV32I-NEXT:    lui a1, 30667
-; RV32I-NEXT:    addi s3, a1, 1329
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    addi s2, a1, 1329
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3 at plt
-; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32I-NEXT:    addi s4, a0, %lo(.LCPI3_0)
-; RV32I-NEXT:    neg a0, s2
-; RV32I-NEXT:    and a0, s2, a0
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    srli a0, a0, 27
+; RV32I-NEXT:    lui a1, %hi(.LCPI3_0)
+; RV32I-NEXT:    addi s4, a1, %lo(.LCPI3_0)
+; RV32I-NEXT:    add s3, s4, a0
+; RV32I-NEXT:    neg a0, s1
+; RV32I-NEXT:    and a0, s1, a0
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3 at plt
-; RV32I-NEXT:    bnez s2, .LBB3_3
+; RV32I-NEXT:    bnez s1, .LBB3_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    li a0, 32
-; RV32I-NEXT:    beqz s0, .LBB3_4
+; RV32I-NEXT:    li a1, 32
+; RV32I-NEXT:    lbu a0, 0(s3)
+; RV32I-NEXT:    beqz s0, .LBB3_3
+; RV32I-NEXT:    j .LBB3_4
 ; RV32I-NEXT:  .LBB3_2:
-; RV32I-NEXT:    srli s1, s1, 27
-; RV32I-NEXT:    add s1, s4, s1
-; RV32I-NEXT:    lbu a0, 0(s1)
-; RV32I-NEXT:    j .LBB3_5
-; RV32I-NEXT:  .LBB3_3:
 ; RV32I-NEXT:    srli a0, a0, 27
 ; RV32I-NEXT:    add a0, s4, a0
-; RV32I-NEXT:    lbu a0, 0(a0)
-; RV32I-NEXT:    bnez s0, .LBB3_2
+; RV32I-NEXT:    lbu a1, 0(a0)
+; RV32I-NEXT:    lbu a0, 0(s3)
+; RV32I-NEXT:    bnez s0, .LBB3_4
+; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    addi a0, a1, 32
 ; RV32I-NEXT:  .LBB3_4:
-; RV32I-NEXT:    addi a0, a0, 32
-; RV32I-NEXT:  .LBB3_5:
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
@@ -441,33 +440,34 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ;
 ; RV32M-LABEL: test_cttz_i64:
 ; RV32M:       # %bb.0:
-; RV32M-NEXT:    lui a2, 30667
-; RV32M-NEXT:    addi a2, a2, 1329
-; RV32M-NEXT:    lui a3, %hi(.LCPI3_0)
-; RV32M-NEXT:    addi a3, a3, %lo(.LCPI3_0)
-; RV32M-NEXT:    bnez a1, .LBB3_3
+; RV32M-NEXT:    neg a2, a0
+; RV32M-NEXT:    and a2, a0, a2
+; RV32M-NEXT:    lui a3, 30667
+; RV32M-NEXT:    addi a3, a3, 1329
+; RV32M-NEXT:    mul a2, a2, a3
+; RV32M-NEXT:    srli a2, a2, 27
+; RV32M-NEXT:    lui a4, %hi(.LCPI3_0)
+; RV32M-NEXT:    addi a4, a4, %lo(.LCPI3_0)
+; RV32M-NEXT:    add a2, a4, a2
+; RV32M-NEXT:    bnez a1, .LBB3_2
 ; RV32M-NEXT:  # %bb.1:
-; RV32M-NEXT:    li a1, 32
-; RV32M-NEXT:    beqz a0, .LBB3_4
+; RV32M-NEXT:    li a3, 32
+; RV32M-NEXT:    lbu a1, 0(a2)
+; RV32M-NEXT:    beqz a0, .LBB3_3
+; RV32M-NEXT:    j .LBB3_4
 ; RV32M-NEXT:  .LBB3_2:
-; RV32M-NEXT:    neg a1, a0
-; RV32M-NEXT:    and a0, a0, a1
-; RV32M-NEXT:    mul a0, a0, a2
-; RV32M-NEXT:    srli a0, a0, 27
-; RV32M-NEXT:    add a0, a3, a0
-; RV32M-NEXT:    lbu a0, 0(a0)
-; RV32M-NEXT:    li a1, 0
-; RV32M-NEXT:    ret
-; RV32M-NEXT:  .LBB3_3:
-; RV32M-NEXT:    neg a4, a1
-; RV32M-NEXT:    and a1, a1, a4
-; RV32M-NEXT:    mul a1, a1, a2
+; RV32M-NEXT:    neg a5, a1
+; RV32M-NEXT:    and a1, a1, a5
+; RV32M-NEXT:    mul a1, a1, a3
 ; RV32M-NEXT:    srli a1, a1, 27
-; RV32M-NEXT:    add a1, a3, a1
-; RV32M-NEXT:    lbu a1, 0(a1)
-; RV32M-NEXT:    bnez a0, .LBB3_2
+; RV32M-NEXT:    add a1, a4, a1
+; RV32M-NEXT:    lbu a3, 0(a1)
+; RV32M-NEXT:    lbu a1, 0(a2)
+; RV32M-NEXT:    bnez a0, .LBB3_4
+; RV32M-NEXT:  .LBB3_3:
+; RV32M-NEXT:    addi a1, a3, 32
 ; RV32M-NEXT:  .LBB3_4:
-; RV32M-NEXT:    addi a0, a1, 32
+; RV32M-NEXT:    mv a0, a1
 ; RV32M-NEXT:    li a1, 0
 ; RV32M-NEXT:    ret
 ;
diff --git a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
index 02072b3e4e5ca82..e1f1ec45cbfbac8 100644
--- a/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
+++ b/llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll
@@ -39,39 +39,38 @@ define signext i32 @ctz_dereferencing_pointer(i64* %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw s2, 0(a0)
+; RV32I-NEXT:    lw s1, 0(a0)
 ; RV32I-NEXT:    lw s4, 4(a0)
-; RV32I-NEXT:    neg a0, s2
-; RV32I-NEXT:    and a0, s2, a0
+; RV32I-NEXT:    neg a0, s1
+; RV32I-NEXT:    and a0, s1, a0
 ; RV32I-NEXT:    lui a1, 30667
-; RV32I-NEXT:    addi s1, a1, 1329
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    addi s0, a1, 1329
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __mulsi3 at plt
-; RV32I-NEXT:    mv s0, a0
-; RV32I-NEXT:    lui a0, %hi(.LCPI0_0)
-; RV32I-NEXT:    addi s3, a0, %lo(.LCPI0_0)
+; RV32I-NEXT:    srli a0, a0, 27
+; RV32I-NEXT:    lui a1, %hi(.LCPI0_0)
+; RV32I-NEXT:    addi s3, a1, %lo(.LCPI0_0)
+; RV32I-NEXT:    add s2, s3, a0
 ; RV32I-NEXT:    neg a0, s4
 ; RV32I-NEXT:    and a0, s4, a0
-; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __mulsi3 at plt
-; RV32I-NEXT:    bnez s4, .LBB0_3
+; RV32I-NEXT:    bnez s4, .LBB0_2
 ; RV32I-NEXT:  # %bb.1: # %entry
 ; RV32I-NEXT:    li a0, 32
-; RV32I-NEXT:    beqz s2, .LBB0_4
+; RV32I-NEXT:    lbu a1, 0(s2)
+; RV32I-NEXT:    beqz s1, .LBB0_3
+; RV32I-NEXT:    j .LBB0_4
 ; RV32I-NEXT:  .LBB0_2:
-; RV32I-NEXT:    srli s0, s0, 27
-; RV32I-NEXT:    add s0, s3, s0
-; RV32I-NEXT:    lbu a0, 0(s0)
-; RV32I-NEXT:    j .LBB0_5
-; RV32I-NEXT:  .LBB0_3:
 ; RV32I-NEXT:    srli a0, a0, 27
 ; RV32I-NEXT:    add a0, s3, a0
 ; RV32I-NEXT:    lbu a0, 0(a0)
-; RV32I-NEXT:    bnez s2, .LBB0_2
+; RV32I-NEXT:    lbu a1, 0(s2)
+; RV32I-NEXT:    bnez s1, .LBB0_4
+; RV32I-NEXT:  .LBB0_3: # %entry
+; RV32I-NEXT:    addi a1, a0, 32
 ; RV32I-NEXT:  .LBB0_4: # %entry
-; RV32I-NEXT:    addi a0, a0, 32
-; RV32I-NEXT:  .LBB0_5: # %entry
-; RV32I-NEXT:    andi a0, a0, 63
+; RV32I-NEXT:    andi a0, a1, 63
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
@@ -502,39 +501,38 @@ define signext i32 @ctz4(i64 %b) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    neg a0, a0
 ; RV32I-NEXT:    and a0, s0, a0
 ; RV32I-NEXT:    lui a1, 30667
-; RV32I-NEXT:    addi s3, a1, 1329
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    addi s2, a1, 1329
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3 at plt
-; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui a0, %hi(.LCPI6_0)
-; RV32I-NEXT:    addi s4, a0, %lo(.LCPI6_0)
-; RV32I-NEXT:    neg a0, s2
-; RV32I-NEXT:    and a0, s2, a0
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    srli a0, a0, 27
+; RV32I-NEXT:    lui a1, %hi(.LCPI6_0)
+; RV32I-NEXT:    addi s4, a1, %lo(.LCPI6_0)
+; RV32I-NEXT:    add s3, s4, a0
+; RV32I-NEXT:    neg a0, s1
+; RV32I-NEXT:    and a0, s1, a0
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3 at plt
-; RV32I-NEXT:    bnez s2, .LBB6_3
+; RV32I-NEXT:    bnez s1, .LBB6_2
 ; RV32I-NEXT:  # %bb.1: # %entry
 ; RV32I-NEXT:    li a0, 32
-; RV32I-NEXT:    beqz s0, .LBB6_4
+; RV32I-NEXT:    lbu a1, 0(s3)
+; RV32I-NEXT:    beqz s0, .LBB6_3
+; RV32I-NEXT:    j .LBB6_4
 ; RV32I-NEXT:  .LBB6_2:
-; RV32I-NEXT:    srli s1, s1, 27
-; RV32I-NEXT:    add s1, s4, s1
-; RV32I-NEXT:    lbu a0, 0(s1)
-; RV32I-NEXT:    j .LBB6_5
-; RV32I-NEXT:  .LBB6_3:
 ; RV32I-NEXT:    srli a0, a0, 27
 ; RV32I-NEXT:    add a0, s4, a0
 ; RV32I-NEXT:    lbu a0, 0(a0)
-; RV32I-NEXT:    bnez s0, .LBB6_2
+; RV32I-NEXT:    lbu a1, 0(s3)
+; RV32I-NEXT:    bnez s0, .LBB6_4
+; RV32I-NEXT:  .LBB6_3: # %entry
+; RV32I-NEXT:    addi a1, a0, 32
 ; RV32I-NEXT:  .LBB6_4: # %entry
-; RV32I-NEXT:    addi a0, a0, 32
-; RV32I-NEXT:  .LBB6_5: # %entry
-; RV32I-NEXT:    andi a0, a0, 63
+; RV32I-NEXT:    andi a0, a1, 63
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
index e667325db3aa787..ea8645916724a6e 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
@@ -256,170 +256,154 @@ define fastcc half @callee_half_32(<32 x half> %A) nounwind {
 define half @caller_half_32(<32 x half> %A) nounwind {
 ; ZHINX32-LABEL: caller_half_32:
 ; ZHINX32:       # %bb.0:
-; ZHINX32-NEXT:    addi sp, sp, -112
-; ZHINX32-NEXT:    sw ra, 108(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s0, 104(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s1, 100(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s2, 96(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s3, 92(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s4, 88(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s5, 84(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s6, 80(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s7, 76(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s8, 72(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s9, 68(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s10, 64(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s11, 60(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lh t0, 112(sp)
-; ZHINX32-NEXT:    sw t0, 56(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    addi sp, sp, -96
+; ZHINX32-NEXT:    sw ra, 92(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s0, 88(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s1, 84(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s2, 80(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s3, 76(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s4, 72(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s5, 68(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s6, 64(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s7, 60(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s8, 56(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s9, 52(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s10, 48(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s11, 44(sp) # 4-byte Folded Spill
 ; ZHINX32-NEXT:    lh t0, 116(sp)
-; ZHINX32-NEXT:    sw t0, 52(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lh t0, 120(sp)
-; ZHINX32-NEXT:    sw t0, 48(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lh t0, 124(sp)
-; ZHINX32-NEXT:    sw t0, 44(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lh t6, 128(sp)
-; ZHINX32-NEXT:    lh t5, 132(sp)
-; ZHINX32-NEXT:    lh t4, 136(sp)
-; ZHINX32-NEXT:    lh s0, 140(sp)
-; ZHINX32-NEXT:    lh s1, 144(sp)
-; ZHINX32-NEXT:    lh s2, 148(sp)
-; ZHINX32-NEXT:    lh s3, 152(sp)
-; ZHINX32-NEXT:    lh s4, 156(sp)
-; ZHINX32-NEXT:    lh s5, 160(sp)
-; ZHINX32-NEXT:    lh s6, 164(sp)
-; ZHINX32-NEXT:    lh s7, 168(sp)
-; ZHINX32-NEXT:    lh s8, 172(sp)
-; ZHINX32-NEXT:    lh s9, 176(sp)
-; ZHINX32-NEXT:    lh s10, 180(sp)
-; ZHINX32-NEXT:    lh s11, 184(sp)
-; ZHINX32-NEXT:    lh ra, 188(sp)
-; ZHINX32-NEXT:    lh t3, 192(sp)
-; ZHINX32-NEXT:    lh t2, 196(sp)
-; ZHINX32-NEXT:    lh t1, 200(sp)
-; ZHINX32-NEXT:    lh t0, 204(sp)
-; ZHINX32-NEXT:    sh t0, 36(sp)
-; ZHINX32-NEXT:    sh t1, 34(sp)
-; ZHINX32-NEXT:    sh t2, 32(sp)
-; ZHINX32-NEXT:    sh t3, 30(sp)
-; ZHINX32-NEXT:    sh ra, 28(sp)
-; ZHINX32-NEXT:    sh s11, 26(sp)
-; ZHINX32-NEXT:    sh s10, 24(sp)
-; ZHINX32-NEXT:    sh s9, 22(sp)
-; ZHINX32-NEXT:    sh s8, 20(sp)
-; ZHINX32-NEXT:    sh s7, 18(sp)
-; ZHINX32-NEXT:    sh s6, 16(sp)
-; ZHINX32-NEXT:    sh s5, 14(sp)
-; ZHINX32-NEXT:    sh s4, 12(sp)
-; ZHINX32-NEXT:    sh s3, 10(sp)
-; ZHINX32-NEXT:    sh s2, 8(sp)
+; ZHINX32-NEXT:    lh t1, 120(sp)
+; ZHINX32-NEXT:    lh s0, 124(sp)
+; ZHINX32-NEXT:    lh s1, 128(sp)
+; ZHINX32-NEXT:    lh t2, 132(sp)
+; ZHINX32-NEXT:    lh t3, 136(sp)
+; ZHINX32-NEXT:    lh t4, 140(sp)
+; ZHINX32-NEXT:    lh t5, 144(sp)
+; ZHINX32-NEXT:    lh t6, 148(sp)
+; ZHINX32-NEXT:    lh s2, 152(sp)
+; ZHINX32-NEXT:    lh s3, 156(sp)
+; ZHINX32-NEXT:    lh s4, 160(sp)
+; ZHINX32-NEXT:    lh s5, 164(sp)
+; ZHINX32-NEXT:    lh s6, 168(sp)
+; ZHINX32-NEXT:    lh s7, 172(sp)
+; ZHINX32-NEXT:    lh s8, 176(sp)
+; ZHINX32-NEXT:    lh s9, 180(sp)
+; ZHINX32-NEXT:    lh s10, 184(sp)
+; ZHINX32-NEXT:    lh s11, 188(sp)
+; ZHINX32-NEXT:    sh s11, 36(sp)
+; ZHINX32-NEXT:    sh s10, 34(sp)
+; ZHINX32-NEXT:    sh s9, 32(sp)
+; ZHINX32-NEXT:    sh s8, 30(sp)
+; ZHINX32-NEXT:    sh s7, 28(sp)
+; ZHINX32-NEXT:    sh s6, 26(sp)
+; ZHINX32-NEXT:    sh s5, 24(sp)
+; ZHINX32-NEXT:    sh s4, 22(sp)
+; ZHINX32-NEXT:    sh s3, 20(sp)
+; ZHINX32-NEXT:    sh s2, 18(sp)
+; ZHINX32-NEXT:    sh t6, 16(sp)
+; ZHINX32-NEXT:    sh t5, 14(sp)
+; ZHINX32-NEXT:    sh t4, 12(sp)
+; ZHINX32-NEXT:    sh t3, 10(sp)
+; ZHINX32-NEXT:    sh t2, 8(sp)
+; ZHINX32-NEXT:    lh t2, 96(sp)
+; ZHINX32-NEXT:    lh t3, 100(sp)
+; ZHINX32-NEXT:    lh t4, 104(sp)
+; ZHINX32-NEXT:    lh t5, 108(sp)
+; ZHINX32-NEXT:    lh t6, 112(sp)
 ; ZHINX32-NEXT:    sh s1, 6(sp)
 ; ZHINX32-NEXT:    sh s0, 4(sp)
-; ZHINX32-NEXT:    sh t4, 2(sp)
-; ZHINX32-NEXT:    sh t5, 0(sp)
-; ZHINX32-NEXT:    lw t2, 56(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t3, 52(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t4, 48(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t5, 44(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    sh t1, 2(sp)
+; ZHINX32-NEXT:    sh t0, 0(sp)
 ; ZHINX32-NEXT:    call callee_half_32 at plt
-; ZHINX32-NEXT:    lw ra, 108(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s0, 104(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s1, 100(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s2, 96(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s3, 92(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s4, 88(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s5, 84(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s6, 80(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s7, 76(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s8, 72(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s9, 68(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s10, 64(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s11, 60(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    addi sp, sp, 112
+; ZHINX32-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s1, 84(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s2, 80(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s3, 76(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s4, 72(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s5, 68(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s6, 64(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s7, 60(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s8, 56(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s9, 52(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s10, 48(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s11, 44(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    addi sp, sp, 96
 ; ZHINX32-NEXT:    ret
 ;
 ; ZHINX64-LABEL: caller_half_32:
 ; ZHINX64:       # %bb.0:
-; ZHINX64-NEXT:    addi sp, sp, -176
-; ZHINX64-NEXT:    sd ra, 168(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s0, 160(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s1, 152(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s2, 144(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s3, 136(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s4, 128(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s5, 120(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s6, 112(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s7, 104(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s8, 96(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s9, 88(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s10, 80(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s11, 72(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lh t0, 176(sp)
-; ZHINX64-NEXT:    sd t0, 64(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    addi sp, sp, -144
+; ZHINX64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s1, 120(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s2, 112(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s3, 104(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s4, 96(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s5, 88(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s6, 80(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s7, 72(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s8, 64(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s9, 56(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s10, 48(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s11, 40(sp) # 8-byte Folded Spill
 ; ZHINX64-NEXT:    lh t0, 184(sp)
-; ZHINX64-NEXT:    sd t0, 56(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lh t0, 192(sp)
-; ZHINX64-NEXT:    sd t0, 48(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lh t0, 200(sp)
-; ZHINX64-NEXT:    sd t0, 40(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lh t6, 208(sp)
-; ZHINX64-NEXT:    lh t5, 216(sp)
-; ZHINX64-NEXT:    lh t4, 224(sp)
-; ZHINX64-NEXT:    lh s0, 232(sp)
-; ZHINX64-NEXT:    lh s1, 240(sp)
-; ZHINX64-NEXT:    lh s2, 248(sp)
-; ZHINX64-NEXT:    lh s3, 256(sp)
-; ZHINX64-NEXT:    lh s4, 264(sp)
-; ZHINX64-NEXT:    lh s5, 272(sp)
-; ZHINX64-NEXT:    lh s6, 280(sp)
-; ZHINX64-NEXT:    lh s7, 288(sp)
-; ZHINX64-NEXT:    lh s8, 296(sp)
-; ZHINX64-NEXT:    lh s9, 304(sp)
-; ZHINX64-NEXT:    lh s10, 312(sp)
-; ZHINX64-NEXT:    lh s11, 320(sp)
-; ZHINX64-NEXT:    lh ra, 328(sp)
-; ZHINX64-NEXT:    lh t3, 336(sp)
-; ZHINX64-NEXT:    lh t2, 344(sp)
-; ZHINX64-NEXT:    lh t1, 352(sp)
-; ZHINX64-NEXT:    lh t0, 360(sp)
-; ZHINX64-NEXT:    sh t0, 36(sp)
-; ZHINX64-NEXT:    sh t1, 34(sp)
-; ZHINX64-NEXT:    sh t2, 32(sp)
-; ZHINX64-NEXT:    sh t3, 30(sp)
-; ZHINX64-NEXT:    sh ra, 28(sp)
-; ZHINX64-NEXT:    sh s11, 26(sp)
-; ZHINX64-NEXT:    sh s10, 24(sp)
-; ZHINX64-NEXT:    sh s9, 22(sp)
-; ZHINX64-NEXT:    sh s8, 20(sp)
-; ZHINX64-NEXT:    sh s7, 18(sp)
-; ZHINX64-NEXT:    sh s6, 16(sp)
-; ZHINX64-NEXT:    sh s5, 14(sp)
-; ZHINX64-NEXT:    sh s4, 12(sp)
-; ZHINX64-NEXT:    sh s3, 10(sp)
-; ZHINX64-NEXT:    sh s2, 8(sp)
+; ZHINX64-NEXT:    lh t1, 192(sp)
+; ZHINX64-NEXT:    lh s0, 200(sp)
+; ZHINX64-NEXT:    lh s1, 208(sp)
+; ZHINX64-NEXT:    lh t2, 216(sp)
+; ZHINX64-NEXT:    lh t3, 224(sp)
+; ZHINX64-NEXT:    lh t4, 232(sp)
+; ZHINX64-NEXT:    lh t5, 240(sp)
+; ZHINX64-NEXT:    lh t6, 248(sp)
+; ZHINX64-NEXT:    lh s2, 256(sp)
+; ZHINX64-NEXT:    lh s3, 264(sp)
+; ZHINX64-NEXT:    lh s4, 272(sp)
+; ZHINX64-NEXT:    lh s5, 280(sp)
+; ZHINX64-NEXT:    lh s6, 288(sp)
+; ZHINX64-NEXT:    lh s7, 296(sp)
+; ZHINX64-NEXT:    lh s8, 304(sp)
+; ZHINX64-NEXT:    lh s9, 312(sp)
+; ZHINX64-NEXT:    lh s10, 320(sp)
+; ZHINX64-NEXT:    lh s11, 328(sp)
+; ZHINX64-NEXT:    sh s11, 36(sp)
+; ZHINX64-NEXT:    sh s10, 34(sp)
+; ZHINX64-NEXT:    sh s9, 32(sp)
+; ZHINX64-NEXT:    sh s8, 30(sp)
+; ZHINX64-NEXT:    sh s7, 28(sp)
+; ZHINX64-NEXT:    sh s6, 26(sp)
+; ZHINX64-NEXT:    sh s5, 24(sp)
+; ZHINX64-NEXT:    sh s4, 22(sp)
+; ZHINX64-NEXT:    sh s3, 20(sp)
+; ZHINX64-NEXT:    sh s2, 18(sp)
+; ZHINX64-NEXT:    sh t6, 16(sp)
+; ZHINX64-NEXT:    sh t5, 14(sp)
+; ZHINX64-NEXT:    sh t4, 12(sp)
+; ZHINX64-NEXT:    sh t3, 10(sp)
+; ZHINX64-NEXT:    sh t2, 8(sp)
+; ZHINX64-NEXT:    lh t2, 144(sp)
+; ZHINX64-NEXT:    lh t3, 152(sp)
+; ZHINX64-NEXT:    lh t4, 160(sp)
+; ZHINX64-NEXT:    lh t5, 168(sp)
+; ZHINX64-NEXT:    lh t6, 176(sp)
 ; ZHINX64-NEXT:    sh s1, 6(sp)
 ; ZHINX64-NEXT:    sh s0, 4(sp)
-; ZHINX64-NEXT:    sh t4, 2(sp)
-; ZHINX64-NEXT:    sh t5, 0(sp)
-; ZHINX64-NEXT:    ld t2, 64(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t3, 56(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t4, 48(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t5, 40(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    sh t1, 2(sp)
+; ZHINX64-NEXT:    sh t0, 0(sp)
 ; ZHINX64-NEXT:    call callee_half_32 at plt
-; ZHINX64-NEXT:    ld ra, 168(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s0, 160(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s1, 152(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s2, 144(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s3, 136(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s4, 128(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s5, 120(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s6, 112(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s7, 104(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s8, 96(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s9, 88(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s10, 80(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s11, 72(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    addi sp, sp, 176
+; ZHINX64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s1, 120(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s2, 112(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s3, 104(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s4, 96(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s5, 88(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s6, 80(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s7, 72(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s8, 64(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s9, 56(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s10, 48(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s11, 40(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    addi sp, sp, 144
 ; ZHINX64-NEXT:    ret
 ;
 ; ZFINX32-LABEL: caller_half_32:
@@ -836,506 +820,458 @@ define fastcc float @callee_float_32(<32 x float> %A) nounwind {
 define float @caller_float_32(<32 x float> %A) nounwind {
 ; ZHINX32-LABEL: caller_float_32:
 ; ZHINX32:       # %bb.0:
-; ZHINX32-NEXT:    addi sp, sp, -144
-; ZHINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lw t0, 144(sp)
-; ZHINX32-NEXT:    sw t0, 88(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    addi sp, sp, -128
+; ZHINX32-NEXT:    sw ra, 124(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s0, 120(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s1, 116(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s2, 112(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s3, 108(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s4, 104(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s5, 100(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s6, 96(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s7, 92(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s8, 88(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s9, 84(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s10, 80(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s11, 76(sp) # 4-byte Folded Spill
 ; ZHINX32-NEXT:    lw t0, 148(sp)
-; ZHINX32-NEXT:    sw t0, 84(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lw t0, 152(sp)
-; ZHINX32-NEXT:    sw t0, 80(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lw t0, 156(sp)
-; ZHINX32-NEXT:    sw t0, 76(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lw t6, 160(sp)
-; ZHINX32-NEXT:    lw t5, 164(sp)
-; ZHINX32-NEXT:    lw t4, 168(sp)
-; ZHINX32-NEXT:    lw s0, 172(sp)
-; ZHINX32-NEXT:    lw s1, 176(sp)
-; ZHINX32-NEXT:    lw s2, 180(sp)
-; ZHINX32-NEXT:    lw s3, 184(sp)
-; ZHINX32-NEXT:    lw s4, 188(sp)
-; ZHINX32-NEXT:    lw s5, 192(sp)
-; ZHINX32-NEXT:    lw s6, 196(sp)
-; ZHINX32-NEXT:    lw s7, 200(sp)
-; ZHINX32-NEXT:    lw s8, 204(sp)
-; ZHINX32-NEXT:    lw s9, 208(sp)
-; ZHINX32-NEXT:    lw s10, 212(sp)
-; ZHINX32-NEXT:    lw s11, 216(sp)
-; ZHINX32-NEXT:    lw ra, 220(sp)
-; ZHINX32-NEXT:    lw t3, 224(sp)
-; ZHINX32-NEXT:    lw t2, 228(sp)
-; ZHINX32-NEXT:    lw t1, 232(sp)
-; ZHINX32-NEXT:    lw t0, 236(sp)
-; ZHINX32-NEXT:    sw t0, 72(sp)
-; ZHINX32-NEXT:    sw t1, 68(sp)
-; ZHINX32-NEXT:    sw t2, 64(sp)
-; ZHINX32-NEXT:    sw t3, 60(sp)
-; ZHINX32-NEXT:    sw ra, 56(sp)
-; ZHINX32-NEXT:    sw s11, 52(sp)
-; ZHINX32-NEXT:    sw s10, 48(sp)
-; ZHINX32-NEXT:    sw s9, 44(sp)
-; ZHINX32-NEXT:    sw s8, 40(sp)
-; ZHINX32-NEXT:    sw s7, 36(sp)
-; ZHINX32-NEXT:    sw s6, 32(sp)
-; ZHINX32-NEXT:    sw s5, 28(sp)
-; ZHINX32-NEXT:    sw s4, 24(sp)
-; ZHINX32-NEXT:    sw s3, 20(sp)
-; ZHINX32-NEXT:    sw s2, 16(sp)
+; ZHINX32-NEXT:    lw t1, 152(sp)
+; ZHINX32-NEXT:    lw s0, 156(sp)
+; ZHINX32-NEXT:    lw s1, 160(sp)
+; ZHINX32-NEXT:    lw t2, 164(sp)
+; ZHINX32-NEXT:    lw t3, 168(sp)
+; ZHINX32-NEXT:    lw t4, 172(sp)
+; ZHINX32-NEXT:    lw t5, 176(sp)
+; ZHINX32-NEXT:    lw t6, 180(sp)
+; ZHINX32-NEXT:    lw s2, 184(sp)
+; ZHINX32-NEXT:    lw s3, 188(sp)
+; ZHINX32-NEXT:    lw s4, 192(sp)
+; ZHINX32-NEXT:    lw s5, 196(sp)
+; ZHINX32-NEXT:    lw s6, 200(sp)
+; ZHINX32-NEXT:    lw s7, 204(sp)
+; ZHINX32-NEXT:    lw s8, 208(sp)
+; ZHINX32-NEXT:    lw s9, 212(sp)
+; ZHINX32-NEXT:    lw s10, 216(sp)
+; ZHINX32-NEXT:    lw s11, 220(sp)
+; ZHINX32-NEXT:    sw s11, 72(sp)
+; ZHINX32-NEXT:    sw s10, 68(sp)
+; ZHINX32-NEXT:    sw s9, 64(sp)
+; ZHINX32-NEXT:    sw s8, 60(sp)
+; ZHINX32-NEXT:    sw s7, 56(sp)
+; ZHINX32-NEXT:    sw s6, 52(sp)
+; ZHINX32-NEXT:    sw s5, 48(sp)
+; ZHINX32-NEXT:    sw s4, 44(sp)
+; ZHINX32-NEXT:    sw s3, 40(sp)
+; ZHINX32-NEXT:    sw s2, 36(sp)
+; ZHINX32-NEXT:    sw t6, 32(sp)
+; ZHINX32-NEXT:    sw t5, 28(sp)
+; ZHINX32-NEXT:    sw t4, 24(sp)
+; ZHINX32-NEXT:    sw t3, 20(sp)
+; ZHINX32-NEXT:    sw t2, 16(sp)
+; ZHINX32-NEXT:    lw t2, 128(sp)
+; ZHINX32-NEXT:    lw t3, 132(sp)
+; ZHINX32-NEXT:    lw t4, 136(sp)
+; ZHINX32-NEXT:    lw t5, 140(sp)
+; ZHINX32-NEXT:    lw t6, 144(sp)
 ; ZHINX32-NEXT:    sw s1, 12(sp)
 ; ZHINX32-NEXT:    sw s0, 8(sp)
-; ZHINX32-NEXT:    sw t4, 4(sp)
-; ZHINX32-NEXT:    sw t5, 0(sp)
-; ZHINX32-NEXT:    lw t2, 88(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t3, 84(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t4, 80(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t5, 76(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    sw t1, 4(sp)
+; ZHINX32-NEXT:    sw t0, 0(sp)
 ; ZHINX32-NEXT:    call callee_float_32 at plt
-; ZHINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    addi sp, sp, 144
+; ZHINX32-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s1, 116(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s2, 112(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s3, 108(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s4, 104(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s5, 100(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s6, 96(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s7, 92(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s8, 88(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s9, 84(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s10, 80(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s11, 76(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    addi sp, sp, 128
 ; ZHINX32-NEXT:    ret
 ;
 ; ZHINX64-LABEL: caller_float_32:
 ; ZHINX64:       # %bb.0:
-; ZHINX64-NEXT:    addi sp, sp, -224
-; ZHINX64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lw t0, 224(sp)
-; ZHINX64-NEXT:    sd t0, 112(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    addi sp, sp, -192
+; ZHINX64-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s1, 168(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s2, 160(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s3, 152(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s4, 144(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s5, 136(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s6, 128(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s7, 120(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s8, 112(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s9, 104(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s10, 96(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s11, 88(sp) # 8-byte Folded Spill
 ; ZHINX64-NEXT:    lw t0, 232(sp)
-; ZHINX64-NEXT:    sd t0, 104(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lw t0, 240(sp)
-; ZHINX64-NEXT:    sd t0, 96(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lw t0, 248(sp)
-; ZHINX64-NEXT:    sd t0, 88(sp) # 8-byte Folded Spill
-; ZHINX64-NEXT:    lw t6, 256(sp)
-; ZHINX64-NEXT:    lw t5, 264(sp)
-; ZHINX64-NEXT:    lw t4, 272(sp)
-; ZHINX64-NEXT:    lw s0, 280(sp)
-; ZHINX64-NEXT:    lw s1, 288(sp)
-; ZHINX64-NEXT:    lw s2, 296(sp)
-; ZHINX64-NEXT:    lw s3, 304(sp)
-; ZHINX64-NEXT:    lw s4, 312(sp)
-; ZHINX64-NEXT:    lw s5, 320(sp)
-; ZHINX64-NEXT:    lw s6, 328(sp)
-; ZHINX64-NEXT:    lw s7, 336(sp)
-; ZHINX64-NEXT:    lw s8, 344(sp)
-; ZHINX64-NEXT:    lw s9, 352(sp)
-; ZHINX64-NEXT:    lw s10, 360(sp)
-; ZHINX64-NEXT:    lw s11, 368(sp)
-; ZHINX64-NEXT:    lw ra, 376(sp)
-; ZHINX64-NEXT:    lw t3, 384(sp)
-; ZHINX64-NEXT:    lw t2, 392(sp)
-; ZHINX64-NEXT:    lw t1, 400(sp)
-; ZHINX64-NEXT:    lw t0, 408(sp)
-; ZHINX64-NEXT:    sw t0, 72(sp)
-; ZHINX64-NEXT:    sw t1, 68(sp)
-; ZHINX64-NEXT:    sw t2, 64(sp)
-; ZHINX64-NEXT:    sw t3, 60(sp)
-; ZHINX64-NEXT:    sw ra, 56(sp)
-; ZHINX64-NEXT:    sw s11, 52(sp)
-; ZHINX64-NEXT:    sw s10, 48(sp)
-; ZHINX64-NEXT:    sw s9, 44(sp)
-; ZHINX64-NEXT:    sw s8, 40(sp)
-; ZHINX64-NEXT:    sw s7, 36(sp)
-; ZHINX64-NEXT:    sw s6, 32(sp)
-; ZHINX64-NEXT:    sw s5, 28(sp)
-; ZHINX64-NEXT:    sw s4, 24(sp)
-; ZHINX64-NEXT:    sw s3, 20(sp)
-; ZHINX64-NEXT:    sw s2, 16(sp)
+; ZHINX64-NEXT:    lw t1, 240(sp)
+; ZHINX64-NEXT:    lw s0, 248(sp)
+; ZHINX64-NEXT:    lw s1, 256(sp)
+; ZHINX64-NEXT:    lw t2, 264(sp)
+; ZHINX64-NEXT:    lw t3, 272(sp)
+; ZHINX64-NEXT:    lw t4, 280(sp)
+; ZHINX64-NEXT:    lw t5, 288(sp)
+; ZHINX64-NEXT:    lw t6, 296(sp)
+; ZHINX64-NEXT:    lw s2, 304(sp)
+; ZHINX64-NEXT:    lw s3, 312(sp)
+; ZHINX64-NEXT:    lw s4, 320(sp)
+; ZHINX64-NEXT:    lw s5, 328(sp)
+; ZHINX64-NEXT:    lw s6, 336(sp)
+; ZHINX64-NEXT:    lw s7, 344(sp)
+; ZHINX64-NEXT:    lw s8, 352(sp)
+; ZHINX64-NEXT:    lw s9, 360(sp)
+; ZHINX64-NEXT:    lw s10, 368(sp)
+; ZHINX64-NEXT:    lw s11, 376(sp)
+; ZHINX64-NEXT:    sw s11, 72(sp)
+; ZHINX64-NEXT:    sw s10, 68(sp)
+; ZHINX64-NEXT:    sw s9, 64(sp)
+; ZHINX64-NEXT:    sw s8, 60(sp)
+; ZHINX64-NEXT:    sw s7, 56(sp)
+; ZHINX64-NEXT:    sw s6, 52(sp)
+; ZHINX64-NEXT:    sw s5, 48(sp)
+; ZHINX64-NEXT:    sw s4, 44(sp)
+; ZHINX64-NEXT:    sw s3, 40(sp)
+; ZHINX64-NEXT:    sw s2, 36(sp)
+; ZHINX64-NEXT:    sw t6, 32(sp)
+; ZHINX64-NEXT:    sw t5, 28(sp)
+; ZHINX64-NEXT:    sw t4, 24(sp)
+; ZHINX64-NEXT:    sw t3, 20(sp)
+; ZHINX64-NEXT:    sw t2, 16(sp)
+; ZHINX64-NEXT:    lw t2, 192(sp)
+; ZHINX64-NEXT:    lw t3, 200(sp)
+; ZHINX64-NEXT:    lw t4, 208(sp)
+; ZHINX64-NEXT:    lw t5, 216(sp)
+; ZHINX64-NEXT:    lw t6, 224(sp)
 ; ZHINX64-NEXT:    sw s1, 12(sp)
 ; ZHINX64-NEXT:    sw s0, 8(sp)
-; ZHINX64-NEXT:    sw t4, 4(sp)
-; ZHINX64-NEXT:    sw t5, 0(sp)
-; ZHINX64-NEXT:    ld t2, 112(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t4, 96(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t5, 88(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    sw t1, 4(sp)
+; ZHINX64-NEXT:    sw t0, 0(sp)
 ; ZHINX64-NEXT:    call callee_float_32 at plt
-; ZHINX64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    addi sp, sp, 224
+; ZHINX64-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s1, 168(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s2, 160(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s3, 152(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s4, 144(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s5, 136(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s6, 128(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s7, 120(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s8, 112(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s9, 104(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s10, 96(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s11, 88(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    addi sp, sp, 192
 ; ZHINX64-NEXT:    ret
 ;
 ; ZFINX32-LABEL: caller_float_32:
 ; ZFINX32:       # %bb.0:
-; ZFINX32-NEXT:    addi sp, sp, -144
-; ZFINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    lw t0, 144(sp)
-; ZFINX32-NEXT:    sw t0, 88(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    addi sp, sp, -128
+; ZFINX32-NEXT:    sw ra, 124(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s0, 120(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s1, 116(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s2, 112(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s3, 108(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s4, 104(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s5, 100(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s6, 96(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s7, 92(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s8, 88(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s9, 84(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s10, 80(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s11, 76(sp) # 4-byte Folded Spill
 ; ZFINX32-NEXT:    lw t0, 148(sp)
-; ZFINX32-NEXT:    sw t0, 84(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    lw t0, 152(sp)
-; ZFINX32-NEXT:    sw t0, 80(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    lw t0, 156(sp)
-; ZFINX32-NEXT:    sw t0, 76(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    lw t6, 160(sp)
-; ZFINX32-NEXT:    lw t5, 164(sp)
-; ZFINX32-NEXT:    lw t4, 168(sp)
-; ZFINX32-NEXT:    lw s0, 172(sp)
-; ZFINX32-NEXT:    lw s1, 176(sp)
-; ZFINX32-NEXT:    lw s2, 180(sp)
-; ZFINX32-NEXT:    lw s3, 184(sp)
-; ZFINX32-NEXT:    lw s4, 188(sp)
-; ZFINX32-NEXT:    lw s5, 192(sp)
-; ZFINX32-NEXT:    lw s6, 196(sp)
-; ZFINX32-NEXT:    lw s7, 200(sp)
-; ZFINX32-NEXT:    lw s8, 204(sp)
-; ZFINX32-NEXT:    lw s9, 208(sp)
-; ZFINX32-NEXT:    lw s10, 212(sp)
-; ZFINX32-NEXT:    lw s11, 216(sp)
-; ZFINX32-NEXT:    lw ra, 220(sp)
-; ZFINX32-NEXT:    lw t3, 224(sp)
-; ZFINX32-NEXT:    lw t2, 228(sp)
-; ZFINX32-NEXT:    lw t1, 232(sp)
-; ZFINX32-NEXT:    lw t0, 236(sp)
-; ZFINX32-NEXT:    sw t0, 72(sp)
-; ZFINX32-NEXT:    sw t1, 68(sp)
-; ZFINX32-NEXT:    sw t2, 64(sp)
-; ZFINX32-NEXT:    sw t3, 60(sp)
-; ZFINX32-NEXT:    sw ra, 56(sp)
-; ZFINX32-NEXT:    sw s11, 52(sp)
-; ZFINX32-NEXT:    sw s10, 48(sp)
-; ZFINX32-NEXT:    sw s9, 44(sp)
-; ZFINX32-NEXT:    sw s8, 40(sp)
-; ZFINX32-NEXT:    sw s7, 36(sp)
-; ZFINX32-NEXT:    sw s6, 32(sp)
-; ZFINX32-NEXT:    sw s5, 28(sp)
-; ZFINX32-NEXT:    sw s4, 24(sp)
-; ZFINX32-NEXT:    sw s3, 20(sp)
-; ZFINX32-NEXT:    sw s2, 16(sp)
+; ZFINX32-NEXT:    lw t1, 152(sp)
+; ZFINX32-NEXT:    lw s0, 156(sp)
+; ZFINX32-NEXT:    lw s1, 160(sp)
+; ZFINX32-NEXT:    lw t2, 164(sp)
+; ZFINX32-NEXT:    lw t3, 168(sp)
+; ZFINX32-NEXT:    lw t4, 172(sp)
+; ZFINX32-NEXT:    lw t5, 176(sp)
+; ZFINX32-NEXT:    lw t6, 180(sp)
+; ZFINX32-NEXT:    lw s2, 184(sp)
+; ZFINX32-NEXT:    lw s3, 188(sp)
+; ZFINX32-NEXT:    lw s4, 192(sp)
+; ZFINX32-NEXT:    lw s5, 196(sp)
+; ZFINX32-NEXT:    lw s6, 200(sp)
+; ZFINX32-NEXT:    lw s7, 204(sp)
+; ZFINX32-NEXT:    lw s8, 208(sp)
+; ZFINX32-NEXT:    lw s9, 212(sp)
+; ZFINX32-NEXT:    lw s10, 216(sp)
+; ZFINX32-NEXT:    lw s11, 220(sp)
+; ZFINX32-NEXT:    sw s11, 72(sp)
+; ZFINX32-NEXT:    sw s10, 68(sp)
+; ZFINX32-NEXT:    sw s9, 64(sp)
+; ZFINX32-NEXT:    sw s8, 60(sp)
+; ZFINX32-NEXT:    sw s7, 56(sp)
+; ZFINX32-NEXT:    sw s6, 52(sp)
+; ZFINX32-NEXT:    sw s5, 48(sp)
+; ZFINX32-NEXT:    sw s4, 44(sp)
+; ZFINX32-NEXT:    sw s3, 40(sp)
+; ZFINX32-NEXT:    sw s2, 36(sp)
+; ZFINX32-NEXT:    sw t6, 32(sp)
+; ZFINX32-NEXT:    sw t5, 28(sp)
+; ZFINX32-NEXT:    sw t4, 24(sp)
+; ZFINX32-NEXT:    sw t3, 20(sp)
+; ZFINX32-NEXT:    sw t2, 16(sp)
+; ZFINX32-NEXT:    lw t2, 128(sp)
+; ZFINX32-NEXT:    lw t3, 132(sp)
+; ZFINX32-NEXT:    lw t4, 136(sp)
+; ZFINX32-NEXT:    lw t5, 140(sp)
+; ZFINX32-NEXT:    lw t6, 144(sp)
 ; ZFINX32-NEXT:    sw s1, 12(sp)
 ; ZFINX32-NEXT:    sw s0, 8(sp)
-; ZFINX32-NEXT:    sw t4, 4(sp)
-; ZFINX32-NEXT:    sw t5, 0(sp)
-; ZFINX32-NEXT:    lw t2, 88(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw t3, 84(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw t4, 80(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw t5, 76(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    sw t1, 4(sp)
+; ZFINX32-NEXT:    sw t0, 0(sp)
 ; ZFINX32-NEXT:    call callee_float_32 at plt
-; ZFINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    addi sp, sp, 144
+; ZFINX32-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s1, 116(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s2, 112(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s3, 108(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s4, 104(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s5, 100(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s6, 96(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s7, 92(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s8, 88(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s9, 84(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s10, 80(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s11, 76(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    addi sp, sp, 128
 ; ZFINX32-NEXT:    ret
 ;
 ; ZFINX64-LABEL: caller_float_32:
 ; ZFINX64:       # %bb.0:
-; ZFINX64-NEXT:    addi sp, sp, -224
-; ZFINX64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    lw t0, 224(sp)
-; ZFINX64-NEXT:    sd t0, 112(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    addi sp, sp, -192
+; ZFINX64-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s1, 168(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s2, 160(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s3, 152(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s4, 144(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s5, 136(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s6, 128(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s7, 120(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s8, 112(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s9, 104(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s10, 96(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s11, 88(sp) # 8-byte Folded Spill
 ; ZFINX64-NEXT:    lw t0, 232(sp)
-; ZFINX64-NEXT:    sd t0, 104(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    lw t0, 240(sp)
-; ZFINX64-NEXT:    sd t0, 96(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    lw t0, 248(sp)
-; ZFINX64-NEXT:    sd t0, 88(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    lw t6, 256(sp)
-; ZFINX64-NEXT:    lw t5, 264(sp)
-; ZFINX64-NEXT:    lw t4, 272(sp)
-; ZFINX64-NEXT:    lw s0, 280(sp)
-; ZFINX64-NEXT:    lw s1, 288(sp)
-; ZFINX64-NEXT:    lw s2, 296(sp)
-; ZFINX64-NEXT:    lw s3, 304(sp)
-; ZFINX64-NEXT:    lw s4, 312(sp)
-; ZFINX64-NEXT:    lw s5, 320(sp)
-; ZFINX64-NEXT:    lw s6, 328(sp)
-; ZFINX64-NEXT:    lw s7, 336(sp)
-; ZFINX64-NEXT:    lw s8, 344(sp)
-; ZFINX64-NEXT:    lw s9, 352(sp)
-; ZFINX64-NEXT:    lw s10, 360(sp)
-; ZFINX64-NEXT:    lw s11, 368(sp)
-; ZFINX64-NEXT:    lw ra, 376(sp)
-; ZFINX64-NEXT:    lw t3, 384(sp)
-; ZFINX64-NEXT:    lw t2, 392(sp)
-; ZFINX64-NEXT:    lw t1, 400(sp)
-; ZFINX64-NEXT:    lw t0, 408(sp)
-; ZFINX64-NEXT:    sw t0, 72(sp)
-; ZFINX64-NEXT:    sw t1, 68(sp)
-; ZFINX64-NEXT:    sw t2, 64(sp)
-; ZFINX64-NEXT:    sw t3, 60(sp)
-; ZFINX64-NEXT:    sw ra, 56(sp)
-; ZFINX64-NEXT:    sw s11, 52(sp)
-; ZFINX64-NEXT:    sw s10, 48(sp)
-; ZFINX64-NEXT:    sw s9, 44(sp)
-; ZFINX64-NEXT:    sw s8, 40(sp)
-; ZFINX64-NEXT:    sw s7, 36(sp)
-; ZFINX64-NEXT:    sw s6, 32(sp)
-; ZFINX64-NEXT:    sw s5, 28(sp)
-; ZFINX64-NEXT:    sw s4, 24(sp)
-; ZFINX64-NEXT:    sw s3, 20(sp)
-; ZFINX64-NEXT:    sw s2, 16(sp)
+; ZFINX64-NEXT:    lw t1, 240(sp)
+; ZFINX64-NEXT:    lw s0, 248(sp)
+; ZFINX64-NEXT:    lw s1, 256(sp)
+; ZFINX64-NEXT:    lw t2, 264(sp)
+; ZFINX64-NEXT:    lw t3, 272(sp)
+; ZFINX64-NEXT:    lw t4, 280(sp)
+; ZFINX64-NEXT:    lw t5, 288(sp)
+; ZFINX64-NEXT:    lw t6, 296(sp)
+; ZFINX64-NEXT:    lw s2, 304(sp)
+; ZFINX64-NEXT:    lw s3, 312(sp)
+; ZFINX64-NEXT:    lw s4, 320(sp)
+; ZFINX64-NEXT:    lw s5, 328(sp)
+; ZFINX64-NEXT:    lw s6, 336(sp)
+; ZFINX64-NEXT:    lw s7, 344(sp)
+; ZFINX64-NEXT:    lw s8, 352(sp)
+; ZFINX64-NEXT:    lw s9, 360(sp)
+; ZFINX64-NEXT:    lw s10, 368(sp)
+; ZFINX64-NEXT:    lw s11, 376(sp)
+; ZFINX64-NEXT:    sw s11, 72(sp)
+; ZFINX64-NEXT:    sw s10, 68(sp)
+; ZFINX64-NEXT:    sw s9, 64(sp)
+; ZFINX64-NEXT:    sw s8, 60(sp)
+; ZFINX64-NEXT:    sw s7, 56(sp)
+; ZFINX64-NEXT:    sw s6, 52(sp)
+; ZFINX64-NEXT:    sw s5, 48(sp)
+; ZFINX64-NEXT:    sw s4, 44(sp)
+; ZFINX64-NEXT:    sw s3, 40(sp)
+; ZFINX64-NEXT:    sw s2, 36(sp)
+; ZFINX64-NEXT:    sw t6, 32(sp)
+; ZFINX64-NEXT:    sw t5, 28(sp)
+; ZFINX64-NEXT:    sw t4, 24(sp)
+; ZFINX64-NEXT:    sw t3, 20(sp)
+; ZFINX64-NEXT:    sw t2, 16(sp)
+; ZFINX64-NEXT:    lw t2, 192(sp)
+; ZFINX64-NEXT:    lw t3, 200(sp)
+; ZFINX64-NEXT:    lw t4, 208(sp)
+; ZFINX64-NEXT:    lw t5, 216(sp)
+; ZFINX64-NEXT:    lw t6, 224(sp)
 ; ZFINX64-NEXT:    sw s1, 12(sp)
 ; ZFINX64-NEXT:    sw s0, 8(sp)
-; ZFINX64-NEXT:    sw t4, 4(sp)
-; ZFINX64-NEXT:    sw t5, 0(sp)
-; ZFINX64-NEXT:    ld t2, 112(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld t4, 96(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld t5, 88(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    sw t1, 4(sp)
+; ZFINX64-NEXT:    sw t0, 0(sp)
 ; ZFINX64-NEXT:    call callee_float_32 at plt
-; ZFINX64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    addi sp, sp, 224
+; ZFINX64-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s1, 168(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s2, 160(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s3, 152(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s4, 144(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s5, 136(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s6, 128(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s7, 120(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s8, 112(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s9, 104(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s10, 96(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s11, 88(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    addi sp, sp, 192
 ; ZFINX64-NEXT:    ret
 ;
 ; ZDINX32-LABEL: caller_float_32:
 ; ZDINX32:       # %bb.0:
-; ZDINX32-NEXT:    addi sp, sp, -144
-; ZDINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    lw t0, 144(sp)
-; ZDINX32-NEXT:    sw t0, 88(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    addi sp, sp, -128
+; ZDINX32-NEXT:    sw ra, 124(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s0, 120(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s1, 116(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s2, 112(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s3, 108(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s4, 104(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s5, 100(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s6, 96(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s7, 92(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s8, 88(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s9, 84(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s10, 80(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s11, 76(sp) # 4-byte Folded Spill
 ; ZDINX32-NEXT:    lw t0, 148(sp)
-; ZDINX32-NEXT:    sw t0, 84(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    lw t0, 152(sp)
-; ZDINX32-NEXT:    sw t0, 80(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    lw t0, 156(sp)
-; ZDINX32-NEXT:    sw t0, 76(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    lw t6, 160(sp)
-; ZDINX32-NEXT:    lw t5, 164(sp)
-; ZDINX32-NEXT:    lw t4, 168(sp)
-; ZDINX32-NEXT:    lw s0, 172(sp)
-; ZDINX32-NEXT:    lw s1, 176(sp)
-; ZDINX32-NEXT:    lw s2, 180(sp)
-; ZDINX32-NEXT:    lw s3, 184(sp)
-; ZDINX32-NEXT:    lw s4, 188(sp)
-; ZDINX32-NEXT:    lw s5, 192(sp)
-; ZDINX32-NEXT:    lw s6, 196(sp)
-; ZDINX32-NEXT:    lw s7, 200(sp)
-; ZDINX32-NEXT:    lw s8, 204(sp)
-; ZDINX32-NEXT:    lw s9, 208(sp)
-; ZDINX32-NEXT:    lw s10, 212(sp)
-; ZDINX32-NEXT:    lw s11, 216(sp)
-; ZDINX32-NEXT:    lw ra, 220(sp)
-; ZDINX32-NEXT:    lw t3, 224(sp)
-; ZDINX32-NEXT:    lw t2, 228(sp)
-; ZDINX32-NEXT:    lw t1, 232(sp)
-; ZDINX32-NEXT:    lw t0, 236(sp)
-; ZDINX32-NEXT:    sw t0, 72(sp)
-; ZDINX32-NEXT:    sw t1, 68(sp)
-; ZDINX32-NEXT:    sw t2, 64(sp)
-; ZDINX32-NEXT:    sw t3, 60(sp)
-; ZDINX32-NEXT:    sw ra, 56(sp)
-; ZDINX32-NEXT:    sw s11, 52(sp)
-; ZDINX32-NEXT:    sw s10, 48(sp)
-; ZDINX32-NEXT:    sw s9, 44(sp)
-; ZDINX32-NEXT:    sw s8, 40(sp)
-; ZDINX32-NEXT:    sw s7, 36(sp)
-; ZDINX32-NEXT:    sw s6, 32(sp)
-; ZDINX32-NEXT:    sw s5, 28(sp)
-; ZDINX32-NEXT:    sw s4, 24(sp)
-; ZDINX32-NEXT:    sw s3, 20(sp)
-; ZDINX32-NEXT:    sw s2, 16(sp)
+; ZDINX32-NEXT:    lw t1, 152(sp)
+; ZDINX32-NEXT:    lw s0, 156(sp)
+; ZDINX32-NEXT:    lw s1, 160(sp)
+; ZDINX32-NEXT:    lw t2, 164(sp)
+; ZDINX32-NEXT:    lw t3, 168(sp)
+; ZDINX32-NEXT:    lw t4, 172(sp)
+; ZDINX32-NEXT:    lw t5, 176(sp)
+; ZDINX32-NEXT:    lw t6, 180(sp)
+; ZDINX32-NEXT:    lw s2, 184(sp)
+; ZDINX32-NEXT:    lw s3, 188(sp)
+; ZDINX32-NEXT:    lw s4, 192(sp)
+; ZDINX32-NEXT:    lw s5, 196(sp)
+; ZDINX32-NEXT:    lw s6, 200(sp)
+; ZDINX32-NEXT:    lw s7, 204(sp)
+; ZDINX32-NEXT:    lw s8, 208(sp)
+; ZDINX32-NEXT:    lw s9, 212(sp)
+; ZDINX32-NEXT:    lw s10, 216(sp)
+; ZDINX32-NEXT:    lw s11, 220(sp)
+; ZDINX32-NEXT:    sw s11, 72(sp)
+; ZDINX32-NEXT:    sw s10, 68(sp)
+; ZDINX32-NEXT:    sw s9, 64(sp)
+; ZDINX32-NEXT:    sw s8, 60(sp)
+; ZDINX32-NEXT:    sw s7, 56(sp)
+; ZDINX32-NEXT:    sw s6, 52(sp)
+; ZDINX32-NEXT:    sw s5, 48(sp)
+; ZDINX32-NEXT:    sw s4, 44(sp)
+; ZDINX32-NEXT:    sw s3, 40(sp)
+; ZDINX32-NEXT:    sw s2, 36(sp)
+; ZDINX32-NEXT:    sw t6, 32(sp)
+; ZDINX32-NEXT:    sw t5, 28(sp)
+; ZDINX32-NEXT:    sw t4, 24(sp)
+; ZDINX32-NEXT:    sw t3, 20(sp)
+; ZDINX32-NEXT:    sw t2, 16(sp)
+; ZDINX32-NEXT:    lw t2, 128(sp)
+; ZDINX32-NEXT:    lw t3, 132(sp)
+; ZDINX32-NEXT:    lw t4, 136(sp)
+; ZDINX32-NEXT:    lw t5, 140(sp)
+; ZDINX32-NEXT:    lw t6, 144(sp)
 ; ZDINX32-NEXT:    sw s1, 12(sp)
 ; ZDINX32-NEXT:    sw s0, 8(sp)
-; ZDINX32-NEXT:    sw t4, 4(sp)
-; ZDINX32-NEXT:    sw t5, 0(sp)
-; ZDINX32-NEXT:    lw t2, 88(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw t3, 84(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw t4, 80(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw t5, 76(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    sw t1, 4(sp)
+; ZDINX32-NEXT:    sw t0, 0(sp)
 ; ZDINX32-NEXT:    call callee_float_32 at plt
-; ZDINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    addi sp, sp, 144
+; ZDINX32-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s1, 116(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s2, 112(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s3, 108(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s4, 104(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s5, 100(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s6, 96(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s7, 92(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s8, 88(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s9, 84(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s10, 80(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s11, 76(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    addi sp, sp, 128
 ; ZDINX32-NEXT:    ret
 ;
 ; ZDINX64-LABEL: caller_float_32:
 ; ZDINX64:       # %bb.0:
-; ZDINX64-NEXT:    addi sp, sp, -224
-; ZDINX64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    lw t0, 224(sp)
-; ZDINX64-NEXT:    sd t0, 112(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    addi sp, sp, -192
+; ZDINX64-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s1, 168(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s2, 160(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s3, 152(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s4, 144(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s5, 136(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s6, 128(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s7, 120(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s8, 112(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s9, 104(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s10, 96(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s11, 88(sp) # 8-byte Folded Spill
 ; ZDINX64-NEXT:    lw t0, 232(sp)
-; ZDINX64-NEXT:    sd t0, 104(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    lw t0, 240(sp)
-; ZDINX64-NEXT:    sd t0, 96(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    lw t0, 248(sp)
-; ZDINX64-NEXT:    sd t0, 88(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    lw t6, 256(sp)
-; ZDINX64-NEXT:    lw t5, 264(sp)
-; ZDINX64-NEXT:    lw t4, 272(sp)
-; ZDINX64-NEXT:    lw s0, 280(sp)
-; ZDINX64-NEXT:    lw s1, 288(sp)
-; ZDINX64-NEXT:    lw s2, 296(sp)
-; ZDINX64-NEXT:    lw s3, 304(sp)
-; ZDINX64-NEXT:    lw s4, 312(sp)
-; ZDINX64-NEXT:    lw s5, 320(sp)
-; ZDINX64-NEXT:    lw s6, 328(sp)
-; ZDINX64-NEXT:    lw s7, 336(sp)
-; ZDINX64-NEXT:    lw s8, 344(sp)
-; ZDINX64-NEXT:    lw s9, 352(sp)
-; ZDINX64-NEXT:    lw s10, 360(sp)
-; ZDINX64-NEXT:    lw s11, 368(sp)
-; ZDINX64-NEXT:    lw ra, 376(sp)
-; ZDINX64-NEXT:    lw t3, 384(sp)
-; ZDINX64-NEXT:    lw t2, 392(sp)
-; ZDINX64-NEXT:    lw t1, 400(sp)
-; ZDINX64-NEXT:    lw t0, 408(sp)
-; ZDINX64-NEXT:    sw t0, 72(sp)
-; ZDINX64-NEXT:    sw t1, 68(sp)
-; ZDINX64-NEXT:    sw t2, 64(sp)
-; ZDINX64-NEXT:    sw t3, 60(sp)
-; ZDINX64-NEXT:    sw ra, 56(sp)
-; ZDINX64-NEXT:    sw s11, 52(sp)
-; ZDINX64-NEXT:    sw s10, 48(sp)
-; ZDINX64-NEXT:    sw s9, 44(sp)
-; ZDINX64-NEXT:    sw s8, 40(sp)
-; ZDINX64-NEXT:    sw s7, 36(sp)
-; ZDINX64-NEXT:    sw s6, 32(sp)
-; ZDINX64-NEXT:    sw s5, 28(sp)
-; ZDINX64-NEXT:    sw s4, 24(sp)
-; ZDINX64-NEXT:    sw s3, 20(sp)
-; ZDINX64-NEXT:    sw s2, 16(sp)
+; ZDINX64-NEXT:    lw t1, 240(sp)
+; ZDINX64-NEXT:    lw s0, 248(sp)
+; ZDINX64-NEXT:    lw s1, 256(sp)
+; ZDINX64-NEXT:    lw t2, 264(sp)
+; ZDINX64-NEXT:    lw t3, 272(sp)
+; ZDINX64-NEXT:    lw t4, 280(sp)
+; ZDINX64-NEXT:    lw t5, 288(sp)
+; ZDINX64-NEXT:    lw t6, 296(sp)
+; ZDINX64-NEXT:    lw s2, 304(sp)
+; ZDINX64-NEXT:    lw s3, 312(sp)
+; ZDINX64-NEXT:    lw s4, 320(sp)
+; ZDINX64-NEXT:    lw s5, 328(sp)
+; ZDINX64-NEXT:    lw s6, 336(sp)
+; ZDINX64-NEXT:    lw s7, 344(sp)
+; ZDINX64-NEXT:    lw s8, 352(sp)
+; ZDINX64-NEXT:    lw s9, 360(sp)
+; ZDINX64-NEXT:    lw s10, 368(sp)
+; ZDINX64-NEXT:    lw s11, 376(sp)
+; ZDINX64-NEXT:    sw s11, 72(sp)
+; ZDINX64-NEXT:    sw s10, 68(sp)
+; ZDINX64-NEXT:    sw s9, 64(sp)
+; ZDINX64-NEXT:    sw s8, 60(sp)
+; ZDINX64-NEXT:    sw s7, 56(sp)
+; ZDINX64-NEXT:    sw s6, 52(sp)
+; ZDINX64-NEXT:    sw s5, 48(sp)
+; ZDINX64-NEXT:    sw s4, 44(sp)
+; ZDINX64-NEXT:    sw s3, 40(sp)
+; ZDINX64-NEXT:    sw s2, 36(sp)
+; ZDINX64-NEXT:    sw t6, 32(sp)
+; ZDINX64-NEXT:    sw t5, 28(sp)
+; ZDINX64-NEXT:    sw t4, 24(sp)
+; ZDINX64-NEXT:    sw t3, 20(sp)
+; ZDINX64-NEXT:    sw t2, 16(sp)
+; ZDINX64-NEXT:    lw t2, 192(sp)
+; ZDINX64-NEXT:    lw t3, 200(sp)
+; ZDINX64-NEXT:    lw t4, 208(sp)
+; ZDINX64-NEXT:    lw t5, 216(sp)
+; ZDINX64-NEXT:    lw t6, 224(sp)
 ; ZDINX64-NEXT:    sw s1, 12(sp)
 ; ZDINX64-NEXT:    sw s0, 8(sp)
-; ZDINX64-NEXT:    sw t4, 4(sp)
-; ZDINX64-NEXT:    sw t5, 0(sp)
-; ZDINX64-NEXT:    ld t2, 112(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld t4, 96(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld t5, 88(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    sw t1, 4(sp)
+; ZDINX64-NEXT:    sw t0, 0(sp)
 ; ZDINX64-NEXT:    call callee_float_32 at plt
-; ZDINX64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    addi sp, sp, 224
+; ZDINX64-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s1, 168(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s2, 160(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s3, 152(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s4, 144(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s5, 136(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s6, 128(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s7, 120(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s8, 112(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s9, 104(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s10, 96(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s11, 88(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    addi sp, sp, 192
 ; ZDINX64-NEXT:    ret
 	%C = call fastcc float @callee_float_32(<32 x float> %A)
 	ret float %C
diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll
index f2079e314d51c1e..9acea0ebe1cd736 100644
--- a/llvm/test/CodeGen/RISCV/forced-atomics.ll
+++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll
@@ -4346,8 +4346,8 @@ define i64 @cmpxchg64_monotonic(ptr %p) nounwind {
 ; RV32-NEXT:    li a4, 0
 ; RV32-NEXT:    li a5, 0
 ; RV32-NEXT:    call __atomic_compare_exchange_8 at plt
-; RV32-NEXT:    lw a1, 4(sp)
 ; RV32-NEXT:    lw a0, 0(sp)
+; RV32-NEXT:    lw a1, 4(sp)
 ; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
@@ -4406,8 +4406,8 @@ define i64 @cmpxchg64_seq_cst(ptr %p) nounwind {
 ; RV32-NEXT:    li a5, 5
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __atomic_compare_exchange_8 at plt
-; RV32-NEXT:    lw a1, 4(sp)
 ; RV32-NEXT:    lw a0, 0(sp)
+; RV32-NEXT:    lw a1, 4(sp)
 ; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/nontemporal.ll b/llvm/test/CodeGen/RISCV/nontemporal.ll
index 4c5c36fc72d14db..afe9d2d21e2c788 100644
--- a/llvm/test/CodeGen/RISCV/nontemporal.ll
+++ b/llvm/test/CodeGen/RISCV/nontemporal.ll
@@ -1037,16 +1037,16 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV64C-NEXT:    lbu t3, 40(a1)
 ; CHECK-RV64C-NEXT:    lbu t4, 48(a1)
 ; CHECK-RV64C-NEXT:    lbu t5, 56(a1)
-; CHECK-RV64C-NEXT:    lbu t6, 64(a1)
+; CHECK-RV64C-NEXT:    lbu a2, 64(a1)
 ; CHECK-RV64C-NEXT:    lbu a3, 72(a1)
 ; CHECK-RV64C-NEXT:    lbu a4, 80(a1)
 ; CHECK-RV64C-NEXT:    lbu a5, 88(a1)
-; CHECK-RV64C-NEXT:    lbu a2, 120(a1)
+; CHECK-RV64C-NEXT:    lbu t6, 120(a1)
 ; CHECK-RV64C-NEXT:    lbu s0, 112(a1)
 ; CHECK-RV64C-NEXT:    lbu s1, 104(a1)
 ; CHECK-RV64C-NEXT:    lbu a1, 96(a1)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sb a2, 15(a0)
+; CHECK-RV64C-NEXT:    sb t6, 15(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
 ; CHECK-RV64C-NEXT:    sb s0, 14(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
@@ -1060,7 +1060,7 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV64C-NEXT:    c.ntl.all
 ; CHECK-RV64C-NEXT:    sb a3, 9(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sb t6, 8(a0)
+; CHECK-RV64C-NEXT:    sb a2, 8(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
 ; CHECK-RV64C-NEXT:    sb t5, 7(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
@@ -1098,16 +1098,16 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV32C-NEXT:    lbu t3, 20(a1)
 ; CHECK-RV32C-NEXT:    lbu t4, 24(a1)
 ; CHECK-RV32C-NEXT:    lbu t5, 28(a1)
-; CHECK-RV32C-NEXT:    lbu t6, 32(a1)
+; CHECK-RV32C-NEXT:    lbu a2, 32(a1)
 ; CHECK-RV32C-NEXT:    lbu a3, 36(a1)
 ; CHECK-RV32C-NEXT:    lbu a4, 40(a1)
 ; CHECK-RV32C-NEXT:    lbu a5, 44(a1)
-; CHECK-RV32C-NEXT:    lbu a2, 60(a1)
+; CHECK-RV32C-NEXT:    lbu t6, 60(a1)
 ; CHECK-RV32C-NEXT:    lbu s0, 56(a1)
 ; CHECK-RV32C-NEXT:    lbu s1, 52(a1)
 ; CHECK-RV32C-NEXT:    lbu a1, 48(a1)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sb a2, 15(a0)
+; CHECK-RV32C-NEXT:    sb t6, 15(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
 ; CHECK-RV32C-NEXT:    sb s0, 14(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
@@ -1121,7 +1121,7 @@ define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV32C-NEXT:    c.ntl.all
 ; CHECK-RV32C-NEXT:    sb a3, 9(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sb t6, 8(a0)
+; CHECK-RV32C-NEXT:    sb a2, 8(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
 ; CHECK-RV32C-NEXT:    sb t5, 7(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
@@ -1219,58 +1219,58 @@ define void @test_nontemporal_store_v8i16(ptr %p, <8 x i16> %v) {
 ;
 ; CHECK-RV64C-LABEL: test_nontemporal_store_v8i16:
 ; CHECK-RV64C:       # %bb.0:
-; CHECK-RV64C-NEXT:    lh a6, 0(a1)
-; CHECK-RV64C-NEXT:    lh a7, 8(a1)
-; CHECK-RV64C-NEXT:    lh t0, 16(a1)
+; CHECK-RV64C-NEXT:    lh a7, 0(a1)
+; CHECK-RV64C-NEXT:    lh t0, 8(a1)
+; CHECK-RV64C-NEXT:    lh a4, 16(a1)
 ; CHECK-RV64C-NEXT:    lh a5, 24(a1)
-; CHECK-RV64C-NEXT:    lh a2, 56(a1)
-; CHECK-RV64C-NEXT:    lh a3, 48(a1)
-; CHECK-RV64C-NEXT:    lh a4, 40(a1)
+; CHECK-RV64C-NEXT:    lh a6, 56(a1)
+; CHECK-RV64C-NEXT:    lh a2, 48(a1)
+; CHECK-RV64C-NEXT:    lh a3, 40(a1)
 ; CHECK-RV64C-NEXT:    lh a1, 32(a1)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sh a2, 14(a0)
+; CHECK-RV64C-NEXT:    sh a6, 14(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sh a3, 12(a0)
+; CHECK-RV64C-NEXT:    sh a2, 12(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sh a4, 10(a0)
+; CHECK-RV64C-NEXT:    sh a3, 10(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
 ; CHECK-RV64C-NEXT:    sh a1, 8(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
 ; CHECK-RV64C-NEXT:    sh a5, 6(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sh t0, 4(a0)
+; CHECK-RV64C-NEXT:    sh a4, 4(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sh a7, 2(a0)
+; CHECK-RV64C-NEXT:    sh t0, 2(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sh a6, 0(a0)
+; CHECK-RV64C-NEXT:    sh a7, 0(a0)
 ; CHECK-RV64C-NEXT:    ret
 ;
 ; CHECK-RV32C-LABEL: test_nontemporal_store_v8i16:
 ; CHECK-RV32C:       # %bb.0:
-; CHECK-RV32C-NEXT:    lh a6, 0(a1)
-; CHECK-RV32C-NEXT:    lh a7, 4(a1)
-; CHECK-RV32C-NEXT:    lh t0, 8(a1)
+; CHECK-RV32C-NEXT:    lh a7, 0(a1)
+; CHECK-RV32C-NEXT:    lh t0, 4(a1)
+; CHECK-RV32C-NEXT:    lh a4, 8(a1)
 ; CHECK-RV32C-NEXT:    lh a5, 12(a1)
-; CHECK-RV32C-NEXT:    lh a2, 28(a1)
-; CHECK-RV32C-NEXT:    lh a3, 24(a1)
-; CHECK-RV32C-NEXT:    lh a4, 20(a1)
+; CHECK-RV32C-NEXT:    lh a6, 28(a1)
+; CHECK-RV32C-NEXT:    lh a2, 24(a1)
+; CHECK-RV32C-NEXT:    lh a3, 20(a1)
 ; CHECK-RV32C-NEXT:    lh a1, 16(a1)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sh a2, 14(a0)
+; CHECK-RV32C-NEXT:    sh a6, 14(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sh a3, 12(a0)
+; CHECK-RV32C-NEXT:    sh a2, 12(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sh a4, 10(a0)
+; CHECK-RV32C-NEXT:    sh a3, 10(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
 ; CHECK-RV32C-NEXT:    sh a1, 8(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
 ; CHECK-RV32C-NEXT:    sh a5, 6(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sh t0, 4(a0)
+; CHECK-RV32C-NEXT:    sh a4, 4(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sh a7, 2(a0)
+; CHECK-RV32C-NEXT:    sh t0, 2(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sh a6, 0(a0)
+; CHECK-RV32C-NEXT:    sh a7, 0(a0)
 ; CHECK-RV32C-NEXT:    ret
 ;
 ; CHECK-RV64V-LABEL: test_nontemporal_store_v8i16:
@@ -2451,16 +2451,16 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV64C-NEXT:    lbu t3, 40(a1)
 ; CHECK-RV64C-NEXT:    lbu t4, 48(a1)
 ; CHECK-RV64C-NEXT:    lbu t5, 56(a1)
-; CHECK-RV64C-NEXT:    lbu t6, 64(a1)
+; CHECK-RV64C-NEXT:    lbu a2, 64(a1)
 ; CHECK-RV64C-NEXT:    lbu a3, 72(a1)
 ; CHECK-RV64C-NEXT:    lbu a4, 80(a1)
 ; CHECK-RV64C-NEXT:    lbu a5, 88(a1)
-; CHECK-RV64C-NEXT:    lbu a2, 120(a1)
+; CHECK-RV64C-NEXT:    lbu t6, 120(a1)
 ; CHECK-RV64C-NEXT:    lbu s0, 112(a1)
 ; CHECK-RV64C-NEXT:    lbu s1, 104(a1)
 ; CHECK-RV64C-NEXT:    lbu a1, 96(a1)
 ; CHECK-RV64C-NEXT:    c.ntl.p1
-; CHECK-RV64C-NEXT:    sb a2, 15(a0)
+; CHECK-RV64C-NEXT:    sb t6, 15(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.p1
 ; CHECK-RV64C-NEXT:    sb s0, 14(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.p1
@@ -2474,7 +2474,7 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV64C-NEXT:    c.ntl.p1
 ; CHECK-RV64C-NEXT:    sb a3, 9(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.p1
-; CHECK-RV64C-NEXT:    sb t6, 8(a0)
+; CHECK-RV64C-NEXT:    sb a2, 8(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.p1
 ; CHECK-RV64C-NEXT:    sb t5, 7(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.p1
@@ -2512,16 +2512,16 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV32C-NEXT:    lbu t3, 20(a1)
 ; CHECK-RV32C-NEXT:    lbu t4, 24(a1)
 ; CHECK-RV32C-NEXT:    lbu t5, 28(a1)
-; CHECK-RV32C-NEXT:    lbu t6, 32(a1)
+; CHECK-RV32C-NEXT:    lbu a2, 32(a1)
 ; CHECK-RV32C-NEXT:    lbu a3, 36(a1)
 ; CHECK-RV32C-NEXT:    lbu a4, 40(a1)
 ; CHECK-RV32C-NEXT:    lbu a5, 44(a1)
-; CHECK-RV32C-NEXT:    lbu a2, 60(a1)
+; CHECK-RV32C-NEXT:    lbu t6, 60(a1)
 ; CHECK-RV32C-NEXT:    lbu s0, 56(a1)
 ; CHECK-RV32C-NEXT:    lbu s1, 52(a1)
 ; CHECK-RV32C-NEXT:    lbu a1, 48(a1)
 ; CHECK-RV32C-NEXT:    c.ntl.p1
-; CHECK-RV32C-NEXT:    sb a2, 15(a0)
+; CHECK-RV32C-NEXT:    sb t6, 15(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.p1
 ; CHECK-RV32C-NEXT:    sb s0, 14(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.p1
@@ -2535,7 +2535,7 @@ define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV32C-NEXT:    c.ntl.p1
 ; CHECK-RV32C-NEXT:    sb a3, 9(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.p1
-; CHECK-RV32C-NEXT:    sb t6, 8(a0)
+; CHECK-RV32C-NEXT:    sb a2, 8(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.p1
 ; CHECK-RV32C-NEXT:    sb t5, 7(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.p1
@@ -2633,58 +2633,58 @@ define void @test_nontemporal_P1_store_v8i16(ptr %p, <8 x i16> %v) {
 ;
 ; CHECK-RV64C-LABEL: test_nontemporal_P1_store_v8i16:
 ; CHECK-RV64C:       # %bb.0:
-; CHECK-RV64C-NEXT:    lh a6, 0(a1)
-; CHECK-RV64C-NEXT:    lh a7, 8(a1)
-; CHECK-RV64C-NEXT:    lh t0, 16(a1)
+; CHECK-RV64C-NEXT:    lh a7, 0(a1)
+; CHECK-RV64C-NEXT:    lh t0, 8(a1)
+; CHECK-RV64C-NEXT:    lh a4, 16(a1)
 ; CHECK-RV64C-NEXT:    lh a5, 24(a1)
-; CHECK-RV64C-NEXT:    lh a2, 56(a1)
-; CHECK-RV64C-NEXT:    lh a3, 48(a1)
-; CHECK-RV64C-NEXT:    lh a4, 40(a1)
+; CHECK-RV64C-NEXT:    lh a6, 56(a1)
+; CHECK-RV64C-NEXT:    lh a2, 48(a1)
+; CHECK-RV64C-NEXT:    lh a3, 40(a1)
 ; CHECK-RV64C-NEXT:    lh a1, 32(a1)
 ; CHECK-RV64C-NEXT:    c.ntl.p1
-; CHECK-RV64C-NEXT:    sh a2, 14(a0)
+; CHECK-RV64C-NEXT:    sh a6, 14(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.p1
-; CHECK-RV64C-NEXT:    sh a3, 12(a0)
+; CHECK-RV64C-NEXT:    sh a2, 12(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.p1
-; CHECK-RV64C-NEXT:    sh a4, 10(a0)
+; CHECK-RV64C-NEXT:    sh a3, 10(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.p1
 ; CHECK-RV64C-NEXT:    sh a1, 8(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.p1
 ; CHECK-RV64C-NEXT:    sh a5, 6(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.p1
-; CHECK-RV64C-NEXT:    sh t0, 4(a0)
+; CHECK-RV64C-NEXT:    sh a4, 4(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.p1
-; CHECK-RV64C-NEXT:    sh a7, 2(a0)
+; CHECK-RV64C-NEXT:    sh t0, 2(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.p1
-; CHECK-RV64C-NEXT:    sh a6, 0(a0)
+; CHECK-RV64C-NEXT:    sh a7, 0(a0)
 ; CHECK-RV64C-NEXT:    ret
 ;
 ; CHECK-RV32C-LABEL: test_nontemporal_P1_store_v8i16:
 ; CHECK-RV32C:       # %bb.0:
-; CHECK-RV32C-NEXT:    lh a6, 0(a1)
-; CHECK-RV32C-NEXT:    lh a7, 4(a1)
-; CHECK-RV32C-NEXT:    lh t0, 8(a1)
+; CHECK-RV32C-NEXT:    lh a7, 0(a1)
+; CHECK-RV32C-NEXT:    lh t0, 4(a1)
+; CHECK-RV32C-NEXT:    lh a4, 8(a1)
 ; CHECK-RV32C-NEXT:    lh a5, 12(a1)
-; CHECK-RV32C-NEXT:    lh a2, 28(a1)
-; CHECK-RV32C-NEXT:    lh a3, 24(a1)
-; CHECK-RV32C-NEXT:    lh a4, 20(a1)
+; CHECK-RV32C-NEXT:    lh a6, 28(a1)
+; CHECK-RV32C-NEXT:    lh a2, 24(a1)
+; CHECK-RV32C-NEXT:    lh a3, 20(a1)
 ; CHECK-RV32C-NEXT:    lh a1, 16(a1)
 ; CHECK-RV32C-NEXT:    c.ntl.p1
-; CHECK-RV32C-NEXT:    sh a2, 14(a0)
+; CHECK-RV32C-NEXT:    sh a6, 14(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.p1
-; CHECK-RV32C-NEXT:    sh a3, 12(a0)
+; CHECK-RV32C-NEXT:    sh a2, 12(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.p1
-; CHECK-RV32C-NEXT:    sh a4, 10(a0)
+; CHECK-RV32C-NEXT:    sh a3, 10(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.p1
 ; CHECK-RV32C-NEXT:    sh a1, 8(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.p1
 ; CHECK-RV32C-NEXT:    sh a5, 6(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.p1
-; CHECK-RV32C-NEXT:    sh t0, 4(a0)
+; CHECK-RV32C-NEXT:    sh a4, 4(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.p1
-; CHECK-RV32C-NEXT:    sh a7, 2(a0)
+; CHECK-RV32C-NEXT:    sh t0, 2(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.p1
-; CHECK-RV32C-NEXT:    sh a6, 0(a0)
+; CHECK-RV32C-NEXT:    sh a7, 0(a0)
 ; CHECK-RV32C-NEXT:    ret
 ;
 ; CHECK-RV64V-LABEL: test_nontemporal_P1_store_v8i16:
@@ -3865,16 +3865,16 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV64C-NEXT:    lbu t3, 40(a1)
 ; CHECK-RV64C-NEXT:    lbu t4, 48(a1)
 ; CHECK-RV64C-NEXT:    lbu t5, 56(a1)
-; CHECK-RV64C-NEXT:    lbu t6, 64(a1)
+; CHECK-RV64C-NEXT:    lbu a2, 64(a1)
 ; CHECK-RV64C-NEXT:    lbu a3, 72(a1)
 ; CHECK-RV64C-NEXT:    lbu a4, 80(a1)
 ; CHECK-RV64C-NEXT:    lbu a5, 88(a1)
-; CHECK-RV64C-NEXT:    lbu a2, 120(a1)
+; CHECK-RV64C-NEXT:    lbu t6, 120(a1)
 ; CHECK-RV64C-NEXT:    lbu s0, 112(a1)
 ; CHECK-RV64C-NEXT:    lbu s1, 104(a1)
 ; CHECK-RV64C-NEXT:    lbu a1, 96(a1)
 ; CHECK-RV64C-NEXT:    c.ntl.pall
-; CHECK-RV64C-NEXT:    sb a2, 15(a0)
+; CHECK-RV64C-NEXT:    sb t6, 15(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.pall
 ; CHECK-RV64C-NEXT:    sb s0, 14(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.pall
@@ -3888,7 +3888,7 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV64C-NEXT:    c.ntl.pall
 ; CHECK-RV64C-NEXT:    sb a3, 9(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.pall
-; CHECK-RV64C-NEXT:    sb t6, 8(a0)
+; CHECK-RV64C-NEXT:    sb a2, 8(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.pall
 ; CHECK-RV64C-NEXT:    sb t5, 7(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.pall
@@ -3926,16 +3926,16 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV32C-NEXT:    lbu t3, 20(a1)
 ; CHECK-RV32C-NEXT:    lbu t4, 24(a1)
 ; CHECK-RV32C-NEXT:    lbu t5, 28(a1)
-; CHECK-RV32C-NEXT:    lbu t6, 32(a1)
+; CHECK-RV32C-NEXT:    lbu a2, 32(a1)
 ; CHECK-RV32C-NEXT:    lbu a3, 36(a1)
 ; CHECK-RV32C-NEXT:    lbu a4, 40(a1)
 ; CHECK-RV32C-NEXT:    lbu a5, 44(a1)
-; CHECK-RV32C-NEXT:    lbu a2, 60(a1)
+; CHECK-RV32C-NEXT:    lbu t6, 60(a1)
 ; CHECK-RV32C-NEXT:    lbu s0, 56(a1)
 ; CHECK-RV32C-NEXT:    lbu s1, 52(a1)
 ; CHECK-RV32C-NEXT:    lbu a1, 48(a1)
 ; CHECK-RV32C-NEXT:    c.ntl.pall
-; CHECK-RV32C-NEXT:    sb a2, 15(a0)
+; CHECK-RV32C-NEXT:    sb t6, 15(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.pall
 ; CHECK-RV32C-NEXT:    sb s0, 14(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.pall
@@ -3949,7 +3949,7 @@ define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV32C-NEXT:    c.ntl.pall
 ; CHECK-RV32C-NEXT:    sb a3, 9(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.pall
-; CHECK-RV32C-NEXT:    sb t6, 8(a0)
+; CHECK-RV32C-NEXT:    sb a2, 8(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.pall
 ; CHECK-RV32C-NEXT:    sb t5, 7(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.pall
@@ -4047,58 +4047,58 @@ define void @test_nontemporal_PALL_store_v8i16(ptr %p, <8 x i16> %v) {
 ;
 ; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_v8i16:
 ; CHECK-RV64C:       # %bb.0:
-; CHECK-RV64C-NEXT:    lh a6, 0(a1)
-; CHECK-RV64C-NEXT:    lh a7, 8(a1)
-; CHECK-RV64C-NEXT:    lh t0, 16(a1)
+; CHECK-RV64C-NEXT:    lh a7, 0(a1)
+; CHECK-RV64C-NEXT:    lh t0, 8(a1)
+; CHECK-RV64C-NEXT:    lh a4, 16(a1)
 ; CHECK-RV64C-NEXT:    lh a5, 24(a1)
-; CHECK-RV64C-NEXT:    lh a2, 56(a1)
-; CHECK-RV64C-NEXT:    lh a3, 48(a1)
-; CHECK-RV64C-NEXT:    lh a4, 40(a1)
+; CHECK-RV64C-NEXT:    lh a6, 56(a1)
+; CHECK-RV64C-NEXT:    lh a2, 48(a1)
+; CHECK-RV64C-NEXT:    lh a3, 40(a1)
 ; CHECK-RV64C-NEXT:    lh a1, 32(a1)
 ; CHECK-RV64C-NEXT:    c.ntl.pall
-; CHECK-RV64C-NEXT:    sh a2, 14(a0)
+; CHECK-RV64C-NEXT:    sh a6, 14(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.pall
-; CHECK-RV64C-NEXT:    sh a3, 12(a0)
+; CHECK-RV64C-NEXT:    sh a2, 12(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.pall
-; CHECK-RV64C-NEXT:    sh a4, 10(a0)
+; CHECK-RV64C-NEXT:    sh a3, 10(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.pall
 ; CHECK-RV64C-NEXT:    sh a1, 8(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.pall
 ; CHECK-RV64C-NEXT:    sh a5, 6(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.pall
-; CHECK-RV64C-NEXT:    sh t0, 4(a0)
+; CHECK-RV64C-NEXT:    sh a4, 4(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.pall
-; CHECK-RV64C-NEXT:    sh a7, 2(a0)
+; CHECK-RV64C-NEXT:    sh t0, 2(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.pall
-; CHECK-RV64C-NEXT:    sh a6, 0(a0)
+; CHECK-RV64C-NEXT:    sh a7, 0(a0)
 ; CHECK-RV64C-NEXT:    ret
 ;
 ; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_v8i16:
 ; CHECK-RV32C:       # %bb.0:
-; CHECK-RV32C-NEXT:    lh a6, 0(a1)
-; CHECK-RV32C-NEXT:    lh a7, 4(a1)
-; CHECK-RV32C-NEXT:    lh t0, 8(a1)
+; CHECK-RV32C-NEXT:    lh a7, 0(a1)
+; CHECK-RV32C-NEXT:    lh t0, 4(a1)
+; CHECK-RV32C-NEXT:    lh a4, 8(a1)
 ; CHECK-RV32C-NEXT:    lh a5, 12(a1)
-; CHECK-RV32C-NEXT:    lh a2, 28(a1)
-; CHECK-RV32C-NEXT:    lh a3, 24(a1)
-; CHECK-RV32C-NEXT:    lh a4, 20(a1)
+; CHECK-RV32C-NEXT:    lh a6, 28(a1)
+; CHECK-RV32C-NEXT:    lh a2, 24(a1)
+; CHECK-RV32C-NEXT:    lh a3, 20(a1)
 ; CHECK-RV32C-NEXT:    lh a1, 16(a1)
 ; CHECK-RV32C-NEXT:    c.ntl.pall
-; CHECK-RV32C-NEXT:    sh a2, 14(a0)
+; CHECK-RV32C-NEXT:    sh a6, 14(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.pall
-; CHECK-RV32C-NEXT:    sh a3, 12(a0)
+; CHECK-RV32C-NEXT:    sh a2, 12(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.pall
-; CHECK-RV32C-NEXT:    sh a4, 10(a0)
+; CHECK-RV32C-NEXT:    sh a3, 10(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.pall
 ; CHECK-RV32C-NEXT:    sh a1, 8(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.pall
 ; CHECK-RV32C-NEXT:    sh a5, 6(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.pall
-; CHECK-RV32C-NEXT:    sh t0, 4(a0)
+; CHECK-RV32C-NEXT:    sh a4, 4(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.pall
-; CHECK-RV32C-NEXT:    sh a7, 2(a0)
+; CHECK-RV32C-NEXT:    sh t0, 2(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.pall
-; CHECK-RV32C-NEXT:    sh a6, 0(a0)
+; CHECK-RV32C-NEXT:    sh a7, 0(a0)
 ; CHECK-RV32C-NEXT:    ret
 ;
 ; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_v8i16:
@@ -5279,16 +5279,16 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV64C-NEXT:    lbu t3, 40(a1)
 ; CHECK-RV64C-NEXT:    lbu t4, 48(a1)
 ; CHECK-RV64C-NEXT:    lbu t5, 56(a1)
-; CHECK-RV64C-NEXT:    lbu t6, 64(a1)
+; CHECK-RV64C-NEXT:    lbu a2, 64(a1)
 ; CHECK-RV64C-NEXT:    lbu a3, 72(a1)
 ; CHECK-RV64C-NEXT:    lbu a4, 80(a1)
 ; CHECK-RV64C-NEXT:    lbu a5, 88(a1)
-; CHECK-RV64C-NEXT:    lbu a2, 120(a1)
+; CHECK-RV64C-NEXT:    lbu t6, 120(a1)
 ; CHECK-RV64C-NEXT:    lbu s0, 112(a1)
 ; CHECK-RV64C-NEXT:    lbu s1, 104(a1)
 ; CHECK-RV64C-NEXT:    lbu a1, 96(a1)
 ; CHECK-RV64C-NEXT:    c.ntl.s1
-; CHECK-RV64C-NEXT:    sb a2, 15(a0)
+; CHECK-RV64C-NEXT:    sb t6, 15(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.s1
 ; CHECK-RV64C-NEXT:    sb s0, 14(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.s1
@@ -5302,7 +5302,7 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV64C-NEXT:    c.ntl.s1
 ; CHECK-RV64C-NEXT:    sb a3, 9(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.s1
-; CHECK-RV64C-NEXT:    sb t6, 8(a0)
+; CHECK-RV64C-NEXT:    sb a2, 8(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.s1
 ; CHECK-RV64C-NEXT:    sb t5, 7(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.s1
@@ -5340,16 +5340,16 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV32C-NEXT:    lbu t3, 20(a1)
 ; CHECK-RV32C-NEXT:    lbu t4, 24(a1)
 ; CHECK-RV32C-NEXT:    lbu t5, 28(a1)
-; CHECK-RV32C-NEXT:    lbu t6, 32(a1)
+; CHECK-RV32C-NEXT:    lbu a2, 32(a1)
 ; CHECK-RV32C-NEXT:    lbu a3, 36(a1)
 ; CHECK-RV32C-NEXT:    lbu a4, 40(a1)
 ; CHECK-RV32C-NEXT:    lbu a5, 44(a1)
-; CHECK-RV32C-NEXT:    lbu a2, 60(a1)
+; CHECK-RV32C-NEXT:    lbu t6, 60(a1)
 ; CHECK-RV32C-NEXT:    lbu s0, 56(a1)
 ; CHECK-RV32C-NEXT:    lbu s1, 52(a1)
 ; CHECK-RV32C-NEXT:    lbu a1, 48(a1)
 ; CHECK-RV32C-NEXT:    c.ntl.s1
-; CHECK-RV32C-NEXT:    sb a2, 15(a0)
+; CHECK-RV32C-NEXT:    sb t6, 15(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.s1
 ; CHECK-RV32C-NEXT:    sb s0, 14(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.s1
@@ -5363,7 +5363,7 @@ define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV32C-NEXT:    c.ntl.s1
 ; CHECK-RV32C-NEXT:    sb a3, 9(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.s1
-; CHECK-RV32C-NEXT:    sb t6, 8(a0)
+; CHECK-RV32C-NEXT:    sb a2, 8(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.s1
 ; CHECK-RV32C-NEXT:    sb t5, 7(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.s1
@@ -5461,58 +5461,58 @@ define void @test_nontemporal_S1_store_v8i16(ptr %p, <8 x i16> %v) {
 ;
 ; CHECK-RV64C-LABEL: test_nontemporal_S1_store_v8i16:
 ; CHECK-RV64C:       # %bb.0:
-; CHECK-RV64C-NEXT:    lh a6, 0(a1)
-; CHECK-RV64C-NEXT:    lh a7, 8(a1)
-; CHECK-RV64C-NEXT:    lh t0, 16(a1)
+; CHECK-RV64C-NEXT:    lh a7, 0(a1)
+; CHECK-RV64C-NEXT:    lh t0, 8(a1)
+; CHECK-RV64C-NEXT:    lh a4, 16(a1)
 ; CHECK-RV64C-NEXT:    lh a5, 24(a1)
-; CHECK-RV64C-NEXT:    lh a2, 56(a1)
-; CHECK-RV64C-NEXT:    lh a3, 48(a1)
-; CHECK-RV64C-NEXT:    lh a4, 40(a1)
+; CHECK-RV64C-NEXT:    lh a6, 56(a1)
+; CHECK-RV64C-NEXT:    lh a2, 48(a1)
+; CHECK-RV64C-NEXT:    lh a3, 40(a1)
 ; CHECK-RV64C-NEXT:    lh a1, 32(a1)
 ; CHECK-RV64C-NEXT:    c.ntl.s1
-; CHECK-RV64C-NEXT:    sh a2, 14(a0)
+; CHECK-RV64C-NEXT:    sh a6, 14(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.s1
-; CHECK-RV64C-NEXT:    sh a3, 12(a0)
+; CHECK-RV64C-NEXT:    sh a2, 12(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.s1
-; CHECK-RV64C-NEXT:    sh a4, 10(a0)
+; CHECK-RV64C-NEXT:    sh a3, 10(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.s1
 ; CHECK-RV64C-NEXT:    sh a1, 8(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.s1
 ; CHECK-RV64C-NEXT:    sh a5, 6(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.s1
-; CHECK-RV64C-NEXT:    sh t0, 4(a0)
+; CHECK-RV64C-NEXT:    sh a4, 4(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.s1
-; CHECK-RV64C-NEXT:    sh a7, 2(a0)
+; CHECK-RV64C-NEXT:    sh t0, 2(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.s1
-; CHECK-RV64C-NEXT:    sh a6, 0(a0)
+; CHECK-RV64C-NEXT:    sh a7, 0(a0)
 ; CHECK-RV64C-NEXT:    ret
 ;
 ; CHECK-RV32C-LABEL: test_nontemporal_S1_store_v8i16:
 ; CHECK-RV32C:       # %bb.0:
-; CHECK-RV32C-NEXT:    lh a6, 0(a1)
-; CHECK-RV32C-NEXT:    lh a7, 4(a1)
-; CHECK-RV32C-NEXT:    lh t0, 8(a1)
+; CHECK-RV32C-NEXT:    lh a7, 0(a1)
+; CHECK-RV32C-NEXT:    lh t0, 4(a1)
+; CHECK-RV32C-NEXT:    lh a4, 8(a1)
 ; CHECK-RV32C-NEXT:    lh a5, 12(a1)
-; CHECK-RV32C-NEXT:    lh a2, 28(a1)
-; CHECK-RV32C-NEXT:    lh a3, 24(a1)
-; CHECK-RV32C-NEXT:    lh a4, 20(a1)
+; CHECK-RV32C-NEXT:    lh a6, 28(a1)
+; CHECK-RV32C-NEXT:    lh a2, 24(a1)
+; CHECK-RV32C-NEXT:    lh a3, 20(a1)
 ; CHECK-RV32C-NEXT:    lh a1, 16(a1)
 ; CHECK-RV32C-NEXT:    c.ntl.s1
-; CHECK-RV32C-NEXT:    sh a2, 14(a0)
+; CHECK-RV32C-NEXT:    sh a6, 14(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.s1
-; CHECK-RV32C-NEXT:    sh a3, 12(a0)
+; CHECK-RV32C-NEXT:    sh a2, 12(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.s1
-; CHECK-RV32C-NEXT:    sh a4, 10(a0)
+; CHECK-RV32C-NEXT:    sh a3, 10(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.s1
 ; CHECK-RV32C-NEXT:    sh a1, 8(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.s1
 ; CHECK-RV32C-NEXT:    sh a5, 6(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.s1
-; CHECK-RV32C-NEXT:    sh t0, 4(a0)
+; CHECK-RV32C-NEXT:    sh a4, 4(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.s1
-; CHECK-RV32C-NEXT:    sh a7, 2(a0)
+; CHECK-RV32C-NEXT:    sh t0, 2(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.s1
-; CHECK-RV32C-NEXT:    sh a6, 0(a0)
+; CHECK-RV32C-NEXT:    sh a7, 0(a0)
 ; CHECK-RV32C-NEXT:    ret
 ;
 ; CHECK-RV64V-LABEL: test_nontemporal_S1_store_v8i16:
@@ -6693,16 +6693,16 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV64C-NEXT:    lbu t3, 40(a1)
 ; CHECK-RV64C-NEXT:    lbu t4, 48(a1)
 ; CHECK-RV64C-NEXT:    lbu t5, 56(a1)
-; CHECK-RV64C-NEXT:    lbu t6, 64(a1)
+; CHECK-RV64C-NEXT:    lbu a2, 64(a1)
 ; CHECK-RV64C-NEXT:    lbu a3, 72(a1)
 ; CHECK-RV64C-NEXT:    lbu a4, 80(a1)
 ; CHECK-RV64C-NEXT:    lbu a5, 88(a1)
-; CHECK-RV64C-NEXT:    lbu a2, 120(a1)
+; CHECK-RV64C-NEXT:    lbu t6, 120(a1)
 ; CHECK-RV64C-NEXT:    lbu s0, 112(a1)
 ; CHECK-RV64C-NEXT:    lbu s1, 104(a1)
 ; CHECK-RV64C-NEXT:    lbu a1, 96(a1)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sb a2, 15(a0)
+; CHECK-RV64C-NEXT:    sb t6, 15(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
 ; CHECK-RV64C-NEXT:    sb s0, 14(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
@@ -6716,7 +6716,7 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV64C-NEXT:    c.ntl.all
 ; CHECK-RV64C-NEXT:    sb a3, 9(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sb t6, 8(a0)
+; CHECK-RV64C-NEXT:    sb a2, 8(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
 ; CHECK-RV64C-NEXT:    sb t5, 7(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
@@ -6754,16 +6754,16 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV32C-NEXT:    lbu t3, 20(a1)
 ; CHECK-RV32C-NEXT:    lbu t4, 24(a1)
 ; CHECK-RV32C-NEXT:    lbu t5, 28(a1)
-; CHECK-RV32C-NEXT:    lbu t6, 32(a1)
+; CHECK-RV32C-NEXT:    lbu a2, 32(a1)
 ; CHECK-RV32C-NEXT:    lbu a3, 36(a1)
 ; CHECK-RV32C-NEXT:    lbu a4, 40(a1)
 ; CHECK-RV32C-NEXT:    lbu a5, 44(a1)
-; CHECK-RV32C-NEXT:    lbu a2, 60(a1)
+; CHECK-RV32C-NEXT:    lbu t6, 60(a1)
 ; CHECK-RV32C-NEXT:    lbu s0, 56(a1)
 ; CHECK-RV32C-NEXT:    lbu s1, 52(a1)
 ; CHECK-RV32C-NEXT:    lbu a1, 48(a1)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sb a2, 15(a0)
+; CHECK-RV32C-NEXT:    sb t6, 15(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
 ; CHECK-RV32C-NEXT:    sb s0, 14(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
@@ -6777,7 +6777,7 @@ define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
 ; CHECK-RV32C-NEXT:    c.ntl.all
 ; CHECK-RV32C-NEXT:    sb a3, 9(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sb t6, 8(a0)
+; CHECK-RV32C-NEXT:    sb a2, 8(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
 ; CHECK-RV32C-NEXT:    sb t5, 7(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
@@ -6875,58 +6875,58 @@ define void @test_nontemporal_ALL_store_v8i16(ptr %p, <8 x i16> %v) {
 ;
 ; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_v8i16:
 ; CHECK-RV64C:       # %bb.0:
-; CHECK-RV64C-NEXT:    lh a6, 0(a1)
-; CHECK-RV64C-NEXT:    lh a7, 8(a1)
-; CHECK-RV64C-NEXT:    lh t0, 16(a1)
+; CHECK-RV64C-NEXT:    lh a7, 0(a1)
+; CHECK-RV64C-NEXT:    lh t0, 8(a1)
+; CHECK-RV64C-NEXT:    lh a4, 16(a1)
 ; CHECK-RV64C-NEXT:    lh a5, 24(a1)
-; CHECK-RV64C-NEXT:    lh a2, 56(a1)
-; CHECK-RV64C-NEXT:    lh a3, 48(a1)
-; CHECK-RV64C-NEXT:    lh a4, 40(a1)
+; CHECK-RV64C-NEXT:    lh a6, 56(a1)
+; CHECK-RV64C-NEXT:    lh a2, 48(a1)
+; CHECK-RV64C-NEXT:    lh a3, 40(a1)
 ; CHECK-RV64C-NEXT:    lh a1, 32(a1)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sh a2, 14(a0)
+; CHECK-RV64C-NEXT:    sh a6, 14(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sh a3, 12(a0)
+; CHECK-RV64C-NEXT:    sh a2, 12(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sh a4, 10(a0)
+; CHECK-RV64C-NEXT:    sh a3, 10(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
 ; CHECK-RV64C-NEXT:    sh a1, 8(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
 ; CHECK-RV64C-NEXT:    sh a5, 6(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sh t0, 4(a0)
+; CHECK-RV64C-NEXT:    sh a4, 4(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sh a7, 2(a0)
+; CHECK-RV64C-NEXT:    sh t0, 2(a0)
 ; CHECK-RV64C-NEXT:    c.ntl.all
-; CHECK-RV64C-NEXT:    sh a6, 0(a0)
+; CHECK-RV64C-NEXT:    sh a7, 0(a0)
 ; CHECK-RV64C-NEXT:    ret
 ;
 ; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_v8i16:
 ; CHECK-RV32C:       # %bb.0:
-; CHECK-RV32C-NEXT:    lh a6, 0(a1)
-; CHECK-RV32C-NEXT:    lh a7, 4(a1)
-; CHECK-RV32C-NEXT:    lh t0, 8(a1)
+; CHECK-RV32C-NEXT:    lh a7, 0(a1)
+; CHECK-RV32C-NEXT:    lh t0, 4(a1)
+; CHECK-RV32C-NEXT:    lh a4, 8(a1)
 ; CHECK-RV32C-NEXT:    lh a5, 12(a1)
-; CHECK-RV32C-NEXT:    lh a2, 28(a1)
-; CHECK-RV32C-NEXT:    lh a3, 24(a1)
-; CHECK-RV32C-NEXT:    lh a4, 20(a1)
+; CHECK-RV32C-NEXT:    lh a6, 28(a1)
+; CHECK-RV32C-NEXT:    lh a2, 24(a1)
+; CHECK-RV32C-NEXT:    lh a3, 20(a1)
 ; CHECK-RV32C-NEXT:    lh a1, 16(a1)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sh a2, 14(a0)
+; CHECK-RV32C-NEXT:    sh a6, 14(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sh a3, 12(a0)
+; CHECK-RV32C-NEXT:    sh a2, 12(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sh a4, 10(a0)
+; CHECK-RV32C-NEXT:    sh a3, 10(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
 ; CHECK-RV32C-NEXT:    sh a1, 8(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
 ; CHECK-RV32C-NEXT:    sh a5, 6(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sh t0, 4(a0)
+; CHECK-RV32C-NEXT:    sh a4, 4(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sh a7, 2(a0)
+; CHECK-RV32C-NEXT:    sh t0, 2(a0)
 ; CHECK-RV32C-NEXT:    c.ntl.all
-; CHECK-RV32C-NEXT:    sh a6, 0(a0)
+; CHECK-RV32C-NEXT:    sh a7, 0(a0)
 ; CHECK-RV32C-NEXT:    ret
 ;
 ; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_v8i16:
diff --git a/llvm/test/CodeGen/RISCV/pr64645.ll b/llvm/test/CodeGen/RISCV/pr64645.ll
index 44dce5aabd22426..4bd7656a237f613 100644
--- a/llvm/test/CodeGen/RISCV/pr64645.ll
+++ b/llvm/test/CodeGen/RISCV/pr64645.ll
@@ -26,10 +26,10 @@ define <2 x double> @v2f64(<2 x double> %x, <2 x double> %y) nounwind {
 ; CHECK-NEXT:    fadd.d a0, a0, a4
 ; CHECK-NEXT:    sw a0, 8(sp)
 ; CHECK-NEXT:    sw a1, 12(sp)
-; CHECK-NEXT:    lw a0, 8(sp)
 ; CHECK-NEXT:    lw a1, 12(sp)
 ; CHECK-NEXT:    sw a2, 8(sp)
 ; CHECK-NEXT:    sw a3, 12(sp)
+; CHECK-NEXT:    lw a0, 8(sp)
 ; CHECK-NEXT:    lw a2, 8(sp)
 ; CHECK-NEXT:    lw a3, 12(sp)
 ; CHECK-NEXT:    addi sp, sp, 16
diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
index 776944b177636c2..25b3f1f6005b21e 100644
--- a/llvm/test/CodeGen/RISCV/push-pop-popret.ll
+++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
@@ -1142,24 +1142,24 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind {
 ; RV32IZCMP-NEXT:    lw t4, 24(a5)
 ; RV32IZCMP-NEXT:    lw t5, 28(a5)
 ; RV32IZCMP-NEXT:    lw t6, 32(a5)
-; RV32IZCMP-NEXT:    lw s2, 36(a5)
-; RV32IZCMP-NEXT:    lw s3, 40(a5)
-; RV32IZCMP-NEXT:    lw s4, 44(a5)
+; RV32IZCMP-NEXT:    lw s3, 36(a5)
+; RV32IZCMP-NEXT:    lw s4, 40(a5)
+; RV32IZCMP-NEXT:    lw a4, 44(a5)
 ; RV32IZCMP-NEXT:    lw a1, 48(a5)
 ; RV32IZCMP-NEXT:    lw s0, 52(a5)
-; RV32IZCMP-NEXT:    lw s1, 68(a5)
-; RV32IZCMP-NEXT:    lw a2, 64(a5)
-; RV32IZCMP-NEXT:    lw a3, 60(a5)
-; RV32IZCMP-NEXT:    lw a4, 56(a5)
-; RV32IZCMP-NEXT:    sw s1, 68(a5)
-; RV32IZCMP-NEXT:    sw a2, 64(a5)
-; RV32IZCMP-NEXT:    sw a3, 60(a5)
-; RV32IZCMP-NEXT:    sw a4, 56(a5)
+; RV32IZCMP-NEXT:    lw s2, 68(a5)
+; RV32IZCMP-NEXT:    lw s1, 64(a5)
+; RV32IZCMP-NEXT:    lw a2, 60(a5)
+; RV32IZCMP-NEXT:    lw a3, 56(a5)
+; RV32IZCMP-NEXT:    sw s2, 68(a5)
+; RV32IZCMP-NEXT:    sw s1, 64(a5)
+; RV32IZCMP-NEXT:    sw a2, 60(a5)
+; RV32IZCMP-NEXT:    sw a3, 56(a5)
 ; RV32IZCMP-NEXT:    sw s0, 52(a5)
 ; RV32IZCMP-NEXT:    sw a1, 48(a5)
-; RV32IZCMP-NEXT:    sw s4, 44(a5)
-; RV32IZCMP-NEXT:    sw s3, 40(a5)
-; RV32IZCMP-NEXT:    sw s2, 36(a5)
+; RV32IZCMP-NEXT:    sw a4, 44(a5)
+; RV32IZCMP-NEXT:    sw s4, 40(a5)
+; RV32IZCMP-NEXT:    sw s3, 36(a5)
 ; RV32IZCMP-NEXT:    sw t6, 32(a5)
 ; RV32IZCMP-NEXT:    sw t5, 28(a5)
 ; RV32IZCMP-NEXT:    sw t4, 24(a5)
@@ -1185,24 +1185,24 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind {
 ; RV64IZCMP-NEXT:    lw t4, 24(a5)
 ; RV64IZCMP-NEXT:    lw t5, 28(a5)
 ; RV64IZCMP-NEXT:    lw t6, 32(a5)
-; RV64IZCMP-NEXT:    lw s2, 36(a5)
-; RV64IZCMP-NEXT:    lw s3, 40(a5)
-; RV64IZCMP-NEXT:    lw s4, 44(a5)
+; RV64IZCMP-NEXT:    lw s3, 36(a5)
+; RV64IZCMP-NEXT:    lw s4, 40(a5)
+; RV64IZCMP-NEXT:    lw a4, 44(a5)
 ; RV64IZCMP-NEXT:    lw a1, 48(a5)
 ; RV64IZCMP-NEXT:    lw s0, 52(a5)
-; RV64IZCMP-NEXT:    lw s1, 68(a5)
-; RV64IZCMP-NEXT:    lw a2, 64(a5)
-; RV64IZCMP-NEXT:    lw a3, 60(a5)
-; RV64IZCMP-NEXT:    lw a4, 56(a5)
-; RV64IZCMP-NEXT:    sw s1, 68(a5)
-; RV64IZCMP-NEXT:    sw a2, 64(a5)
-; RV64IZCMP-NEXT:    sw a3, 60(a5)
-; RV64IZCMP-NEXT:    sw a4, 56(a5)
+; RV64IZCMP-NEXT:    lw s2, 68(a5)
+; RV64IZCMP-NEXT:    lw s1, 64(a5)
+; RV64IZCMP-NEXT:    lw a2, 60(a5)
+; RV64IZCMP-NEXT:    lw a3, 56(a5)
+; RV64IZCMP-NEXT:    sw s2, 68(a5)
+; RV64IZCMP-NEXT:    sw s1, 64(a5)
+; RV64IZCMP-NEXT:    sw a2, 60(a5)
+; RV64IZCMP-NEXT:    sw a3, 56(a5)
 ; RV64IZCMP-NEXT:    sw s0, 52(a5)
 ; RV64IZCMP-NEXT:    sw a1, 48(a5)
-; RV64IZCMP-NEXT:    sw s4, 44(a5)
-; RV64IZCMP-NEXT:    sw s3, 40(a5)
-; RV64IZCMP-NEXT:    sw s2, 36(a5)
+; RV64IZCMP-NEXT:    sw a4, 44(a5)
+; RV64IZCMP-NEXT:    sw s4, 40(a5)
+; RV64IZCMP-NEXT:    sw s3, 36(a5)
 ; RV64IZCMP-NEXT:    sw t6, 32(a5)
 ; RV64IZCMP-NEXT:    sw t5, 28(a5)
 ; RV64IZCMP-NEXT:    sw t4, 24(a5)
@@ -1228,24 +1228,24 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind {
 ; RV32IZCMP-SR-NEXT:    lw t4, 24(a5)
 ; RV32IZCMP-SR-NEXT:    lw t5, 28(a5)
 ; RV32IZCMP-SR-NEXT:    lw t6, 32(a5)
-; RV32IZCMP-SR-NEXT:    lw s2, 36(a5)
-; RV32IZCMP-SR-NEXT:    lw s3, 40(a5)
-; RV32IZCMP-SR-NEXT:    lw s4, 44(a5)
+; RV32IZCMP-SR-NEXT:    lw s3, 36(a5)
+; RV32IZCMP-SR-NEXT:    lw s4, 40(a5)
+; RV32IZCMP-SR-NEXT:    lw a4, 44(a5)
 ; RV32IZCMP-SR-NEXT:    lw a1, 48(a5)
 ; RV32IZCMP-SR-NEXT:    lw s0, 52(a5)
-; RV32IZCMP-SR-NEXT:    lw s1, 68(a5)
-; RV32IZCMP-SR-NEXT:    lw a2, 64(a5)
-; RV32IZCMP-SR-NEXT:    lw a3, 60(a5)
-; RV32IZCMP-SR-NEXT:    lw a4, 56(a5)
-; RV32IZCMP-SR-NEXT:    sw s1, 68(a5)
-; RV32IZCMP-SR-NEXT:    sw a2, 64(a5)
-; RV32IZCMP-SR-NEXT:    sw a3, 60(a5)
-; RV32IZCMP-SR-NEXT:    sw a4, 56(a5)
+; RV32IZCMP-SR-NEXT:    lw s2, 68(a5)
+; RV32IZCMP-SR-NEXT:    lw s1, 64(a5)
+; RV32IZCMP-SR-NEXT:    lw a2, 60(a5)
+; RV32IZCMP-SR-NEXT:    lw a3, 56(a5)
+; RV32IZCMP-SR-NEXT:    sw s2, 68(a5)
+; RV32IZCMP-SR-NEXT:    sw s1, 64(a5)
+; RV32IZCMP-SR-NEXT:    sw a2, 60(a5)
+; RV32IZCMP-SR-NEXT:    sw a3, 56(a5)
 ; RV32IZCMP-SR-NEXT:    sw s0, 52(a5)
 ; RV32IZCMP-SR-NEXT:    sw a1, 48(a5)
-; RV32IZCMP-SR-NEXT:    sw s4, 44(a5)
-; RV32IZCMP-SR-NEXT:    sw s3, 40(a5)
-; RV32IZCMP-SR-NEXT:    sw s2, 36(a5)
+; RV32IZCMP-SR-NEXT:    sw a4, 44(a5)
+; RV32IZCMP-SR-NEXT:    sw s4, 40(a5)
+; RV32IZCMP-SR-NEXT:    sw s3, 36(a5)
 ; RV32IZCMP-SR-NEXT:    sw t6, 32(a5)
 ; RV32IZCMP-SR-NEXT:    sw t5, 28(a5)
 ; RV32IZCMP-SR-NEXT:    sw t4, 24(a5)
@@ -1271,24 +1271,24 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind {
 ; RV64IZCMP-SR-NEXT:    lw t4, 24(a5)
 ; RV64IZCMP-SR-NEXT:    lw t5, 28(a5)
 ; RV64IZCMP-SR-NEXT:    lw t6, 32(a5)
-; RV64IZCMP-SR-NEXT:    lw s2, 36(a5)
-; RV64IZCMP-SR-NEXT:    lw s3, 40(a5)
-; RV64IZCMP-SR-NEXT:    lw s4, 44(a5)
+; RV64IZCMP-SR-NEXT:    lw s3, 36(a5)
+; RV64IZCMP-SR-NEXT:    lw s4, 40(a5)
+; RV64IZCMP-SR-NEXT:    lw a4, 44(a5)
 ; RV64IZCMP-SR-NEXT:    lw a1, 48(a5)
 ; RV64IZCMP-SR-NEXT:    lw s0, 52(a5)
-; RV64IZCMP-SR-NEXT:    lw s1, 68(a5)
-; RV64IZCMP-SR-NEXT:    lw a2, 64(a5)
-; RV64IZCMP-SR-NEXT:    lw a3, 60(a5)
-; RV64IZCMP-SR-NEXT:    lw a4, 56(a5)
-; RV64IZCMP-SR-NEXT:    sw s1, 68(a5)
-; RV64IZCMP-SR-NEXT:    sw a2, 64(a5)
-; RV64IZCMP-SR-NEXT:    sw a3, 60(a5)
-; RV64IZCMP-SR-NEXT:    sw a4, 56(a5)
+; RV64IZCMP-SR-NEXT:    lw s2, 68(a5)
+; RV64IZCMP-SR-NEXT:    lw s1, 64(a5)
+; RV64IZCMP-SR-NEXT:    lw a2, 60(a5)
+; RV64IZCMP-SR-NEXT:    lw a3, 56(a5)
+; RV64IZCMP-SR-NEXT:    sw s2, 68(a5)
+; RV64IZCMP-SR-NEXT:    sw s1, 64(a5)
+; RV64IZCMP-SR-NEXT:    sw a2, 60(a5)
+; RV64IZCMP-SR-NEXT:    sw a3, 56(a5)
 ; RV64IZCMP-SR-NEXT:    sw s0, 52(a5)
 ; RV64IZCMP-SR-NEXT:    sw a1, 48(a5)
-; RV64IZCMP-SR-NEXT:    sw s4, 44(a5)
-; RV64IZCMP-SR-NEXT:    sw s3, 40(a5)
-; RV64IZCMP-SR-NEXT:    sw s2, 36(a5)
+; RV64IZCMP-SR-NEXT:    sw a4, 44(a5)
+; RV64IZCMP-SR-NEXT:    sw s4, 40(a5)
+; RV64IZCMP-SR-NEXT:    sw s3, 36(a5)
 ; RV64IZCMP-SR-NEXT:    sw t6, 32(a5)
 ; RV64IZCMP-SR-NEXT:    sw t5, 28(a5)
 ; RV64IZCMP-SR-NEXT:    sw t4, 24(a5)
@@ -1837,16 +1837,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV32IZCMP-NEXT:    sw t4, 44(sp) # 4-byte Folded Spill
 ; RV32IZCMP-NEXT:    sw t5, 40(sp) # 4-byte Folded Spill
 ; RV32IZCMP-NEXT:    sw t6, 36(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lui a6, %hi(var_test_irq)
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-NEXT:    lui a4, %hi(var_test_irq)
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a4)
 ; RV32IZCMP-NEXT:    sw a0, 32(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a4)
 ; RV32IZCMP-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a4)
 ; RV32IZCMP-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a4)
 ; RV32IZCMP-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    addi a5, a6, %lo(var_test_irq)
+; RV32IZCMP-NEXT:    addi a5, a4, %lo(var_test_irq)
 ; RV32IZCMP-NEXT:    lw a0, 16(a5)
 ; RV32IZCMP-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
 ; RV32IZCMP-NEXT:    lw a0, 20(a5)
@@ -1866,28 +1866,28 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV32IZCMP-NEXT:    lw s11, 72(a5)
 ; RV32IZCMP-NEXT:    lw ra, 76(a5)
 ; RV32IZCMP-NEXT:    lw s1, 80(a5)
-; RV32IZCMP-NEXT:    lw t3, 84(a5)
-; RV32IZCMP-NEXT:    lw t2, 88(a5)
-; RV32IZCMP-NEXT:    lw t1, 92(a5)
-; RV32IZCMP-NEXT:    lw t0, 96(a5)
+; RV32IZCMP-NEXT:    lw t2, 84(a5)
+; RV32IZCMP-NEXT:    lw t1, 88(a5)
+; RV32IZCMP-NEXT:    lw t0, 92(a5)
+; RV32IZCMP-NEXT:    lw a7, 96(a5)
 ; RV32IZCMP-NEXT:    lw s0, 100(a5)
-; RV32IZCMP-NEXT:    lw a7, 104(a5)
-; RV32IZCMP-NEXT:    lw a4, 108(a5)
-; RV32IZCMP-NEXT:    lw a0, 124(a5)
-; RV32IZCMP-NEXT:    lw a1, 120(a5)
-; RV32IZCMP-NEXT:    lw a2, 116(a5)
-; RV32IZCMP-NEXT:    lw a3, 112(a5)
-; RV32IZCMP-NEXT:    sw a0, 124(a5)
-; RV32IZCMP-NEXT:    sw a1, 120(a5)
-; RV32IZCMP-NEXT:    sw a2, 116(a5)
-; RV32IZCMP-NEXT:    sw a3, 112(a5)
-; RV32IZCMP-NEXT:    sw a4, 108(a5)
-; RV32IZCMP-NEXT:    sw a7, 104(a5)
+; RV32IZCMP-NEXT:    lw a6, 104(a5)
+; RV32IZCMP-NEXT:    lw a3, 108(a5)
+; RV32IZCMP-NEXT:    lw t3, 124(a5)
+; RV32IZCMP-NEXT:    lw a0, 120(a5)
+; RV32IZCMP-NEXT:    lw a1, 116(a5)
+; RV32IZCMP-NEXT:    lw a2, 112(a5)
+; RV32IZCMP-NEXT:    sw t3, 124(a5)
+; RV32IZCMP-NEXT:    sw a0, 120(a5)
+; RV32IZCMP-NEXT:    sw a1, 116(a5)
+; RV32IZCMP-NEXT:    sw a2, 112(a5)
+; RV32IZCMP-NEXT:    sw a3, 108(a5)
+; RV32IZCMP-NEXT:    sw a6, 104(a5)
 ; RV32IZCMP-NEXT:    sw s0, 100(a5)
-; RV32IZCMP-NEXT:    sw t0, 96(a5)
-; RV32IZCMP-NEXT:    sw t1, 92(a5)
-; RV32IZCMP-NEXT:    sw t2, 88(a5)
-; RV32IZCMP-NEXT:    sw t3, 84(a5)
+; RV32IZCMP-NEXT:    sw a7, 96(a5)
+; RV32IZCMP-NEXT:    sw t0, 92(a5)
+; RV32IZCMP-NEXT:    sw t1, 88(a5)
+; RV32IZCMP-NEXT:    sw t2, 84(a5)
 ; RV32IZCMP-NEXT:    sw s1, 80(a5)
 ; RV32IZCMP-NEXT:    sw ra, 76(a5)
 ; RV32IZCMP-NEXT:    sw s11, 72(a5)
@@ -1908,13 +1908,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV32IZCMP-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
 ; RV32IZCMP-NEXT:    sw a0, 16(a5)
 ; RV32IZCMP-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a4)
 ; RV32IZCMP-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a4)
 ; RV32IZCMP-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a4)
 ; RV32IZCMP-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a4)
 ; RV32IZCMP-NEXT:    lw t0, 92(sp) # 4-byte Folded Reload
 ; RV32IZCMP-NEXT:    lw t1, 88(sp) # 4-byte Folded Reload
 ; RV32IZCMP-NEXT:    lw t2, 84(sp) # 4-byte Folded Reload
@@ -1953,16 +1953,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV64IZCMP-NEXT:    sd t4, 72(sp) # 8-byte Folded Spill
 ; RV64IZCMP-NEXT:    sd t5, 64(sp) # 8-byte Folded Spill
 ; RV64IZCMP-NEXT:    sd t6, 56(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lui a6, %hi(var_test_irq)
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-NEXT:    lui a4, %hi(var_test_irq)
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a4)
 ; RV64IZCMP-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a4)
 ; RV64IZCMP-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a4)
 ; RV64IZCMP-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a4)
 ; RV64IZCMP-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    addi a5, a6, %lo(var_test_irq)
+; RV64IZCMP-NEXT:    addi a5, a4, %lo(var_test_irq)
 ; RV64IZCMP-NEXT:    lw a0, 16(a5)
 ; RV64IZCMP-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
 ; RV64IZCMP-NEXT:    lw a0, 20(a5)
@@ -1982,28 +1982,28 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV64IZCMP-NEXT:    lw s11, 72(a5)
 ; RV64IZCMP-NEXT:    lw ra, 76(a5)
 ; RV64IZCMP-NEXT:    lw s1, 80(a5)
-; RV64IZCMP-NEXT:    lw t3, 84(a5)
-; RV64IZCMP-NEXT:    lw t2, 88(a5)
-; RV64IZCMP-NEXT:    lw t1, 92(a5)
-; RV64IZCMP-NEXT:    lw t0, 96(a5)
+; RV64IZCMP-NEXT:    lw t2, 84(a5)
+; RV64IZCMP-NEXT:    lw t1, 88(a5)
+; RV64IZCMP-NEXT:    lw t0, 92(a5)
+; RV64IZCMP-NEXT:    lw a7, 96(a5)
 ; RV64IZCMP-NEXT:    lw s0, 100(a5)
-; RV64IZCMP-NEXT:    lw a7, 104(a5)
-; RV64IZCMP-NEXT:    lw a4, 108(a5)
-; RV64IZCMP-NEXT:    lw a0, 124(a5)
-; RV64IZCMP-NEXT:    lw a1, 120(a5)
-; RV64IZCMP-NEXT:    lw a2, 116(a5)
-; RV64IZCMP-NEXT:    lw a3, 112(a5)
-; RV64IZCMP-NEXT:    sw a0, 124(a5)
-; RV64IZCMP-NEXT:    sw a1, 120(a5)
-; RV64IZCMP-NEXT:    sw a2, 116(a5)
-; RV64IZCMP-NEXT:    sw a3, 112(a5)
-; RV64IZCMP-NEXT:    sw a4, 108(a5)
-; RV64IZCMP-NEXT:    sw a7, 104(a5)
+; RV64IZCMP-NEXT:    lw a6, 104(a5)
+; RV64IZCMP-NEXT:    lw a3, 108(a5)
+; RV64IZCMP-NEXT:    lw t3, 124(a5)
+; RV64IZCMP-NEXT:    lw a0, 120(a5)
+; RV64IZCMP-NEXT:    lw a1, 116(a5)
+; RV64IZCMP-NEXT:    lw a2, 112(a5)
+; RV64IZCMP-NEXT:    sw t3, 124(a5)
+; RV64IZCMP-NEXT:    sw a0, 120(a5)
+; RV64IZCMP-NEXT:    sw a1, 116(a5)
+; RV64IZCMP-NEXT:    sw a2, 112(a5)
+; RV64IZCMP-NEXT:    sw a3, 108(a5)
+; RV64IZCMP-NEXT:    sw a6, 104(a5)
 ; RV64IZCMP-NEXT:    sw s0, 100(a5)
-; RV64IZCMP-NEXT:    sw t0, 96(a5)
-; RV64IZCMP-NEXT:    sw t1, 92(a5)
-; RV64IZCMP-NEXT:    sw t2, 88(a5)
-; RV64IZCMP-NEXT:    sw t3, 84(a5)
+; RV64IZCMP-NEXT:    sw a7, 96(a5)
+; RV64IZCMP-NEXT:    sw t0, 92(a5)
+; RV64IZCMP-NEXT:    sw t1, 88(a5)
+; RV64IZCMP-NEXT:    sw t2, 84(a5)
 ; RV64IZCMP-NEXT:    sw s1, 80(a5)
 ; RV64IZCMP-NEXT:    sw ra, 76(a5)
 ; RV64IZCMP-NEXT:    sw s11, 72(a5)
@@ -2024,13 +2024,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV64IZCMP-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
 ; RV64IZCMP-NEXT:    sw a0, 16(a5)
 ; RV64IZCMP-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a4)
 ; RV64IZCMP-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a4)
 ; RV64IZCMP-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a4)
 ; RV64IZCMP-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a4)
 ; RV64IZCMP-NEXT:    ld t0, 168(sp) # 8-byte Folded Reload
 ; RV64IZCMP-NEXT:    ld t1, 160(sp) # 8-byte Folded Reload
 ; RV64IZCMP-NEXT:    ld t2, 152(sp) # 8-byte Folded Reload
@@ -2069,16 +2069,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV32IZCMP-SR-NEXT:    sw t4, 44(sp) # 4-byte Folded Spill
 ; RV32IZCMP-SR-NEXT:    sw t5, 40(sp) # 4-byte Folded Spill
 ; RV32IZCMP-SR-NEXT:    sw t6, 36(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lui a6, %hi(var_test_irq)
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-SR-NEXT:    lui a4, %hi(var_test_irq)
+; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq)(a4)
 ; RV32IZCMP-SR-NEXT:    sw a0, 32(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+4)(a4)
 ; RV32IZCMP-SR-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+8)(a4)
 ; RV32IZCMP-SR-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+12)(a4)
 ; RV32IZCMP-SR-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    addi a5, a6, %lo(var_test_irq)
+; RV32IZCMP-SR-NEXT:    addi a5, a4, %lo(var_test_irq)
 ; RV32IZCMP-SR-NEXT:    lw a0, 16(a5)
 ; RV32IZCMP-SR-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
 ; RV32IZCMP-SR-NEXT:    lw a0, 20(a5)
@@ -2098,28 +2098,28 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV32IZCMP-SR-NEXT:    lw s11, 72(a5)
 ; RV32IZCMP-SR-NEXT:    lw ra, 76(a5)
 ; RV32IZCMP-SR-NEXT:    lw s1, 80(a5)
-; RV32IZCMP-SR-NEXT:    lw t3, 84(a5)
-; RV32IZCMP-SR-NEXT:    lw t2, 88(a5)
-; RV32IZCMP-SR-NEXT:    lw t1, 92(a5)
-; RV32IZCMP-SR-NEXT:    lw t0, 96(a5)
+; RV32IZCMP-SR-NEXT:    lw t2, 84(a5)
+; RV32IZCMP-SR-NEXT:    lw t1, 88(a5)
+; RV32IZCMP-SR-NEXT:    lw t0, 92(a5)
+; RV32IZCMP-SR-NEXT:    lw a7, 96(a5)
 ; RV32IZCMP-SR-NEXT:    lw s0, 100(a5)
-; RV32IZCMP-SR-NEXT:    lw a7, 104(a5)
-; RV32IZCMP-SR-NEXT:    lw a4, 108(a5)
-; RV32IZCMP-SR-NEXT:    lw a0, 124(a5)
-; RV32IZCMP-SR-NEXT:    lw a1, 120(a5)
-; RV32IZCMP-SR-NEXT:    lw a2, 116(a5)
-; RV32IZCMP-SR-NEXT:    lw a3, 112(a5)
-; RV32IZCMP-SR-NEXT:    sw a0, 124(a5)
-; RV32IZCMP-SR-NEXT:    sw a1, 120(a5)
-; RV32IZCMP-SR-NEXT:    sw a2, 116(a5)
-; RV32IZCMP-SR-NEXT:    sw a3, 112(a5)
-; RV32IZCMP-SR-NEXT:    sw a4, 108(a5)
-; RV32IZCMP-SR-NEXT:    sw a7, 104(a5)
+; RV32IZCMP-SR-NEXT:    lw a6, 104(a5)
+; RV32IZCMP-SR-NEXT:    lw a3, 108(a5)
+; RV32IZCMP-SR-NEXT:    lw t3, 124(a5)
+; RV32IZCMP-SR-NEXT:    lw a0, 120(a5)
+; RV32IZCMP-SR-NEXT:    lw a1, 116(a5)
+; RV32IZCMP-SR-NEXT:    lw a2, 112(a5)
+; RV32IZCMP-SR-NEXT:    sw t3, 124(a5)
+; RV32IZCMP-SR-NEXT:    sw a0, 120(a5)
+; RV32IZCMP-SR-NEXT:    sw a1, 116(a5)
+; RV32IZCMP-SR-NEXT:    sw a2, 112(a5)
+; RV32IZCMP-SR-NEXT:    sw a3, 108(a5)
+; RV32IZCMP-SR-NEXT:    sw a6, 104(a5)
 ; RV32IZCMP-SR-NEXT:    sw s0, 100(a5)
-; RV32IZCMP-SR-NEXT:    sw t0, 96(a5)
-; RV32IZCMP-SR-NEXT:    sw t1, 92(a5)
-; RV32IZCMP-SR-NEXT:    sw t2, 88(a5)
-; RV32IZCMP-SR-NEXT:    sw t3, 84(a5)
+; RV32IZCMP-SR-NEXT:    sw a7, 96(a5)
+; RV32IZCMP-SR-NEXT:    sw t0, 92(a5)
+; RV32IZCMP-SR-NEXT:    sw t1, 88(a5)
+; RV32IZCMP-SR-NEXT:    sw t2, 84(a5)
 ; RV32IZCMP-SR-NEXT:    sw s1, 80(a5)
 ; RV32IZCMP-SR-NEXT:    sw ra, 76(a5)
 ; RV32IZCMP-SR-NEXT:    sw s11, 72(a5)
@@ -2140,13 +2140,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV32IZCMP-SR-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
 ; RV32IZCMP-SR-NEXT:    sw a0, 16(a5)
 ; RV32IZCMP-SR-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+12)(a4)
 ; RV32IZCMP-SR-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+8)(a4)
 ; RV32IZCMP-SR-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+4)(a4)
 ; RV32IZCMP-SR-NEXT:    lw a0, 32(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq)(a4)
 ; RV32IZCMP-SR-NEXT:    lw t0, 92(sp) # 4-byte Folded Reload
 ; RV32IZCMP-SR-NEXT:    lw t1, 88(sp) # 4-byte Folded Reload
 ; RV32IZCMP-SR-NEXT:    lw t2, 84(sp) # 4-byte Folded Reload
@@ -2185,16 +2185,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV64IZCMP-SR-NEXT:    sd t4, 72(sp) # 8-byte Folded Spill
 ; RV64IZCMP-SR-NEXT:    sd t5, 64(sp) # 8-byte Folded Spill
 ; RV64IZCMP-SR-NEXT:    sd t6, 56(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lui a6, %hi(var_test_irq)
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-SR-NEXT:    lui a4, %hi(var_test_irq)
+; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq)(a4)
 ; RV64IZCMP-SR-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+4)(a4)
 ; RV64IZCMP-SR-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+8)(a4)
 ; RV64IZCMP-SR-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+12)(a4)
 ; RV64IZCMP-SR-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    addi a5, a6, %lo(var_test_irq)
+; RV64IZCMP-SR-NEXT:    addi a5, a4, %lo(var_test_irq)
 ; RV64IZCMP-SR-NEXT:    lw a0, 16(a5)
 ; RV64IZCMP-SR-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
 ; RV64IZCMP-SR-NEXT:    lw a0, 20(a5)
@@ -2214,28 +2214,28 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV64IZCMP-SR-NEXT:    lw s11, 72(a5)
 ; RV64IZCMP-SR-NEXT:    lw ra, 76(a5)
 ; RV64IZCMP-SR-NEXT:    lw s1, 80(a5)
-; RV64IZCMP-SR-NEXT:    lw t3, 84(a5)
-; RV64IZCMP-SR-NEXT:    lw t2, 88(a5)
-; RV64IZCMP-SR-NEXT:    lw t1, 92(a5)
-; RV64IZCMP-SR-NEXT:    lw t0, 96(a5)
+; RV64IZCMP-SR-NEXT:    lw t2, 84(a5)
+; RV64IZCMP-SR-NEXT:    lw t1, 88(a5)
+; RV64IZCMP-SR-NEXT:    lw t0, 92(a5)
+; RV64IZCMP-SR-NEXT:    lw a7, 96(a5)
 ; RV64IZCMP-SR-NEXT:    lw s0, 100(a5)
-; RV64IZCMP-SR-NEXT:    lw a7, 104(a5)
-; RV64IZCMP-SR-NEXT:    lw a4, 108(a5)
-; RV64IZCMP-SR-NEXT:    lw a0, 124(a5)
-; RV64IZCMP-SR-NEXT:    lw a1, 120(a5)
-; RV64IZCMP-SR-NEXT:    lw a2, 116(a5)
-; RV64IZCMP-SR-NEXT:    lw a3, 112(a5)
-; RV64IZCMP-SR-NEXT:    sw a0, 124(a5)
-; RV64IZCMP-SR-NEXT:    sw a1, 120(a5)
-; RV64IZCMP-SR-NEXT:    sw a2, 116(a5)
-; RV64IZCMP-SR-NEXT:    sw a3, 112(a5)
-; RV64IZCMP-SR-NEXT:    sw a4, 108(a5)
-; RV64IZCMP-SR-NEXT:    sw a7, 104(a5)
+; RV64IZCMP-SR-NEXT:    lw a6, 104(a5)
+; RV64IZCMP-SR-NEXT:    lw a3, 108(a5)
+; RV64IZCMP-SR-NEXT:    lw t3, 124(a5)
+; RV64IZCMP-SR-NEXT:    lw a0, 120(a5)
+; RV64IZCMP-SR-NEXT:    lw a1, 116(a5)
+; RV64IZCMP-SR-NEXT:    lw a2, 112(a5)
+; RV64IZCMP-SR-NEXT:    sw t3, 124(a5)
+; RV64IZCMP-SR-NEXT:    sw a0, 120(a5)
+; RV64IZCMP-SR-NEXT:    sw a1, 116(a5)
+; RV64IZCMP-SR-NEXT:    sw a2, 112(a5)
+; RV64IZCMP-SR-NEXT:    sw a3, 108(a5)
+; RV64IZCMP-SR-NEXT:    sw a6, 104(a5)
 ; RV64IZCMP-SR-NEXT:    sw s0, 100(a5)
-; RV64IZCMP-SR-NEXT:    sw t0, 96(a5)
-; RV64IZCMP-SR-NEXT:    sw t1, 92(a5)
-; RV64IZCMP-SR-NEXT:    sw t2, 88(a5)
-; RV64IZCMP-SR-NEXT:    sw t3, 84(a5)
+; RV64IZCMP-SR-NEXT:    sw a7, 96(a5)
+; RV64IZCMP-SR-NEXT:    sw t0, 92(a5)
+; RV64IZCMP-SR-NEXT:    sw t1, 88(a5)
+; RV64IZCMP-SR-NEXT:    sw t2, 84(a5)
 ; RV64IZCMP-SR-NEXT:    sw s1, 80(a5)
 ; RV64IZCMP-SR-NEXT:    sw ra, 76(a5)
 ; RV64IZCMP-SR-NEXT:    sw s11, 72(a5)
@@ -2256,13 +2256,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV64IZCMP-SR-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
 ; RV64IZCMP-SR-NEXT:    sw a0, 16(a5)
 ; RV64IZCMP-SR-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+12)(a4)
 ; RV64IZCMP-SR-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+8)(a4)
 ; RV64IZCMP-SR-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+4)(a4)
 ; RV64IZCMP-SR-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq)(a4)
 ; RV64IZCMP-SR-NEXT:    ld t0, 168(sp) # 8-byte Folded Reload
 ; RV64IZCMP-SR-NEXT:    ld t1, 160(sp) # 8-byte Folded Reload
 ; RV64IZCMP-SR-NEXT:    ld t2, 152(sp) # 8-byte Folded Reload
@@ -2313,16 +2313,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV32I-NEXT:    sw t4, 40(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw t5, 36(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw t6, 32(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lui a6, %hi(var_test_irq)
-; RV32I-NEXT:    lw a0, %lo(var_test_irq)(a6)
+; RV32I-NEXT:    lui a4, %hi(var_test_irq)
+; RV32I-NEXT:    lw a0, %lo(var_test_irq)(a4)
 ; RV32I-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
+; RV32I-NEXT:    lw a0, %lo(var_test_irq+4)(a4)
 ; RV32I-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
+; RV32I-NEXT:    lw a0, %lo(var_test_irq+8)(a4)
 ; RV32I-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
+; RV32I-NEXT:    lw a0, %lo(var_test_irq+12)(a4)
 ; RV32I-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    addi a5, a6, %lo(var_test_irq)
+; RV32I-NEXT:    addi a5, a4, %lo(var_test_irq)
 ; RV32I-NEXT:    lw a0, 16(a5)
 ; RV32I-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lw a0, 20(a5)
@@ -2347,18 +2347,18 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV32I-NEXT:    lw s10, 92(a5)
 ; RV32I-NEXT:    lw s11, 96(a5)
 ; RV32I-NEXT:    lw ra, 100(a5)
-; RV32I-NEXT:    lw a7, 104(a5)
-; RV32I-NEXT:    lw a4, 108(a5)
-; RV32I-NEXT:    lw a0, 124(a5)
-; RV32I-NEXT:    lw a1, 120(a5)
-; RV32I-NEXT:    lw a2, 116(a5)
-; RV32I-NEXT:    lw a3, 112(a5)
-; RV32I-NEXT:    sw a0, 124(a5)
-; RV32I-NEXT:    sw a1, 120(a5)
-; RV32I-NEXT:    sw a2, 116(a5)
-; RV32I-NEXT:    sw a3, 112(a5)
-; RV32I-NEXT:    sw a4, 108(a5)
-; RV32I-NEXT:    sw a7, 104(a5)
+; RV32I-NEXT:    lw a6, 104(a5)
+; RV32I-NEXT:    lw a3, 108(a5)
+; RV32I-NEXT:    lw a7, 124(a5)
+; RV32I-NEXT:    lw a0, 120(a5)
+; RV32I-NEXT:    lw a1, 116(a5)
+; RV32I-NEXT:    lw a2, 112(a5)
+; RV32I-NEXT:    sw a7, 124(a5)
+; RV32I-NEXT:    sw a0, 120(a5)
+; RV32I-NEXT:    sw a1, 116(a5)
+; RV32I-NEXT:    sw a2, 112(a5)
+; RV32I-NEXT:    sw a3, 108(a5)
+; RV32I-NEXT:    sw a6, 104(a5)
 ; RV32I-NEXT:    sw ra, 100(a5)
 ; RV32I-NEXT:    sw s11, 96(a5)
 ; RV32I-NEXT:    sw s10, 92(a5)
@@ -2384,13 +2384,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV32I-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    sw a0, 16(a5)
 ; RV32I-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
+; RV32I-NEXT:    sw a0, %lo(var_test_irq+12)(a4)
 ; RV32I-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
+; RV32I-NEXT:    sw a0, %lo(var_test_irq+8)(a4)
 ; RV32I-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
+; RV32I-NEXT:    sw a0, %lo(var_test_irq+4)(a4)
 ; RV32I-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV32I-NEXT:    sw a0, %lo(var_test_irq)(a4)
 ; RV32I-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw t0, 136(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw t1, 132(sp) # 4-byte Folded Reload
@@ -2453,16 +2453,16 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV64I-NEXT:    sd t4, 64(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd t5, 56(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd t6, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lui a6, %hi(var_test_irq)
-; RV64I-NEXT:    lw a0, %lo(var_test_irq)(a6)
+; RV64I-NEXT:    lui a4, %hi(var_test_irq)
+; RV64I-NEXT:    lw a0, %lo(var_test_irq)(a4)
 ; RV64I-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
+; RV64I-NEXT:    lw a0, %lo(var_test_irq+4)(a4)
 ; RV64I-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
+; RV64I-NEXT:    lw a0, %lo(var_test_irq+8)(a4)
 ; RV64I-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
+; RV64I-NEXT:    lw a0, %lo(var_test_irq+12)(a4)
 ; RV64I-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    addi a5, a6, %lo(var_test_irq)
+; RV64I-NEXT:    addi a5, a4, %lo(var_test_irq)
 ; RV64I-NEXT:    lw a0, 16(a5)
 ; RV64I-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    lw a0, 20(a5)
@@ -2487,18 +2487,18 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV64I-NEXT:    lw s10, 92(a5)
 ; RV64I-NEXT:    lw s11, 96(a5)
 ; RV64I-NEXT:    lw ra, 100(a5)
-; RV64I-NEXT:    lw a7, 104(a5)
-; RV64I-NEXT:    lw a4, 108(a5)
-; RV64I-NEXT:    lw a0, 124(a5)
-; RV64I-NEXT:    lw a1, 120(a5)
-; RV64I-NEXT:    lw a2, 116(a5)
-; RV64I-NEXT:    lw a3, 112(a5)
-; RV64I-NEXT:    sw a0, 124(a5)
-; RV64I-NEXT:    sw a1, 120(a5)
-; RV64I-NEXT:    sw a2, 116(a5)
-; RV64I-NEXT:    sw a3, 112(a5)
-; RV64I-NEXT:    sw a4, 108(a5)
-; RV64I-NEXT:    sw a7, 104(a5)
+; RV64I-NEXT:    lw a6, 104(a5)
+; RV64I-NEXT:    lw a3, 108(a5)
+; RV64I-NEXT:    lw a7, 124(a5)
+; RV64I-NEXT:    lw a0, 120(a5)
+; RV64I-NEXT:    lw a1, 116(a5)
+; RV64I-NEXT:    lw a2, 112(a5)
+; RV64I-NEXT:    sw a7, 124(a5)
+; RV64I-NEXT:    sw a0, 120(a5)
+; RV64I-NEXT:    sw a1, 116(a5)
+; RV64I-NEXT:    sw a2, 112(a5)
+; RV64I-NEXT:    sw a3, 108(a5)
+; RV64I-NEXT:    sw a6, 104(a5)
 ; RV64I-NEXT:    sw ra, 100(a5)
 ; RV64I-NEXT:    sw s11, 96(a5)
 ; RV64I-NEXT:    sw s10, 92(a5)
@@ -2524,13 +2524,13 @@ define void @callee_with_irq() nounwind "interrupt"="user" {
 ; RV64I-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    sw a0, 16(a5)
 ; RV64I-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
+; RV64I-NEXT:    sw a0, %lo(var_test_irq+12)(a4)
 ; RV64I-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
+; RV64I-NEXT:    sw a0, %lo(var_test_irq+8)(a4)
 ; RV64I-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
+; RV64I-NEXT:    sw a0, %lo(var_test_irq+4)(a4)
 ; RV64I-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV64I-NEXT:    sw a0, %lo(var_test_irq)(a4)
 ; RV64I-NEXT:    ld ra, 264(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld t0, 256(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
@@ -2570,16 +2570,16 @@ define void @callee_no_irq() nounwind{
 ; RV32IZCMP-LABEL: callee_no_irq:
 ; RV32IZCMP:       # %bb.0:
 ; RV32IZCMP-NEXT:    cm.push {ra, s0-s11}, -96
-; RV32IZCMP-NEXT:    lui a6, %hi(var_test_irq)
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-NEXT:    lui a4, %hi(var_test_irq)
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a4)
 ; RV32IZCMP-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a4)
 ; RV32IZCMP-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a4)
 ; RV32IZCMP-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a4)
 ; RV32IZCMP-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IZCMP-NEXT:    addi a5, a6, %lo(var_test_irq)
+; RV32IZCMP-NEXT:    addi a5, a4, %lo(var_test_irq)
 ; RV32IZCMP-NEXT:    lw a0, 16(a5)
 ; RV32IZCMP-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
 ; RV32IZCMP-NEXT:    lw a0, 20(a5)
@@ -2599,28 +2599,28 @@ define void @callee_no_irq() nounwind{
 ; RV32IZCMP-NEXT:    lw s11, 72(a5)
 ; RV32IZCMP-NEXT:    lw ra, 76(a5)
 ; RV32IZCMP-NEXT:    lw s1, 80(a5)
-; RV32IZCMP-NEXT:    lw t3, 84(a5)
-; RV32IZCMP-NEXT:    lw t2, 88(a5)
-; RV32IZCMP-NEXT:    lw t1, 92(a5)
-; RV32IZCMP-NEXT:    lw t0, 96(a5)
+; RV32IZCMP-NEXT:    lw t2, 84(a5)
+; RV32IZCMP-NEXT:    lw t1, 88(a5)
+; RV32IZCMP-NEXT:    lw t0, 92(a5)
+; RV32IZCMP-NEXT:    lw a7, 96(a5)
 ; RV32IZCMP-NEXT:    lw s0, 100(a5)
-; RV32IZCMP-NEXT:    lw a7, 104(a5)
-; RV32IZCMP-NEXT:    lw a4, 108(a5)
-; RV32IZCMP-NEXT:    lw a0, 124(a5)
-; RV32IZCMP-NEXT:    lw a1, 120(a5)
-; RV32IZCMP-NEXT:    lw a2, 116(a5)
-; RV32IZCMP-NEXT:    lw a3, 112(a5)
-; RV32IZCMP-NEXT:    sw a0, 124(a5)
-; RV32IZCMP-NEXT:    sw a1, 120(a5)
-; RV32IZCMP-NEXT:    sw a2, 116(a5)
-; RV32IZCMP-NEXT:    sw a3, 112(a5)
-; RV32IZCMP-NEXT:    sw a4, 108(a5)
-; RV32IZCMP-NEXT:    sw a7, 104(a5)
+; RV32IZCMP-NEXT:    lw a6, 104(a5)
+; RV32IZCMP-NEXT:    lw a3, 108(a5)
+; RV32IZCMP-NEXT:    lw t3, 124(a5)
+; RV32IZCMP-NEXT:    lw a0, 120(a5)
+; RV32IZCMP-NEXT:    lw a1, 116(a5)
+; RV32IZCMP-NEXT:    lw a2, 112(a5)
+; RV32IZCMP-NEXT:    sw t3, 124(a5)
+; RV32IZCMP-NEXT:    sw a0, 120(a5)
+; RV32IZCMP-NEXT:    sw a1, 116(a5)
+; RV32IZCMP-NEXT:    sw a2, 112(a5)
+; RV32IZCMP-NEXT:    sw a3, 108(a5)
+; RV32IZCMP-NEXT:    sw a6, 104(a5)
 ; RV32IZCMP-NEXT:    sw s0, 100(a5)
-; RV32IZCMP-NEXT:    sw t0, 96(a5)
-; RV32IZCMP-NEXT:    sw t1, 92(a5)
-; RV32IZCMP-NEXT:    sw t2, 88(a5)
-; RV32IZCMP-NEXT:    sw t3, 84(a5)
+; RV32IZCMP-NEXT:    sw a7, 96(a5)
+; RV32IZCMP-NEXT:    sw t0, 92(a5)
+; RV32IZCMP-NEXT:    sw t1, 88(a5)
+; RV32IZCMP-NEXT:    sw t2, 84(a5)
 ; RV32IZCMP-NEXT:    sw s1, 80(a5)
 ; RV32IZCMP-NEXT:    sw ra, 76(a5)
 ; RV32IZCMP-NEXT:    sw s11, 72(a5)
@@ -2641,28 +2641,28 @@ define void @callee_no_irq() nounwind{
 ; RV32IZCMP-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
 ; RV32IZCMP-NEXT:    sw a0, 16(a5)
 ; RV32IZCMP-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a4)
 ; RV32IZCMP-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a4)
 ; RV32IZCMP-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a4)
 ; RV32IZCMP-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a4)
 ; RV32IZCMP-NEXT:    cm.popret {ra, s0-s11}, 96
 ;
 ; RV64IZCMP-LABEL: callee_no_irq:
 ; RV64IZCMP:       # %bb.0:
 ; RV64IZCMP-NEXT:    cm.push {ra, s0-s11}, -160
-; RV64IZCMP-NEXT:    lui a6, %hi(var_test_irq)
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-NEXT:    lui a4, %hi(var_test_irq)
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq)(a4)
 ; RV64IZCMP-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+4)(a4)
 ; RV64IZCMP-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+8)(a4)
 ; RV64IZCMP-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-NEXT:    lw a0, %lo(var_test_irq+12)(a4)
 ; RV64IZCMP-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IZCMP-NEXT:    addi a5, a6, %lo(var_test_irq)
+; RV64IZCMP-NEXT:    addi a5, a4, %lo(var_test_irq)
 ; RV64IZCMP-NEXT:    lw a0, 16(a5)
 ; RV64IZCMP-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
 ; RV64IZCMP-NEXT:    lw a0, 20(a5)
@@ -2682,28 +2682,28 @@ define void @callee_no_irq() nounwind{
 ; RV64IZCMP-NEXT:    lw s11, 72(a5)
 ; RV64IZCMP-NEXT:    lw ra, 76(a5)
 ; RV64IZCMP-NEXT:    lw s1, 80(a5)
-; RV64IZCMP-NEXT:    lw t3, 84(a5)
-; RV64IZCMP-NEXT:    lw t2, 88(a5)
-; RV64IZCMP-NEXT:    lw t1, 92(a5)
-; RV64IZCMP-NEXT:    lw t0, 96(a5)
+; RV64IZCMP-NEXT:    lw t2, 84(a5)
+; RV64IZCMP-NEXT:    lw t1, 88(a5)
+; RV64IZCMP-NEXT:    lw t0, 92(a5)
+; RV64IZCMP-NEXT:    lw a7, 96(a5)
 ; RV64IZCMP-NEXT:    lw s0, 100(a5)
-; RV64IZCMP-NEXT:    lw a7, 104(a5)
-; RV64IZCMP-NEXT:    lw a4, 108(a5)
-; RV64IZCMP-NEXT:    lw a0, 124(a5)
-; RV64IZCMP-NEXT:    lw a1, 120(a5)
-; RV64IZCMP-NEXT:    lw a2, 116(a5)
-; RV64IZCMP-NEXT:    lw a3, 112(a5)
-; RV64IZCMP-NEXT:    sw a0, 124(a5)
-; RV64IZCMP-NEXT:    sw a1, 120(a5)
-; RV64IZCMP-NEXT:    sw a2, 116(a5)
-; RV64IZCMP-NEXT:    sw a3, 112(a5)
-; RV64IZCMP-NEXT:    sw a4, 108(a5)
-; RV64IZCMP-NEXT:    sw a7, 104(a5)
+; RV64IZCMP-NEXT:    lw a6, 104(a5)
+; RV64IZCMP-NEXT:    lw a3, 108(a5)
+; RV64IZCMP-NEXT:    lw t3, 124(a5)
+; RV64IZCMP-NEXT:    lw a0, 120(a5)
+; RV64IZCMP-NEXT:    lw a1, 116(a5)
+; RV64IZCMP-NEXT:    lw a2, 112(a5)
+; RV64IZCMP-NEXT:    sw t3, 124(a5)
+; RV64IZCMP-NEXT:    sw a0, 120(a5)
+; RV64IZCMP-NEXT:    sw a1, 116(a5)
+; RV64IZCMP-NEXT:    sw a2, 112(a5)
+; RV64IZCMP-NEXT:    sw a3, 108(a5)
+; RV64IZCMP-NEXT:    sw a6, 104(a5)
 ; RV64IZCMP-NEXT:    sw s0, 100(a5)
-; RV64IZCMP-NEXT:    sw t0, 96(a5)
-; RV64IZCMP-NEXT:    sw t1, 92(a5)
-; RV64IZCMP-NEXT:    sw t2, 88(a5)
-; RV64IZCMP-NEXT:    sw t3, 84(a5)
+; RV64IZCMP-NEXT:    sw a7, 96(a5)
+; RV64IZCMP-NEXT:    sw t0, 92(a5)
+; RV64IZCMP-NEXT:    sw t1, 88(a5)
+; RV64IZCMP-NEXT:    sw t2, 84(a5)
 ; RV64IZCMP-NEXT:    sw s1, 80(a5)
 ; RV64IZCMP-NEXT:    sw ra, 76(a5)
 ; RV64IZCMP-NEXT:    sw s11, 72(a5)
@@ -2724,28 +2724,28 @@ define void @callee_no_irq() nounwind{
 ; RV64IZCMP-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
 ; RV64IZCMP-NEXT:    sw a0, 16(a5)
 ; RV64IZCMP-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+12)(a4)
 ; RV64IZCMP-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+8)(a4)
 ; RV64IZCMP-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq+4)(a4)
 ; RV64IZCMP-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-NEXT:    sw a0, %lo(var_test_irq)(a4)
 ; RV64IZCMP-NEXT:    cm.popret {ra, s0-s11}, 160
 ;
 ; RV32IZCMP-SR-LABEL: callee_no_irq:
 ; RV32IZCMP-SR:       # %bb.0:
 ; RV32IZCMP-SR-NEXT:    cm.push {ra, s0-s11}, -96
-; RV32IZCMP-SR-NEXT:    lui a6, %hi(var_test_irq)
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-SR-NEXT:    lui a4, %hi(var_test_irq)
+; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq)(a4)
 ; RV32IZCMP-SR-NEXT:    sw a0, 28(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+4)(a4)
 ; RV32IZCMP-SR-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+8)(a4)
 ; RV32IZCMP-SR-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+12)(a4)
 ; RV32IZCMP-SR-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32IZCMP-SR-NEXT:    addi a5, a6, %lo(var_test_irq)
+; RV32IZCMP-SR-NEXT:    addi a5, a4, %lo(var_test_irq)
 ; RV32IZCMP-SR-NEXT:    lw a0, 16(a5)
 ; RV32IZCMP-SR-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
 ; RV32IZCMP-SR-NEXT:    lw a0, 20(a5)
@@ -2765,28 +2765,28 @@ define void @callee_no_irq() nounwind{
 ; RV32IZCMP-SR-NEXT:    lw s11, 72(a5)
 ; RV32IZCMP-SR-NEXT:    lw ra, 76(a5)
 ; RV32IZCMP-SR-NEXT:    lw s1, 80(a5)
-; RV32IZCMP-SR-NEXT:    lw t3, 84(a5)
-; RV32IZCMP-SR-NEXT:    lw t2, 88(a5)
-; RV32IZCMP-SR-NEXT:    lw t1, 92(a5)
-; RV32IZCMP-SR-NEXT:    lw t0, 96(a5)
+; RV32IZCMP-SR-NEXT:    lw t2, 84(a5)
+; RV32IZCMP-SR-NEXT:    lw t1, 88(a5)
+; RV32IZCMP-SR-NEXT:    lw t0, 92(a5)
+; RV32IZCMP-SR-NEXT:    lw a7, 96(a5)
 ; RV32IZCMP-SR-NEXT:    lw s0, 100(a5)
-; RV32IZCMP-SR-NEXT:    lw a7, 104(a5)
-; RV32IZCMP-SR-NEXT:    lw a4, 108(a5)
-; RV32IZCMP-SR-NEXT:    lw a0, 124(a5)
-; RV32IZCMP-SR-NEXT:    lw a1, 120(a5)
-; RV32IZCMP-SR-NEXT:    lw a2, 116(a5)
-; RV32IZCMP-SR-NEXT:    lw a3, 112(a5)
-; RV32IZCMP-SR-NEXT:    sw a0, 124(a5)
-; RV32IZCMP-SR-NEXT:    sw a1, 120(a5)
-; RV32IZCMP-SR-NEXT:    sw a2, 116(a5)
-; RV32IZCMP-SR-NEXT:    sw a3, 112(a5)
-; RV32IZCMP-SR-NEXT:    sw a4, 108(a5)
-; RV32IZCMP-SR-NEXT:    sw a7, 104(a5)
+; RV32IZCMP-SR-NEXT:    lw a6, 104(a5)
+; RV32IZCMP-SR-NEXT:    lw a3, 108(a5)
+; RV32IZCMP-SR-NEXT:    lw t3, 124(a5)
+; RV32IZCMP-SR-NEXT:    lw a0, 120(a5)
+; RV32IZCMP-SR-NEXT:    lw a1, 116(a5)
+; RV32IZCMP-SR-NEXT:    lw a2, 112(a5)
+; RV32IZCMP-SR-NEXT:    sw t3, 124(a5)
+; RV32IZCMP-SR-NEXT:    sw a0, 120(a5)
+; RV32IZCMP-SR-NEXT:    sw a1, 116(a5)
+; RV32IZCMP-SR-NEXT:    sw a2, 112(a5)
+; RV32IZCMP-SR-NEXT:    sw a3, 108(a5)
+; RV32IZCMP-SR-NEXT:    sw a6, 104(a5)
 ; RV32IZCMP-SR-NEXT:    sw s0, 100(a5)
-; RV32IZCMP-SR-NEXT:    sw t0, 96(a5)
-; RV32IZCMP-SR-NEXT:    sw t1, 92(a5)
-; RV32IZCMP-SR-NEXT:    sw t2, 88(a5)
-; RV32IZCMP-SR-NEXT:    sw t3, 84(a5)
+; RV32IZCMP-SR-NEXT:    sw a7, 96(a5)
+; RV32IZCMP-SR-NEXT:    sw t0, 92(a5)
+; RV32IZCMP-SR-NEXT:    sw t1, 88(a5)
+; RV32IZCMP-SR-NEXT:    sw t2, 84(a5)
 ; RV32IZCMP-SR-NEXT:    sw s1, 80(a5)
 ; RV32IZCMP-SR-NEXT:    sw ra, 76(a5)
 ; RV32IZCMP-SR-NEXT:    sw s11, 72(a5)
@@ -2807,28 +2807,28 @@ define void @callee_no_irq() nounwind{
 ; RV32IZCMP-SR-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
 ; RV32IZCMP-SR-NEXT:    sw a0, 16(a5)
 ; RV32IZCMP-SR-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
+; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+12)(a4)
 ; RV32IZCMP-SR-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
+; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+8)(a4)
 ; RV32IZCMP-SR-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
+; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+4)(a4)
 ; RV32IZCMP-SR-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
-; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV32IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq)(a4)
 ; RV32IZCMP-SR-NEXT:    cm.popret {ra, s0-s11}, 96
 ;
 ; RV64IZCMP-SR-LABEL: callee_no_irq:
 ; RV64IZCMP-SR:       # %bb.0:
 ; RV64IZCMP-SR-NEXT:    cm.push {ra, s0-s11}, -160
-; RV64IZCMP-SR-NEXT:    lui a6, %hi(var_test_irq)
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-SR-NEXT:    lui a4, %hi(var_test_irq)
+; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq)(a4)
 ; RV64IZCMP-SR-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+4)(a4)
 ; RV64IZCMP-SR-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+8)(a4)
 ; RV64IZCMP-SR-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-SR-NEXT:    lw a0, %lo(var_test_irq+12)(a4)
 ; RV64IZCMP-SR-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
-; RV64IZCMP-SR-NEXT:    addi a5, a6, %lo(var_test_irq)
+; RV64IZCMP-SR-NEXT:    addi a5, a4, %lo(var_test_irq)
 ; RV64IZCMP-SR-NEXT:    lw a0, 16(a5)
 ; RV64IZCMP-SR-NEXT:    sd a0, 8(sp) # 8-byte Folded Spill
 ; RV64IZCMP-SR-NEXT:    lw a0, 20(a5)
@@ -2848,28 +2848,28 @@ define void @callee_no_irq() nounwind{
 ; RV64IZCMP-SR-NEXT:    lw s11, 72(a5)
 ; RV64IZCMP-SR-NEXT:    lw ra, 76(a5)
 ; RV64IZCMP-SR-NEXT:    lw s1, 80(a5)
-; RV64IZCMP-SR-NEXT:    lw t3, 84(a5)
-; RV64IZCMP-SR-NEXT:    lw t2, 88(a5)
-; RV64IZCMP-SR-NEXT:    lw t1, 92(a5)
-; RV64IZCMP-SR-NEXT:    lw t0, 96(a5)
+; RV64IZCMP-SR-NEXT:    lw t2, 84(a5)
+; RV64IZCMP-SR-NEXT:    lw t1, 88(a5)
+; RV64IZCMP-SR-NEXT:    lw t0, 92(a5)
+; RV64IZCMP-SR-NEXT:    lw a7, 96(a5)
 ; RV64IZCMP-SR-NEXT:    lw s0, 100(a5)
-; RV64IZCMP-SR-NEXT:    lw a7, 104(a5)
-; RV64IZCMP-SR-NEXT:    lw a4, 108(a5)
-; RV64IZCMP-SR-NEXT:    lw a0, 124(a5)
-; RV64IZCMP-SR-NEXT:    lw a1, 120(a5)
-; RV64IZCMP-SR-NEXT:    lw a2, 116(a5)
-; RV64IZCMP-SR-NEXT:    lw a3, 112(a5)
-; RV64IZCMP-SR-NEXT:    sw a0, 124(a5)
-; RV64IZCMP-SR-NEXT:    sw a1, 120(a5)
-; RV64IZCMP-SR-NEXT:    sw a2, 116(a5)
-; RV64IZCMP-SR-NEXT:    sw a3, 112(a5)
-; RV64IZCMP-SR-NEXT:    sw a4, 108(a5)
-; RV64IZCMP-SR-NEXT:    sw a7, 104(a5)
+; RV64IZCMP-SR-NEXT:    lw a6, 104(a5)
+; RV64IZCMP-SR-NEXT:    lw a3, 108(a5)
+; RV64IZCMP-SR-NEXT:    lw t3, 124(a5)
+; RV64IZCMP-SR-NEXT:    lw a0, 120(a5)
+; RV64IZCMP-SR-NEXT:    lw a1, 116(a5)
+; RV64IZCMP-SR-NEXT:    lw a2, 112(a5)
+; RV64IZCMP-SR-NEXT:    sw t3, 124(a5)
+; RV64IZCMP-SR-NEXT:    sw a0, 120(a5)
+; RV64IZCMP-SR-NEXT:    sw a1, 116(a5)
+; RV64IZCMP-SR-NEXT:    sw a2, 112(a5)
+; RV64IZCMP-SR-NEXT:    sw a3, 108(a5)
+; RV64IZCMP-SR-NEXT:    sw a6, 104(a5)
 ; RV64IZCMP-SR-NEXT:    sw s0, 100(a5)
-; RV64IZCMP-SR-NEXT:    sw t0, 96(a5)
-; RV64IZCMP-SR-NEXT:    sw t1, 92(a5)
-; RV64IZCMP-SR-NEXT:    sw t2, 88(a5)
-; RV64IZCMP-SR-NEXT:    sw t3, 84(a5)
+; RV64IZCMP-SR-NEXT:    sw a7, 96(a5)
+; RV64IZCMP-SR-NEXT:    sw t0, 92(a5)
+; RV64IZCMP-SR-NEXT:    sw t1, 88(a5)
+; RV64IZCMP-SR-NEXT:    sw t2, 84(a5)
 ; RV64IZCMP-SR-NEXT:    sw s1, 80(a5)
 ; RV64IZCMP-SR-NEXT:    sw ra, 76(a5)
 ; RV64IZCMP-SR-NEXT:    sw s11, 72(a5)
@@ -2890,13 +2890,13 @@ define void @callee_no_irq() nounwind{
 ; RV64IZCMP-SR-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
 ; RV64IZCMP-SR-NEXT:    sw a0, 16(a5)
 ; RV64IZCMP-SR-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
+; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+12)(a4)
 ; RV64IZCMP-SR-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
+; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+8)(a4)
 ; RV64IZCMP-SR-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
+; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq+4)(a4)
 ; RV64IZCMP-SR-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV64IZCMP-SR-NEXT:    sw a0, %lo(var_test_irq)(a4)
 ; RV64IZCMP-SR-NEXT:    cm.popret {ra, s0-s11}, 160
 ;
 ; RV32I-LABEL: callee_no_irq:
@@ -2915,16 +2915,16 @@ define void @callee_no_irq() nounwind{
 ; RV32I-NEXT:    sw s9, 36(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s10, 32(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s11, 28(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lui a6, %hi(var_test_irq)
-; RV32I-NEXT:    lw a0, %lo(var_test_irq)(a6)
+; RV32I-NEXT:    lui a4, %hi(var_test_irq)
+; RV32I-NEXT:    lw a0, %lo(var_test_irq)(a4)
 ; RV32I-NEXT:    sw a0, 24(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
+; RV32I-NEXT:    lw a0, %lo(var_test_irq+4)(a4)
 ; RV32I-NEXT:    sw a0, 20(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
+; RV32I-NEXT:    lw a0, %lo(var_test_irq+8)(a4)
 ; RV32I-NEXT:    sw a0, 16(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
+; RV32I-NEXT:    lw a0, %lo(var_test_irq+12)(a4)
 ; RV32I-NEXT:    sw a0, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    addi a5, a6, %lo(var_test_irq)
+; RV32I-NEXT:    addi a5, a4, %lo(var_test_irq)
 ; RV32I-NEXT:    lw a0, 16(a5)
 ; RV32I-NEXT:    sw a0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    lw a0, 20(a5)
@@ -2949,18 +2949,18 @@ define void @callee_no_irq() nounwind{
 ; RV32I-NEXT:    lw s10, 92(a5)
 ; RV32I-NEXT:    lw s11, 96(a5)
 ; RV32I-NEXT:    lw ra, 100(a5)
-; RV32I-NEXT:    lw a7, 104(a5)
-; RV32I-NEXT:    lw a4, 108(a5)
-; RV32I-NEXT:    lw a0, 124(a5)
-; RV32I-NEXT:    lw a1, 120(a5)
-; RV32I-NEXT:    lw a2, 116(a5)
-; RV32I-NEXT:    lw a3, 112(a5)
-; RV32I-NEXT:    sw a0, 124(a5)
-; RV32I-NEXT:    sw a1, 120(a5)
-; RV32I-NEXT:    sw a2, 116(a5)
-; RV32I-NEXT:    sw a3, 112(a5)
-; RV32I-NEXT:    sw a4, 108(a5)
-; RV32I-NEXT:    sw a7, 104(a5)
+; RV32I-NEXT:    lw a6, 104(a5)
+; RV32I-NEXT:    lw a3, 108(a5)
+; RV32I-NEXT:    lw a7, 124(a5)
+; RV32I-NEXT:    lw a0, 120(a5)
+; RV32I-NEXT:    lw a1, 116(a5)
+; RV32I-NEXT:    lw a2, 112(a5)
+; RV32I-NEXT:    sw a7, 124(a5)
+; RV32I-NEXT:    sw a0, 120(a5)
+; RV32I-NEXT:    sw a1, 116(a5)
+; RV32I-NEXT:    sw a2, 112(a5)
+; RV32I-NEXT:    sw a3, 108(a5)
+; RV32I-NEXT:    sw a6, 104(a5)
 ; RV32I-NEXT:    sw ra, 100(a5)
 ; RV32I-NEXT:    sw s11, 96(a5)
 ; RV32I-NEXT:    sw s10, 92(a5)
@@ -2986,13 +2986,13 @@ define void @callee_no_irq() nounwind{
 ; RV32I-NEXT:    lw a0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    sw a0, 16(a5)
 ; RV32I-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
+; RV32I-NEXT:    sw a0, %lo(var_test_irq+12)(a4)
 ; RV32I-NEXT:    lw a0, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
+; RV32I-NEXT:    sw a0, %lo(var_test_irq+8)(a4)
 ; RV32I-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
+; RV32I-NEXT:    sw a0, %lo(var_test_irq+4)(a4)
 ; RV32I-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV32I-NEXT:    sw a0, %lo(var_test_irq)(a4)
 ; RV32I-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
@@ -3025,16 +3025,16 @@ define void @callee_no_irq() nounwind{
 ; RV64I-NEXT:    sd s9, 72(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s10, 64(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    sd s11, 56(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lui a6, %hi(var_test_irq)
-; RV64I-NEXT:    lw a0, %lo(var_test_irq)(a6)
+; RV64I-NEXT:    lui a4, %hi(var_test_irq)
+; RV64I-NEXT:    lw a0, %lo(var_test_irq)(a4)
 ; RV64I-NEXT:    sd a0, 48(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var_test_irq+4)(a6)
+; RV64I-NEXT:    lw a0, %lo(var_test_irq+4)(a4)
 ; RV64I-NEXT:    sd a0, 40(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var_test_irq+8)(a6)
+; RV64I-NEXT:    lw a0, %lo(var_test_irq+8)(a4)
 ; RV64I-NEXT:    sd a0, 32(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    lw a0, %lo(var_test_irq+12)(a6)
+; RV64I-NEXT:    lw a0, %lo(var_test_irq+12)(a4)
 ; RV64I-NEXT:    sd a0, 24(sp) # 8-byte Folded Spill
-; RV64I-NEXT:    addi a5, a6, %lo(var_test_irq)
+; RV64I-NEXT:    addi a5, a4, %lo(var_test_irq)
 ; RV64I-NEXT:    lw a0, 16(a5)
 ; RV64I-NEXT:    sd a0, 16(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    lw a0, 20(a5)
@@ -3059,18 +3059,18 @@ define void @callee_no_irq() nounwind{
 ; RV64I-NEXT:    lw s10, 92(a5)
 ; RV64I-NEXT:    lw s11, 96(a5)
 ; RV64I-NEXT:    lw ra, 100(a5)
-; RV64I-NEXT:    lw a7, 104(a5)
-; RV64I-NEXT:    lw a4, 108(a5)
-; RV64I-NEXT:    lw a0, 124(a5)
-; RV64I-NEXT:    lw a1, 120(a5)
-; RV64I-NEXT:    lw a2, 116(a5)
-; RV64I-NEXT:    lw a3, 112(a5)
-; RV64I-NEXT:    sw a0, 124(a5)
-; RV64I-NEXT:    sw a1, 120(a5)
-; RV64I-NEXT:    sw a2, 116(a5)
-; RV64I-NEXT:    sw a3, 112(a5)
-; RV64I-NEXT:    sw a4, 108(a5)
-; RV64I-NEXT:    sw a7, 104(a5)
+; RV64I-NEXT:    lw a6, 104(a5)
+; RV64I-NEXT:    lw a3, 108(a5)
+; RV64I-NEXT:    lw a7, 124(a5)
+; RV64I-NEXT:    lw a0, 120(a5)
+; RV64I-NEXT:    lw a1, 116(a5)
+; RV64I-NEXT:    lw a2, 112(a5)
+; RV64I-NEXT:    sw a7, 124(a5)
+; RV64I-NEXT:    sw a0, 120(a5)
+; RV64I-NEXT:    sw a1, 116(a5)
+; RV64I-NEXT:    sw a2, 112(a5)
+; RV64I-NEXT:    sw a3, 108(a5)
+; RV64I-NEXT:    sw a6, 104(a5)
 ; RV64I-NEXT:    sw ra, 100(a5)
 ; RV64I-NEXT:    sw s11, 96(a5)
 ; RV64I-NEXT:    sw s10, 92(a5)
@@ -3096,13 +3096,13 @@ define void @callee_no_irq() nounwind{
 ; RV64I-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    sw a0, 16(a5)
 ; RV64I-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq+12)(a6)
+; RV64I-NEXT:    sw a0, %lo(var_test_irq+12)(a4)
 ; RV64I-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq+8)(a6)
+; RV64I-NEXT:    sw a0, %lo(var_test_irq+8)(a4)
 ; RV64I-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq+4)(a6)
+; RV64I-NEXT:    sw a0, %lo(var_test_irq+4)(a4)
 ; RV64I-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    sw a0, %lo(var_test_irq)(a6)
+; RV64I-NEXT:    sw a0, %lo(var_test_irq)(a4)
 ; RV64I-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/remat-stack-load-aggressive.ll b/llvm/test/CodeGen/RISCV/remat-stack-load-aggressive.ll
new file mode 100644
index 000000000000000..571914256f58bc3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/remat-stack-load-aggressive.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=riscv32 -riscv-enable-load-remat-aggressive=false | FileCheck %s --check-prefix=NOPROP
+; RUN: llc < %s -mtriple=riscv32 -riscv-enable-load-remat-aggressive=true | FileCheck %s --check-prefix=PROP
+define void @start() {
+; NOPROP-LABEL: start:
+; NOPROP:       # %bb.0: # %entry
+; NOPROP-NEXT:    addi sp, sp, -16
+; NOPROP-NEXT:    .cfi_def_cfa_offset 16
+; NOPROP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; NOPROP-NEXT:    .cfi_offset ra, -4
+; NOPROP-NEXT:    addi a0, sp, 8
+; NOPROP-NEXT:    addi a1, sp, 4
+; NOPROP-NEXT:    mv a2, sp
+; NOPROP-NEXT:    call init_var at plt
+; NOPROP-NEXT:    lw a0, 4(sp)
+; NOPROP-NEXT:    lw a1, 8(sp)
+; NOPROP-NEXT:    call chain at plt
+; NOPROP-NEXT:    lw a1, 0(sp)
+; NOPROP-NEXT:    mv a2, a0
+; NOPROP-NEXT:    mv a0, a1
+; NOPROP-NEXT:    mv a1, a2
+; NOPROP-NEXT:    call chain at plt
+; NOPROP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; NOPROP-NEXT:    addi sp, sp, 16
+; NOPROP-NEXT:    ret
+;
+; PROP-LABEL: start:
+; PROP:       # %bb.0: # %entry
+; PROP-NEXT:    addi sp, sp, -16
+; PROP-NEXT:    .cfi_def_cfa_offset 16
+; PROP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; PROP-NEXT:    .cfi_offset ra, -4
+; PROP-NEXT:    addi a0, sp, 8
+; PROP-NEXT:    addi a1, sp, 4
+; PROP-NEXT:    mv a2, sp
+; PROP-NEXT:    call init_var at plt
+; PROP-NEXT:    lw a0, 4(sp)
+; PROP-NEXT:    lw a1, 8(sp)
+; PROP-NEXT:    call chain at plt
+; PROP-NEXT:    mv a1, a0
+; PROP-NEXT:    lw a0, 0(sp)
+; PROP-NEXT:    call chain at plt
+; PROP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; PROP-NEXT:    addi sp, sp, 16
+; PROP-NEXT:    ret
+entry:
+  %a = alloca i32, align 4
+  %b = alloca i32, align 4
+  %c = alloca i32, align 4
+  call void @init_var(ptr %a, ptr %b, ptr %c)
+  %0 = load i32, ptr %b, align 4
+  %1 = load i32, ptr %a, align 4
+  %call = call i32 @chain(i32 %0, i32 %1)
+  %2 = load i32, ptr %c, align 4
+  %call1 = call i32 @chain(i32 %2, i32 %call)
+  ret void
+}
+
+declare void @init_var(ptr, ptr, ptr, ptr)
+
+declare i32 @chain(i32, i32)
+
diff --git a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
index 321d9c612336b9c..9cfd7e230278e2c 100644
--- a/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32xtheadbb.ll
@@ -222,38 +222,37 @@ define i64 @cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    neg a0, a0
 ; RV32I-NEXT:    and a0, s0, a0
 ; RV32I-NEXT:    lui a1, 30667
-; RV32I-NEXT:    addi s3, a1, 1329
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    addi s2, a1, 1329
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3 at plt
-; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32I-NEXT:    addi s4, a0, %lo(.LCPI3_0)
-; RV32I-NEXT:    neg a0, s2
-; RV32I-NEXT:    and a0, s2, a0
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    srli a0, a0, 27
+; RV32I-NEXT:    lui a1, %hi(.LCPI3_0)
+; RV32I-NEXT:    addi s4, a1, %lo(.LCPI3_0)
+; RV32I-NEXT:    add s3, s4, a0
+; RV32I-NEXT:    neg a0, s1
+; RV32I-NEXT:    and a0, s1, a0
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3 at plt
-; RV32I-NEXT:    bnez s2, .LBB3_3
+; RV32I-NEXT:    bnez s1, .LBB3_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    li a0, 32
-; RV32I-NEXT:    beqz s0, .LBB3_4
+; RV32I-NEXT:    li a1, 32
+; RV32I-NEXT:    lbu a0, 0(s3)
+; RV32I-NEXT:    beqz s0, .LBB3_3
+; RV32I-NEXT:    j .LBB3_4
 ; RV32I-NEXT:  .LBB3_2:
-; RV32I-NEXT:    srli s1, s1, 27
-; RV32I-NEXT:    add s1, s4, s1
-; RV32I-NEXT:    lbu a0, 0(s1)
-; RV32I-NEXT:    j .LBB3_5
-; RV32I-NEXT:  .LBB3_3:
 ; RV32I-NEXT:    srli a0, a0, 27
 ; RV32I-NEXT:    add a0, s4, a0
-; RV32I-NEXT:    lbu a0, 0(a0)
-; RV32I-NEXT:    bnez s0, .LBB3_2
+; RV32I-NEXT:    lbu a1, 0(a0)
+; RV32I-NEXT:    lbu a0, 0(s3)
+; RV32I-NEXT:    bnez s0, .LBB3_4
+; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    addi a0, a1, 32
 ; RV32I-NEXT:  .LBB3_4:
-; RV32I-NEXT:    addi a0, a0, 32
-; RV32I-NEXT:  .LBB3_5:
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 5f9ca503bcb053c..ef23d8a16f2f413 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -212,38 +212,37 @@ define i64 @cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    sw s4, 8(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    mv s2, a1
+; RV32I-NEXT:    mv s1, a1
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    neg a0, a0
 ; RV32I-NEXT:    and a0, s0, a0
 ; RV32I-NEXT:    lui a1, 30667
-; RV32I-NEXT:    addi s3, a1, 1329
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    addi s2, a1, 1329
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3 at plt
-; RV32I-NEXT:    mv s1, a0
-; RV32I-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32I-NEXT:    addi s4, a0, %lo(.LCPI3_0)
-; RV32I-NEXT:    neg a0, s2
-; RV32I-NEXT:    and a0, s2, a0
-; RV32I-NEXT:    mv a1, s3
+; RV32I-NEXT:    srli a0, a0, 27
+; RV32I-NEXT:    lui a1, %hi(.LCPI3_0)
+; RV32I-NEXT:    addi s4, a1, %lo(.LCPI3_0)
+; RV32I-NEXT:    add s3, s4, a0
+; RV32I-NEXT:    neg a0, s1
+; RV32I-NEXT:    and a0, s1, a0
+; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __mulsi3 at plt
-; RV32I-NEXT:    bnez s2, .LBB3_3
+; RV32I-NEXT:    bnez s1, .LBB3_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    li a0, 32
-; RV32I-NEXT:    beqz s0, .LBB3_4
+; RV32I-NEXT:    li a1, 32
+; RV32I-NEXT:    lbu a0, 0(s3)
+; RV32I-NEXT:    beqz s0, .LBB3_3
+; RV32I-NEXT:    j .LBB3_4
 ; RV32I-NEXT:  .LBB3_2:
-; RV32I-NEXT:    srli s1, s1, 27
-; RV32I-NEXT:    add s1, s4, s1
-; RV32I-NEXT:    lbu a0, 0(s1)
-; RV32I-NEXT:    j .LBB3_5
-; RV32I-NEXT:  .LBB3_3:
 ; RV32I-NEXT:    srli a0, a0, 27
 ; RV32I-NEXT:    add a0, s4, a0
-; RV32I-NEXT:    lbu a0, 0(a0)
-; RV32I-NEXT:    bnez s0, .LBB3_2
+; RV32I-NEXT:    lbu a1, 0(a0)
+; RV32I-NEXT:    lbu a0, 0(s3)
+; RV32I-NEXT:    bnez s0, .LBB3_4
+; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    addi a0, a1, 32
 ; RV32I-NEXT:  .LBB3_4:
-; RV32I-NEXT:    addi a0, a0, 32
-; RV32I-NEXT:  .LBB3_5:
 ; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll
index 705ec2df126b01f..376b8c1b34a717c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll
@@ -20,24 +20,23 @@ define signext i32 @foo(i32 signext %aa) #0 {
 ; CHECK-NEXT:    sub sp, sp, a1
 ; CHECK-NEXT:    andi sp, sp, -16
 ; CHECK-NEXT:    mv s1, sp
-; CHECK-NEXT:    lw t0, 44(s1)
+; CHECK-NEXT:    lw a1, 16(s1)
+; CHECK-NEXT:    lw a2, 12(s1)
+; CHECK-NEXT:    lw a3, 8(s1)
+; CHECK-NEXT:    sw a0, 52(s1)
+; CHECK-NEXT:    sw a0, 48(s1)
+; CHECK-NEXT:    addi sp, sp, -32
+; CHECK-NEXT:    sd a3, 16(sp)
+; CHECK-NEXT:    sd a2, 8(sp)
+; CHECK-NEXT:    sd a1, 0(sp)
+; CHECK-NEXT:    lw a0, 44(s1)
 ; CHECK-NEXT:    lw a2, 40(s1)
 ; CHECK-NEXT:    lw a3, 36(s1)
 ; CHECK-NEXT:    lw a4, 32(s1)
 ; CHECK-NEXT:    lw a5, 28(s1)
 ; CHECK-NEXT:    lw a6, 24(s1)
 ; CHECK-NEXT:    lw a7, 20(s1)
-; CHECK-NEXT:    lw t1, 16(s1)
-; CHECK-NEXT:    lw a1, 12(s1)
-; CHECK-NEXT:    lw t2, 8(s1)
-; CHECK-NEXT:    sw a0, 52(s1)
-; CHECK-NEXT:    sw a0, 48(s1)
-; CHECK-NEXT:    addi sp, sp, -32
-; CHECK-NEXT:    sd t2, 16(sp)
-; CHECK-NEXT:    sd a1, 8(sp)
 ; CHECK-NEXT:    addi a1, s1, 48
-; CHECK-NEXT:    sd t1, 0(sp)
-; CHECK-NEXT:    mv a0, t0
 ; CHECK-NEXT:    call gfunc at plt
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    li a0, 0
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
index 5a74108c90da719..d5f19c931878009 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
@@ -97,36 +97,36 @@ define dso_local signext i32 @main() #0 {
 ; CHECK-NEXT:    sw a0, -104(s0)
 ; CHECK-NEXT:    sw a0, -108(s0)
 ; CHECK-NEXT:    sw a0, -112(s0)
+; CHECK-NEXT:    vl8re32.v v8, (s1)
+; CHECK-NEXT:    lw a0, -108(s0)
+; CHECK-NEXT:    lw a1, -112(s0)
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sd a1, 8(sp)
+; CHECK-NEXT:    sd a0, 0(sp)
 ; CHECK-NEXT:    lw a0, -76(s0)
 ; CHECK-NEXT:    lw a1, -80(s0)
-; CHECK-NEXT:    vl8re32.v v8, (s1)
 ; CHECK-NEXT:    lw a2, -84(s0)
 ; CHECK-NEXT:    lw a3, -88(s0)
 ; CHECK-NEXT:    lw a4, -92(s0)
 ; CHECK-NEXT:    lw a5, -96(s0)
 ; CHECK-NEXT:    lw a6, -100(s0)
 ; CHECK-NEXT:    lw a7, -104(s0)
-; CHECK-NEXT:    lw t0, -108(s0)
-; CHECK-NEXT:    lw t1, -112(s0)
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sd t1, 8(sp)
-; CHECK-NEXT:    sd t0, 0(sp)
 ; CHECK-NEXT:    call lots_args
 ; CHECK-NEXT:    addi sp, sp, 16
+; CHECK-NEXT:    vl8re32.v v8, (s1)
+; CHECK-NEXT:    lw a0, -108(s0)
+; CHECK-NEXT:    lw a1, -112(s0)
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    sd a1, 8(sp)
+; CHECK-NEXT:    sd a0, 0(sp)
 ; CHECK-NEXT:    lw a0, -76(s0)
 ; CHECK-NEXT:    lw a1, -80(s0)
-; CHECK-NEXT:    vl8re32.v v8, (s1)
 ; CHECK-NEXT:    lw a2, -84(s0)
 ; CHECK-NEXT:    lw a3, -88(s0)
 ; CHECK-NEXT:    lw a4, -92(s0)
 ; CHECK-NEXT:    lw a5, -96(s0)
 ; CHECK-NEXT:    lw a6, -100(s0)
 ; CHECK-NEXT:    lw a7, -104(s0)
-; CHECK-NEXT:    lw t0, -108(s0)
-; CHECK-NEXT:    lw t1, -112(s0)
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sd t1, 8(sp)
-; CHECK-NEXT:    sd t0, 0(sp)
 ; CHECK-NEXT:    call lots_args
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    li a0, 0
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index 122388c1b73ec3e..8f846bc1b4586d3 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -324,10 +324,10 @@ define void @test_srem_vec(ptr %X) nounwind {
 ; RV32-NEXT:    srai s3, a0, 31
 ; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    slli a1, a1, 31
-; RV32-NEXT:    lw a0, 0(s0)
 ; RV32-NEXT:    srai s4, a1, 31
 ; RV32-NEXT:    slli a1, a3, 31
 ; RV32-NEXT:    srai a1, a1, 31
+; RV32-NEXT:    lw a0, 0(s0)
 ; RV32-NEXT:    li a2, 6
 ; RV32-NEXT:    li a3, 0
 ; RV32-NEXT:    call __moddi3 at plt
@@ -476,10 +476,10 @@ define void @test_srem_vec(ptr %X) nounwind {
 ; RV32M-NEXT:    srai s3, a0, 31
 ; RV32M-NEXT:    srli a1, a1, 1
 ; RV32M-NEXT:    slli a1, a1, 31
-; RV32M-NEXT:    lw a0, 0(s0)
 ; RV32M-NEXT:    srai s4, a1, 31
 ; RV32M-NEXT:    slli a1, a3, 31
 ; RV32M-NEXT:    srai a1, a1, 31
+; RV32M-NEXT:    lw a0, 0(s0)
 ; RV32M-NEXT:    li a2, 6
 ; RV32M-NEXT:    li a3, 0
 ; RV32M-NEXT:    call __moddi3 at plt
@@ -626,10 +626,10 @@ define void @test_srem_vec(ptr %X) nounwind {
 ; RV32MV-NEXT:    srai s3, a0, 31
 ; RV32MV-NEXT:    srli a1, a1, 1
 ; RV32MV-NEXT:    slli a1, a1, 31
-; RV32MV-NEXT:    lw a0, 0(s0)
 ; RV32MV-NEXT:    srai s4, a1, 31
 ; RV32MV-NEXT:    slli a1, a3, 31
 ; RV32MV-NEXT:    srai a1, a1, 31
+; RV32MV-NEXT:    lw a0, 0(s0)
 ; RV32MV-NEXT:    li a2, 6
 ; RV32MV-NEXT:    li a3, 0
 ; RV32MV-NEXT:    call __moddi3 at plt
diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll
index b0d435368e92bd4..5c7d299ca47631a 100644
--- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll
@@ -1317,20 +1317,20 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    lbu s10, 22(a0)
 ; RV64I-NEXT:    lbu s11, 23(a0)
 ; RV64I-NEXT:    lbu ra, 24(a0)
-; RV64I-NEXT:    lbu t0, 25(a0)
-; RV64I-NEXT:    lbu a7, 26(a0)
-; RV64I-NEXT:    lbu a6, 27(a0)
-; RV64I-NEXT:    lbu a5, 28(a0)
-; RV64I-NEXT:    lbu a3, 31(a0)
-; RV64I-NEXT:    lbu a4, 30(a0)
+; RV64I-NEXT:    lbu a7, 25(a0)
+; RV64I-NEXT:    lbu a6, 26(a0)
+; RV64I-NEXT:    lbu a5, 27(a0)
+; RV64I-NEXT:    lbu a4, 28(a0)
+; RV64I-NEXT:    lbu t0, 31(a0)
+; RV64I-NEXT:    lbu a3, 30(a0)
 ; RV64I-NEXT:    lbu a0, 29(a0)
 ; RV64I-NEXT:    lbu a1, 0(a1)
-; RV64I-NEXT:    sb a3, 87(sp)
-; RV64I-NEXT:    sb a4, 86(sp)
+; RV64I-NEXT:    sb t0, 87(sp)
+; RV64I-NEXT:    sb a3, 86(sp)
 ; RV64I-NEXT:    sb a0, 85(sp)
-; RV64I-NEXT:    sb a5, 84(sp)
-; RV64I-NEXT:    sb a6, 83(sp)
-; RV64I-NEXT:    sb a7, 82(sp)
+; RV64I-NEXT:    sb a4, 84(sp)
+; RV64I-NEXT:    sb a5, 83(sp)
+; RV64I-NEXT:    sb a6, 82(sp)
 ; RV64I-NEXT:    sb zero, 119(sp)
 ; RV64I-NEXT:    sb zero, 118(sp)
 ; RV64I-NEXT:    sb zero, 117(sp)
@@ -1363,7 +1363,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    sb zero, 90(sp)
 ; RV64I-NEXT:    sb zero, 89(sp)
 ; RV64I-NEXT:    sb zero, 88(sp)
-; RV64I-NEXT:    sb t0, 81(sp)
+; RV64I-NEXT:    sb a7, 81(sp)
 ; RV64I-NEXT:    sb ra, 80(sp)
 ; RV64I-NEXT:    sb s11, 79(sp)
 ; RV64I-NEXT:    sb s10, 78(sp)
@@ -1429,18 +1429,18 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    lbu s10, 31(a6)
 ; RV64I-NEXT:    lbu s11, 16(a6)
 ; RV64I-NEXT:    lbu ra, 17(a6)
-; RV64I-NEXT:    lbu a5, 18(a6)
-; RV64I-NEXT:    lbu a4, 19(a6)
-; RV64I-NEXT:    lbu a0, 23(a6)
-; RV64I-NEXT:    lbu a1, 22(a6)
-; RV64I-NEXT:    lbu a3, 21(a6)
+; RV64I-NEXT:    lbu a4, 18(a6)
+; RV64I-NEXT:    lbu a3, 19(a6)
+; RV64I-NEXT:    lbu a5, 23(a6)
+; RV64I-NEXT:    lbu a0, 22(a6)
+; RV64I-NEXT:    lbu a1, 21(a6)
 ; RV64I-NEXT:    lbu a6, 20(a6)
-; RV64I-NEXT:    sb a0, 23(a2)
-; RV64I-NEXT:    sb a1, 22(a2)
-; RV64I-NEXT:    sb a3, 21(a2)
+; RV64I-NEXT:    sb a5, 23(a2)
+; RV64I-NEXT:    sb a0, 22(a2)
+; RV64I-NEXT:    sb a1, 21(a2)
 ; RV64I-NEXT:    sb a6, 20(a2)
-; RV64I-NEXT:    sb a4, 19(a2)
-; RV64I-NEXT:    sb a5, 18(a2)
+; RV64I-NEXT:    sb a3, 19(a2)
+; RV64I-NEXT:    sb a4, 18(a2)
 ; RV64I-NEXT:    sb ra, 17(a2)
 ; RV64I-NEXT:    sb s11, 16(a2)
 ; RV64I-NEXT:    sb s10, 31(a2)
@@ -1535,20 +1535,20 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    lbu s10, 22(a0)
 ; RV32I-NEXT:    lbu s11, 23(a0)
 ; RV32I-NEXT:    lbu ra, 24(a0)
-; RV32I-NEXT:    lbu t0, 25(a0)
-; RV32I-NEXT:    lbu a7, 26(a0)
-; RV32I-NEXT:    lbu a6, 27(a0)
-; RV32I-NEXT:    lbu a5, 28(a0)
-; RV32I-NEXT:    lbu a3, 31(a0)
-; RV32I-NEXT:    lbu a4, 30(a0)
+; RV32I-NEXT:    lbu a7, 25(a0)
+; RV32I-NEXT:    lbu a6, 26(a0)
+; RV32I-NEXT:    lbu a5, 27(a0)
+; RV32I-NEXT:    lbu a4, 28(a0)
+; RV32I-NEXT:    lbu t0, 31(a0)
+; RV32I-NEXT:    lbu a3, 30(a0)
 ; RV32I-NEXT:    lbu a0, 29(a0)
 ; RV32I-NEXT:    lbu a1, 0(a1)
-; RV32I-NEXT:    sb a3, 59(sp)
-; RV32I-NEXT:    sb a4, 58(sp)
+; RV32I-NEXT:    sb t0, 59(sp)
+; RV32I-NEXT:    sb a3, 58(sp)
 ; RV32I-NEXT:    sb a0, 57(sp)
-; RV32I-NEXT:    sb a5, 56(sp)
-; RV32I-NEXT:    sb a6, 55(sp)
-; RV32I-NEXT:    sb a7, 54(sp)
+; RV32I-NEXT:    sb a4, 56(sp)
+; RV32I-NEXT:    sb a5, 55(sp)
+; RV32I-NEXT:    sb a6, 54(sp)
 ; RV32I-NEXT:    sb zero, 91(sp)
 ; RV32I-NEXT:    sb zero, 90(sp)
 ; RV32I-NEXT:    sb zero, 89(sp)
@@ -1581,7 +1581,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    sb zero, 62(sp)
 ; RV32I-NEXT:    sb zero, 61(sp)
 ; RV32I-NEXT:    sb zero, 60(sp)
-; RV32I-NEXT:    sb t0, 53(sp)
+; RV32I-NEXT:    sb a7, 53(sp)
 ; RV32I-NEXT:    sb ra, 52(sp)
 ; RV32I-NEXT:    sb s11, 51(sp)
 ; RV32I-NEXT:    sb s10, 50(sp)
@@ -1647,18 +1647,18 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    lbu s10, 17(a6)
 ; RV32I-NEXT:    lbu s11, 30(a6)
 ; RV32I-NEXT:    lbu ra, 31(a6)
-; RV32I-NEXT:    lbu a5, 28(a6)
-; RV32I-NEXT:    lbu a4, 29(a6)
-; RV32I-NEXT:    lbu a0, 25(a6)
-; RV32I-NEXT:    lbu a1, 24(a6)
-; RV32I-NEXT:    lbu a3, 27(a6)
+; RV32I-NEXT:    lbu a4, 28(a6)
+; RV32I-NEXT:    lbu a3, 29(a6)
+; RV32I-NEXT:    lbu a5, 25(a6)
+; RV32I-NEXT:    lbu a0, 24(a6)
+; RV32I-NEXT:    lbu a1, 27(a6)
 ; RV32I-NEXT:    lbu a6, 26(a6)
-; RV32I-NEXT:    sb a0, 25(a2)
-; RV32I-NEXT:    sb a1, 24(a2)
-; RV32I-NEXT:    sb a3, 27(a2)
+; RV32I-NEXT:    sb a5, 25(a2)
+; RV32I-NEXT:    sb a0, 24(a2)
+; RV32I-NEXT:    sb a1, 27(a2)
 ; RV32I-NEXT:    sb a6, 26(a2)
-; RV32I-NEXT:    sb a4, 29(a2)
-; RV32I-NEXT:    sb a5, 28(a2)
+; RV32I-NEXT:    sb a3, 29(a2)
+; RV32I-NEXT:    sb a4, 28(a2)
 ; RV32I-NEXT:    sb ra, 31(a2)
 ; RV32I-NEXT:    sb s11, 30(a2)
 ; RV32I-NEXT:    sb s10, 17(a2)
@@ -1760,20 +1760,20 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    lbu s10, 22(a0)
 ; RV64I-NEXT:    lbu s11, 23(a0)
 ; RV64I-NEXT:    lbu ra, 24(a0)
-; RV64I-NEXT:    lbu t0, 25(a0)
-; RV64I-NEXT:    lbu a7, 26(a0)
-; RV64I-NEXT:    lbu a6, 27(a0)
-; RV64I-NEXT:    lbu a5, 28(a0)
-; RV64I-NEXT:    lbu a3, 31(a0)
-; RV64I-NEXT:    lbu a4, 30(a0)
+; RV64I-NEXT:    lbu a7, 25(a0)
+; RV64I-NEXT:    lbu a6, 26(a0)
+; RV64I-NEXT:    lbu a5, 27(a0)
+; RV64I-NEXT:    lbu a4, 28(a0)
+; RV64I-NEXT:    lbu t0, 31(a0)
+; RV64I-NEXT:    lbu a3, 30(a0)
 ; RV64I-NEXT:    lbu a0, 29(a0)
 ; RV64I-NEXT:    lbu a1, 0(a1)
-; RV64I-NEXT:    sb a3, 119(sp)
-; RV64I-NEXT:    sb a4, 118(sp)
+; RV64I-NEXT:    sb t0, 119(sp)
+; RV64I-NEXT:    sb a3, 118(sp)
 ; RV64I-NEXT:    sb a0, 117(sp)
-; RV64I-NEXT:    sb a5, 116(sp)
-; RV64I-NEXT:    sb a6, 115(sp)
-; RV64I-NEXT:    sb a7, 114(sp)
+; RV64I-NEXT:    sb a4, 116(sp)
+; RV64I-NEXT:    sb a5, 115(sp)
+; RV64I-NEXT:    sb a6, 114(sp)
 ; RV64I-NEXT:    sb zero, 87(sp)
 ; RV64I-NEXT:    sb zero, 86(sp)
 ; RV64I-NEXT:    sb zero, 85(sp)
@@ -1806,7 +1806,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    sb zero, 58(sp)
 ; RV64I-NEXT:    sb zero, 57(sp)
 ; RV64I-NEXT:    sb zero, 56(sp)
-; RV64I-NEXT:    sb t0, 113(sp)
+; RV64I-NEXT:    sb a7, 113(sp)
 ; RV64I-NEXT:    sb ra, 112(sp)
 ; RV64I-NEXT:    sb s11, 111(sp)
 ; RV64I-NEXT:    sb s10, 110(sp)
@@ -1872,18 +1872,18 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    lbu s10, 31(a6)
 ; RV64I-NEXT:    lbu s11, 16(a6)
 ; RV64I-NEXT:    lbu ra, 17(a6)
-; RV64I-NEXT:    lbu a5, 18(a6)
-; RV64I-NEXT:    lbu a4, 19(a6)
-; RV64I-NEXT:    lbu a0, 23(a6)
-; RV64I-NEXT:    lbu a1, 22(a6)
-; RV64I-NEXT:    lbu a3, 21(a6)
+; RV64I-NEXT:    lbu a4, 18(a6)
+; RV64I-NEXT:    lbu a3, 19(a6)
+; RV64I-NEXT:    lbu a5, 23(a6)
+; RV64I-NEXT:    lbu a0, 22(a6)
+; RV64I-NEXT:    lbu a1, 21(a6)
 ; RV64I-NEXT:    lbu a6, 20(a6)
-; RV64I-NEXT:    sb a0, 23(a2)
-; RV64I-NEXT:    sb a1, 22(a2)
-; RV64I-NEXT:    sb a3, 21(a2)
+; RV64I-NEXT:    sb a5, 23(a2)
+; RV64I-NEXT:    sb a0, 22(a2)
+; RV64I-NEXT:    sb a1, 21(a2)
 ; RV64I-NEXT:    sb a6, 20(a2)
-; RV64I-NEXT:    sb a4, 19(a2)
-; RV64I-NEXT:    sb a5, 18(a2)
+; RV64I-NEXT:    sb a3, 19(a2)
+; RV64I-NEXT:    sb a4, 18(a2)
 ; RV64I-NEXT:    sb ra, 17(a2)
 ; RV64I-NEXT:    sb s11, 16(a2)
 ; RV64I-NEXT:    sb s10, 31(a2)
@@ -1978,20 +1978,20 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    lbu s10, 22(a0)
 ; RV32I-NEXT:    lbu s11, 23(a0)
 ; RV32I-NEXT:    lbu ra, 24(a0)
-; RV32I-NEXT:    lbu t0, 25(a0)
-; RV32I-NEXT:    lbu a7, 26(a0)
-; RV32I-NEXT:    lbu a6, 27(a0)
-; RV32I-NEXT:    lbu a5, 28(a0)
-; RV32I-NEXT:    lbu a3, 31(a0)
-; RV32I-NEXT:    lbu a4, 30(a0)
+; RV32I-NEXT:    lbu a7, 25(a0)
+; RV32I-NEXT:    lbu a6, 26(a0)
+; RV32I-NEXT:    lbu a5, 27(a0)
+; RV32I-NEXT:    lbu a4, 28(a0)
+; RV32I-NEXT:    lbu t0, 31(a0)
+; RV32I-NEXT:    lbu a3, 30(a0)
 ; RV32I-NEXT:    lbu a0, 29(a0)
 ; RV32I-NEXT:    lbu a1, 0(a1)
-; RV32I-NEXT:    sb a3, 91(sp)
-; RV32I-NEXT:    sb a4, 90(sp)
+; RV32I-NEXT:    sb t0, 91(sp)
+; RV32I-NEXT:    sb a3, 90(sp)
 ; RV32I-NEXT:    sb a0, 89(sp)
-; RV32I-NEXT:    sb a5, 88(sp)
-; RV32I-NEXT:    sb a6, 87(sp)
-; RV32I-NEXT:    sb a7, 86(sp)
+; RV32I-NEXT:    sb a4, 88(sp)
+; RV32I-NEXT:    sb a5, 87(sp)
+; RV32I-NEXT:    sb a6, 86(sp)
 ; RV32I-NEXT:    sb zero, 59(sp)
 ; RV32I-NEXT:    sb zero, 58(sp)
 ; RV32I-NEXT:    sb zero, 57(sp)
@@ -2024,7 +2024,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    sb zero, 30(sp)
 ; RV32I-NEXT:    sb zero, 29(sp)
 ; RV32I-NEXT:    sb zero, 28(sp)
-; RV32I-NEXT:    sb t0, 85(sp)
+; RV32I-NEXT:    sb a7, 85(sp)
 ; RV32I-NEXT:    sb ra, 84(sp)
 ; RV32I-NEXT:    sb s11, 83(sp)
 ; RV32I-NEXT:    sb s10, 82(sp)
@@ -2090,18 +2090,18 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    lbu s10, 17(a6)
 ; RV32I-NEXT:    lbu s11, 30(a6)
 ; RV32I-NEXT:    lbu ra, 31(a6)
-; RV32I-NEXT:    lbu a5, 28(a6)
-; RV32I-NEXT:    lbu a4, 29(a6)
-; RV32I-NEXT:    lbu a0, 25(a6)
-; RV32I-NEXT:    lbu a1, 24(a6)
-; RV32I-NEXT:    lbu a3, 27(a6)
+; RV32I-NEXT:    lbu a4, 28(a6)
+; RV32I-NEXT:    lbu a3, 29(a6)
+; RV32I-NEXT:    lbu a5, 25(a6)
+; RV32I-NEXT:    lbu a0, 24(a6)
+; RV32I-NEXT:    lbu a1, 27(a6)
 ; RV32I-NEXT:    lbu a6, 26(a6)
-; RV32I-NEXT:    sb a0, 25(a2)
-; RV32I-NEXT:    sb a1, 24(a2)
-; RV32I-NEXT:    sb a3, 27(a2)
+; RV32I-NEXT:    sb a5, 25(a2)
+; RV32I-NEXT:    sb a0, 24(a2)
+; RV32I-NEXT:    sb a1, 27(a2)
 ; RV32I-NEXT:    sb a6, 26(a2)
-; RV32I-NEXT:    sb a4, 29(a2)
-; RV32I-NEXT:    sb a5, 28(a2)
+; RV32I-NEXT:    sb a3, 29(a2)
+; RV32I-NEXT:    sb a4, 28(a2)
 ; RV32I-NEXT:    sb ra, 31(a2)
 ; RV32I-NEXT:    sb s11, 30(a2)
 ; RV32I-NEXT:    sb s10, 17(a2)
@@ -2204,23 +2204,23 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    lbu s10, 21(a0)
 ; RV64I-NEXT:    lbu s11, 22(a0)
 ; RV64I-NEXT:    lbu ra, 23(a0)
-; RV64I-NEXT:    lbu a7, 24(a0)
-; RV64I-NEXT:    lbu a6, 25(a0)
-; RV64I-NEXT:    lbu a5, 26(a0)
-; RV64I-NEXT:    lbu a4, 27(a0)
-; RV64I-NEXT:    lbu a1, 30(a0)
-; RV64I-NEXT:    lbu a3, 29(a0)
+; RV64I-NEXT:    lbu a6, 24(a0)
+; RV64I-NEXT:    lbu a5, 25(a0)
+; RV64I-NEXT:    lbu a4, 26(a0)
+; RV64I-NEXT:    lbu a3, 27(a0)
+; RV64I-NEXT:    lbu a7, 30(a0)
+; RV64I-NEXT:    lbu a1, 29(a0)
 ; RV64I-NEXT:    lbu a0, 28(a0)
 ; RV64I-NEXT:    lbu t0, 0(t0)
-; RV64I-NEXT:    sb a1, 86(sp)
-; RV64I-NEXT:    sb a3, 85(sp)
+; RV64I-NEXT:    sb a7, 86(sp)
+; RV64I-NEXT:    sb a1, 85(sp)
 ; RV64I-NEXT:    sb a0, 84(sp)
-; RV64I-NEXT:    sb a4, 83(sp)
-; RV64I-NEXT:    sb a5, 82(sp)
-; RV64I-NEXT:    sb a6, 81(sp)
+; RV64I-NEXT:    sb a3, 83(sp)
+; RV64I-NEXT:    sb a4, 82(sp)
+; RV64I-NEXT:    sb a5, 81(sp)
 ; RV64I-NEXT:    sb t1, 87(sp)
 ; RV64I-NEXT:    slli t1, t1, 56
-; RV64I-NEXT:    sb a7, 80(sp)
+; RV64I-NEXT:    sb a6, 80(sp)
 ; RV64I-NEXT:    sb ra, 79(sp)
 ; RV64I-NEXT:    sb s11, 78(sp)
 ; RV64I-NEXT:    sb s10, 77(sp)
@@ -2325,18 +2325,18 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    lbu s10, 31(a6)
 ; RV64I-NEXT:    lbu s11, 16(a6)
 ; RV64I-NEXT:    lbu ra, 17(a6)
-; RV64I-NEXT:    lbu a5, 18(a6)
-; RV64I-NEXT:    lbu a4, 19(a6)
-; RV64I-NEXT:    lbu a0, 23(a6)
-; RV64I-NEXT:    lbu a1, 22(a6)
-; RV64I-NEXT:    lbu a3, 21(a6)
+; RV64I-NEXT:    lbu a4, 18(a6)
+; RV64I-NEXT:    lbu a3, 19(a6)
+; RV64I-NEXT:    lbu a5, 23(a6)
+; RV64I-NEXT:    lbu a0, 22(a6)
+; RV64I-NEXT:    lbu a1, 21(a6)
 ; RV64I-NEXT:    lbu a6, 20(a6)
-; RV64I-NEXT:    sb a0, 23(a2)
-; RV64I-NEXT:    sb a1, 22(a2)
-; RV64I-NEXT:    sb a3, 21(a2)
+; RV64I-NEXT:    sb a5, 23(a2)
+; RV64I-NEXT:    sb a0, 22(a2)
+; RV64I-NEXT:    sb a1, 21(a2)
 ; RV64I-NEXT:    sb a6, 20(a2)
-; RV64I-NEXT:    sb a4, 19(a2)
-; RV64I-NEXT:    sb a5, 18(a2)
+; RV64I-NEXT:    sb a3, 19(a2)
+; RV64I-NEXT:    sb a4, 18(a2)
 ; RV64I-NEXT:    sb ra, 17(a2)
 ; RV64I-NEXT:    sb s11, 16(a2)
 ; RV64I-NEXT:    sb s10, 31(a2)
@@ -2432,23 +2432,23 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    lbu s10, 21(a0)
 ; RV32I-NEXT:    lbu s11, 22(a0)
 ; RV32I-NEXT:    lbu ra, 23(a0)
-; RV32I-NEXT:    lbu a7, 24(a0)
-; RV32I-NEXT:    lbu a6, 25(a0)
-; RV32I-NEXT:    lbu a5, 26(a0)
-; RV32I-NEXT:    lbu a4, 27(a0)
-; RV32I-NEXT:    lbu a1, 30(a0)
-; RV32I-NEXT:    lbu a3, 29(a0)
+; RV32I-NEXT:    lbu a6, 24(a0)
+; RV32I-NEXT:    lbu a5, 25(a0)
+; RV32I-NEXT:    lbu a4, 26(a0)
+; RV32I-NEXT:    lbu a3, 27(a0)
+; RV32I-NEXT:    lbu a7, 30(a0)
+; RV32I-NEXT:    lbu a1, 29(a0)
 ; RV32I-NEXT:    lbu a0, 28(a0)
 ; RV32I-NEXT:    lbu t0, 0(t0)
-; RV32I-NEXT:    sb a1, 58(sp)
-; RV32I-NEXT:    sb a3, 57(sp)
+; RV32I-NEXT:    sb a7, 58(sp)
+; RV32I-NEXT:    sb a1, 57(sp)
 ; RV32I-NEXT:    sb a0, 56(sp)
-; RV32I-NEXT:    sb a4, 55(sp)
-; RV32I-NEXT:    sb a5, 54(sp)
-; RV32I-NEXT:    sb a6, 53(sp)
+; RV32I-NEXT:    sb a3, 55(sp)
+; RV32I-NEXT:    sb a4, 54(sp)
+; RV32I-NEXT:    sb a5, 53(sp)
 ; RV32I-NEXT:    sb t1, 59(sp)
 ; RV32I-NEXT:    slli t1, t1, 24
-; RV32I-NEXT:    sb a7, 52(sp)
+; RV32I-NEXT:    sb a6, 52(sp)
 ; RV32I-NEXT:    sb ra, 51(sp)
 ; RV32I-NEXT:    sb s11, 50(sp)
 ; RV32I-NEXT:    sb s10, 49(sp)
@@ -2549,18 +2549,18 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    lbu s10, 17(a6)
 ; RV32I-NEXT:    lbu s11, 30(a6)
 ; RV32I-NEXT:    lbu ra, 31(a6)
-; RV32I-NEXT:    lbu a5, 28(a6)
-; RV32I-NEXT:    lbu a4, 29(a6)
-; RV32I-NEXT:    lbu a0, 25(a6)
-; RV32I-NEXT:    lbu a1, 24(a6)
-; RV32I-NEXT:    lbu a3, 27(a6)
+; RV32I-NEXT:    lbu a4, 28(a6)
+; RV32I-NEXT:    lbu a3, 29(a6)
+; RV32I-NEXT:    lbu a5, 25(a6)
+; RV32I-NEXT:    lbu a0, 24(a6)
+; RV32I-NEXT:    lbu a1, 27(a6)
 ; RV32I-NEXT:    lbu a6, 26(a6)
-; RV32I-NEXT:    sb a0, 25(a2)
-; RV32I-NEXT:    sb a1, 24(a2)
-; RV32I-NEXT:    sb a3, 27(a2)
+; RV32I-NEXT:    sb a5, 25(a2)
+; RV32I-NEXT:    sb a0, 24(a2)
+; RV32I-NEXT:    sb a1, 27(a2)
 ; RV32I-NEXT:    sb a6, 26(a2)
-; RV32I-NEXT:    sb a4, 29(a2)
-; RV32I-NEXT:    sb a5, 28(a2)
+; RV32I-NEXT:    sb a3, 29(a2)
+; RV32I-NEXT:    sb a4, 28(a2)
 ; RV32I-NEXT:    sb ra, 31(a2)
 ; RV32I-NEXT:    sb s11, 30(a2)
 ; RV32I-NEXT:    sb s10, 17(a2)
diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
index a601256bc2afaa6..2164dbb464d2542 100644
--- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
+++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll
@@ -1511,22 +1511,22 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    or a1, a1, s10
 ; RV64I-NEXT:    lbu s10, 23(a0)
 ; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    or t0, a1, s11
+; RV64I-NEXT:    or a7, a1, s11
 ; RV64I-NEXT:    lbu s11, 24(a0)
-; RV64I-NEXT:    lbu a7, 25(a0)
-; RV64I-NEXT:    lbu a6, 26(a0)
-; RV64I-NEXT:    lbu a5, 27(a0)
-; RV64I-NEXT:    lbu a1, 31(a0)
-; RV64I-NEXT:    lbu a3, 30(a0)
-; RV64I-NEXT:    lbu a4, 29(a0)
+; RV64I-NEXT:    lbu a6, 25(a0)
+; RV64I-NEXT:    lbu a5, 26(a0)
+; RV64I-NEXT:    lbu a4, 27(a0)
+; RV64I-NEXT:    lbu t0, 31(a0)
+; RV64I-NEXT:    lbu a1, 30(a0)
+; RV64I-NEXT:    lbu a3, 29(a0)
 ; RV64I-NEXT:    lbu a0, 28(a0)
-; RV64I-NEXT:    sb a1, 87(sp)
-; RV64I-NEXT:    sb a3, 86(sp)
-; RV64I-NEXT:    sb a4, 85(sp)
+; RV64I-NEXT:    sb t0, 87(sp)
+; RV64I-NEXT:    sb a1, 86(sp)
+; RV64I-NEXT:    sb a3, 85(sp)
 ; RV64I-NEXT:    sb a0, 84(sp)
-; RV64I-NEXT:    sb a5, 83(sp)
-; RV64I-NEXT:    sb a6, 82(sp)
-; RV64I-NEXT:    sb a7, 81(sp)
+; RV64I-NEXT:    sb a4, 83(sp)
+; RV64I-NEXT:    sb a5, 82(sp)
+; RV64I-NEXT:    sb a6, 81(sp)
 ; RV64I-NEXT:    sb s11, 80(sp)
 ; RV64I-NEXT:    sb s10, 79(sp)
 ; RV64I-NEXT:    sb ra, 78(sp)
@@ -1590,7 +1590,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    sb a0, 57(sp)
 ; RV64I-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    sb a0, 56(sp)
-; RV64I-NEXT:    slli a0, t0, 56
+; RV64I-NEXT:    slli a0, a7, 56
 ; RV64I-NEXT:    srli a0, a0, 59
 ; RV64I-NEXT:    addi a3, sp, 56
 ; RV64I-NEXT:    add a3, a3, a0
@@ -1616,7 +1616,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    or a1, a4, a1
 ; RV64I-NEXT:    slli a1, a1, 32
 ; RV64I-NEXT:    or a4, a1, a0
-; RV64I-NEXT:    andi a1, t0, 7
+; RV64I-NEXT:    andi a1, a7, 7
 ; RV64I-NEXT:    lbu a0, 17(a3)
 ; RV64I-NEXT:    lbu a5, 16(a3)
 ; RV64I-NEXT:    lbu a6, 18(a3)
@@ -1829,22 +1829,22 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    or a1, a1, ra
 ; RV32I-NEXT:    lbu ra, 23(a0)
-; RV32I-NEXT:    or t0, a1, s10
+; RV32I-NEXT:    or a7, a1, s10
 ; RV32I-NEXT:    lbu s10, 24(a0)
-; RV32I-NEXT:    lbu a7, 25(a0)
-; RV32I-NEXT:    lbu a6, 26(a0)
-; RV32I-NEXT:    lbu a5, 27(a0)
-; RV32I-NEXT:    lbu a1, 31(a0)
-; RV32I-NEXT:    lbu a3, 30(a0)
-; RV32I-NEXT:    lbu a4, 29(a0)
+; RV32I-NEXT:    lbu a6, 25(a0)
+; RV32I-NEXT:    lbu a5, 26(a0)
+; RV32I-NEXT:    lbu a4, 27(a0)
+; RV32I-NEXT:    lbu t0, 31(a0)
+; RV32I-NEXT:    lbu a1, 30(a0)
+; RV32I-NEXT:    lbu a3, 29(a0)
 ; RV32I-NEXT:    lbu a0, 28(a0)
-; RV32I-NEXT:    sb a1, 59(sp)
-; RV32I-NEXT:    sb a3, 58(sp)
-; RV32I-NEXT:    sb a4, 57(sp)
+; RV32I-NEXT:    sb t0, 59(sp)
+; RV32I-NEXT:    sb a1, 58(sp)
+; RV32I-NEXT:    sb a3, 57(sp)
 ; RV32I-NEXT:    sb a0, 56(sp)
-; RV32I-NEXT:    sb a5, 55(sp)
-; RV32I-NEXT:    sb a6, 54(sp)
-; RV32I-NEXT:    sb a7, 53(sp)
+; RV32I-NEXT:    sb a4, 55(sp)
+; RV32I-NEXT:    sb a5, 54(sp)
+; RV32I-NEXT:    sb a6, 53(sp)
 ; RV32I-NEXT:    sb s10, 52(sp)
 ; RV32I-NEXT:    sb ra, 51(sp)
 ; RV32I-NEXT:    sb s11, 50(sp)
@@ -1908,7 +1908,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    sb a0, 29(sp)
 ; RV32I-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    sb a0, 28(sp)
-; RV32I-NEXT:    slli a0, t0, 24
+; RV32I-NEXT:    slli a0, a7, 24
 ; RV32I-NEXT:    srli a0, a0, 27
 ; RV32I-NEXT:    addi a4, sp, 28
 ; RV32I-NEXT:    add a4, a4, a0
@@ -1922,7 +1922,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    slli a5, a5, 24
 ; RV32I-NEXT:    or a3, a5, a3
 ; RV32I-NEXT:    or t5, a3, a0
-; RV32I-NEXT:    andi a3, t0, 7
+; RV32I-NEXT:    andi a3, a7, 7
 ; RV32I-NEXT:    lbu a0, 9(a4)
 ; RV32I-NEXT:    lbu a1, 8(a4)
 ; RV32I-NEXT:    lbu a5, 10(a4)
@@ -2169,22 +2169,22 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    or a1, a1, s10
 ; RV64I-NEXT:    lbu s10, 23(a0)
 ; RV64I-NEXT:    slli a1, a1, 32
-; RV64I-NEXT:    or t0, a1, s11
+; RV64I-NEXT:    or a7, a1, s11
 ; RV64I-NEXT:    lbu s11, 24(a0)
-; RV64I-NEXT:    lbu a7, 25(a0)
-; RV64I-NEXT:    lbu a6, 26(a0)
-; RV64I-NEXT:    lbu a5, 27(a0)
-; RV64I-NEXT:    lbu a1, 31(a0)
-; RV64I-NEXT:    lbu a3, 30(a0)
-; RV64I-NEXT:    lbu a4, 29(a0)
+; RV64I-NEXT:    lbu a6, 25(a0)
+; RV64I-NEXT:    lbu a5, 26(a0)
+; RV64I-NEXT:    lbu a4, 27(a0)
+; RV64I-NEXT:    lbu t0, 31(a0)
+; RV64I-NEXT:    lbu a1, 30(a0)
+; RV64I-NEXT:    lbu a3, 29(a0)
 ; RV64I-NEXT:    lbu a0, 28(a0)
-; RV64I-NEXT:    sb a1, 119(sp)
-; RV64I-NEXT:    sb a3, 118(sp)
-; RV64I-NEXT:    sb a4, 117(sp)
+; RV64I-NEXT:    sb t0, 119(sp)
+; RV64I-NEXT:    sb a1, 118(sp)
+; RV64I-NEXT:    sb a3, 117(sp)
 ; RV64I-NEXT:    sb a0, 116(sp)
-; RV64I-NEXT:    sb a5, 115(sp)
-; RV64I-NEXT:    sb a6, 114(sp)
-; RV64I-NEXT:    sb a7, 113(sp)
+; RV64I-NEXT:    sb a4, 115(sp)
+; RV64I-NEXT:    sb a5, 114(sp)
+; RV64I-NEXT:    sb a6, 113(sp)
 ; RV64I-NEXT:    sb s11, 112(sp)
 ; RV64I-NEXT:    sb s10, 111(sp)
 ; RV64I-NEXT:    sb ra, 110(sp)
@@ -2248,7 +2248,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    sb a0, 89(sp)
 ; RV64I-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    sb a0, 88(sp)
-; RV64I-NEXT:    slli a0, t0, 56
+; RV64I-NEXT:    slli a0, a7, 56
 ; RV64I-NEXT:    srli a0, a0, 59
 ; RV64I-NEXT:    addi a1, sp, 88
 ; RV64I-NEXT:    sub a0, a1, a0
@@ -2274,7 +2274,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    or a3, a4, a3
 ; RV64I-NEXT:    slli a3, a3, 32
 ; RV64I-NEXT:    or a3, a3, a1
-; RV64I-NEXT:    andi a1, t0, 7
+; RV64I-NEXT:    andi a1, a7, 7
 ; RV64I-NEXT:    lbu a4, 1(a0)
 ; RV64I-NEXT:    lbu a5, 0(a0)
 ; RV64I-NEXT:    lbu a6, 2(a0)
@@ -2487,22 +2487,22 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    slli a1, a1, 24
 ; RV32I-NEXT:    or a1, a1, ra
 ; RV32I-NEXT:    lbu ra, 23(a0)
-; RV32I-NEXT:    or t0, a1, s10
+; RV32I-NEXT:    or a7, a1, s10
 ; RV32I-NEXT:    lbu s10, 24(a0)
-; RV32I-NEXT:    lbu a7, 25(a0)
-; RV32I-NEXT:    lbu a6, 26(a0)
-; RV32I-NEXT:    lbu a5, 27(a0)
-; RV32I-NEXT:    lbu a1, 31(a0)
-; RV32I-NEXT:    lbu a3, 30(a0)
-; RV32I-NEXT:    lbu a4, 29(a0)
+; RV32I-NEXT:    lbu a6, 25(a0)
+; RV32I-NEXT:    lbu a5, 26(a0)
+; RV32I-NEXT:    lbu a4, 27(a0)
+; RV32I-NEXT:    lbu t0, 31(a0)
+; RV32I-NEXT:    lbu a1, 30(a0)
+; RV32I-NEXT:    lbu a3, 29(a0)
 ; RV32I-NEXT:    lbu a0, 28(a0)
-; RV32I-NEXT:    sb a1, 91(sp)
-; RV32I-NEXT:    sb a3, 90(sp)
-; RV32I-NEXT:    sb a4, 89(sp)
+; RV32I-NEXT:    sb t0, 91(sp)
+; RV32I-NEXT:    sb a1, 90(sp)
+; RV32I-NEXT:    sb a3, 89(sp)
 ; RV32I-NEXT:    sb a0, 88(sp)
-; RV32I-NEXT:    sb a5, 87(sp)
-; RV32I-NEXT:    sb a6, 86(sp)
-; RV32I-NEXT:    sb a7, 85(sp)
+; RV32I-NEXT:    sb a4, 87(sp)
+; RV32I-NEXT:    sb a5, 86(sp)
+; RV32I-NEXT:    sb a6, 85(sp)
 ; RV32I-NEXT:    sb s10, 84(sp)
 ; RV32I-NEXT:    sb ra, 83(sp)
 ; RV32I-NEXT:    sb s11, 82(sp)
@@ -2566,7 +2566,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    sb a0, 61(sp)
 ; RV32I-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    sb a0, 60(sp)
-; RV32I-NEXT:    slli a0, t0, 24
+; RV32I-NEXT:    slli a0, a7, 24
 ; RV32I-NEXT:    srli a0, a0, 27
 ; RV32I-NEXT:    addi a4, sp, 60
 ; RV32I-NEXT:    sub a4, a4, a0
@@ -2580,7 +2580,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    slli a5, a5, 24
 ; RV32I-NEXT:    or a3, a5, a3
 ; RV32I-NEXT:    or t5, a3, a0
-; RV32I-NEXT:    andi a1, t0, 7
+; RV32I-NEXT:    andi a1, a7, 7
 ; RV32I-NEXT:    lbu a0, 1(a4)
 ; RV32I-NEXT:    lbu a3, 0(a4)
 ; RV32I-NEXT:    lbu a5, 2(a4)
@@ -2828,22 +2828,22 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; RV64I-NEXT:    lbu s10, 22(a0)
 ; RV64I-NEXT:    slli a1, a1, 32
 ; RV64I-NEXT:    or t2, a1, a3
-; RV64I-NEXT:    lbu t0, 23(a0)
-; RV64I-NEXT:    lbu a7, 24(a0)
-; RV64I-NEXT:    lbu a6, 25(a0)
-; RV64I-NEXT:    lbu a5, 26(a0)
-; RV64I-NEXT:    lbu a1, 30(a0)
-; RV64I-NEXT:    lbu a3, 29(a0)
-; RV64I-NEXT:    lbu a4, 28(a0)
+; RV64I-NEXT:    lbu a7, 23(a0)
+; RV64I-NEXT:    lbu a6, 24(a0)
+; RV64I-NEXT:    lbu a5, 25(a0)
+; RV64I-NEXT:    lbu a4, 26(a0)
+; RV64I-NEXT:    lbu t0, 30(a0)
+; RV64I-NEXT:    lbu a1, 29(a0)
+; RV64I-NEXT:    lbu a3, 28(a0)
 ; RV64I-NEXT:    lbu a0, 27(a0)
-; RV64I-NEXT:    sb a1, 86(sp)
-; RV64I-NEXT:    sb a3, 85(sp)
-; RV64I-NEXT:    sb a4, 84(sp)
+; RV64I-NEXT:    sb t0, 86(sp)
+; RV64I-NEXT:    sb a1, 85(sp)
+; RV64I-NEXT:    sb a3, 84(sp)
 ; RV64I-NEXT:    sb a0, 83(sp)
-; RV64I-NEXT:    sb a5, 82(sp)
-; RV64I-NEXT:    sb a6, 81(sp)
-; RV64I-NEXT:    sb a7, 80(sp)
-; RV64I-NEXT:    sb t0, 79(sp)
+; RV64I-NEXT:    sb a4, 82(sp)
+; RV64I-NEXT:    sb a5, 81(sp)
+; RV64I-NEXT:    sb a6, 80(sp)
+; RV64I-NEXT:    sb a7, 79(sp)
 ; RV64I-NEXT:    sb s10, 78(sp)
 ; RV64I-NEXT:    sb ra, 77(sp)
 ; RV64I-NEXT:    sb s11, 76(sp)
@@ -3155,22 +3155,22 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind {
 ; RV32I-NEXT:    or a1, a1, ra
 ; RV32I-NEXT:    lbu ra, 22(a0)
 ; RV32I-NEXT:    or t1, a1, a3
-; RV32I-NEXT:    lbu t0, 23(a0)
-; RV32I-NEXT:    lbu a7, 24(a0)
-; RV32I-NEXT:    lbu a6, 25(a0)
-; RV32I-NEXT:    lbu a5, 26(a0)
-; RV32I-NEXT:    lbu a1, 30(a0)
-; RV32I-NEXT:    lbu a3, 29(a0)
-; RV32I-NEXT:    lbu a4, 28(a0)
+; RV32I-NEXT:    lbu a7, 23(a0)
+; RV32I-NEXT:    lbu a6, 24(a0)
+; RV32I-NEXT:    lbu a5, 25(a0)
+; RV32I-NEXT:    lbu a4, 26(a0)
+; RV32I-NEXT:    lbu t0, 30(a0)
+; RV32I-NEXT:    lbu a1, 29(a0)
+; RV32I-NEXT:    lbu a3, 28(a0)
 ; RV32I-NEXT:    lbu a0, 27(a0)
-; RV32I-NEXT:    sb a1, 58(sp)
-; RV32I-NEXT:    sb a3, 57(sp)
-; RV32I-NEXT:    sb a4, 56(sp)
+; RV32I-NEXT:    sb t0, 58(sp)
+; RV32I-NEXT:    sb a1, 57(sp)
+; RV32I-NEXT:    sb a3, 56(sp)
 ; RV32I-NEXT:    sb a0, 55(sp)
-; RV32I-NEXT:    sb a5, 54(sp)
-; RV32I-NEXT:    sb a6, 53(sp)
-; RV32I-NEXT:    sb a7, 52(sp)
-; RV32I-NEXT:    sb t0, 51(sp)
+; RV32I-NEXT:    sb a4, 54(sp)
+; RV32I-NEXT:    sb a5, 53(sp)
+; RV32I-NEXT:    sb a6, 52(sp)
+; RV32I-NEXT:    sb a7, 51(sp)
 ; RV32I-NEXT:    sb ra, 50(sp)
 ; RV32I-NEXT:    sb s11, 49(sp)
 ; RV32I-NEXT:    sb s10, 48(sp)



More information about the llvm-commits mailing list