[llvm] 137d347 - [RISCV] Reverse the order of loading/storing callee-saved registers.

Hsiangkai Wang via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 22 07:02:50 PST 2021


Author: Hsiangkai Wang
Date: 2021-11-22T23:02:11+08:00
New Revision: 137d3474ca39a9af6130519a41b62dd58672a5c0

URL: https://github.com/llvm/llvm-project/commit/137d3474ca39a9af6130519a41b62dd58672a5c0
DIFF: https://github.com/llvm/llvm-project/commit/137d3474ca39a9af6130519a41b62dd58672a5c0.diff

LOG: [RISCV] Reverse the order of loading/storing callee-saved registers.

Currently, we restore the return address register as the last restoring
instruction in the epilog. The next instruction is `ret` usually. It is
a use of return address register. In some microarchitectures, there is
load-to-use data hazard. To avoid the load-to-use data hazard, we could
separate the load instruction from its use as far as possible. In this
patch, we reverse the order of restoring callee-saved registers to
increase the distance of `load ra` and `ret` in the epilog.

Differential Revision: https://reviews.llvm.org/D113967

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
    llvm/test/CodeGen/RISCV/aext-to-sext.ll
    llvm/test/CodeGen/RISCV/alloca.ll
    llvm/test/CodeGen/RISCV/atomic-rmw.ll
    llvm/test/CodeGen/RISCV/atomic-signext.ll
    llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
    llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
    llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
    llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
    llvm/test/CodeGen/RISCV/calling-conv-half.ll
    llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll
    llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll
    llvm/test/CodeGen/RISCV/calling-conv-ilp32.ll
    llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-common.ll
    llvm/test/CodeGen/RISCV/calling-conv-lp64.ll
    llvm/test/CodeGen/RISCV/calls.ll
    llvm/test/CodeGen/RISCV/copysign-casts.ll
    llvm/test/CodeGen/RISCV/double-arith.ll
    llvm/test/CodeGen/RISCV/double-convert.ll
    llvm/test/CodeGen/RISCV/double-fcmp.ll
    llvm/test/CodeGen/RISCV/double-intrinsics.ll
    llvm/test/CodeGen/RISCV/exception-pointer-register.ll
    llvm/test/CodeGen/RISCV/fastcc-int.ll
    llvm/test/CodeGen/RISCV/float-arith.ll
    llvm/test/CodeGen/RISCV/float-bit-preserving-dagcombines.ll
    llvm/test/CodeGen/RISCV/float-br-fcmp.ll
    llvm/test/CodeGen/RISCV/float-convert.ll
    llvm/test/CodeGen/RISCV/float-fcmp.ll
    llvm/test/CodeGen/RISCV/float-intrinsics.ll
    llvm/test/CodeGen/RISCV/fp16-promote.ll
    llvm/test/CodeGen/RISCV/frame-info.ll
    llvm/test/CodeGen/RISCV/frame.ll
    llvm/test/CodeGen/RISCV/frameaddr-returnaddr.ll
    llvm/test/CodeGen/RISCV/half-arith.ll
    llvm/test/CodeGen/RISCV/half-convert.ll
    llvm/test/CodeGen/RISCV/half-intrinsics.ll
    llvm/test/CodeGen/RISCV/half-mem.ll
    llvm/test/CodeGen/RISCV/interrupt-attr-callee.ll
    llvm/test/CodeGen/RISCV/interrupt-attr-nocall.ll
    llvm/test/CodeGen/RISCV/interrupt-attr.ll
    llvm/test/CodeGen/RISCV/large-stack.ll
    llvm/test/CodeGen/RISCV/mul.ll
    llvm/test/CodeGen/RISCV/out-of-reach-emergency-slot.mir
    llvm/test/CodeGen/RISCV/remat.ll
    llvm/test/CodeGen/RISCV/rv32i-rv64i-float-double.ll
    llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll
    llvm/test/CodeGen/RISCV/rv32zbb.ll
    llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
    llvm/test/CodeGen/RISCV/rv64zbb.ll
    llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
    llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
    llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
    llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir
    llvm/test/CodeGen/RISCV/rvv/localvar.ll
    llvm/test/CodeGen/RISCV/rvv/memory-args.ll
    llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
    llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
    llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll
    llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
    llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
    llvm/test/CodeGen/RISCV/select-cc.ll
    llvm/test/CodeGen/RISCV/shadowcallstack.ll
    llvm/test/CodeGen/RISCV/shifts.ll
    llvm/test/CodeGen/RISCV/shrinkwrap.ll
    llvm/test/CodeGen/RISCV/srem-lkk.ll
    llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
    llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
    llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll
    llvm/test/CodeGen/RISCV/stack-realignment.ll
    llvm/test/CodeGen/RISCV/stack-store-check.ll
    llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
    llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll
    llvm/test/CodeGen/RISCV/urem-lkk.ll
    llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
    llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
    llvm/test/CodeGen/RISCV/vararg.ll
    llvm/test/CodeGen/RISCV/xaluo.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 595c3cdfbb1d5..697f177723581 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1065,10 +1065,14 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
   if (MI != MBB.end() && !MI->isDebugInstr())
     DL = MI->getDebugLoc();
 
-  // Manually restore values not restored by libcall. Insert in reverse order.
+  // Manually restore values not restored by libcall.
+  // Keep the same order as in the prologue. There is no need to reverse the
+  // order in the epilogue. In addition, the return address will be restored
+  // first in the epilogue. It increases the opportunity to avoid the
+  // load-to-use data hazard between loading RA and return by RA.
   // loadRegFromStackSlot can insert multiple instructions.
   const auto &NonLibcallCSI = getNonLibcallCSI(*MF, CSI);
-  for (auto &CS : reverse(NonLibcallCSI)) {
+  for (auto &CS : NonLibcallCSI) {
     Register Reg = CS.getReg();
     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
     TII.loadRegFromStackSlot(MBB, MI, Reg, CS.getFrameIdx(), RC, TRI);

diff  --git a/llvm/test/CodeGen/RISCV/aext-to-sext.ll b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
index 5265a085073a7..df90981508a9b 100644
--- a/llvm/test/CodeGen/RISCV/aext-to-sext.ll
+++ b/llvm/test/CodeGen/RISCV/aext-to-sext.ll
@@ -25,9 +25,9 @@ define void @quux(i32 signext %arg, i32 signext %arg1) nounwind {
 ; RV64I-NEXT:    addiw s1, s1, 1
 ; RV64I-NEXT:    bne s1, s0, .LBB0_2
 ; RV64I-NEXT:  .LBB0_3: # %bb6
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 bb:

diff  --git a/llvm/test/CodeGen/RISCV/alloca.ll b/llvm/test/CodeGen/RISCV/alloca.ll
index 95ef6aae2e6b6..9726e1f25a71c 100644
--- a/llvm/test/CodeGen/RISCV/alloca.ll
+++ b/llvm/test/CodeGen/RISCV/alloca.ll
@@ -20,8 +20,8 @@ define void @simple_alloca(i32 %n) nounwind {
 ; RV32I-NEXT:    mv sp, a0
 ; RV32I-NEXT:    call notdead at plt
 ; RV32I-NEXT:    addi sp, s0, -16
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
   %1 = alloca i8, i32 %n
@@ -48,9 +48,9 @@ define void @scoped_alloca(i32 %n) nounwind {
 ; RV32I-NEXT:    call notdead at plt
 ; RV32I-NEXT:    mv sp, s1
 ; RV32I-NEXT:    addi sp, s0, -16
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
   %sp = call i8* @llvm.stacksave()
@@ -94,8 +94,8 @@ define void @alloca_callframe(i32 %n) nounwind {
 ; RV32I-NEXT:    call func at plt
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    addi sp, s0, -16
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
   %1 = alloca i8, i32 %n

diff  --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index 6f64d8a2f10ef..131b3abf0fdfd 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -2034,10 +2034,10 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB35_1
 ; RV32I-NEXT:  .LBB35_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -2106,10 +2106,10 @@ define i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB35_1
 ; RV64I-NEXT:  .LBB35_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -2182,10 +2182,10 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB36_1
 ; RV32I-NEXT:  .LBB36_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -2254,10 +2254,10 @@ define i8 @atomicrmw_max_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB36_1
 ; RV64I-NEXT:  .LBB36_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -2330,10 +2330,10 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB37_1
 ; RV32I-NEXT:  .LBB37_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -2402,10 +2402,10 @@ define i8 @atomicrmw_max_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB37_1
 ; RV64I-NEXT:  .LBB37_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -2478,10 +2478,10 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB38_1
 ; RV32I-NEXT:  .LBB38_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -2550,10 +2550,10 @@ define i8 @atomicrmw_max_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB38_1
 ; RV64I-NEXT:  .LBB38_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -2626,10 +2626,10 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB39_1
 ; RV32I-NEXT:  .LBB39_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -2698,10 +2698,10 @@ define i8 @atomicrmw_max_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB39_1
 ; RV64I-NEXT:  .LBB39_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -2774,10 +2774,10 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB40_1
 ; RV32I-NEXT:  .LBB40_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -2846,10 +2846,10 @@ define i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB40_1
 ; RV64I-NEXT:  .LBB40_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -2922,10 +2922,10 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB41_1
 ; RV32I-NEXT:  .LBB41_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -2994,10 +2994,10 @@ define i8 @atomicrmw_min_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB41_1
 ; RV64I-NEXT:  .LBB41_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -3070,10 +3070,10 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB42_1
 ; RV32I-NEXT:  .LBB42_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -3142,10 +3142,10 @@ define i8 @atomicrmw_min_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB42_1
 ; RV64I-NEXT:  .LBB42_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -3218,10 +3218,10 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB43_1
 ; RV32I-NEXT:  .LBB43_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -3290,10 +3290,10 @@ define i8 @atomicrmw_min_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB43_1
 ; RV64I-NEXT:  .LBB43_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -3366,10 +3366,10 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB44_1
 ; RV32I-NEXT:  .LBB44_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -3438,10 +3438,10 @@ define i8 @atomicrmw_min_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB44_1
 ; RV64I-NEXT:  .LBB44_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -3512,10 +3512,10 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB45_1
 ; RV32I-NEXT:  .LBB45_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -3576,10 +3576,10 @@ define i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB45_1
 ; RV64I-NEXT:  .LBB45_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -3644,10 +3644,10 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB46_1
 ; RV32I-NEXT:  .LBB46_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -3708,10 +3708,10 @@ define i8 @atomicrmw_umax_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB46_1
 ; RV64I-NEXT:  .LBB46_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -3776,10 +3776,10 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB47_1
 ; RV32I-NEXT:  .LBB47_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -3840,10 +3840,10 @@ define i8 @atomicrmw_umax_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB47_1
 ; RV64I-NEXT:  .LBB47_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -3908,10 +3908,10 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB48_1
 ; RV32I-NEXT:  .LBB48_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -3972,10 +3972,10 @@ define i8 @atomicrmw_umax_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB48_1
 ; RV64I-NEXT:  .LBB48_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -4040,10 +4040,10 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB49_1
 ; RV32I-NEXT:  .LBB49_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -4104,10 +4104,10 @@ define i8 @atomicrmw_umax_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB49_1
 ; RV64I-NEXT:  .LBB49_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -4172,10 +4172,10 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB50_1
 ; RV32I-NEXT:  .LBB50_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -4236,10 +4236,10 @@ define i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB50_1
 ; RV64I-NEXT:  .LBB50_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -4304,10 +4304,10 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB51_1
 ; RV32I-NEXT:  .LBB51_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -4368,10 +4368,10 @@ define i8 @atomicrmw_umin_i8_acquire(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB51_1
 ; RV64I-NEXT:  .LBB51_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -4436,10 +4436,10 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB52_1
 ; RV32I-NEXT:  .LBB52_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -4500,10 +4500,10 @@ define i8 @atomicrmw_umin_i8_release(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB52_1
 ; RV64I-NEXT:  .LBB52_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -4568,10 +4568,10 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB53_1
 ; RV32I-NEXT:  .LBB53_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -4632,10 +4632,10 @@ define i8 @atomicrmw_umin_i8_acq_rel(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB53_1
 ; RV64I-NEXT:  .LBB53_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -4700,10 +4700,10 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:    j .LBB54_1
 ; RV32I-NEXT:  .LBB54_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -4764,10 +4764,10 @@ define i8 @atomicrmw_umin_i8_seq_cst(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:    j .LBB54_1
 ; RV64I-NEXT:  .LBB54_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -6914,10 +6914,10 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB90_1
 ; RV32I-NEXT:  .LBB90_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -6987,10 +6987,10 @@ define i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB90_1
 ; RV64I-NEXT:  .LBB90_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -7064,11 +7064,11 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB91_1
 ; RV32I-NEXT:  .LBB91_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
 ; RV32IA-LABEL: atomicrmw_max_i16_acquire:
@@ -7137,10 +7137,10 @@ define i16 @atomicrmw_max_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB91_1
 ; RV64I-NEXT:  .LBB91_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -7214,10 +7214,10 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB92_1
 ; RV32I-NEXT:  .LBB92_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -7287,10 +7287,10 @@ define i16 @atomicrmw_max_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB92_1
 ; RV64I-NEXT:  .LBB92_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -7364,10 +7364,10 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB93_1
 ; RV32I-NEXT:  .LBB93_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -7437,10 +7437,10 @@ define i16 @atomicrmw_max_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB93_1
 ; RV64I-NEXT:  .LBB93_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -7514,10 +7514,10 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB94_1
 ; RV32I-NEXT:  .LBB94_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -7587,10 +7587,10 @@ define i16 @atomicrmw_max_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB94_1
 ; RV64I-NEXT:  .LBB94_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -7664,10 +7664,10 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB95_1
 ; RV32I-NEXT:  .LBB95_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -7737,10 +7737,10 @@ define i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB95_1
 ; RV64I-NEXT:  .LBB95_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -7814,10 +7814,10 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB96_1
 ; RV32I-NEXT:  .LBB96_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -7887,10 +7887,10 @@ define i16 @atomicrmw_min_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB96_1
 ; RV64I-NEXT:  .LBB96_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -7964,10 +7964,10 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB97_1
 ; RV32I-NEXT:  .LBB97_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -8037,10 +8037,10 @@ define i16 @atomicrmw_min_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB97_1
 ; RV64I-NEXT:  .LBB97_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -8114,10 +8114,10 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB98_1
 ; RV32I-NEXT:  .LBB98_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -8187,10 +8187,10 @@ define i16 @atomicrmw_min_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB98_1
 ; RV64I-NEXT:  .LBB98_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -8264,10 +8264,10 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB99_1
 ; RV32I-NEXT:  .LBB99_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -8337,10 +8337,10 @@ define i16 @atomicrmw_min_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB99_1
 ; RV64I-NEXT:  .LBB99_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -8415,11 +8415,11 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB100_1
 ; RV32I-NEXT:  .LBB100_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -8484,11 +8484,11 @@ define i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB100_1
 ; RV64I-NEXT:  .LBB100_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -8557,11 +8557,11 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB101_1
 ; RV32I-NEXT:  .LBB101_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -8626,11 +8626,11 @@ define i16 @atomicrmw_umax_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB101_1
 ; RV64I-NEXT:  .LBB101_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -8699,11 +8699,11 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB102_1
 ; RV32I-NEXT:  .LBB102_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -8768,11 +8768,11 @@ define i16 @atomicrmw_umax_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB102_1
 ; RV64I-NEXT:  .LBB102_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -8841,11 +8841,11 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB103_1
 ; RV32I-NEXT:  .LBB103_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -8910,11 +8910,11 @@ define i16 @atomicrmw_umax_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB103_1
 ; RV64I-NEXT:  .LBB103_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -8983,11 +8983,11 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB104_1
 ; RV32I-NEXT:  .LBB104_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -9052,11 +9052,11 @@ define i16 @atomicrmw_umax_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB104_1
 ; RV64I-NEXT:  .LBB104_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -9125,11 +9125,11 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB105_1
 ; RV32I-NEXT:  .LBB105_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -9194,11 +9194,11 @@ define i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB105_1
 ; RV64I-NEXT:  .LBB105_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -9267,11 +9267,11 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB106_1
 ; RV32I-NEXT:  .LBB106_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -9336,11 +9336,11 @@ define i16 @atomicrmw_umin_i16_acquire(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB106_1
 ; RV64I-NEXT:  .LBB106_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -9409,11 +9409,11 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB107_1
 ; RV32I-NEXT:  .LBB107_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -9478,11 +9478,11 @@ define i16 @atomicrmw_umin_i16_release(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB107_1
 ; RV64I-NEXT:  .LBB107_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -9551,11 +9551,11 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB108_1
 ; RV32I-NEXT:  .LBB108_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -9620,11 +9620,11 @@ define i16 @atomicrmw_umin_i16_acq_rel(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB108_1
 ; RV64I-NEXT:  .LBB108_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -9693,13 +9693,13 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:    j .LBB109_1
 ; RV32I-NEXT:  .LBB109_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a1
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    addi sp, sp, 32
-; RV32I-NEXT:    ret
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
 ;
 ; RV32IA-LABEL: atomicrmw_umin_i16_seq_cst:
 ; RV32IA:       # %bb.0:
@@ -9762,11 +9762,11 @@ define i16 @atomicrmw_umin_i16_seq_cst(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:    j .LBB109_1
 ; RV64I-NEXT:  .LBB109_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a1
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -11099,9 +11099,9 @@ define i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB145_1
 ; RV32I-NEXT:  .LBB145_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -11143,10 +11143,10 @@ define i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB145_1
 ; RV64I-NEXT:  .LBB145_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -11189,9 +11189,9 @@ define i32 @atomicrmw_max_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB146_1
 ; RV32I-NEXT:  .LBB146_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -11233,10 +11233,10 @@ define i32 @atomicrmw_max_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB146_1
 ; RV64I-NEXT:  .LBB146_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -11279,9 +11279,9 @@ define i32 @atomicrmw_max_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB147_1
 ; RV32I-NEXT:  .LBB147_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -11323,10 +11323,10 @@ define i32 @atomicrmw_max_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB147_1
 ; RV64I-NEXT:  .LBB147_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -11369,9 +11369,9 @@ define i32 @atomicrmw_max_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB148_1
 ; RV32I-NEXT:  .LBB148_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -11413,10 +11413,10 @@ define i32 @atomicrmw_max_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB148_1
 ; RV64I-NEXT:  .LBB148_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -11459,9 +11459,9 @@ define i32 @atomicrmw_max_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB149_1
 ; RV32I-NEXT:  .LBB149_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -11503,10 +11503,10 @@ define i32 @atomicrmw_max_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB149_1
 ; RV64I-NEXT:  .LBB149_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -11549,9 +11549,9 @@ define i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB150_1
 ; RV32I-NEXT:  .LBB150_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -11593,10 +11593,10 @@ define i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB150_1
 ; RV64I-NEXT:  .LBB150_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -11639,9 +11639,9 @@ define i32 @atomicrmw_min_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB151_1
 ; RV32I-NEXT:  .LBB151_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -11683,10 +11683,10 @@ define i32 @atomicrmw_min_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB151_1
 ; RV64I-NEXT:  .LBB151_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -11729,9 +11729,9 @@ define i32 @atomicrmw_min_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB152_1
 ; RV32I-NEXT:  .LBB152_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -11773,10 +11773,10 @@ define i32 @atomicrmw_min_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB152_1
 ; RV64I-NEXT:  .LBB152_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -11819,9 +11819,9 @@ define i32 @atomicrmw_min_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB153_1
 ; RV32I-NEXT:  .LBB153_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -11863,10 +11863,10 @@ define i32 @atomicrmw_min_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB153_1
 ; RV64I-NEXT:  .LBB153_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -11909,9 +11909,9 @@ define i32 @atomicrmw_min_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB154_1
 ; RV32I-NEXT:  .LBB154_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -11953,10 +11953,10 @@ define i32 @atomicrmw_min_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB154_1
 ; RV64I-NEXT:  .LBB154_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -11999,9 +11999,9 @@ define i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB155_1
 ; RV32I-NEXT:  .LBB155_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -12043,10 +12043,10 @@ define i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB155_1
 ; RV64I-NEXT:  .LBB155_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -12089,9 +12089,9 @@ define i32 @atomicrmw_umax_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB156_1
 ; RV32I-NEXT:  .LBB156_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -12133,10 +12133,10 @@ define i32 @atomicrmw_umax_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB156_1
 ; RV64I-NEXT:  .LBB156_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -12179,9 +12179,9 @@ define i32 @atomicrmw_umax_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB157_1
 ; RV32I-NEXT:  .LBB157_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -12223,10 +12223,10 @@ define i32 @atomicrmw_umax_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB157_1
 ; RV64I-NEXT:  .LBB157_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -12269,9 +12269,9 @@ define i32 @atomicrmw_umax_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB158_1
 ; RV32I-NEXT:  .LBB158_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -12313,10 +12313,10 @@ define i32 @atomicrmw_umax_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB158_1
 ; RV64I-NEXT:  .LBB158_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -12359,9 +12359,9 @@ define i32 @atomicrmw_umax_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB159_1
 ; RV32I-NEXT:  .LBB159_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -12403,10 +12403,10 @@ define i32 @atomicrmw_umax_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB159_1
 ; RV64I-NEXT:  .LBB159_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -12449,9 +12449,9 @@ define i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB160_1
 ; RV32I-NEXT:  .LBB160_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -12493,10 +12493,10 @@ define i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB160_1
 ; RV64I-NEXT:  .LBB160_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -12539,9 +12539,9 @@ define i32 @atomicrmw_umin_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB161_1
 ; RV32I-NEXT:  .LBB161_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -12583,10 +12583,10 @@ define i32 @atomicrmw_umin_i32_acquire(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB161_1
 ; RV64I-NEXT:  .LBB161_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -12629,9 +12629,9 @@ define i32 @atomicrmw_umin_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB162_1
 ; RV32I-NEXT:  .LBB162_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -12673,10 +12673,10 @@ define i32 @atomicrmw_umin_i32_release(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB162_1
 ; RV64I-NEXT:  .LBB162_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -12719,9 +12719,9 @@ define i32 @atomicrmw_umin_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB163_1
 ; RV32I-NEXT:  .LBB163_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -12763,10 +12763,10 @@ define i32 @atomicrmw_umin_i32_acq_rel(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB163_1
 ; RV64I-NEXT:  .LBB163_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -12809,9 +12809,9 @@ define i32 @atomicrmw_umin_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB164_1
 ; RV32I-NEXT:  .LBB164_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -12853,10 +12853,10 @@ define i32 @atomicrmw_umin_i32_seq_cst(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB164_1
 ; RV64I-NEXT:  .LBB164_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -14321,10 +14321,10 @@ define i64 @atomicrmw_max_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB200_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -14375,10 +14375,10 @@ define i64 @atomicrmw_max_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB200_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -14412,9 +14412,9 @@ define i64 @atomicrmw_max_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB200_1
 ; RV64I-NEXT:  .LBB200_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -14474,10 +14474,10 @@ define i64 @atomicrmw_max_i64_acquire(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB201_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -14528,10 +14528,10 @@ define i64 @atomicrmw_max_i64_acquire(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB201_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -14565,9 +14565,9 @@ define i64 @atomicrmw_max_i64_acquire(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB201_1
 ; RV64I-NEXT:  .LBB201_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -14627,10 +14627,10 @@ define i64 @atomicrmw_max_i64_release(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB202_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -14681,11 +14681,11 @@ define i64 @atomicrmw_max_i64_release(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB202_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    addi sp, sp, 32
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
 ; RV64I-LABEL: atomicrmw_max_i64_release:
@@ -14718,9 +14718,9 @@ define i64 @atomicrmw_max_i64_release(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB202_1
 ; RV64I-NEXT:  .LBB202_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -14780,10 +14780,10 @@ define i64 @atomicrmw_max_i64_acq_rel(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB203_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -14834,10 +14834,10 @@ define i64 @atomicrmw_max_i64_acq_rel(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB203_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -14871,9 +14871,9 @@ define i64 @atomicrmw_max_i64_acq_rel(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB203_1
 ; RV64I-NEXT:  .LBB203_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -14933,10 +14933,10 @@ define i64 @atomicrmw_max_i64_seq_cst(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB204_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -14987,10 +14987,10 @@ define i64 @atomicrmw_max_i64_seq_cst(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB204_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -15024,9 +15024,9 @@ define i64 @atomicrmw_max_i64_seq_cst(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB204_1
 ; RV64I-NEXT:  .LBB204_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -15087,10 +15087,10 @@ define i64 @atomicrmw_min_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB205_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -15142,10 +15142,10 @@ define i64 @atomicrmw_min_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB205_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -15179,9 +15179,9 @@ define i64 @atomicrmw_min_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB205_1
 ; RV64I-NEXT:  .LBB205_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -15242,10 +15242,10 @@ define i64 @atomicrmw_min_i64_acquire(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB206_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -15297,10 +15297,10 @@ define i64 @atomicrmw_min_i64_acquire(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB206_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -15334,9 +15334,9 @@ define i64 @atomicrmw_min_i64_acquire(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB206_1
 ; RV64I-NEXT:  .LBB206_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -15397,10 +15397,10 @@ define i64 @atomicrmw_min_i64_release(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB207_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -15452,10 +15452,10 @@ define i64 @atomicrmw_min_i64_release(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB207_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -15489,9 +15489,9 @@ define i64 @atomicrmw_min_i64_release(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB207_1
 ; RV64I-NEXT:  .LBB207_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -15552,10 +15552,10 @@ define i64 @atomicrmw_min_i64_acq_rel(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB208_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -15607,10 +15607,10 @@ define i64 @atomicrmw_min_i64_acq_rel(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB208_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -15644,9 +15644,9 @@ define i64 @atomicrmw_min_i64_acq_rel(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB208_1
 ; RV64I-NEXT:  .LBB208_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -15707,10 +15707,10 @@ define i64 @atomicrmw_min_i64_seq_cst(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB209_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -15762,10 +15762,10 @@ define i64 @atomicrmw_min_i64_seq_cst(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB209_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -15799,9 +15799,9 @@ define i64 @atomicrmw_min_i64_seq_cst(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB209_1
 ; RV64I-NEXT:  .LBB209_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -15861,10 +15861,10 @@ define i64 @atomicrmw_umax_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB210_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -15915,10 +15915,10 @@ define i64 @atomicrmw_umax_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB210_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -15952,9 +15952,9 @@ define i64 @atomicrmw_umax_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB210_1
 ; RV64I-NEXT:  .LBB210_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -16014,10 +16014,10 @@ define i64 @atomicrmw_umax_i64_acquire(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB211_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -16068,10 +16068,10 @@ define i64 @atomicrmw_umax_i64_acquire(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB211_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -16105,9 +16105,9 @@ define i64 @atomicrmw_umax_i64_acquire(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB211_1
 ; RV64I-NEXT:  .LBB211_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -16167,10 +16167,10 @@ define i64 @atomicrmw_umax_i64_release(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB212_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -16221,10 +16221,10 @@ define i64 @atomicrmw_umax_i64_release(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB212_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -16258,9 +16258,9 @@ define i64 @atomicrmw_umax_i64_release(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB212_1
 ; RV64I-NEXT:  .LBB212_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -16320,10 +16320,10 @@ define i64 @atomicrmw_umax_i64_acq_rel(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB213_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -16374,10 +16374,10 @@ define i64 @atomicrmw_umax_i64_acq_rel(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB213_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -16411,9 +16411,9 @@ define i64 @atomicrmw_umax_i64_acq_rel(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB213_1
 ; RV64I-NEXT:  .LBB213_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -16473,10 +16473,10 @@ define i64 @atomicrmw_umax_i64_seq_cst(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB214_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -16527,10 +16527,10 @@ define i64 @atomicrmw_umax_i64_seq_cst(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB214_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -16564,9 +16564,9 @@ define i64 @atomicrmw_umax_i64_seq_cst(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB214_1
 ; RV64I-NEXT:  .LBB214_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -16627,10 +16627,10 @@ define i64 @atomicrmw_umin_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB215_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -16682,10 +16682,10 @@ define i64 @atomicrmw_umin_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB215_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -16719,9 +16719,9 @@ define i64 @atomicrmw_umin_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB215_1
 ; RV64I-NEXT:  .LBB215_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -16782,10 +16782,10 @@ define i64 @atomicrmw_umin_i64_acquire(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB216_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -16837,10 +16837,10 @@ define i64 @atomicrmw_umin_i64_acquire(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB216_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -16874,9 +16874,9 @@ define i64 @atomicrmw_umin_i64_acquire(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB216_1
 ; RV64I-NEXT:  .LBB216_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -16937,10 +16937,10 @@ define i64 @atomicrmw_umin_i64_release(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB217_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -16992,10 +16992,10 @@ define i64 @atomicrmw_umin_i64_release(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB217_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -17029,9 +17029,9 @@ define i64 @atomicrmw_umin_i64_release(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB217_1
 ; RV64I-NEXT:  .LBB217_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -17092,10 +17092,10 @@ define i64 @atomicrmw_umin_i64_acq_rel(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB218_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -17147,10 +17147,10 @@ define i64 @atomicrmw_umin_i64_acq_rel(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB218_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -17184,9 +17184,9 @@ define i64 @atomicrmw_umin_i64_acq_rel(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB218_1
 ; RV64I-NEXT:  .LBB218_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -17247,10 +17247,10 @@ define i64 @atomicrmw_umin_i64_seq_cst(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB219_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -17302,10 +17302,10 @@ define i64 @atomicrmw_umin_i64_seq_cst(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB219_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -17339,9 +17339,9 @@ define i64 @atomicrmw_umin_i64_seq_cst(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB219_1
 ; RV64I-NEXT:  .LBB219_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll
index 2fb815ab2dab4..0347efeafbe5f 100644
--- a/llvm/test/CodeGen/RISCV/atomic-signext.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll
@@ -611,10 +611,10 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:  .LBB10_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -686,10 +686,10 @@ define signext i8 @atomicrmw_max_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:  .LBB10_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -765,10 +765,10 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:  .LBB11_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -840,10 +840,10 @@ define signext i8 @atomicrmw_min_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:  .LBB11_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -917,10 +917,10 @@ define signext i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:  .LBB12_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -984,10 +984,10 @@ define signext i8 @atomicrmw_umax_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:  .LBB12_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -1055,10 +1055,10 @@ define signext i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV32I-NEXT:  .LBB13_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a3, 24
 ; RV32I-NEXT:    srai a0, a0, 24
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1122,10 +1122,10 @@ define signext i8 @atomicrmw_umin_i8_monotonic(i8 *%a, i8 %b) nounwind {
 ; RV64I-NEXT:  .LBB13_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a3, 56
 ; RV64I-NEXT:    srai a0, a0, 56
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -1667,10 +1667,10 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:  .LBB21_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a3, 16
 ; RV32I-NEXT:    srai a0, a0, 16
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1743,10 +1743,10 @@ define signext i16 @atomicrmw_max_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:  .LBB21_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a3, 48
 ; RV64I-NEXT:    srai a0, a0, 48
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -1823,10 +1823,10 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:  .LBB22_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a3, 16
 ; RV32I-NEXT:    srai a0, a0, 16
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1899,10 +1899,10 @@ define signext i16 @atomicrmw_min_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:  .LBB22_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a3, 48
 ; RV64I-NEXT:    srai a0, a0, 48
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -1980,11 +1980,11 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:  .LBB23_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a1, 16
 ; RV32I-NEXT:    srai a0, a0, 16
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -2052,11 +2052,11 @@ define signext i16 @atomicrmw_umax_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:  .LBB23_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a1, 48
 ; RV64I-NEXT:    srai a0, a0, 48
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -2128,11 +2128,11 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV32I-NEXT:  .LBB24_4: # %atomicrmw.end
 ; RV32I-NEXT:    slli a0, a1, 16
 ; RV32I-NEXT:    srai a0, a0, 16
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -2200,11 +2200,11 @@ define signext i16 @atomicrmw_umin_i16_monotonic(i16 *%a, i16 %b) nounwind {
 ; RV64I-NEXT:  .LBB24_4: # %atomicrmw.end
 ; RV64I-NEXT:    slli a0, a1, 48
 ; RV64I-NEXT:    srai a0, a0, 48
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -2530,9 +2530,9 @@ define signext i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB32_1
 ; RV32I-NEXT:  .LBB32_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -2574,10 +2574,10 @@ define signext i32 @atomicrmw_max_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB32_1
 ; RV64I-NEXT:  .LBB32_4: # %atomicrmw.end
 ; RV64I-NEXT:    sext.w a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -2620,9 +2620,9 @@ define signext i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB33_1
 ; RV32I-NEXT:  .LBB33_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -2664,10 +2664,10 @@ define signext i32 @atomicrmw_min_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB33_1
 ; RV64I-NEXT:  .LBB33_4: # %atomicrmw.end
 ; RV64I-NEXT:    sext.w a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -2710,9 +2710,9 @@ define signext i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB34_1
 ; RV32I-NEXT:  .LBB34_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -2754,10 +2754,10 @@ define signext i32 @atomicrmw_umax_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB34_1
 ; RV64I-NEXT:  .LBB34_4: # %atomicrmw.end
 ; RV64I-NEXT:    sext.w a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -2800,9 +2800,9 @@ define signext i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV32I-NEXT:    j .LBB35_1
 ; RV32I-NEXT:  .LBB35_4: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a3
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -2844,10 +2844,10 @@ define signext i32 @atomicrmw_umin_i32_monotonic(i32 *%a, i32 %b) nounwind {
 ; RV64I-NEXT:    j .LBB35_1
 ; RV64I-NEXT:  .LBB35_4: # %atomicrmw.end
 ; RV64I-NEXT:    sext.w a0, a3
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -3188,10 +3188,10 @@ define signext i64 @atomicrmw_max_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB43_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -3242,10 +3242,10 @@ define signext i64 @atomicrmw_max_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB43_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -3279,9 +3279,9 @@ define signext i64 @atomicrmw_max_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB43_1
 ; RV64I-NEXT:  .LBB43_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -3342,10 +3342,10 @@ define signext i64 @atomicrmw_min_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB44_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -3397,10 +3397,10 @@ define signext i64 @atomicrmw_min_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB44_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -3434,9 +3434,9 @@ define signext i64 @atomicrmw_min_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB44_1
 ; RV64I-NEXT:  .LBB44_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -3496,10 +3496,10 @@ define signext i64 @atomicrmw_umax_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB45_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -3550,10 +3550,10 @@ define signext i64 @atomicrmw_umax_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB45_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -3587,9 +3587,9 @@ define signext i64 @atomicrmw_umax_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB45_1
 ; RV64I-NEXT:  .LBB45_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;
@@ -3650,10 +3650,10 @@ define signext i64 @atomicrmw_umin_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32I-NEXT:  .LBB46_7: # %atomicrmw.end
 ; RV32I-NEXT:    mv a0, a4
 ; RV32I-NEXT:    mv a1, a5
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -3705,10 +3705,10 @@ define signext i64 @atomicrmw_umin_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV32IA-NEXT:  .LBB46_7: # %atomicrmw.end
 ; RV32IA-NEXT:    mv a0, a4
 ; RV32IA-NEXT:    mv a1, a5
-; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32IA-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
 ; RV32IA-NEXT:    addi sp, sp, 32
 ; RV32IA-NEXT:    ret
 ;
@@ -3742,9 +3742,9 @@ define signext i64 @atomicrmw_umin_i64_monotonic(i64 *%a, i64 %b) nounwind {
 ; RV64I-NEXT:    j .LBB46_1
 ; RV64I-NEXT:  .LBB46_4: # %atomicrmw.end
 ; RV64I-NEXT:    mv a0, a3
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
index 1ca0b2e133841..a09b4d94558e1 100644
--- a/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/bswap-ctlz-cttz-ctpop.ll
@@ -493,14 +493,14 @@ define i64 @test_cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    srli a0, s2, 24
 ; RV32I-NEXT:  .LBB7_3:
 ; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -793,14 +793,14 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind {
 ; RV32I-NEXT:    srli a0, s2, 24
 ; RV32I-NEXT:  .LBB11_3:
 ; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -968,13 +968,13 @@ define i64 @test_ctpop_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    srli a0, a0, 24
 ; RV32I-NEXT:    add a0, a0, s5
 ; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
index d8d904ac85c6c..db43fdda057ea 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll
@@ -242,18 +242,18 @@ define void @callee() nounwind {
 ; ILP32F-NEXT:    fsw ft2, %lo(var+8)(a0)
 ; ILP32F-NEXT:    fsw ft1, %lo(var+4)(a0)
 ; ILP32F-NEXT:    fsw ft0, %lo(var)(a0)
-; ILP32F-NEXT:    flw fs11, 0(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs10, 4(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs9, 8(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs8, 12(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs7, 16(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs6, 20(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs5, 24(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs4, 28(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs3, 32(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs2, 36(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs1, 40(sp) # 4-byte Folded Reload
 ; ILP32F-NEXT:    flw fs0, 44(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs1, 40(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs2, 36(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs3, 32(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs4, 28(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs5, 24(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs6, 20(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs7, 16(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs8, 12(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs9, 8(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs10, 4(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs11, 0(sp) # 4-byte Folded Reload
 ; ILP32F-NEXT:    addi sp, sp, 48
 ; ILP32F-NEXT:    ret
 ;
@@ -338,18 +338,18 @@ define void @callee() nounwind {
 ; LP64F-NEXT:    fsw ft2, %lo(var+8)(a0)
 ; LP64F-NEXT:    fsw ft1, %lo(var+4)(a0)
 ; LP64F-NEXT:    fsw ft0, %lo(var)(a0)
-; LP64F-NEXT:    flw fs11, 0(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs10, 4(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs9, 8(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs8, 12(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs7, 16(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs6, 20(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs5, 24(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs4, 28(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs3, 32(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs2, 36(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs1, 40(sp) # 4-byte Folded Reload
 ; LP64F-NEXT:    flw fs0, 44(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs1, 40(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs2, 36(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs3, 32(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs4, 28(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs5, 24(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs6, 20(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs7, 16(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs8, 12(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs9, 8(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs10, 4(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs11, 0(sp) # 4-byte Folded Reload
 ; LP64F-NEXT:    addi sp, sp, 48
 ; LP64F-NEXT:    ret
 ;
@@ -434,18 +434,18 @@ define void @callee() nounwind {
 ; ILP32D-NEXT:    fsw ft2, %lo(var+8)(a0)
 ; ILP32D-NEXT:    fsw ft1, %lo(var+4)(a0)
 ; ILP32D-NEXT:    fsw ft0, %lo(var)(a0)
-; ILP32D-NEXT:    fld fs11, 0(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs10, 8(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs9, 16(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs8, 24(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs7, 32(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs6, 40(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs5, 48(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs4, 56(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs3, 64(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
 ; ILP32D-NEXT:    fld fs0, 88(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs3, 64(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs4, 56(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs5, 48(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs6, 40(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs7, 32(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs8, 24(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs9, 16(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs10, 8(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs11, 0(sp) # 8-byte Folded Reload
 ; ILP32D-NEXT:    addi sp, sp, 96
 ; ILP32D-NEXT:    ret
 ;
@@ -530,18 +530,18 @@ define void @callee() nounwind {
 ; LP64D-NEXT:    fsw ft2, %lo(var+8)(a0)
 ; LP64D-NEXT:    fsw ft1, %lo(var+4)(a0)
 ; LP64D-NEXT:    fsw ft0, %lo(var)(a0)
-; LP64D-NEXT:    fld fs11, 0(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs10, 8(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs9, 16(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs8, 24(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs7, 32(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs6, 40(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs5, 48(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs4, 56(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs3, 64(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
 ; LP64D-NEXT:    fld fs0, 88(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs3, 64(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs4, 56(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs5, 48(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs6, 40(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs7, 32(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs8, 24(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs9, 16(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs10, 8(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs11, 0(sp) # 8-byte Folded Reload
 ; LP64D-NEXT:    addi sp, sp, 96
 ; LP64D-NEXT:    ret
   %val = load [32 x float], [32 x float]* @var
@@ -694,9 +694,9 @@ define void @caller() nounwind {
 ; ILP32-NEXT:    fsw ft0, %lo(var+4)(s0)
 ; ILP32-NEXT:    flw ft0, 128(sp) # 4-byte Folded Reload
 ; ILP32-NEXT:    fsw ft0, %lo(var)(s0)
-; ILP32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
 ; ILP32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; ILP32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; ILP32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
 ; ILP32-NEXT:    addi sp, sp, 144
 ; ILP32-NEXT:    ret
 ;
@@ -837,9 +837,9 @@ define void @caller() nounwind {
 ; LP64-NEXT:    fsw ft0, %lo(var+4)(s0)
 ; LP64-NEXT:    flw ft0, 132(sp) # 4-byte Folded Reload
 ; LP64-NEXT:    fsw ft0, %lo(var)(s0)
-; LP64-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
-; LP64-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
 ; LP64-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
+; LP64-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
+; LP64-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
 ; LP64-NEXT:    addi sp, sp, 160
 ; LP64-NEXT:    ret
 ;
@@ -968,21 +968,21 @@ define void @caller() nounwind {
 ; ILP32F-NEXT:    fsw ft0, %lo(var+4)(s0)
 ; ILP32F-NEXT:    flw ft0, 80(sp) # 4-byte Folded Reload
 ; ILP32F-NEXT:    fsw ft0, %lo(var)(s0)
-; ILP32F-NEXT:    flw fs11, 84(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs10, 88(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs9, 92(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs8, 96(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs7, 100(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs6, 104(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs5, 108(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs4, 112(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs3, 116(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs2, 120(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs1, 124(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    flw fs0, 128(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; ILP32F-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
 ; ILP32F-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs0, 128(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs1, 124(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs2, 120(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs3, 116(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs4, 112(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs5, 108(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs6, 104(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs7, 100(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs8, 96(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs9, 92(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs10, 88(sp) # 4-byte Folded Reload
+; ILP32F-NEXT:    flw fs11, 84(sp) # 4-byte Folded Reload
 ; ILP32F-NEXT:    addi sp, sp, 144
 ; ILP32F-NEXT:    ret
 ;
@@ -1111,21 +1111,21 @@ define void @caller() nounwind {
 ; LP64F-NEXT:    fsw ft0, %lo(var+4)(s0)
 ; LP64F-NEXT:    flw ft0, 84(sp) # 4-byte Folded Reload
 ; LP64F-NEXT:    fsw ft0, %lo(var)(s0)
-; LP64F-NEXT:    flw fs11, 88(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs10, 92(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs9, 96(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs8, 100(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs7, 104(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs6, 108(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs5, 112(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs4, 116(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs3, 120(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs2, 124(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs1, 128(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    flw fs0, 132(sp) # 4-byte Folded Reload
-; LP64F-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
-; LP64F-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
 ; LP64F-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
+; LP64F-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
+; LP64F-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
+; LP64F-NEXT:    flw fs0, 132(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs1, 128(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs2, 124(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs3, 120(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs4, 116(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs5, 112(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs6, 108(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs7, 104(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs8, 100(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs9, 96(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs10, 92(sp) # 4-byte Folded Reload
+; LP64F-NEXT:    flw fs11, 88(sp) # 4-byte Folded Reload
 ; LP64F-NEXT:    addi sp, sp, 160
 ; LP64F-NEXT:    ret
 ;
@@ -1254,21 +1254,21 @@ define void @caller() nounwind {
 ; ILP32D-NEXT:    fsw ft0, %lo(var+4)(s0)
 ; ILP32D-NEXT:    flw ft0, 76(sp) # 4-byte Folded Reload
 ; ILP32D-NEXT:    fsw ft0, %lo(var)(s0)
-; ILP32D-NEXT:    fld fs11, 80(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs10, 88(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs9, 96(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs8, 104(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs7, 112(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs6, 120(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs5, 128(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs4, 136(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs3, 144(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs2, 152(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs1, 160(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs0, 168(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    lw s1, 180(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    lw s0, 184(sp) # 4-byte Folded Reload
 ; ILP32D-NEXT:    lw ra, 188(sp) # 4-byte Folded Reload
+; ILP32D-NEXT:    lw s0, 184(sp) # 4-byte Folded Reload
+; ILP32D-NEXT:    lw s1, 180(sp) # 4-byte Folded Reload
+; ILP32D-NEXT:    fld fs0, 168(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs1, 160(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs2, 152(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs3, 144(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs4, 136(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs5, 128(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs6, 120(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs7, 112(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs8, 104(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs9, 96(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs10, 88(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs11, 80(sp) # 8-byte Folded Reload
 ; ILP32D-NEXT:    addi sp, sp, 192
 ; ILP32D-NEXT:    ret
 ;
@@ -1397,21 +1397,21 @@ define void @caller() nounwind {
 ; LP64D-NEXT:    fsw ft0, %lo(var+4)(s0)
 ; LP64D-NEXT:    flw ft0, 84(sp) # 4-byte Folded Reload
 ; LP64D-NEXT:    fsw ft0, %lo(var)(s0)
-; LP64D-NEXT:    fld fs11, 88(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs10, 96(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs9, 104(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs8, 112(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs7, 120(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs6, 128(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs5, 136(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs4, 144(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs3, 152(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs2, 160(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs1, 168(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs0, 176(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    ld s1, 184(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    ld s0, 192(sp) # 8-byte Folded Reload
 ; LP64D-NEXT:    ld ra, 200(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    ld s0, 192(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    ld s1, 184(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs0, 176(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs1, 168(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs2, 160(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs3, 152(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs4, 144(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs5, 136(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs6, 128(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs7, 120(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs8, 112(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs9, 104(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs10, 96(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs11, 88(sp) # 8-byte Folded Reload
 ; LP64D-NEXT:    addi sp, sp, 208
 ; LP64D-NEXT:    ret
   %val = load [32 x float], [32 x float]* @var

diff  --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
index e18cb59534924..acea232db2103 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll
@@ -238,18 +238,18 @@ define void @callee() nounwind {
 ; ILP32D-NEXT:    fsd ft2, 16(a1)
 ; ILP32D-NEXT:    fsd ft1, %lo(var+8)(a0)
 ; ILP32D-NEXT:    fsd ft0, %lo(var)(a0)
-; ILP32D-NEXT:    fld fs11, 0(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs10, 8(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs9, 16(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs8, 24(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs7, 32(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs6, 40(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs5, 48(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs4, 56(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs3, 64(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
 ; ILP32D-NEXT:    fld fs0, 88(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs3, 64(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs4, 56(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs5, 48(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs6, 40(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs7, 32(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs8, 24(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs9, 16(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs10, 8(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs11, 0(sp) # 8-byte Folded Reload
 ; ILP32D-NEXT:    addi sp, sp, 96
 ; ILP32D-NEXT:    ret
 ;
@@ -334,18 +334,18 @@ define void @callee() nounwind {
 ; LP64D-NEXT:    fsd ft2, 16(a1)
 ; LP64D-NEXT:    fsd ft1, %lo(var+8)(a0)
 ; LP64D-NEXT:    fsd ft0, %lo(var)(a0)
-; LP64D-NEXT:    fld fs11, 0(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs10, 8(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs9, 16(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs8, 24(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs7, 32(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs6, 40(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs5, 48(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs4, 56(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs3, 64(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
 ; LP64D-NEXT:    fld fs0, 88(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs3, 64(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs4, 56(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs5, 48(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs6, 40(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs7, 32(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs8, 24(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs9, 16(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs10, 8(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs11, 0(sp) # 8-byte Folded Reload
 ; LP64D-NEXT:    addi sp, sp, 96
 ; LP64D-NEXT:    ret
   %val = load [32 x double], [32 x double]* @var
@@ -498,9 +498,9 @@ define void @caller() nounwind {
 ; ILP32-NEXT:    fsd ft0, %lo(var+8)(s0)
 ; ILP32-NEXT:    fld ft0, 248(sp) # 8-byte Folded Reload
 ; ILP32-NEXT:    fsd ft0, %lo(var)(s0)
-; ILP32-NEXT:    lw s1, 260(sp) # 4-byte Folded Reload
-; ILP32-NEXT:    lw s0, 264(sp) # 4-byte Folded Reload
 ; ILP32-NEXT:    lw ra, 268(sp) # 4-byte Folded Reload
+; ILP32-NEXT:    lw s0, 264(sp) # 4-byte Folded Reload
+; ILP32-NEXT:    lw s1, 260(sp) # 4-byte Folded Reload
 ; ILP32-NEXT:    addi sp, sp, 272
 ; ILP32-NEXT:    ret
 ;
@@ -641,9 +641,9 @@ define void @caller() nounwind {
 ; LP64-NEXT:    fsd ft0, %lo(var+8)(s0)
 ; LP64-NEXT:    fld ft0, 256(sp) # 8-byte Folded Reload
 ; LP64-NEXT:    fsd ft0, %lo(var)(s0)
-; LP64-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
-; LP64-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
 ; LP64-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
+; LP64-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
+; LP64-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
 ; LP64-NEXT:    addi sp, sp, 288
 ; LP64-NEXT:    ret
 ;
@@ -772,21 +772,21 @@ define void @caller() nounwind {
 ; ILP32D-NEXT:    fsd ft0, %lo(var+8)(s0)
 ; ILP32D-NEXT:    fld ft0, 152(sp) # 8-byte Folded Reload
 ; ILP32D-NEXT:    fsd ft0, %lo(var)(s0)
-; ILP32D-NEXT:    fld fs11, 160(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs10, 168(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs9, 176(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs8, 184(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs7, 192(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs6, 200(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs5, 208(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs4, 216(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs3, 224(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs2, 232(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs1, 240(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    fld fs0, 248(sp) # 8-byte Folded Reload
-; ILP32D-NEXT:    lw s1, 260(sp) # 4-byte Folded Reload
-; ILP32D-NEXT:    lw s0, 264(sp) # 4-byte Folded Reload
 ; ILP32D-NEXT:    lw ra, 268(sp) # 4-byte Folded Reload
+; ILP32D-NEXT:    lw s0, 264(sp) # 4-byte Folded Reload
+; ILP32D-NEXT:    lw s1, 260(sp) # 4-byte Folded Reload
+; ILP32D-NEXT:    fld fs0, 248(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs1, 240(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs2, 232(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs3, 224(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs4, 216(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs5, 208(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs6, 200(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs7, 192(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs8, 184(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs9, 176(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs10, 168(sp) # 8-byte Folded Reload
+; ILP32D-NEXT:    fld fs11, 160(sp) # 8-byte Folded Reload
 ; ILP32D-NEXT:    addi sp, sp, 272
 ; ILP32D-NEXT:    ret
 ;
@@ -915,21 +915,21 @@ define void @caller() nounwind {
 ; LP64D-NEXT:    fsd ft0, %lo(var+8)(s0)
 ; LP64D-NEXT:    fld ft0, 160(sp) # 8-byte Folded Reload
 ; LP64D-NEXT:    fsd ft0, %lo(var)(s0)
-; LP64D-NEXT:    fld fs11, 168(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs10, 176(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs9, 184(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs8, 192(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs7, 200(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs6, 208(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs5, 216(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs4, 224(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs3, 232(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs2, 240(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs1, 248(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    fld fs0, 256(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
-; LP64D-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
 ; LP64D-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs0, 256(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs1, 248(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs2, 240(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs3, 232(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs4, 224(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs5, 216(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs6, 208(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs7, 200(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs8, 192(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs9, 184(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs10, 176(sp) # 8-byte Folded Reload
+; LP64D-NEXT:    fld fs11, 168(sp) # 8-byte Folded Reload
 ; LP64D-NEXT:    addi sp, sp, 288
 ; LP64D-NEXT:    ret
   %val = load [32 x double], [32 x double]* @var

diff  --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
index 024d015e4f549..1387a646ce994 100644
--- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
+++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll
@@ -120,19 +120,19 @@ define void @callee() nounwind {
 ; RV32I-NEXT:    sw a0, %lo(var+4)(a7)
 ; RV32I-NEXT:    lw a0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    sw a0, %lo(var)(a7)
-; RV32I-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s10, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s9, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s8, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s7, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s6, 48(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 52(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 56(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 60(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 64(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 64(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s9, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s10, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 80
 ; RV32I-NEXT:    ret
 ;
@@ -233,19 +233,19 @@ define void @callee() nounwind {
 ; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+4)(a7)
 ; RV32I-WITH-FP-NEXT:    lw a0, -56(s0) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    sw a0, %lo(var)(a7)
-; RV32I-WITH-FP-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s10, 32(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s9, 36(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s8, 40(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s7, 44(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s6, 48(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s5, 52(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s4, 56(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s3, 60(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s2, 64(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s0, 72(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s1, 68(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s2, 64(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s3, 60(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s4, 56(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s5, 52(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s6, 48(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s7, 44(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s8, 40(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s9, 36(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s10, 32(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s11, 28(sp) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    addi sp, sp, 80
 ; RV32I-WITH-FP-NEXT:    ret
 ;
@@ -343,19 +343,19 @@ define void @callee() nounwind {
 ; RV64I-NEXT:    sw a0, %lo(var+4)(a7)
 ; RV64I-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    sw a0, %lo(var)(a7)
-; RV64I-NEXT:    ld s11, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s10, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s9, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s8, 80(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s7, 88(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s6, 96(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s5, 104(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 112(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 120(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 128(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 128(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 112(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 104(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 96(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s7, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s8, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s9, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s10, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s11, 56(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 160
 ; RV64I-NEXT:    ret
 ;
@@ -456,19 +456,19 @@ define void @callee() nounwind {
 ; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+4)(a7)
 ; RV64I-WITH-FP-NEXT:    ld a0, -112(s0) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    sw a0, %lo(var)(a7)
-; RV64I-WITH-FP-NEXT:    ld s11, 56(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s10, 64(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s9, 72(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s8, 80(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s7, 88(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s6, 96(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s5, 104(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s4, 112(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s3, 120(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s2, 128(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s2, 128(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s3, 120(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s4, 112(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s5, 104(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s6, 96(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s7, 88(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s8, 80(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s9, 72(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s10, 64(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s11, 56(sp) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    addi sp, sp, 160
 ; RV64I-WITH-FP-NEXT:    ret
   %val = load [32 x i32], [32 x i32]* @var
@@ -607,19 +607,19 @@ define void @caller() nounwind {
 ; RV32I-NEXT:    sw a0, %lo(var+4)(s0)
 ; RV32I-NEXT:    lw a0, 88(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    sw a0, %lo(var)(s0)
-; RV32I-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 144
 ; RV32I-NEXT:    ret
 ;
@@ -753,19 +753,19 @@ define void @caller() nounwind {
 ; RV32I-WITH-FP-NEXT:    sw a0, %lo(var+4)(s6)
 ; RV32I-WITH-FP-NEXT:    lw a0, -56(s0) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    sw a0, %lo(var)(s6)
-; RV32I-WITH-FP-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; RV32I-WITH-FP-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; RV32I-WITH-FP-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
 ; RV32I-WITH-FP-NEXT:    addi sp, sp, 144
 ; RV32I-WITH-FP-NEXT:    ret
 ;
@@ -896,19 +896,19 @@ define void @caller() nounwind {
 ; RV64I-NEXT:    sw a0, %lo(var+4)(s0)
 ; RV64I-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    sw a0, %lo(var)(s0)
-; RV64I-NEXT:    ld s11, 184(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s10, 192(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s9, 200(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s8, 208(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s7, 216(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s6, 224(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s5, 232(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 240(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 248(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 256(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 256(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 248(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 240(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 232(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 224(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s7, 216(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s8, 208(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s9, 200(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s10, 192(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s11, 184(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 288
 ; RV64I-NEXT:    ret
 ;
@@ -1042,19 +1042,19 @@ define void @caller() nounwind {
 ; RV64I-WITH-FP-NEXT:    sw a0, %lo(var+4)(s6)
 ; RV64I-WITH-FP-NEXT:    ld a0, -112(s0) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    sw a0, %lo(var)(s6)
-; RV64I-WITH-FP-NEXT:    ld s11, 184(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s10, 192(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s9, 200(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s8, 208(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s7, 216(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s6, 224(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s5, 232(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s4, 240(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s3, 248(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s2, 256(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
-; RV64I-WITH-FP-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s2, 256(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s3, 248(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s4, 240(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s5, 232(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s6, 224(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s7, 216(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s8, 208(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s9, 200(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s10, 192(sp) # 8-byte Folded Reload
+; RV64I-WITH-FP-NEXT:    ld s11, 184(sp) # 8-byte Folded Reload
 ; RV64I-WITH-FP-NEXT:    addi sp, sp, 288
 ; RV64I-WITH-FP-NEXT:    ret
 

diff  --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
index e637a952aaf96..3f7515c9523ee 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll
@@ -23,8 +23,8 @@ define i32 @callee_half_in_regs(i32 %a, half %b) nounwind {
 ; RV32I-NEXT:    call __gnu_h2f_ieee at plt
 ; RV32I-NEXT:    call __fixsfsi at plt
 ; RV32I-NEXT:    add a0, s0, a0
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -40,8 +40,8 @@ define i32 @callee_half_in_regs(i32 %a, half %b) nounwind {
 ; RV64I-NEXT:    call __gnu_h2f_ieee at plt
 ; RV64I-NEXT:    call __fixsfdi at plt
 ; RV64I-NEXT:    addw a0, s0, a0
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
@@ -56,8 +56,8 @@ define i32 @callee_half_in_regs(i32 %a, half %b) nounwind {
 ; RV32IF-NEXT:    fmv.w.x ft0, a0
 ; RV32IF-NEXT:    fcvt.w.s a0, ft0, rtz
 ; RV32IF-NEXT:    add a0, s0, a0
-; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
 ;
@@ -72,8 +72,8 @@ define i32 @callee_half_in_regs(i32 %a, half %b) nounwind {
 ; RV64IF-NEXT:    fmv.w.x ft0, a0
 ; RV64IF-NEXT:    fcvt.l.s a0, ft0, rtz
 ; RV64IF-NEXT:    addw a0, s0, a0
-; RV64IF-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64IF-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64IF-NEXT:    addi sp, sp, 16
 ; RV64IF-NEXT:    ret
 ;
@@ -87,8 +87,8 @@ define i32 @callee_half_in_regs(i32 %a, half %b) nounwind {
 ; RV32-ILP32F-NEXT:    call __gnu_h2f_ieee at plt
 ; RV32-ILP32F-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV32-ILP32F-NEXT:    add a0, s0, a0
-; RV32-ILP32F-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-ILP32F-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ILP32F-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-ILP32F-NEXT:    addi sp, sp, 16
 ; RV32-ILP32F-NEXT:    ret
 ;
@@ -102,8 +102,8 @@ define i32 @callee_half_in_regs(i32 %a, half %b) nounwind {
 ; RV64-LP64F-NEXT:    call __gnu_h2f_ieee at plt
 ; RV64-LP64F-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-LP64F-NEXT:    addw a0, s0, a0
-; RV64-LP64F-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64-LP64F-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-LP64F-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64-LP64F-NEXT:    addi sp, sp, 16
 ; RV64-LP64F-NEXT:    ret
   %b_fptosi = fptosi half %b to i32
@@ -196,8 +196,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
 ; RV32I-NEXT:    call __gnu_h2f_ieee at plt
 ; RV32I-NEXT:    call __fixsfsi at plt
 ; RV32I-NEXT:    add a0, s0, a0
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -211,8 +211,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
 ; RV64I-NEXT:    call __gnu_h2f_ieee at plt
 ; RV64I-NEXT:    call __fixsfdi at plt
 ; RV64I-NEXT:    addw a0, s0, a0
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
@@ -227,8 +227,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
 ; RV32IF-NEXT:    fmv.w.x ft0, a0
 ; RV32IF-NEXT:    fcvt.w.s a0, ft0, rtz
 ; RV32IF-NEXT:    add a0, s0, a0
-; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
 ;
@@ -243,8 +243,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
 ; RV64IF-NEXT:    fmv.w.x ft0, a0
 ; RV64IF-NEXT:    fcvt.l.s a0, ft0, rtz
 ; RV64IF-NEXT:    addw a0, s0, a0
-; RV64IF-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64IF-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64IF-NEXT:    addi sp, sp, 16
 ; RV64IF-NEXT:    ret
 ;
@@ -258,8 +258,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
 ; RV32-ILP32F-NEXT:    call __gnu_h2f_ieee at plt
 ; RV32-ILP32F-NEXT:    fcvt.w.s a0, fa0, rtz
 ; RV32-ILP32F-NEXT:    add a0, s0, a0
-; RV32-ILP32F-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-ILP32F-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-ILP32F-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-ILP32F-NEXT:    addi sp, sp, 16
 ; RV32-ILP32F-NEXT:    ret
 ;
@@ -273,8 +273,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f,
 ; RV64-LP64F-NEXT:    call __gnu_h2f_ieee at plt
 ; RV64-LP64F-NEXT:    fcvt.l.s a0, fa0, rtz
 ; RV64-LP64F-NEXT:    addw a0, s0, a0
-; RV64-LP64F-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64-LP64F-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-LP64F-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64-LP64F-NEXT:    addi sp, sp, 16
 ; RV64-LP64F-NEXT:    ret
   %1 = fptosi half %i to i32

diff  --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll
index dfd04ebdd763e..ef9fb400bef11 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll
@@ -28,8 +28,8 @@ define i32 @callee_double_in_regs(i32 %a, double %b) nounwind {
 ; RV32I-FPELIM-NEXT:    mv a1, a2
 ; RV32I-FPELIM-NEXT:    call __fixdfsi at plt
 ; RV32I-FPELIM-NEXT:    add a0, s0, a0
-; RV32I-FPELIM-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-FPELIM-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-FPELIM-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-FPELIM-NEXT:    addi sp, sp, 16
 ; RV32I-FPELIM-NEXT:    ret
 ;
@@ -45,9 +45,9 @@ define i32 @callee_double_in_regs(i32 %a, double %b) nounwind {
 ; RV32I-WITHFP-NEXT:    mv a1, a2
 ; RV32I-WITHFP-NEXT:    call __fixdfsi at plt
 ; RV32I-WITHFP-NEXT:    add a0, s1, a0
-; RV32I-WITHFP-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %b_fptosi = fptosi double %b to i32
@@ -78,8 +78,8 @@ define i32 @caller_double_in_regs() nounwind {
 ; RV32I-WITHFP-NEXT:    lui a2, 262144
 ; RV32I-WITHFP-NEXT:    li a1, 0
 ; RV32I-WITHFP-NEXT:    call callee_double_in_regs at plt
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call i32 @callee_double_in_regs(i32 1, double 2.0)
@@ -121,8 +121,8 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 %
 ; RV32I-WITHFP-NEXT:    add a0, a0, a3
 ; RV32I-WITHFP-NEXT:    add a0, a0, a4
 ; RV32I-WITHFP-NEXT:    add a0, a0, a1
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = bitcast fp128 %c to i128
@@ -227,8 +227,8 @@ define void @caller_aligned_stack() nounwind {
 ; RV32I-WITHFP-NEXT:    li a7, 14
 ; RV32I-WITHFP-NEXT:    sw t0, -32(s0)
 ; RV32I-WITHFP-NEXT:    call callee_aligned_stack at plt
-; RV32I-WITHFP-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 64
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call i32 @callee_aligned_stack(i32 1, i32 11,
@@ -253,8 +253,8 @@ define double @callee_small_scalar_ret() nounwind {
 ; RV32I-WITHFP-NEXT:    addi s0, sp, 16
 ; RV32I-WITHFP-NEXT:    lui a1, 261888
 ; RV32I-WITHFP-NEXT:    li a0, 0
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   ret double 1.0
@@ -277,8 +277,8 @@ define i64 @caller_small_scalar_ret() nounwind {
 ; RV32I-WITHFP-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-WITHFP-NEXT:    addi s0, sp, 16
 ; RV32I-WITHFP-NEXT:    call callee_small_scalar_ret at plt
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call double @callee_small_scalar_ret()

diff  --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 7bf1dee701f9e..d15372c74c636 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -37,8 +37,8 @@ define i32 @callee_i64_in_regs(i32 %a, i64 %b) nounwind {
 ; RV32I-WITHFP-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-WITHFP-NEXT:    addi s0, sp, 16
 ; RV32I-WITHFP-NEXT:    add a0, a0, a1
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %b_trunc = trunc i64 %b to i32
@@ -69,8 +69,8 @@ define i32 @caller_i64_in_regs() nounwind {
 ; RV32I-WITHFP-NEXT:    li a1, 2
 ; RV32I-WITHFP-NEXT:    li a2, 0
 ; RV32I-WITHFP-NEXT:    call callee_i64_in_regs at plt
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call i32 @callee_i64_in_regs(i32 1, i64 2)
@@ -122,8 +122,8 @@ define i32 @callee_many_scalars(i8 %a, i16 %b, i32 %c, i64 %d, i32 %e, i32 %f, i
 ; RV32I-WITHFP-NEXT:    add a0, a0, a5
 ; RV32I-WITHFP-NEXT:    add a0, a0, a6
 ; RV32I-WITHFP-NEXT:    add a0, a0, t0
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %a_ext = zext i8 %a to i32
@@ -178,8 +178,8 @@ define i32 @caller_many_scalars() nounwind {
 ; RV32I-WITHFP-NEXT:    sw zero, 0(sp)
 ; RV32I-WITHFP-NEXT:    li a4, 0
 ; RV32I-WITHFP-NEXT:    call callee_many_scalars at plt
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call i32 @callee_many_scalars(i8 1, i16 2, i32 3, i64 4, i32 5, i32 6, i64 7, i32 8)
@@ -232,8 +232,8 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind {
 ; RV32I-WITHFP-NEXT:    or a0, a1, a0
 ; RV32I-WITHFP-NEXT:    or a0, a0, a2
 ; RV32I-WITHFP-NEXT:    seqz a0, a0
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %b_bitcast = bitcast fp128 %b to i128
@@ -283,8 +283,8 @@ define i32 @caller_large_scalars() nounwind {
 ; RV32I-WITHFP-NEXT:    addi a1, s0, -48
 ; RV32I-WITHFP-NEXT:    sw a2, -24(s0)
 ; RV32I-WITHFP-NEXT:    call callee_large_scalars at plt
-; RV32I-WITHFP-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 48
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call i32 @callee_large_scalars(i128 1, fp128 0xL00000000000000007FFF000000000000)
@@ -340,8 +340,8 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d,
 ; RV32I-WITHFP-NEXT:    or a0, a2, a0
 ; RV32I-WITHFP-NEXT:    or a0, a0, a1
 ; RV32I-WITHFP-NEXT:    seqz a0, a0
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %j_bitcast = bitcast fp128 %j to i128
@@ -411,8 +411,8 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind {
 ; RV32I-WITHFP-NEXT:    addi a7, s0, -24
 ; RV32I-WITHFP-NEXT:    sw t0, -24(s0)
 ; RV32I-WITHFP-NEXT:    call callee_large_scalars_exhausted_regs at plt
-; RV32I-WITHFP-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 64
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call i32 @callee_large_scalars_exhausted_regs(
@@ -448,8 +448,8 @@ define i32 @caller_mixed_scalar_libcalls(i64 %a) nounwind {
 ; RV32I-WITHFP-NEXT:    addi a0, s0, -24
 ; RV32I-WITHFP-NEXT:    call __floatditf at plt
 ; RV32I-WITHFP-NEXT:    lw a0, -24(s0)
-; RV32I-WITHFP-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 32
 ; RV32I-WITHFP-NEXT:    ret
   %1 = sitofp i64 %a to fp128
@@ -477,8 +477,8 @@ define i32 @callee_small_coerced_struct([2 x i32] %a.coerce) nounwind {
 ; RV32I-WITHFP-NEXT:    addi s0, sp, 16
 ; RV32I-WITHFP-NEXT:    xor a0, a0, a1
 ; RV32I-WITHFP-NEXT:    seqz a0, a0
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = extractvalue [2 x i32] %a.coerce, 0
@@ -509,8 +509,8 @@ define i32 @caller_small_coerced_struct() nounwind {
 ; RV32I-WITHFP-NEXT:    li a0, 1
 ; RV32I-WITHFP-NEXT:    li a1, 2
 ; RV32I-WITHFP-NEXT:    call callee_small_coerced_struct at plt
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call i32 @callee_small_coerced_struct([2 x i32] [i32 1, i32 2])
@@ -538,8 +538,8 @@ define i32 @callee_large_struct(%struct.large* byval(%struct.large) align 4 %a)
 ; RV32I-WITHFP-NEXT:    lw a1, 0(a0)
 ; RV32I-WITHFP-NEXT:    lw a0, 12(a0)
 ; RV32I-WITHFP-NEXT:    add a0, a1, a0
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = getelementptr inbounds %struct.large, %struct.large* %a, i32 0, i32 0
@@ -593,8 +593,8 @@ define i32 @caller_large_struct() nounwind {
 ; RV32I-WITHFP-NEXT:    sw a3, -28(s0)
 ; RV32I-WITHFP-NEXT:    addi a0, s0, -40
 ; RV32I-WITHFP-NEXT:    call callee_large_struct at plt
-; RV32I-WITHFP-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 48
 ; RV32I-WITHFP-NEXT:    ret
   %ls = alloca %struct.large, align 4
@@ -646,8 +646,8 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 %
 ; RV32I-WITHFP-NEXT:    add a0, a0, a3
 ; RV32I-WITHFP-NEXT:    add a0, a0, a4
 ; RV32I-WITHFP-NEXT:    add a0, a0, a1
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = bitcast fp128 %c to i128
@@ -745,8 +745,8 @@ define void @caller_aligned_stack() nounwind {
 ; RV32I-WITHFP-NEXT:    li a7, 14
 ; RV32I-WITHFP-NEXT:    sw t0, -32(s0)
 ; RV32I-WITHFP-NEXT:    call callee_aligned_stack at plt
-; RV32I-WITHFP-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 64
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call i32 @callee_aligned_stack(i32 1, i32 11,
@@ -775,8 +775,8 @@ define i64 @callee_small_scalar_ret() nounwind {
 ; RV32I-WITHFP-NEXT:    lui a0, 466866
 ; RV32I-WITHFP-NEXT:    addi a0, a0, 1677
 ; RV32I-WITHFP-NEXT:    li a1, 287
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   ret i64 1234567898765
@@ -815,8 +815,8 @@ define i32 @caller_small_scalar_ret() nounwind {
 ; RV32I-WITHFP-NEXT:    xor a0, a0, a2
 ; RV32I-WITHFP-NEXT:    or a0, a0, a1
 ; RV32I-WITHFP-NEXT:    seqz a0, a0
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call i64 @callee_small_scalar_ret()
@@ -842,8 +842,8 @@ define %struct.small @callee_small_struct_ret() nounwind {
 ; RV32I-WITHFP-NEXT:    addi s0, sp, 16
 ; RV32I-WITHFP-NEXT:    li a0, 1
 ; RV32I-WITHFP-NEXT:    li a1, 0
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   ret %struct.small { i32 1, i32* null }
@@ -868,8 +868,8 @@ define i32 @caller_small_struct_ret() nounwind {
 ; RV32I-WITHFP-NEXT:    addi s0, sp, 16
 ; RV32I-WITHFP-NEXT:    call callee_small_struct_ret at plt
 ; RV32I-WITHFP-NEXT:    add a0, a0, a1
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call %struct.small @callee_small_struct_ret()
@@ -903,8 +903,8 @@ define fp128 @callee_large_scalar_ret() nounwind {
 ; RV32I-WITHFP-NEXT:    sw zero, 8(a0)
 ; RV32I-WITHFP-NEXT:    sw zero, 4(a0)
 ; RV32I-WITHFP-NEXT:    sw zero, 0(a0)
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   ret fp128 0xL00000000000000007FFF000000000000
@@ -929,8 +929,8 @@ define void @caller_large_scalar_ret() nounwind {
 ; RV32I-WITHFP-NEXT:    addi s0, sp, 32
 ; RV32I-WITHFP-NEXT:    addi a0, s0, -32
 ; RV32I-WITHFP-NEXT:    call callee_large_scalar_ret at plt
-; RV32I-WITHFP-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 32
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call fp128 @callee_large_scalar_ret()
@@ -966,8 +966,8 @@ define void @callee_large_struct_ret(%struct.large* noalias sret(%struct.large)
 ; RV32I-WITHFP-NEXT:    sw a1, 8(a0)
 ; RV32I-WITHFP-NEXT:    li a1, 4
 ; RV32I-WITHFP-NEXT:    sw a1, 12(a0)
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %a = getelementptr inbounds %struct.large, %struct.large* %agg.result, i32 0, i32 0
@@ -1006,8 +1006,8 @@ define i32 @caller_large_struct_ret() nounwind {
 ; RV32I-WITHFP-NEXT:    lw a0, -24(s0)
 ; RV32I-WITHFP-NEXT:    lw a1, -12(s0)
 ; RV32I-WITHFP-NEXT:    add a0, a0, a1
-; RV32I-WITHFP-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 32
 ; RV32I-WITHFP-NEXT:    ret
   %1 = alloca %struct.large

diff  --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32.ll
index 8279f4d8abc7d..07acb9fd80a9c 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32.ll
@@ -22,8 +22,8 @@ define i32 @callee_float_in_regs(i32 %a, float %b) nounwind {
 ; RV32I-FPELIM-NEXT:    mv a0, a1
 ; RV32I-FPELIM-NEXT:    call __fixsfsi at plt
 ; RV32I-FPELIM-NEXT:    add a0, s0, a0
-; RV32I-FPELIM-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-FPELIM-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-FPELIM-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-FPELIM-NEXT:    addi sp, sp, 16
 ; RV32I-FPELIM-NEXT:    ret
 ;
@@ -38,9 +38,9 @@ define i32 @callee_float_in_regs(i32 %a, float %b) nounwind {
 ; RV32I-WITHFP-NEXT:    mv a0, a1
 ; RV32I-WITHFP-NEXT:    call __fixsfsi at plt
 ; RV32I-WITHFP-NEXT:    add a0, s1, a0
-; RV32I-WITHFP-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %b_fptosi = fptosi float %b to i32
@@ -69,8 +69,8 @@ define i32 @caller_float_in_regs() nounwind {
 ; RV32I-WITHFP-NEXT:    li a0, 1
 ; RV32I-WITHFP-NEXT:    lui a1, 262144
 ; RV32I-WITHFP-NEXT:    call callee_float_in_regs at plt
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call i32 @callee_float_in_regs(i32 1, float 2.0)
@@ -92,8 +92,8 @@ define i32 @callee_float_on_stack(i64 %a, i64 %b, i64 %c, i64 %d, float %e) noun
 ; RV32I-WITHFP-NEXT:    addi s0, sp, 16
 ; RV32I-WITHFP-NEXT:    lw a0, 0(s0)
 ; RV32I-WITHFP-NEXT:    add a0, a6, a0
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = trunc i64 %d to i32
@@ -139,8 +139,8 @@ define i32 @caller_float_on_stack() nounwind {
 ; RV32I-WITHFP-NEXT:    li a5, 0
 ; RV32I-WITHFP-NEXT:    li a7, 0
 ; RV32I-WITHFP-NEXT:    call callee_float_on_stack at plt
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call i32 @callee_float_on_stack(i64 1, i64 2, i64 3, i64 4, float 5.0)
@@ -160,8 +160,8 @@ define float @callee_tiny_scalar_ret() nounwind {
 ; RV32I-WITHFP-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-WITHFP-NEXT:    addi s0, sp, 16
 ; RV32I-WITHFP-NEXT:    lui a0, 260096
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   ret float 1.0
@@ -184,8 +184,8 @@ define i32 @caller_tiny_scalar_ret() nounwind {
 ; RV32I-WITHFP-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-WITHFP-NEXT:    addi s0, sp, 16
 ; RV32I-WITHFP-NEXT:    call callee_tiny_scalar_ret at plt
-; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32I-WITHFP-NEXT:    ret
   %1 = call float @callee_tiny_scalar_ret()

diff  --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-common.ll
index 05f9cb41eaa0d..f972172bd4952 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-common.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-common.ll
@@ -19,8 +19,8 @@ define i64 @callee_double_in_regs(i64 %a, double %b) nounwind {
 ; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:    call __fixdfdi at plt
 ; RV64I-NEXT:    add a0, s0, a0
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
   %b_fptosi = fptosi double %b to i64

diff  --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64.ll
index 95c993ee9495d..bf98412bc315c 100644
--- a/llvm/test/CodeGen/RISCV/calling-conv-lp64.ll
+++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64.ll
@@ -24,8 +24,8 @@ define i64 @callee_float_in_regs(i64 %a, float %b) nounwind {
 ; RV64I-FPELIM-NEXT:    mv a0, a1
 ; RV64I-FPELIM-NEXT:    call __fixsfdi at plt
 ; RV64I-FPELIM-NEXT:    add a0, s0, a0
-; RV64I-FPELIM-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-FPELIM-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-FPELIM-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-FPELIM-NEXT:    addi sp, sp, 16
 ; RV64I-FPELIM-NEXT:    ret
 ;
@@ -40,9 +40,9 @@ define i64 @callee_float_in_regs(i64 %a, float %b) nounwind {
 ; RV64I-WITHFP-NEXT:    mv a0, a1
 ; RV64I-WITHFP-NEXT:    call __fixsfdi at plt
 ; RV64I-WITHFP-NEXT:    add a0, s1, a0
-; RV64I-WITHFP-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-WITHFP-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-WITHFP-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-WITHFP-NEXT:    addi sp, sp, 32
 ; RV64I-WITHFP-NEXT:    ret
   %b_fptosi = fptosi float %b to i64
@@ -71,8 +71,8 @@ define i64 @caller_float_in_regs() nounwind {
 ; RV64I-WITHFP-NEXT:    li a0, 1
 ; RV64I-WITHFP-NEXT:    lui a1, 262144
 ; RV64I-WITHFP-NEXT:    call callee_float_in_regs at plt
-; RV64I-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-WITHFP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV64I-WITHFP-NEXT:    ret
   %1 = call i64 @callee_float_in_regs(i64 1, float 2.0)
@@ -92,8 +92,8 @@ define i64 @callee_float_on_stack(i128 %a, i128 %b, i128 %c, i128 %d, float %e)
 ; RV64I-WITHFP-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
 ; RV64I-WITHFP-NEXT:    addi s0, sp, 16
 ; RV64I-WITHFP-NEXT:    lw a0, 0(s0)
-; RV64I-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-WITHFP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV64I-WITHFP-NEXT:    ret
   %1 = trunc i128 %d to i64
@@ -140,8 +140,8 @@ define i64 @caller_float_on_stack() nounwind {
 ; RV64I-WITHFP-NEXT:    li a5, 0
 ; RV64I-WITHFP-NEXT:    li a7, 0
 ; RV64I-WITHFP-NEXT:    call callee_float_on_stack at plt
-; RV64I-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-WITHFP-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-WITHFP-NEXT:    addi sp, sp, 32
 ; RV64I-WITHFP-NEXT:    ret
   %1 = call i64 @callee_float_on_stack(i128 1, i128 2, i128 3, i128 4, float 5.0)
@@ -161,8 +161,8 @@ define float @callee_tiny_scalar_ret() nounwind {
 ; RV64I-WITHFP-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
 ; RV64I-WITHFP-NEXT:    addi s0, sp, 16
 ; RV64I-WITHFP-NEXT:    lui a0, 260096
-; RV64I-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-WITHFP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV64I-WITHFP-NEXT:    ret
   ret float 1.0
@@ -190,8 +190,8 @@ define i64 @caller_tiny_scalar_ret() nounwind {
 ; RV64I-WITHFP-NEXT:    addi s0, sp, 16
 ; RV64I-WITHFP-NEXT:    call callee_tiny_scalar_ret at plt
 ; RV64I-WITHFP-NEXT:    sext.w a0, a0
-; RV64I-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-WITHFP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-WITHFP-NEXT:    addi sp, sp, 16
 ; RV64I-WITHFP-NEXT:    ret
   %1 = call float @callee_tiny_scalar_ret()

diff  --git a/llvm/test/CodeGen/RISCV/calls.ll b/llvm/test/CodeGen/RISCV/calls.ll
index 7a964ebb42221..dd719b4a33a21 100644
--- a/llvm/test/CodeGen/RISCV/calls.ll
+++ b/llvm/test/CodeGen/RISCV/calls.ll
@@ -180,8 +180,8 @@ define i32 @test_call_fastcc(i32 %a, i32 %b) nounwind {
 ; RV32I-NEXT:    mv s0, a0
 ; RV32I-NEXT:    call fastcc_function at plt
 ; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -193,8 +193,8 @@ define i32 @test_call_fastcc(i32 %a, i32 %b) nounwind {
 ; RV32I-PIC-NEXT:    mv s0, a0
 ; RV32I-PIC-NEXT:    call fastcc_function at plt
 ; RV32I-PIC-NEXT:    mv a0, s0
-; RV32I-PIC-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-PIC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-PIC-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-PIC-NEXT:    addi sp, sp, 16
 ; RV32I-PIC-NEXT:    ret
   %1 = call fastcc i32 @fastcc_function(i32 %a, i32 %b)
@@ -221,8 +221,8 @@ define i32 @test_call_external_many_args(i32 %a) nounwind {
 ; RV32I-NEXT:    mv a7, a0
 ; RV32I-NEXT:    call external_many_args at plt
 ; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -243,8 +243,8 @@ define i32 @test_call_external_many_args(i32 %a) nounwind {
 ; RV32I-PIC-NEXT:    mv a7, a0
 ; RV32I-PIC-NEXT:    call external_many_args at plt
 ; RV32I-PIC-NEXT:    mv a0, s0
-; RV32I-PIC-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-PIC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-PIC-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-PIC-NEXT:    addi sp, sp, 16
 ; RV32I-PIC-NEXT:    ret
   %1 = call i32 @external_many_args(i32 %a, i32 %a, i32 %a, i32 %a, i32 %a,

diff  --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll
index 054c4820f534e..6ea6f224bc04c 100644
--- a/llvm/test/CodeGen/RISCV/copysign-casts.ll
+++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll
@@ -140,8 +140,8 @@ define double @fold_promote_d_h(double %a, half %b) nounwind {
 ; RV32IFD-NEXT:    call __gnu_h2f_ieee at plt
 ; RV32IFD-NEXT:    fcvt.d.s ft0, fa0
 ; RV32IFD-NEXT:    fsgnj.d fa0, fs0, ft0
-; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
@@ -155,8 +155,8 @@ define double @fold_promote_d_h(double %a, half %b) nounwind {
 ; RV64IFD-NEXT:    call __gnu_h2f_ieee at plt
 ; RV64IFD-NEXT:    fcvt.d.s ft0, fa0
 ; RV64IFD-NEXT:    fsgnj.d fa0, fs0, ft0
-; RV64IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; RV64IFD-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; RV64IFD-NEXT:    addi sp, sp, 16
 ; RV64IFD-NEXT:    ret
 ;
@@ -220,8 +220,8 @@ define float @fold_promote_f_h(float %a, half %b) nounwind {
 ; RV32IF-NEXT:    fmv.x.w a0, fa1
 ; RV32IF-NEXT:    call __gnu_h2f_ieee at plt
 ; RV32IF-NEXT:    fsgnj.s fa0, fs0, fa0
-; RV32IF-NEXT:    flw fs0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    flw fs0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
 ;
@@ -234,8 +234,8 @@ define float @fold_promote_f_h(float %a, half %b) nounwind {
 ; RV32IFD-NEXT:    fmv.x.w a0, fa1
 ; RV32IFD-NEXT:    call __gnu_h2f_ieee at plt
 ; RV32IFD-NEXT:    fsgnj.s fa0, fs0, fa0
-; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; RV32IFD-NEXT:    addi sp, sp, 16
 ; RV32IFD-NEXT:    ret
 ;
@@ -248,8 +248,8 @@ define float @fold_promote_f_h(float %a, half %b) nounwind {
 ; RV64IFD-NEXT:    fmv.x.w a0, fa1
 ; RV64IFD-NEXT:    call __gnu_h2f_ieee at plt
 ; RV64IFD-NEXT:    fsgnj.s fa0, fs0, fa0
-; RV64IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; RV64IFD-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IFD-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; RV64IFD-NEXT:    addi sp, sp, 16
 ; RV64IFD-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/double-arith.ll b/llvm/test/CodeGen/RISCV/double-arith.ll
index 1189011cf69c8..d3959af83d0da 100644
--- a/llvm/test/CodeGen/RISCV/double-arith.ll
+++ b/llvm/test/CodeGen/RISCV/double-arith.ll
@@ -813,11 +813,11 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    mv a2, s3
 ; RV32I-NEXT:    mv a3, s2
 ; RV32I-NEXT:    call fma at plt
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -838,9 +838,9 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call fma at plt
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %c_ = fadd double 0.0, %c ; avoid negation using xor
@@ -917,13 +917,13 @@ define double @fnmadd_d(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    mv a2, s3
 ; RV32I-NEXT:    mv a3, s2
 ; RV32I-NEXT:    call fma at plt
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -949,10 +949,10 @@ define double @fnmadd_d(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    call fma at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %a_ = fadd double 0.0, %a
@@ -1032,13 +1032,13 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    mv a2, s5
 ; RV32I-NEXT:    call fma at plt
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1064,10 +1064,10 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    xor a2, a0, a2
 ; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call fma at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %b_ = fadd double 0.0, %b
@@ -1133,11 +1133,11 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    mv a4, s3
 ; RV32I-NEXT:    mv a5, s2
 ; RV32I-NEXT:    call fma at plt
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1157,9 +1157,9 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    call fma at plt
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %a_ = fadd double 0.0, %a
@@ -1226,11 +1226,11 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    mv a4, s3
 ; RV32I-NEXT:    mv a5, s2
 ; RV32I-NEXT:    call fma at plt
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1251,9 +1251,9 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    call fma at plt
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %b_ = fadd double 0.0, %b
@@ -1303,9 +1303,9 @@ define double @fmadd_d_contract(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    mv a3, s0
 ; RV32I-NEXT:    call __adddf3 at plt
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -1318,8 +1318,8 @@ define double @fmadd_d_contract(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    call __muldf3 at plt
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __adddf3 at plt
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
   %1 = fmul contract double %a, %b
@@ -1389,13 +1389,13 @@ define double @fmsub_d_contract(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    mv a2, s4
 ; RV32I-NEXT:    mv a3, s5
 ; RV32I-NEXT:    call __subdf3 at plt
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1417,10 +1417,10 @@ define double @fmsub_d_contract(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    call __muldf3 at plt
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __subdf3 at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %c_ = fadd double 0.0, %c ; avoid negation using xor
@@ -1509,13 +1509,13 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    mv a2, s2
 ; RV32I-NEXT:    mv a3, s3
 ; RV32I-NEXT:    call __subdf3 at plt
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1548,11 +1548,11 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __subdf3 at plt
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %a_ = fadd double 0.0, %a ; avoid negation using xor
@@ -1633,13 +1633,13 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    mv a0, s3
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __subdf3 at plt
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1664,10 +1664,10 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __subdf3 at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %a_ = fadd double 0.0, %a ; avoid negation using xor

diff  --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll
index 1c913c82ddd63..55a4e983037f1 100644
--- a/llvm/test/CodeGen/RISCV/double-convert.ll
+++ b/llvm/test/CodeGen/RISCV/double-convert.ll
@@ -204,13 +204,13 @@ define i32 @fcvt_w_d_sat(double %a) nounwind {
 ; RV32I-NEXT:    mv s0, s4
 ; RV32I-NEXT:  .LBB3_6: # %start
 ; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -256,12 +256,12 @@ define i32 @fcvt_w_d_sat(double %a) nounwind {
 ; RV64I-NEXT:    mv s1, s3
 ; RV64I-NEXT:  .LBB3_6: # %start
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 start:
@@ -444,11 +444,11 @@ define i32 @fcvt_wu_d_sat(double %a) nounwind {
 ; RV32I-NEXT:  # %bb.3: # %start
 ; RV32I-NEXT:    mv a0, a1
 ; RV32I-NEXT:  .LBB6_4: # %start
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -482,10 +482,10 @@ define i32 @fcvt_wu_d_sat(double %a) nounwind {
 ; RV64I-NEXT:    srli s1, a0, 32
 ; RV64I-NEXT:  .LBB6_4: # %start
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 start:
@@ -846,15 +846,15 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
 ; RV32I-NEXT:  .LBB12_12: # %start
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s0
-; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
 ;
@@ -901,12 +901,12 @@ define i64 @fcvt_l_d_sat(double %a) nounwind {
 ; RV64I-NEXT:    mv s1, s2
 ; RV64I-NEXT:  .LBB12_7: # %start
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 start:
@@ -1070,14 +1070,14 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind {
 ; RV32I-NEXT:  .LBB14_8: # %start
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s2
-; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1110,10 +1110,10 @@ define i64 @fcvt_lu_d_sat(double %a) nounwind {
 ; RV64I-NEXT:  # %bb.3: # %start
 ; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:  .LBB14_4: # %start
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 start:
@@ -1477,9 +1477,9 @@ define signext i32 @fcvt_d_w_demanded_bits(i32 signext %0, double* %1) {
 ; RV32I-NEXT:    sw a1, 4(s0)
 ; RV32I-NEXT:    sw a0, 0(s0)
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -1499,9 +1499,9 @@ define signext i32 @fcvt_d_w_demanded_bits(i32 signext %0, double* %1) {
 ; RV64I-NEXT:    call __floatsidf at plt
 ; RV64I-NEXT:    sd a0, 0(s0)
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %3 = add i32 %0, 1
@@ -1543,9 +1543,9 @@ define signext i32 @fcvt_d_wu_demanded_bits(i32 signext %0, double* %1) {
 ; RV32I-NEXT:    sw a1, 4(s0)
 ; RV32I-NEXT:    sw a0, 0(s0)
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -1565,9 +1565,9 @@ define signext i32 @fcvt_d_wu_demanded_bits(i32 signext %0, double* %1) {
 ; RV64I-NEXT:    call __floatunsidf at plt
 ; RV64I-NEXT:    sd a0, 0(s0)
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %3 = add i32 %0, 1

diff  --git a/llvm/test/CodeGen/RISCV/double-fcmp.ll b/llvm/test/CodeGen/RISCV/double-fcmp.ll
index e1e0bba34da6a..55c2d6b410e00 100644
--- a/llvm/test/CodeGen/RISCV/double-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/double-fcmp.ll
@@ -307,12 +307,12 @@ define i32 @fcmp_one(double %a, double %b) nounwind {
 ; RV32I-NEXT:    call __unorddf2 at plt
 ; RV32I-NEXT:    seqz a0, a0
 ; RV32I-NEXT:    and a0, a0, s4
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -332,10 +332,10 @@ define i32 @fcmp_one(double %a, double %b) nounwind {
 ; RV64I-NEXT:    call __unorddf2 at plt
 ; RV64I-NEXT:    seqz a0, a0
 ; RV64I-NEXT:    and a0, a0, s2
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = fcmp one double %a, %b
@@ -441,12 +441,12 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind {
 ; RV32I-NEXT:    call __unorddf2 at plt
 ; RV32I-NEXT:    snez a0, a0
 ; RV32I-NEXT:    or a0, a0, s4
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -466,10 +466,10 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind {
 ; RV64I-NEXT:    call __unorddf2 at plt
 ; RV64I-NEXT:    snez a0, a0
 ; RV64I-NEXT:    or a0, a0, s2
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = fcmp ueq double %a, %b

diff  --git a/llvm/test/CodeGen/RISCV/double-intrinsics.ll b/llvm/test/CodeGen/RISCV/double-intrinsics.ll
index 5ba90b7c53322..553e8fbeec379 100644
--- a/llvm/test/CodeGen/RISCV/double-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/double-intrinsics.ll
@@ -206,9 +206,9 @@ define double @sincos_f64(double %a) nounwind {
 ; RV32IFD-NEXT:    fsd ft0, 8(sp)
 ; RV32IFD-NEXT:    lw a0, 8(sp)
 ; RV32IFD-NEXT:    lw a1, 12(sp)
-; RV32IFD-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32IFD-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32IFD-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; RV32IFD-NEXT:    addi sp, sp, 32
 ; RV32IFD-NEXT:    ret
 ;
@@ -227,8 +227,8 @@ define double @sincos_f64(double %a) nounwind {
 ; RV64IFD-NEXT:    fld ft1, 8(sp) # 8-byte Folded Reload
 ; RV64IFD-NEXT:    fadd.d ft0, ft1, ft0
 ; RV64IFD-NEXT:    fmv.x.d a0, ft0
-; RV64IFD-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64IFD-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IFD-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64IFD-NEXT:    addi sp, sp, 32
 ; RV64IFD-NEXT:    ret
 ;
@@ -253,11 +253,11 @@ define double @sincos_f64(double %a) nounwind {
 ; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    mv a1, s3
 ; RV32I-NEXT:    call __adddf3 at plt
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -275,9 +275,9 @@ define double @sincos_f64(double %a) nounwind {
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __adddf3 at plt
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = call double @llvm.sin.f64(double %a)
@@ -633,9 +633,9 @@ define double @fmuladd_f64(double %a, double %b, double %c) nounwind {
 ; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    mv a3, s0
 ; RV32I-NEXT:    call __adddf3 at plt
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -648,8 +648,8 @@ define double @fmuladd_f64(double %a, double %b, double %c) nounwind {
 ; RV64I-NEXT:    call __muldf3 at plt
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __adddf3 at plt
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
   %1 = call double @llvm.fmuladd.f64(double %a, double %b, double %c)

diff  --git a/llvm/test/CodeGen/RISCV/exception-pointer-register.ll b/llvm/test/CodeGen/RISCV/exception-pointer-register.ll
index 0215295ded1f7..329ff94759e5f 100644
--- a/llvm/test/CodeGen/RISCV/exception-pointer-register.ll
+++ b/llvm/test/CodeGen/RISCV/exception-pointer-register.ll
@@ -37,9 +37,9 @@ define void @caller(i1* %p) personality i8* bitcast (i32 (...)* @__gxx_personali
 ; RV32I-NEXT:    call foo at plt
 ; RV32I-NEXT:  .Ltmp3:
 ; RV32I-NEXT:  .LBB0_3: # %end2
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ; RV32I-NEXT:  .LBB0_4: # %lpad
@@ -74,9 +74,9 @@ define void @caller(i1* %p) personality i8* bitcast (i32 (...)* @__gxx_personali
 ; RV64I-NEXT:    call foo at plt
 ; RV64I-NEXT:  .Ltmp3:
 ; RV64I-NEXT:  .LBB0_3: # %end2
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ; RV64I-NEXT:  .LBB0_4: # %lpad

diff  --git a/llvm/test/CodeGen/RISCV/fastcc-int.ll b/llvm/test/CodeGen/RISCV/fastcc-int.ll
index 363fd5d3cc734..0df0ce183c016 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-int.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-int.ll
@@ -45,8 +45,8 @@ define i32 @caller(<16 x i32> %A) nounwind {
 ; RV32-NEXT:    sw t1, 0(sp)
 ; RV32-NEXT:    mv a0, t0
 ; RV32-NEXT:    call callee at plt
-; RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
@@ -76,8 +76,8 @@ define i32 @caller(<16 x i32> %A) nounwind {
 ; RV64-NEXT:    sd t1, 0(sp)
 ; RV64-NEXT:    mv a0, t0
 ; RV64-NEXT:    call callee at plt
-; RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 48
 ; RV64-NEXT:    ret
 	%C = call fastcc i32 @callee(<16 x i32> %A)

diff  --git a/llvm/test/CodeGen/RISCV/float-arith.ll b/llvm/test/CodeGen/RISCV/float-arith.ll
index 96a36c80ea562..94a5ad769d2f3 100644
--- a/llvm/test/CodeGen/RISCV/float-arith.ll
+++ b/llvm/test/CodeGen/RISCV/float-arith.ll
@@ -329,8 +329,8 @@ define float @fsgnjn_s(float %a, float %b) nounwind {
 ; RV32I-NEXT:    and a2, s0, a2
 ; RV32I-NEXT:    and a0, a0, a1
 ; RV32I-NEXT:    or a0, a2, a0
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -347,8 +347,8 @@ define float @fsgnjn_s(float %a, float %b) nounwind {
 ; RV64I-NEXT:    and a2, s0, a2
 ; RV64I-NEXT:    and a0, a0, a1
 ; RV64I-NEXT:    or a0, a2, a0
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
   %1 = fadd float %a, %b
@@ -693,9 +693,9 @@ define float @fmsub_s(float %a, float %b, float %c) nounwind {
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call fmaf at plt
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -715,9 +715,9 @@ define float @fmsub_s(float %a, float %b, float %c) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call fmaf at plt
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %c_ = fadd float 0.0, %c ; avoid negation using xor
@@ -772,10 +772,10 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind {
 ; RV32I-NEXT:    mv a0, a1
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call fmaf at plt
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -800,10 +800,10 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind {
 ; RV64I-NEXT:    mv a0, a1
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    call fmaf at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %a_ = fadd float 0.0, %a
@@ -860,10 +860,10 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind {
 ; RV32I-NEXT:    xor a2, a0, a2
 ; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call fmaf at plt
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -888,10 +888,10 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind {
 ; RV64I-NEXT:    xor a2, a0, a2
 ; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call fmaf at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %b_ = fadd float 0.0, %b
@@ -940,9 +940,9 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind {
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    call fmaf at plt
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -961,9 +961,9 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind {
 ; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    call fmaf at plt
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %a_ = fadd float 0.0, %a
@@ -1011,9 +1011,9 @@ define float @fnmsub_s_2(float %a, float %b, float %c) nounwind {
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    mv a2, s0
 ; RV32I-NEXT:    call fmaf at plt
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -1033,9 +1033,9 @@ define float @fnmsub_s_2(float %a, float %b, float %c) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    mv a2, s0
 ; RV64I-NEXT:    call fmaf at plt
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %b_ = fadd float 0.0, %b
@@ -1072,8 +1072,8 @@ define float @fmadd_s_contract(float %a, float %b, float %c) nounwind {
 ; RV32I-NEXT:    call __mulsf3 at plt
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __addsf3 at plt
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -1086,8 +1086,8 @@ define float @fmadd_s_contract(float %a, float %b, float %c) nounwind {
 ; RV64I-NEXT:    call __mulsf3 at plt
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __addsf3 at plt
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
   %1 = fmul contract float %a, %b
@@ -1136,10 +1136,10 @@ define float @fmsub_s_contract(float %a, float %b, float %c) nounwind {
 ; RV32I-NEXT:    call __mulsf3 at plt
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __subsf3 at plt
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -1161,10 +1161,10 @@ define float @fmsub_s_contract(float %a, float %b, float %c) nounwind {
 ; RV64I-NEXT:    call __mulsf3 at plt
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __subsf3 at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %c_ = fadd float 0.0, %c ; avoid negation using xor
@@ -1228,11 +1228,11 @@ define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind {
 ; RV32I-NEXT:    xor a0, a0, a1
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __subsf3 at plt
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1264,11 +1264,11 @@ define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind {
 ; RV64I-NEXT:    xor a0, a0, a1
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __subsf3 at plt
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %a_ = fadd float 0.0, %a ; avoid negation using xor
@@ -1326,10 +1326,10 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind {
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __subsf3 at plt
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -1354,10 +1354,10 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind {
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __subsf3 at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %a_ = fadd float 0.0, %a ; avoid negation using xor

diff  --git a/llvm/test/CodeGen/RISCV/float-bit-preserving-dagcombines.ll b/llvm/test/CodeGen/RISCV/float-bit-preserving-dagcombines.ll
index 73f47ffe67f92..dd3678b58f19e 100644
--- a/llvm/test/CodeGen/RISCV/float-bit-preserving-dagcombines.ll
+++ b/llvm/test/CodeGen/RISCV/float-bit-preserving-dagcombines.ll
@@ -79,9 +79,9 @@ define double @bitcast_double_and(double %a1, double %a2) nounwind {
 ; RV32F-NEXT:    mv a0, s1
 ; RV32F-NEXT:    mv a1, s0
 ; RV32F-NEXT:    call __adddf3 at plt
-; RV32F-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32F-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32F-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32F-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32F-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32F-NEXT:    addi sp, sp, 16
 ; RV32F-NEXT:    ret
 ;
@@ -115,8 +115,8 @@ define double @bitcast_double_and(double %a1, double %a2) nounwind {
 ; RV64F-NEXT:    and a1, a0, a1
 ; RV64F-NEXT:    mv a0, s0
 ; RV64F-NEXT:    call __adddf3 at plt
-; RV64F-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64F-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64F-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64F-NEXT:    addi sp, sp, 16
 ; RV64F-NEXT:    ret
 ;
@@ -202,9 +202,9 @@ define double @bitcast_double_xor(double %a1, double %a2) nounwind {
 ; RV32F-NEXT:    mv a0, s1
 ; RV32F-NEXT:    mv a1, s0
 ; RV32F-NEXT:    call __muldf3 at plt
-; RV32F-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32F-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32F-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32F-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32F-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32F-NEXT:    addi sp, sp, 16
 ; RV32F-NEXT:    ret
 ;
@@ -238,8 +238,8 @@ define double @bitcast_double_xor(double %a1, double %a2) nounwind {
 ; RV64F-NEXT:    xor a1, a0, a1
 ; RV64F-NEXT:    mv a0, s0
 ; RV64F-NEXT:    call __muldf3 at plt
-; RV64F-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64F-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64F-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64F-NEXT:    addi sp, sp, 16
 ; RV64F-NEXT:    ret
 ;
@@ -328,9 +328,9 @@ define double @bitcast_double_or(double %a1, double %a2) nounwind {
 ; RV32F-NEXT:    mv a0, s1
 ; RV32F-NEXT:    mv a1, s0
 ; RV32F-NEXT:    call __muldf3 at plt
-; RV32F-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32F-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32F-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32F-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32F-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32F-NEXT:    addi sp, sp, 16
 ; RV32F-NEXT:    ret
 ;
@@ -365,8 +365,8 @@ define double @bitcast_double_or(double %a1, double %a2) nounwind {
 ; RV64F-NEXT:    or a1, a0, a1
 ; RV64F-NEXT:    mv a0, s0
 ; RV64F-NEXT:    call __muldf3 at plt
-; RV64F-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64F-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64F-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64F-NEXT:    addi sp, sp, 16
 ; RV64F-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/float-br-fcmp.ll b/llvm/test/CodeGen/RISCV/float-br-fcmp.ll
index 535190f885445..ea4732f72b9ca 100644
--- a/llvm/test/CodeGen/RISCV/float-br-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/float-br-fcmp.ll
@@ -668,8 +668,8 @@ define i32 @br_fcmp_store_load_stack_slot(float %a, float %b) nounwind {
 ; RV64IF-NEXT:    beqz a0, .LBB17_3
 ; RV64IF-NEXT:  # %bb.2: # %if.end4
 ; RV64IF-NEXT:    li a0, 0
-; RV64IF-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64IF-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IF-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64IF-NEXT:    addi sp, sp, 32
 ; RV64IF-NEXT:    ret
 ; RV64IF-NEXT:  .LBB17_3: # %if.then

diff  --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll
index 5bbbae704b49f..f79afd41811b1 100644
--- a/llvm/test/CodeGen/RISCV/float-convert.ll
+++ b/llvm/test/CodeGen/RISCV/float-convert.ll
@@ -107,12 +107,12 @@ define i32 @fcvt_w_s_sat(float %a) nounwind {
 ; RV32I-NEXT:    mv s1, s3
 ; RV32I-NEXT:  .LBB1_6: # %start
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -154,12 +154,12 @@ define i32 @fcvt_w_s_sat(float %a) nounwind {
 ; RV64I-NEXT:    mv s1, s3
 ; RV64I-NEXT:  .LBB1_6: # %start
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 start:
@@ -319,10 +319,10 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind {
 ; RV32I-NEXT:  # %bb.3: # %start
 ; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:  .LBB4_4: # %start
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -354,10 +354,10 @@ define i32 @fcvt_wu_s_sat(float %a) nounwind {
 ; RV64I-NEXT:    srli s1, a0, 32
 ; RV64I-NEXT:  .LBB4_4: # %start
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 start:
@@ -669,8 +669,8 @@ define i64 @fcvt_l_s_sat(float %a) nounwind {
 ; RV32IF-NEXT:  .LBB12_7: # %start
 ; RV32IF-NEXT:    li a1, 0
 ; RV32IF-NEXT:  .LBB12_8: # %start
-; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
 ; RV32IF-NEXT:  .LBB12_9: # %start
@@ -768,14 +768,14 @@ define i64 @fcvt_l_s_sat(float %a) nounwind {
 ; RV32I-NEXT:  .LBB12_12: # %start
 ; RV32I-NEXT:    mv a0, s3
 ; RV32I-NEXT:    mv a1, s1
-; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -819,12 +819,12 @@ define i64 @fcvt_l_s_sat(float %a) nounwind {
 ; RV64I-NEXT:    mv s1, s2
 ; RV64I-NEXT:  .LBB12_7: # %start
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 start:
@@ -901,8 +901,8 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
 ; RV32IF-NEXT:    mv a2, a1
 ; RV32IF-NEXT:  .LBB14_6: # %start
 ; RV32IF-NEXT:    mv a1, a2
-; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
 ; RV32IF-NEXT:  .LBB14_7: # %start
@@ -975,13 +975,13 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
 ; RV32I-NEXT:  .LBB14_8: # %start
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s3
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1013,10 +1013,10 @@ define i64 @fcvt_lu_s_sat(float %a) nounwind {
 ; RV64I-NEXT:  # %bb.3: # %start
 ; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:  .LBB14_4: # %start
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 start:
@@ -1267,9 +1267,9 @@ define signext i32 @fcvt_s_w_demanded_bits(i32 signext %0, float* %1) {
 ; RV32I-NEXT:    call __floatsisf at plt
 ; RV32I-NEXT:    sw a0, 0(s0)
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -1289,9 +1289,9 @@ define signext i32 @fcvt_s_w_demanded_bits(i32 signext %0, float* %1) {
 ; RV64I-NEXT:    call __floatsisf at plt
 ; RV64I-NEXT:    sw a0, 0(s0)
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %3 = add i32 %0, 1
@@ -1332,9 +1332,9 @@ define signext i32 @fcvt_s_wu_demanded_bits(i32 signext %0, float* %1) {
 ; RV32I-NEXT:    call __floatunsisf at plt
 ; RV32I-NEXT:    sw a0, 0(s0)
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -1354,9 +1354,9 @@ define signext i32 @fcvt_s_wu_demanded_bits(i32 signext %0, float* %1) {
 ; RV64I-NEXT:    call __floatunsisf at plt
 ; RV64I-NEXT:    sw a0, 0(s0)
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %3 = add i32 %0, 1

diff  --git a/llvm/test/CodeGen/RISCV/float-fcmp.ll b/llvm/test/CodeGen/RISCV/float-fcmp.ll
index af5e356269bac..c0cd6eda64670 100644
--- a/llvm/test/CodeGen/RISCV/float-fcmp.ll
+++ b/llvm/test/CodeGen/RISCV/float-fcmp.ll
@@ -265,10 +265,10 @@ define i32 @fcmp_one(float %a, float %b) nounwind {
 ; RV32I-NEXT:    call __unordsf2 at plt
 ; RV32I-NEXT:    seqz a0, a0
 ; RV32I-NEXT:    and a0, a0, s2
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -288,10 +288,10 @@ define i32 @fcmp_one(float %a, float %b) nounwind {
 ; RV64I-NEXT:    call __unordsf2 at plt
 ; RV64I-NEXT:    seqz a0, a0
 ; RV64I-NEXT:    and a0, a0, s2
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = fcmp one float %a, %b
@@ -379,10 +379,10 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind {
 ; RV32I-NEXT:    call __unordsf2 at plt
 ; RV32I-NEXT:    snez a0, a0
 ; RV32I-NEXT:    or a0, a0, s2
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -402,10 +402,10 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind {
 ; RV64I-NEXT:    call __unordsf2 at plt
 ; RV64I-NEXT:    snez a0, a0
 ; RV64I-NEXT:    or a0, a0, s2
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = fcmp ueq float %a, %b

diff  --git a/llvm/test/CodeGen/RISCV/float-intrinsics.ll b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
index 13cb256d8cef2..bfc35fed1fcfd 100644
--- a/llvm/test/CodeGen/RISCV/float-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/float-intrinsics.ll
@@ -195,8 +195,8 @@ define float @sincos_f32(float %a) nounwind {
 ; RV32IF-NEXT:    flw ft1, 4(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    fadd.s ft0, ft1, ft0
 ; RV32IF-NEXT:    fmv.x.w a0, ft0
-; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
 ;
@@ -215,8 +215,8 @@ define float @sincos_f32(float %a) nounwind {
 ; RV64IF-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
 ; RV64IF-NEXT:    fadd.s ft0, ft1, ft0
 ; RV64IF-NEXT:    fmv.x.w a0, ft0
-; RV64IF-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64IF-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IF-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64IF-NEXT:    addi sp, sp, 32
 ; RV64IF-NEXT:    ret
 ;
@@ -234,9 +234,9 @@ define float @sincos_f32(float %a) nounwind {
 ; RV32I-NEXT:    mv a1, a0
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __addsf3 at plt
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -254,9 +254,9 @@ define float @sincos_f32(float %a) nounwind {
 ; RV64I-NEXT:    mv a1, a0
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __addsf3 at plt
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = call float @llvm.sin.f32(float %a)
@@ -589,8 +589,8 @@ define float @fmuladd_f32(float %a, float %b, float %c) nounwind {
 ; RV32I-NEXT:    call __mulsf3 at plt
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __addsf3 at plt
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -603,8 +603,8 @@ define float @fmuladd_f32(float %a, float %b, float %c) nounwind {
 ; RV64I-NEXT:    call __mulsf3 at plt
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __addsf3 at plt
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
   %1 = call float @llvm.fmuladd.f32(float %a, float %b, float %c)

diff  --git a/llvm/test/CodeGen/RISCV/fp16-promote.ll b/llvm/test/CodeGen/RISCV/fp16-promote.ll
index b2c72505ebd62..6bb39b27db820 100644
--- a/llvm/test/CodeGen/RISCV/fp16-promote.ll
+++ b/llvm/test/CodeGen/RISCV/fp16-promote.ll
@@ -52,8 +52,8 @@ define void @test_fptrunc_float(float %f, half* %p) nounwind {
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    call __gnu_f2h_ieee at plt
 ; CHECK-NEXT:    sh a0, 0(s0)
-; CHECK-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %a = fptrunc float %f to half
@@ -70,8 +70,8 @@ define void @test_fptrunc_double(double %d, half* %p) nounwind {
 ; CHECK-NEXT:    mv s0, a0
 ; CHECK-NEXT:    call __truncdfhf2 at plt
 ; CHECK-NEXT:    sh a0, 0(s0)
-; CHECK-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
   %a = fptrunc double %d to half
@@ -97,10 +97,10 @@ define void @test_fadd(half* %p, half* %q) nounwind {
 ; CHECK-NEXT:    fadd.s fa0, fa0, fs0
 ; CHECK-NEXT:    call __gnu_f2h_ieee at plt
 ; CHECK-NEXT:    sh a0, 0(s0)
-; CHECK-NEXT:    fld fs0, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 8(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
   %a = load half, half* %p
@@ -128,10 +128,10 @@ define void @test_fmul(half* %p, half* %q) nounwind {
 ; CHECK-NEXT:    fmul.s fa0, fa0, fs0
 ; CHECK-NEXT:    call __gnu_f2h_ieee at plt
 ; CHECK-NEXT:    sh a0, 0(s0)
-; CHECK-NEXT:    fld fs0, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    fld fs0, 8(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
   %a = load half, half* %p

diff  --git a/llvm/test/CodeGen/RISCV/frame-info.ll b/llvm/test/CodeGen/RISCV/frame-info.ll
index aa15b2478ffea..0a28df1cf5fb2 100644
--- a/llvm/test/CodeGen/RISCV/frame-info.ll
+++ b/llvm/test/CodeGen/RISCV/frame-info.ll
@@ -27,8 +27,8 @@ define void @trivial() {
 ; RV32-WITHFP-NEXT:    .cfi_offset s0, -8
 ; RV32-WITHFP-NEXT:    addi s0, sp, 16
 ; RV32-WITHFP-NEXT:    .cfi_def_cfa s0, 0
-; RV32-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32-WITHFP-NEXT:    ret
 ;
@@ -42,8 +42,8 @@ define void @trivial() {
 ; RV64-WITHFP-NEXT:    .cfi_offset s0, -16
 ; RV64-WITHFP-NEXT:    addi s0, sp, 16
 ; RV64-WITHFP-NEXT:    .cfi_def_cfa s0, 0
-; RV64-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64-WITHFP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64-WITHFP-NEXT:    addi sp, sp, 16
 ; RV64-WITHFP-NEXT:    ret
   ret void
@@ -66,8 +66,8 @@ define void @stack_alloc(i32 signext %size) {
 ; RV32-NEXT:    mv sp, a0
 ; RV32-NEXT:    call callee_with_args at plt
 ; RV32-NEXT:    addi sp, s0, -16
-; RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
@@ -89,8 +89,8 @@ define void @stack_alloc(i32 signext %size) {
 ; RV64-NEXT:    mv sp, a0
 ; RV64-NEXT:    call callee_with_args at plt
 ; RV64-NEXT:    addi sp, s0, -16
-; RV64-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
 ;
@@ -110,8 +110,8 @@ define void @stack_alloc(i32 signext %size) {
 ; RV32-WITHFP-NEXT:    mv sp, a0
 ; RV32-WITHFP-NEXT:    call callee_with_args at plt
 ; RV32-WITHFP-NEXT:    addi sp, s0, -16
-; RV32-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32-WITHFP-NEXT:    ret
 ;
@@ -133,8 +133,8 @@ define void @stack_alloc(i32 signext %size) {
 ; RV64-WITHFP-NEXT:    mv sp, a0
 ; RV64-WITHFP-NEXT:    call callee_with_args at plt
 ; RV64-WITHFP-NEXT:    addi sp, s0, -16
-; RV64-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64-WITHFP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64-WITHFP-NEXT:    addi sp, sp, 16
 ; RV64-WITHFP-NEXT:    ret
 entry:
@@ -192,8 +192,8 @@ define void @branch_and_tail_call(i1 %a) {
 ; RV32-WITHFP-NEXT:    addi s0, sp, 16
 ; RV32-WITHFP-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-WITHFP-NEXT:    call callee2 at plt
-; RV32-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-WITHFP-NEXT:    addi sp, sp, 16
 ; RV32-WITHFP-NEXT:    ret
 ;
@@ -213,8 +213,8 @@ define void @branch_and_tail_call(i1 %a) {
 ; RV64-WITHFP-NEXT:    addi s0, sp, 16
 ; RV64-WITHFP-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-WITHFP-NEXT:    call callee2 at plt
-; RV64-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64-WITHFP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64-WITHFP-NEXT:    addi sp, sp, 16
 ; RV64-WITHFP-NEXT:    ret
   br i1 %a, label %blue_pill, label %red_pill

diff  --git a/llvm/test/CodeGen/RISCV/frame.ll b/llvm/test/CodeGen/RISCV/frame.ll
index 3cde03b8d33e2..6f24bea9d3523 100644
--- a/llvm/test/CodeGen/RISCV/frame.ll
+++ b/llvm/test/CodeGen/RISCV/frame.ll
@@ -37,8 +37,8 @@ define i32 @test() nounwind {
 ; RV32I-WITHFP-NEXT:    addi a0, s0, -28
 ; RV32I-WITHFP-NEXT:    call test1 at plt
 ; RV32I-WITHFP-NEXT:    li a0, 0
-; RV32I-WITHFP-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 32
 ; RV32I-WITHFP-NEXT:    ret
   %key = alloca %struct.key_t, align 4

diff  --git a/llvm/test/CodeGen/RISCV/frameaddr-returnaddr.ll b/llvm/test/CodeGen/RISCV/frameaddr-returnaddr.ll
index 369cf7be0a771..1914abfe63a43 100644
--- a/llvm/test/CodeGen/RISCV/frameaddr-returnaddr.ll
+++ b/llvm/test/CodeGen/RISCV/frameaddr-returnaddr.ll
@@ -16,8 +16,8 @@ define i8* @test_frameaddress_0() nounwind {
 ; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
 ; RV32I-NEXT:    addi s0, sp, 16
 ; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -28,8 +28,8 @@ define i8* @test_frameaddress_0() nounwind {
 ; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
 ; RV64I-NEXT:    addi s0, sp, 16
 ; RV64I-NEXT:    mv a0, s0
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
   %1 = call i8* @llvm.frameaddress(i32 0)
@@ -45,8 +45,8 @@ define i8* @test_frameaddress_2() nounwind {
 ; RV32I-NEXT:    addi s0, sp, 16
 ; RV32I-NEXT:    lw a0, -8(s0)
 ; RV32I-NEXT:    lw a0, -8(a0)
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -58,8 +58,8 @@ define i8* @test_frameaddress_2() nounwind {
 ; RV64I-NEXT:    addi s0, sp, 16
 ; RV64I-NEXT:    ld a0, -16(s0)
 ; RV64I-NEXT:    ld a0, -16(a0)
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
   %1 = call i8* @llvm.frameaddress(i32 2)
@@ -78,8 +78,8 @@ define i8* @test_frameaddress_3_alloca() nounwind {
 ; RV32I-NEXT:    lw a0, -8(s0)
 ; RV32I-NEXT:    lw a0, -8(a0)
 ; RV32I-NEXT:    lw a0, -8(a0)
-; RV32I-NEXT:    lw s0, 104(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 108(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 104(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 112
 ; RV32I-NEXT:    ret
 ;
@@ -94,8 +94,8 @@ define i8* @test_frameaddress_3_alloca() nounwind {
 ; RV64I-NEXT:    ld a0, -16(s0)
 ; RV64I-NEXT:    ld a0, -16(a0)
 ; RV64I-NEXT:    ld a0, -16(a0)
-; RV64I-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 128
 ; RV64I-NEXT:    ret
   %1 = alloca [100 x i8]
@@ -129,8 +129,8 @@ define i8* @test_returnaddress_2() nounwind {
 ; RV32I-NEXT:    lw a0, -8(s0)
 ; RV32I-NEXT:    lw a0, -8(a0)
 ; RV32I-NEXT:    lw a0, -4(a0)
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -143,8 +143,8 @@ define i8* @test_returnaddress_2() nounwind {
 ; RV64I-NEXT:    ld a0, -16(s0)
 ; RV64I-NEXT:    ld a0, -16(a0)
 ; RV64I-NEXT:    ld a0, -8(a0)
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
   %1 = call i8* @llvm.returnaddress(i32 2)

diff  --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll
index 96dbe1dba0f57..c8cfb4559adbe 100644
--- a/llvm/test/CodeGen/RISCV/half-arith.ll
+++ b/llvm/test/CodeGen/RISCV/half-arith.ll
@@ -42,10 +42,10 @@ define half @fadd_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __addsf3 at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -68,10 +68,10 @@ define half @fadd_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __addsf3 at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = fadd half %a, %b
@@ -108,10 +108,10 @@ define half @fsub_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __subsf3 at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -134,10 +134,10 @@ define half @fsub_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __subsf3 at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = fsub half %a, %b
@@ -174,10 +174,10 @@ define half @fmul_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __mulsf3 at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -200,10 +200,10 @@ define half @fmul_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __mulsf3 at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = fmul half %a, %b
@@ -240,10 +240,10 @@ define half @fdiv_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __divsf3 at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -266,10 +266,10 @@ define half @fdiv_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __divsf3 at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = fdiv half %a, %b
@@ -398,9 +398,9 @@ define i32 @fneg_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __eqsf2 at plt
 ; RV32I-NEXT:    seqz a0, a0
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -429,9 +429,9 @@ define i32 @fneg_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __eqsf2 at plt
 ; RV64I-NEXT:    seqz a0, a0
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = fadd half %a, %a
@@ -488,11 +488,11 @@ define half @fsgnjn_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    addi a1, a1, -1
 ; RV32I-NEXT:    and a1, s2, a1
 ; RV32I-NEXT:    or a0, a1, a0
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -528,11 +528,11 @@ define half @fsgnjn_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    addiw a1, a1, -1
 ; RV64I-NEXT:    and a1, s2, a1
 ; RV64I-NEXT:    or a0, a1, a0
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %1 = fadd half %a, %b
@@ -591,10 +591,10 @@ define half @fabs_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __addsf3 at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -629,10 +629,10 @@ define half @fabs_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __addsf3 at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = fadd half %a, %b
@@ -673,10 +673,10 @@ define half @fmin_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call fminf at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -699,10 +699,10 @@ define half @fmin_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call fminf at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = call half @llvm.minnum.f16(half %a, half %b)
@@ -741,10 +741,10 @@ define half @fmax_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call fmaxf at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -767,10 +767,10 @@ define half @fmax_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call fmaxf at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = call half @llvm.maxnum.f16(half %a, half %b)
@@ -807,10 +807,10 @@ define i32 @feq_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __eqsf2 at plt
 ; RV32I-NEXT:    seqz a0, a0
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -833,10 +833,10 @@ define i32 @feq_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __eqsf2 at plt
 ; RV64I-NEXT:    seqz a0, a0
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = fcmp oeq half %a, %b
@@ -874,10 +874,10 @@ define i32 @flt_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __ltsf2 at plt
 ; RV32I-NEXT:    slti a0, a0, 0
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -900,10 +900,10 @@ define i32 @flt_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __ltsf2 at plt
 ; RV64I-NEXT:    slti a0, a0, 0
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = fcmp olt half %a, %b
@@ -941,10 +941,10 @@ define i32 @fle_s(half %a, half %b) nounwind {
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __lesf2 at plt
 ; RV32I-NEXT:    slti a0, a0, 1
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -967,10 +967,10 @@ define i32 @fle_s(half %a, half %b) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __lesf2 at plt
 ; RV64I-NEXT:    slti a0, a0, 1
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = fcmp ole half %a, %b
@@ -1016,11 +1016,11 @@ define half @fmadd_s(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call fmaf at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1049,11 +1049,11 @@ define half @fmadd_s(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    call fmaf at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %1 = call half @llvm.fma.f16(half %a, half %b, half %c)
@@ -1112,12 +1112,12 @@ define half @fmsub_s(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call fmaf at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1158,12 +1158,12 @@ define half @fmsub_s(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    call fmaf at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %c_ = fadd half 0.0, %c ; avoid negation using xor
@@ -1238,12 +1238,12 @@ define half @fnmadd_s(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call fmaf at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1296,12 +1296,12 @@ define half @fnmadd_s(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    call fmaf at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %a_ = fadd half 0.0, %a
@@ -1378,12 +1378,12 @@ define half @fnmadd_s_2(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call fmaf at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1436,12 +1436,12 @@ define half @fnmadd_s_2(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call fmaf at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %b_ = fadd half 0.0, %b
@@ -1503,12 +1503,12 @@ define half @fnmsub_s(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    call fmaf at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1548,12 +1548,12 @@ define half @fnmsub_s(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    call fmaf at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %a_ = fadd half 0.0, %a
@@ -1614,12 +1614,12 @@ define half @fnmsub_s_2(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    mv a2, s1
 ; RV32I-NEXT:    call fmaf at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1660,12 +1660,12 @@ define half @fnmsub_s_2(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    mv a2, s1
 ; RV64I-NEXT:    call fmaf at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %b_ = fadd half 0.0, %b
@@ -1715,11 +1715,11 @@ define half @fmadd_s_contract(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __addsf3 at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1753,11 +1753,11 @@ define half @fmadd_s_contract(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    call __addsf3 at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %1 = fmul contract half %a, %b
@@ -1816,11 +1816,11 @@ define half @fmsub_s_contract(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call __subsf3 at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1860,11 +1860,11 @@ define half @fmsub_s_contract(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call __subsf3 at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %c_ = fadd half 0.0, %c ; avoid negation using xor
@@ -1946,12 +1946,12 @@ define half @fnmadd_s_contract(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __subsf3 at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -2009,12 +2009,12 @@ define half @fnmadd_s_contract(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    call __subsf3 at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %a_ = fadd half 0.0, %a ; avoid negation using xor
@@ -2086,11 +2086,11 @@ define half @fnmsub_s_contract(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:    call __subsf3 at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -2137,11 +2137,11 @@ define half @fnmsub_s_contract(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:    call __subsf3 at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %a_ = fadd half 0.0, %a ; avoid negation using xor

diff  --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll
index 5c7cd114cff13..43bbecc7f88b3 100644
--- a/llvm/test/CodeGen/RISCV/half-convert.ll
+++ b/llvm/test/CodeGen/RISCV/half-convert.ll
@@ -176,11 +176,11 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; RV32I-NEXT:    mv s1, s3
 ; RV32I-NEXT:  .LBB1_6: # %start
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -225,11 +225,11 @@ define i16 @fcvt_si_h_sat(half %a) nounwind {
 ; RV64I-NEXT:    mv s1, s3
 ; RV64I-NEXT:  .LBB1_6: # %start
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 start:
@@ -457,11 +457,11 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind {
 ; RV32I-NEXT:    mv s2, s3
 ; RV32I-NEXT:  .LBB4_4: # %start
 ; RV32I-NEXT:    mv a0, s2
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -497,11 +497,11 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind {
 ; RV64I-NEXT:    mv s2, s3
 ; RV64I-NEXT:  .LBB4_4: # %start
 ; RV64I-NEXT:    mv a0, s2
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 start:
@@ -647,12 +647,12 @@ define i32 @fcvt_w_h_sat(half %a) nounwind {
 ; RV32I-NEXT:    mv s1, s3
 ; RV32I-NEXT:  .LBB6_6: # %start
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -698,12 +698,12 @@ define i32 @fcvt_w_h_sat(half %a) nounwind {
 ; RV64I-NEXT:    mv s1, s3
 ; RV64I-NEXT:  .LBB6_6: # %start
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 start:
@@ -839,10 +839,10 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
 ; RV32I-NEXT:  # %bb.3: # %start
 ; RV32I-NEXT:    mv a0, s2
 ; RV32I-NEXT:  .LBB8_4: # %start
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -878,10 +878,10 @@ define i32 @fcvt_wu_h_sat(half %a) nounwind {
 ; RV64I-NEXT:    srli s1, a0, 32
 ; RV64I-NEXT:  .LBB8_4: # %start
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 start:
@@ -984,9 +984,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IZFH-NEXT:  .LBB10_7: # %start
 ; RV32IZFH-NEXT:    li a1, 0
 ; RV32IZFH-NEXT:  .LBB10_8: # %start
-; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    addi sp, sp, 16
 ; RV32IZFH-NEXT:    ret
 ; RV32IZFH-NEXT:  .LBB10_9: # %start
@@ -1051,9 +1051,9 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32IDZFH-NEXT:  .LBB10_7: # %start
 ; RV32IDZFH-NEXT:    li a1, 0
 ; RV32IDZFH-NEXT:  .LBB10_8: # %start
-; RV32IDZFH-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IDZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IDZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; RV32IDZFH-NEXT:    addi sp, sp, 16
 ; RV32IDZFH-NEXT:    ret
 ; RV32IDZFH-NEXT:  .LBB10_9: # %start
@@ -1154,14 +1154,14 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV32I-NEXT:  .LBB10_12: # %start
 ; RV32I-NEXT:    mv a0, s3
 ; RV32I-NEXT:    mv a1, s1
-; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1209,12 +1209,12 @@ define i64 @fcvt_l_h_sat(half %a) nounwind {
 ; RV64I-NEXT:    mv s1, s2
 ; RV64I-NEXT:  .LBB10_7: # %start
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 start:
@@ -1312,9 +1312,9 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; RV32IZFH-NEXT:    mv a2, a1
 ; RV32IZFH-NEXT:  .LBB12_6: # %start
 ; RV32IZFH-NEXT:    mv a1, a2
-; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
-; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    addi sp, sp, 16
 ; RV32IZFH-NEXT:    ret
 ; RV32IZFH-NEXT:  .LBB12_7: # %start
@@ -1366,9 +1366,9 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; RV32IDZFH-NEXT:    mv a2, a1
 ; RV32IDZFH-NEXT:  .LBB12_6: # %start
 ; RV32IDZFH-NEXT:    mv a1, a2
-; RV32IDZFH-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
-; RV32IDZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IDZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IDZFH-NEXT:    fld fs0, 0(sp) # 8-byte Folded Reload
 ; RV32IDZFH-NEXT:    addi sp, sp, 16
 ; RV32IDZFH-NEXT:    ret
 ; RV32IDZFH-NEXT:  .LBB12_7: # %start
@@ -1444,13 +1444,13 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; RV32I-NEXT:  .LBB12_8: # %start
 ; RV32I-NEXT:    mv a0, s4
 ; RV32I-NEXT:    mv a1, s3
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1486,10 +1486,10 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind {
 ; RV64I-NEXT:  # %bb.3: # %start
 ; RV64I-NEXT:    mv a0, s2
 ; RV64I-NEXT:  .LBB12_4: # %start
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 start:
@@ -2313,9 +2313,9 @@ define signext i32 @fcvt_h_w_demanded_bits(i32 signext %0, half* %1) {
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
 ; RV32I-NEXT:    sh a0, 0(s0)
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -2336,9 +2336,9 @@ define signext i32 @fcvt_h_w_demanded_bits(i32 signext %0, half* %1) {
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
 ; RV64I-NEXT:    sh a0, 0(s0)
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %3 = add i32 %0, 1
@@ -2394,9 +2394,9 @@ define signext i32 @fcvt_h_wu_demanded_bits(i32 signext %0, half* %1) {
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
 ; RV32I-NEXT:    sh a0, 0(s0)
 ; RV32I-NEXT:    mv a0, s1
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -2417,9 +2417,9 @@ define signext i32 @fcvt_h_wu_demanded_bits(i32 signext %0, half* %1) {
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
 ; RV64I-NEXT:    sh a0, 0(s0)
 ; RV64I-NEXT:    mv a0, s1
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %3 = add i32 %0, 1

diff  --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
index 13e6ea9c8470c..432faf999d890 100644
--- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll
@@ -120,11 +120,11 @@ define half @fma_f16(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call fmaf at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -153,11 +153,11 @@ define half @fma_f16(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    call fmaf at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %1 = call half @llvm.fma.f16(half %a, half %b, half %c)
@@ -217,11 +217,11 @@ define half @fmuladd_f16(half %a, half %b, half %c) nounwind {
 ; RV32I-NEXT:    mv a1, s2
 ; RV32I-NEXT:    call __addsf3 at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -255,11 +255,11 @@ define half @fmuladd_f16(half %a, half %b, half %c) nounwind {
 ; RV64I-NEXT:    mv a1, s2
 ; RV64I-NEXT:    call __addsf3 at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
   %1 = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
@@ -348,10 +348,10 @@ define half @minnum_f16(half %a, half %b) nounwind {
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call fminf at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -374,10 +374,10 @@ define half @minnum_f16(half %a, half %b) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call fminf at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = call half @llvm.minnum.f16(half %a, half %b)
@@ -426,10 +426,10 @@ define half @maxnum_f16(half %a, half %b) nounwind {
 ; RV32I-NEXT:    mv a0, s1
 ; RV32I-NEXT:    call fmaxf at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -452,10 +452,10 @@ define half @maxnum_f16(half %a, half %b) nounwind {
 ; RV64I-NEXT:    mv a0, s1
 ; RV64I-NEXT:    call fmaxf at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = call half @llvm.maxnum.f16(half %a, half %b)

diff  --git a/llvm/test/CodeGen/RISCV/half-mem.ll b/llvm/test/CodeGen/RISCV/half-mem.ll
index 711e68ef18d63..b6cf84976ee8e 100644
--- a/llvm/test/CodeGen/RISCV/half-mem.ll
+++ b/llvm/test/CodeGen/RISCV/half-mem.ll
@@ -124,8 +124,8 @@ define half @flh_stack(half %a) nounwind {
 ; RV32IZFH-NEXT:    call notdead at plt
 ; RV32IZFH-NEXT:    flh ft0, 4(sp)
 ; RV32IZFH-NEXT:    fadd.h fa0, ft0, fs0
-; RV32IZFH-NEXT:    flw fs0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IZFH-NEXT:    flw fs0, 8(sp) # 4-byte Folded Reload
 ; RV32IZFH-NEXT:    addi sp, sp, 16
 ; RV32IZFH-NEXT:    ret
 ;
@@ -139,8 +139,8 @@ define half @flh_stack(half %a) nounwind {
 ; RV64IZFH-NEXT:    call notdead at plt
 ; RV64IZFH-NEXT:    flh ft0, 0(sp)
 ; RV64IZFH-NEXT:    fadd.h fa0, ft0, fs0
-; RV64IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
 ; RV64IZFH-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IZFH-NEXT:    flw fs0, 4(sp) # 4-byte Folded Reload
 ; RV64IZFH-NEXT:    addi sp, sp, 16
 ; RV64IZFH-NEXT:    ret
   %1 = alloca half, align 4

diff  --git a/llvm/test/CodeGen/RISCV/interrupt-attr-callee.ll b/llvm/test/CodeGen/RISCV/interrupt-attr-callee.ll
index 6dc86c54ec7ce..0016f5f39cd54 100644
--- a/llvm/test/CodeGen/RISCV/interrupt-attr-callee.ll
+++ b/llvm/test/CodeGen/RISCV/interrupt-attr-callee.ll
@@ -22,8 +22,8 @@ define dso_local void @handler() nounwind {
 ; CHECK-RV32-NEXT:    mv s0, a0
 ; CHECK-RV32-NEXT:    call callee at plt
 ; CHECK-RV32-NEXT:    mv a0, s0
-; CHECK-RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    addi sp, sp, 16
 ; CHECK-RV32-NEXT:    tail write at plt
 ;
@@ -38,8 +38,8 @@ define dso_local void @handler() nounwind {
 ; CHECK-RV32-F-NEXT:    mv s0, a0
 ; CHECK-RV32-F-NEXT:    call callee at plt
 ; CHECK-RV32-F-NEXT:    mv a0, s0
-; CHECK-RV32-F-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32-F-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32-F-NEXT:    addi sp, sp, 16
 ; CHECK-RV32-F-NEXT:    tail write at plt
 ;
@@ -54,8 +54,8 @@ define dso_local void @handler() nounwind {
 ; CHECK-RV32-FD-NEXT:    mv s0, a0
 ; CHECK-RV32-FD-NEXT:    call callee at plt
 ; CHECK-RV32-FD-NEXT:    mv a0, s0
-; CHECK-RV32-FD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32-FD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32-FD-NEXT:    addi sp, sp, 16
 ; CHECK-RV32-FD-NEXT:    tail write at plt
 entry:

diff  --git a/llvm/test/CodeGen/RISCV/interrupt-attr-nocall.ll b/llvm/test/CodeGen/RISCV/interrupt-attr-nocall.ll
index 32ab8a36832f4..fc475306cdc07 100644
--- a/llvm/test/CodeGen/RISCV/interrupt-attr-nocall.ll
+++ b/llvm/test/CodeGen/RISCV/interrupt-attr-nocall.ll
@@ -35,8 +35,8 @@ define void @foo_i32() nounwind #0 {
 ; CHECK-RV32-NEXT:    add a0, a1, a0
 ; CHECK-RV32-NEXT:    lui a1, %hi(c)
 ; CHECK-RV32-NEXT:    sw a0, %lo(c)(a1)
-; CHECK-RV32-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    addi sp, sp, 16
 ; CHECK-RV32-NEXT:    mret
 ;
@@ -52,8 +52,8 @@ define void @foo_i32() nounwind #0 {
 ; CHECK-RV32IF-NEXT:    add a0, a1, a0
 ; CHECK-RV32IF-NEXT:    lui a1, %hi(c)
 ; CHECK-RV32IF-NEXT:    sw a0, %lo(c)(a1)
-; CHECK-RV32IF-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32IF-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32IF-NEXT:    addi sp, sp, 16
 ; CHECK-RV32IF-NEXT:    mret
 ;
@@ -69,8 +69,8 @@ define void @foo_i32() nounwind #0 {
 ; CHECK-RV32IFD-NEXT:    add a0, a1, a0
 ; CHECK-RV32IFD-NEXT:    lui a1, %hi(c)
 ; CHECK-RV32IFD-NEXT:    sw a0, %lo(c)(a1)
-; CHECK-RV32IFD-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32IFD-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    lw a1, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32IFD-NEXT:    addi sp, sp, 16
 ; CHECK-RV32IFD-NEXT:    mret
   %1 = load i32, i32* @a
@@ -100,10 +100,10 @@ define void @foo_fp_i32() nounwind #1 {
 ; CHECK-RV32-NEXT:    add a0, a1, a0
 ; CHECK-RV32-NEXT:    lui a1, %hi(c)
 ; CHECK-RV32-NEXT:    sw a0, %lo(c)(a1)
-; CHECK-RV32-NEXT:    lw a1, 0(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a1, 0(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    addi sp, sp, 16
 ; CHECK-RV32-NEXT:    mret
 ;
@@ -122,10 +122,10 @@ define void @foo_fp_i32() nounwind #1 {
 ; CHECK-RV32IF-NEXT:    add a0, a1, a0
 ; CHECK-RV32IF-NEXT:    lui a1, %hi(c)
 ; CHECK-RV32IF-NEXT:    sw a0, %lo(c)(a1)
-; CHECK-RV32IF-NEXT:    lw a1, 0(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a1, 0(sp) # 4-byte Folded Reload
 ; CHECK-RV32IF-NEXT:    addi sp, sp, 16
 ; CHECK-RV32IF-NEXT:    mret
 ;
@@ -144,10 +144,10 @@ define void @foo_fp_i32() nounwind #1 {
 ; CHECK-RV32IFD-NEXT:    add a0, a1, a0
 ; CHECK-RV32IFD-NEXT:    lui a1, %hi(c)
 ; CHECK-RV32IFD-NEXT:    sw a0, %lo(c)(a1)
-; CHECK-RV32IFD-NEXT:    lw a1, 0(sp) # 4-byte Folded Reload
-; CHECK-RV32IFD-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32IFD-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    lw a0, 4(sp) # 4-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    lw a1, 0(sp) # 4-byte Folded Reload
 ; CHECK-RV32IFD-NEXT:    addi sp, sp, 16
 ; CHECK-RV32IFD-NEXT:    mret
   %1 = load i32, i32* @a
@@ -188,22 +188,22 @@ define void @foo_float() nounwind #0 {
 ; CHECK-RV32-NEXT:    call __addsf3 at plt
 ; CHECK-RV32-NEXT:    lui a1, %hi(d)
 ; CHECK-RV32-NEXT:    sw a0, %lo(d)(a1)
-; CHECK-RV32-NEXT:    lw t6, 0(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t5, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t4, 8(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t3, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a7, 16(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a5, 24(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a4, 28(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a3, 32(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a2, 36(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a1, 40(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t2, 48(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t0, 56(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t0, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t2, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a1, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a2, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a3, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a4, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a5, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a7, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t3, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t4, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t5, 4(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t6, 0(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    addi sp, sp, 64
 ; CHECK-RV32-NEXT:    mret
 ;
@@ -220,9 +220,9 @@ define void @foo_float() nounwind #0 {
 ; CHECK-RV32IF-NEXT:    fadd.s ft0, ft0, ft1
 ; CHECK-RV32IF-NEXT:    lui a0, %hi(d)
 ; CHECK-RV32IF-NEXT:    fsw ft0, %lo(d)(a0)
-; CHECK-RV32IF-NEXT:    flw ft1, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft0, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV32IF-NEXT:    lw a0, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft0, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft1, 4(sp) # 4-byte Folded Reload
 ; CHECK-RV32IF-NEXT:    addi sp, sp, 16
 ; CHECK-RV32IF-NEXT:    mret
 ;
@@ -239,9 +239,9 @@ define void @foo_float() nounwind #0 {
 ; CHECK-RV32IFD-NEXT:    fadd.s ft0, ft0, ft1
 ; CHECK-RV32IFD-NEXT:    lui a0, %hi(d)
 ; CHECK-RV32IFD-NEXT:    fsw ft0, %lo(d)(a0)
-; CHECK-RV32IFD-NEXT:    fld ft1, 8(sp) # 8-byte Folded Reload
-; CHECK-RV32IFD-NEXT:    fld ft0, 16(sp) # 8-byte Folded Reload
 ; CHECK-RV32IFD-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    fld ft0, 16(sp) # 8-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    fld ft1, 8(sp) # 8-byte Folded Reload
 ; CHECK-RV32IFD-NEXT:    addi sp, sp, 32
 ; CHECK-RV32IFD-NEXT:    mret
   %1 = load float, float* @e
@@ -283,23 +283,23 @@ define void @foo_fp_float() nounwind #1 {
 ; CHECK-RV32-NEXT:    call __addsf3 at plt
 ; CHECK-RV32-NEXT:    lui a1, %hi(d)
 ; CHECK-RV32-NEXT:    sw a0, %lo(d)(a1)
-; CHECK-RV32-NEXT:    lw t6, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t5, 16(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t4, 20(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t3, 24(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a7, 28(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a6, 32(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a5, 36(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a4, 40(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a3, 44(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a2, 48(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a1, 52(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw s0, 60(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t2, 64(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t1, 68(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t0, 72(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t0, 72(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t1, 68(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t2, 64(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s0, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a1, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a2, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a3, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a4, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a5, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a6, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a7, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t3, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t4, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t5, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t6, 12(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    addi sp, sp, 80
 ; CHECK-RV32-NEXT:    mret
 ;
@@ -319,11 +319,11 @@ define void @foo_fp_float() nounwind #1 {
 ; CHECK-RV32IF-NEXT:    fadd.s ft0, ft0, ft1
 ; CHECK-RV32IF-NEXT:    lui a0, %hi(d)
 ; CHECK-RV32IF-NEXT:    fsw ft0, %lo(d)(a0)
-; CHECK-RV32IF-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft0, 16(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; CHECK-RV32IF-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft0, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft1, 12(sp) # 4-byte Folded Reload
 ; CHECK-RV32IF-NEXT:    addi sp, sp, 32
 ; CHECK-RV32IF-NEXT:    mret
 ;
@@ -343,11 +343,11 @@ define void @foo_fp_float() nounwind #1 {
 ; CHECK-RV32IFD-NEXT:    fadd.s ft0, ft0, ft1
 ; CHECK-RV32IFD-NEXT:    lui a0, %hi(d)
 ; CHECK-RV32IFD-NEXT:    fsw ft0, %lo(d)(a0)
-; CHECK-RV32IFD-NEXT:    fld ft1, 0(sp) # 8-byte Folded Reload
-; CHECK-RV32IFD-NEXT:    fld ft0, 8(sp) # 8-byte Folded Reload
-; CHECK-RV32IFD-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; CHECK-RV32IFD-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; CHECK-RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    fld ft0, 8(sp) # 8-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    fld ft1, 0(sp) # 8-byte Folded Reload
 ; CHECK-RV32IFD-NEXT:    addi sp, sp, 32
 ; CHECK-RV32IFD-NEXT:    mret
   %1 = load float, float* @e
@@ -391,22 +391,22 @@ define void @foo_double() nounwind #0 {
 ; CHECK-RV32-NEXT:    lui a2, %hi(g)
 ; CHECK-RV32-NEXT:    sw a1, %lo(g+4)(a2)
 ; CHECK-RV32-NEXT:    sw a0, %lo(g)(a2)
-; CHECK-RV32-NEXT:    lw t6, 0(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t5, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t4, 8(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t3, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a7, 16(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a5, 24(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a4, 28(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a3, 32(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a2, 36(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a1, 40(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t2, 48(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t0, 56(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t0, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t2, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a1, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a2, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a3, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a4, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a5, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a7, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t3, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t4, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t5, 4(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t6, 0(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    addi sp, sp, 64
 ; CHECK-RV32-NEXT:    mret
 ;
@@ -471,54 +471,54 @@ define void @foo_double() nounwind #0 {
 ; CHECK-RV32IF-NEXT:    lui a2, %hi(g)
 ; CHECK-RV32IF-NEXT:    sw a1, %lo(g+4)(a2)
 ; CHECK-RV32IF-NEXT:    sw a0, %lo(g)(a2)
-; CHECK-RV32IF-NEXT:    flw fs11, 0(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs10, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs9, 8(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs8, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs7, 16(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs6, 20(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs5, 24(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs4, 28(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs3, 32(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs2, 36(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs1, 40(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs0, 44(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft11, 48(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft10, 52(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft9, 56(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft8, 60(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa7, 64(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa6, 68(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa5, 72(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa4, 76(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa3, 80(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa2, 84(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa1, 88(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa0, 92(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft7, 96(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft6, 100(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft5, 104(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft4, 108(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft3, 112(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft2, 116(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft1, 120(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft0, 124(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw t6, 128(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw t5, 132(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw t4, 136(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw t3, 140(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a7, 144(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a6, 148(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a5, 152(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a4, 156(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a3, 160(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a2, 164(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a1, 168(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a0, 172(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw t2, 176(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw t1, 180(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw t0, 184(sp) # 4-byte Folded Reload
 ; CHECK-RV32IF-NEXT:    lw ra, 188(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw t0, 184(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw t1, 180(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw t2, 176(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a0, 172(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a1, 168(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a2, 164(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a3, 160(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a4, 156(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a5, 152(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a6, 148(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a7, 144(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw t3, 140(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw t4, 136(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw t5, 132(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw t6, 128(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft0, 124(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft1, 120(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft2, 116(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft3, 112(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft4, 108(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft5, 104(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft6, 100(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft7, 96(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa0, 92(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa1, 88(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa2, 84(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa3, 80(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa4, 76(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa5, 72(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa6, 68(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa7, 64(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft8, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft9, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft10, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft11, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs0, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs1, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs2, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs3, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs4, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs5, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs6, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs7, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs8, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs9, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs10, 4(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs11, 0(sp) # 4-byte Folded Reload
 ; CHECK-RV32IF-NEXT:    addi sp, sp, 192
 ; CHECK-RV32IF-NEXT:    mret
 ;
@@ -535,9 +535,9 @@ define void @foo_double() nounwind #0 {
 ; CHECK-RV32IFD-NEXT:    fadd.d ft0, ft0, ft1
 ; CHECK-RV32IFD-NEXT:    lui a0, %hi(g)
 ; CHECK-RV32IFD-NEXT:    fsd ft0, %lo(g)(a0)
-; CHECK-RV32IFD-NEXT:    fld ft1, 8(sp) # 8-byte Folded Reload
-; CHECK-RV32IFD-NEXT:    fld ft0, 16(sp) # 8-byte Folded Reload
 ; CHECK-RV32IFD-NEXT:    lw a0, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    fld ft0, 16(sp) # 8-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    fld ft1, 8(sp) # 8-byte Folded Reload
 ; CHECK-RV32IFD-NEXT:    addi sp, sp, 32
 ; CHECK-RV32IFD-NEXT:    mret
   %1 = load double, double* @h
@@ -582,23 +582,23 @@ define void @foo_fp_double() nounwind #1 {
 ; CHECK-RV32-NEXT:    lui a2, %hi(g)
 ; CHECK-RV32-NEXT:    sw a1, %lo(g+4)(a2)
 ; CHECK-RV32-NEXT:    sw a0, %lo(g)(a2)
-; CHECK-RV32-NEXT:    lw t6, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t5, 16(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t4, 20(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t3, 24(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a7, 28(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a6, 32(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a5, 36(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a4, 40(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a3, 44(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a2, 48(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a1, 52(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw s0, 60(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t2, 64(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t1, 68(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t0, 72(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t0, 72(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t1, 68(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t2, 64(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s0, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a1, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a2, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a3, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a4, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a5, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a6, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a7, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t3, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t4, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t5, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t6, 12(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    addi sp, sp, 80
 ; CHECK-RV32-NEXT:    mret
 ;
@@ -665,55 +665,55 @@ define void @foo_fp_double() nounwind #1 {
 ; CHECK-RV32IF-NEXT:    lui a2, %hi(g)
 ; CHECK-RV32IF-NEXT:    sw a1, %lo(g+4)(a2)
 ; CHECK-RV32IF-NEXT:    sw a0, %lo(g)(a2)
-; CHECK-RV32IF-NEXT:    flw fs11, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs10, 16(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs9, 20(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs8, 24(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs7, 28(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs6, 32(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs5, 36(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs4, 40(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs3, 44(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs2, 48(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs1, 52(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fs0, 56(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft11, 60(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft10, 64(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft9, 68(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft8, 72(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa7, 76(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa6, 80(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa5, 84(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa4, 88(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa3, 92(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa2, 96(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa1, 100(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw fa0, 104(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft7, 108(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft6, 112(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft5, 116(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft4, 120(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft3, 124(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft2, 128(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft1, 132(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    flw ft0, 136(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw t6, 140(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw t5, 144(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw t4, 148(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw t3, 152(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a7, 156(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a6, 160(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a5, 164(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a4, 168(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a3, 172(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a2, 176(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a1, 180(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw a0, 184(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw s0, 188(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw t2, 192(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw t1, 196(sp) # 4-byte Folded Reload
-; CHECK-RV32IF-NEXT:    lw t0, 200(sp) # 4-byte Folded Reload
 ; CHECK-RV32IF-NEXT:    lw ra, 204(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw t0, 200(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw t1, 196(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw t2, 192(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw s0, 188(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a0, 184(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a1, 180(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a2, 176(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a3, 172(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a4, 168(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a5, 164(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a6, 160(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw a7, 156(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw t3, 152(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw t4, 148(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw t5, 144(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    lw t6, 140(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft0, 136(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft1, 132(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft2, 128(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft3, 124(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft4, 120(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft5, 116(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft6, 112(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft7, 108(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa0, 104(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa1, 100(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa2, 96(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa3, 92(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa4, 88(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa5, 84(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa6, 80(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fa7, 76(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft8, 72(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft9, 68(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft10, 64(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw ft11, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs0, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs1, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs2, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs3, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs4, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs5, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs6, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs7, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs8, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs9, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs10, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32IF-NEXT:    flw fs11, 12(sp) # 4-byte Folded Reload
 ; CHECK-RV32IF-NEXT:    addi sp, sp, 208
 ; CHECK-RV32IF-NEXT:    mret
 ;
@@ -733,11 +733,11 @@ define void @foo_fp_double() nounwind #1 {
 ; CHECK-RV32IFD-NEXT:    fadd.d ft0, ft0, ft1
 ; CHECK-RV32IFD-NEXT:    lui a0, %hi(g)
 ; CHECK-RV32IFD-NEXT:    fsd ft0, %lo(g)(a0)
-; CHECK-RV32IFD-NEXT:    fld ft1, 0(sp) # 8-byte Folded Reload
-; CHECK-RV32IFD-NEXT:    fld ft0, 8(sp) # 8-byte Folded Reload
-; CHECK-RV32IFD-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
-; CHECK-RV32IFD-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; CHECK-RV32IFD-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    lw a0, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    fld ft0, 8(sp) # 8-byte Folded Reload
+; CHECK-RV32IFD-NEXT:    fld ft1, 0(sp) # 8-byte Folded Reload
 ; CHECK-RV32IFD-NEXT:    addi sp, sp, 32
 ; CHECK-RV32IFD-NEXT:    mret
   %1 = load double, double* @h

diff  --git a/llvm/test/CodeGen/RISCV/interrupt-attr.ll b/llvm/test/CodeGen/RISCV/interrupt-attr.ll
index 8300a5b8ca963..ea9235c08aa33 100644
--- a/llvm/test/CodeGen/RISCV/interrupt-attr.ll
+++ b/llvm/test/CodeGen/RISCV/interrupt-attr.ll
@@ -71,22 +71,22 @@ define void @foo_with_call() #2 {
 ; CHECK-RV32-NEXT:    sw t5, 4(sp) # 4-byte Folded Spill
 ; CHECK-RV32-NEXT:    sw t6, 0(sp) # 4-byte Folded Spill
 ; CHECK-RV32-NEXT:    call otherfoo at plt
-; CHECK-RV32-NEXT:    lw t6, 0(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t5, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t4, 8(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t3, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a7, 16(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a5, 24(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a4, 28(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a3, 32(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a2, 36(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a1, 40(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t2, 48(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t0, 56(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t0, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t2, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a1, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a2, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a3, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a4, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a5, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a7, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t3, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t4, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t5, 4(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t6, 0(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    addi sp, sp, 64
 ; CHECK-RV32-NEXT:    mret
 ;
@@ -142,54 +142,54 @@ define void @foo_with_call() #2 {
 ; CHECK-RV32-F-NEXT:    fsw fs10, 4(sp) # 4-byte Folded Spill
 ; CHECK-RV32-F-NEXT:    fsw fs11, 0(sp) # 4-byte Folded Spill
 ; CHECK-RV32-F-NEXT:    call otherfoo at plt
-; CHECK-RV32-F-NEXT:    flw fs11, 0(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs10, 4(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs9, 8(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs8, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs7, 16(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs6, 20(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs5, 24(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs4, 28(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs3, 32(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs2, 36(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs1, 40(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs0, 44(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft11, 48(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft10, 52(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft9, 56(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft8, 60(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa7, 64(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa6, 68(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa5, 72(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa4, 76(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa3, 80(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa2, 84(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa1, 88(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa0, 92(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft7, 96(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft6, 100(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft5, 104(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft4, 108(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft3, 112(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft2, 116(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft1, 120(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft0, 124(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw t6, 128(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw t5, 132(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw t4, 136(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw t3, 140(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a7, 144(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a6, 148(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a5, 152(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a4, 156(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a3, 160(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a2, 164(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a1, 168(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a0, 172(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw t2, 176(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw t1, 180(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw t0, 184(sp) # 4-byte Folded Reload
 ; CHECK-RV32-F-NEXT:    lw ra, 188(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw t0, 184(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw t1, 180(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw t2, 176(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a0, 172(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a1, 168(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a2, 164(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a3, 160(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a4, 156(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a5, 152(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a6, 148(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a7, 144(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw t3, 140(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw t4, 136(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw t5, 132(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw t6, 128(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft0, 124(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft1, 120(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft2, 116(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft3, 112(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft4, 108(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft5, 104(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft6, 100(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft7, 96(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa0, 92(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa1, 88(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa2, 84(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa3, 80(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa4, 76(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa5, 72(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa6, 68(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa7, 64(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft8, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft9, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft10, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft11, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs0, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs1, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs2, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs3, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs4, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs5, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs6, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs7, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs8, 12(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs9, 8(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs10, 4(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs11, 0(sp) # 4-byte Folded Reload
 ; CHECK-RV32-F-NEXT:    addi sp, sp, 192
 ; CHECK-RV32-F-NEXT:    mret
 ;
@@ -245,54 +245,54 @@ define void @foo_with_call() #2 {
 ; CHECK-RV32-FD-NEXT:    fsd fs10, 8(sp) # 8-byte Folded Spill
 ; CHECK-RV32-FD-NEXT:    fsd fs11, 0(sp) # 8-byte Folded Spill
 ; CHECK-RV32-FD-NEXT:    call otherfoo at plt
-; CHECK-RV32-FD-NEXT:    fld fs11, 0(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs10, 8(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs9, 16(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs8, 24(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs7, 32(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs6, 40(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs5, 48(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs4, 56(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs3, 64(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs0, 88(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft11, 96(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft10, 104(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft9, 112(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft8, 120(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa7, 128(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa6, 136(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa5, 144(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa4, 152(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa3, 160(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa2, 168(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa1, 176(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa0, 184(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft7, 192(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft6, 200(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft5, 208(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft4, 216(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft3, 224(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft2, 232(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft1, 240(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft0, 248(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw t6, 256(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw t5, 260(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw t4, 264(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw t3, 268(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a7, 272(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a6, 276(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a5, 280(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a4, 284(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a3, 288(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a2, 292(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a1, 296(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a0, 300(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw t2, 304(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw t1, 308(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw t0, 312(sp) # 4-byte Folded Reload
 ; CHECK-RV32-FD-NEXT:    lw ra, 316(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw t0, 312(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw t1, 308(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw t2, 304(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a0, 300(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a1, 296(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a2, 292(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a3, 288(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a4, 284(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a5, 280(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a6, 276(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a7, 272(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw t3, 268(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw t4, 264(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw t5, 260(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw t6, 256(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft0, 248(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft1, 240(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft2, 232(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft3, 224(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft4, 216(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft5, 208(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft6, 200(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft7, 192(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa0, 184(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa1, 176(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa2, 168(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa3, 160(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa4, 152(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa5, 144(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa6, 136(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa7, 128(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft8, 120(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft9, 112(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft10, 104(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft11, 96(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs0, 88(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs3, 64(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs4, 56(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs5, 48(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs6, 40(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs7, 32(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs8, 24(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs9, 16(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs10, 8(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs11, 0(sp) # 8-byte Folded Reload
 ; CHECK-RV32-FD-NEXT:    addi sp, sp, 320
 ; CHECK-RV32-FD-NEXT:    mret
 ;
@@ -316,22 +316,22 @@ define void @foo_with_call() #2 {
 ; CHECK-RV64-NEXT:    sd t5, 8(sp) # 8-byte Folded Spill
 ; CHECK-RV64-NEXT:    sd t6, 0(sp) # 8-byte Folded Spill
 ; CHECK-RV64-NEXT:    call otherfoo at plt
-; CHECK-RV64-NEXT:    ld t6, 0(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld t5, 8(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld t4, 16(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld t3, 24(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a7, 32(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a6, 40(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a5, 48(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a4, 56(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a3, 64(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld t2, 96(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld t1, 104(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld t0, 112(sp) # 8-byte Folded Reload
 ; CHECK-RV64-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld t0, 112(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld t1, 104(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld t2, 96(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a1, 80(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a2, 72(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a3, 64(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a4, 56(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a5, 48(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a6, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a7, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld t3, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld t4, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld t5, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld t6, 0(sp) # 8-byte Folded Reload
 ; CHECK-RV64-NEXT:    addi sp, sp, 128
 ; CHECK-RV64-NEXT:    mret
 ;
@@ -387,54 +387,54 @@ define void @foo_with_call() #2 {
 ; CHECK-RV64-F-NEXT:    fsw fs10, 4(sp) # 4-byte Folded Spill
 ; CHECK-RV64-F-NEXT:    fsw fs11, 0(sp) # 4-byte Folded Spill
 ; CHECK-RV64-F-NEXT:    call otherfoo at plt
-; CHECK-RV64-F-NEXT:    flw fs11, 0(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs10, 4(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs9, 8(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs8, 12(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs7, 16(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs6, 20(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs5, 24(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs4, 28(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs3, 32(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs2, 36(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs1, 40(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs0, 44(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft11, 48(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft10, 52(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft9, 56(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft8, 60(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa7, 64(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa6, 68(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa5, 72(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa4, 76(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa3, 80(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa2, 84(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa1, 88(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa0, 92(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft7, 96(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft6, 100(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft5, 104(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft4, 108(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft3, 112(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft2, 116(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft1, 120(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft0, 124(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld t6, 128(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld t5, 136(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld t4, 144(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld t3, 152(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a7, 160(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a6, 168(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a5, 176(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a4, 184(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a3, 192(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a2, 200(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a1, 208(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a0, 216(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld t2, 224(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld t1, 232(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld t0, 240(sp) # 8-byte Folded Reload
 ; CHECK-RV64-F-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld t0, 240(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld t1, 232(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld t2, 224(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a0, 216(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a1, 208(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a2, 200(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a3, 192(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a4, 184(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a5, 176(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a6, 168(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a7, 160(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld t3, 152(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld t4, 144(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld t5, 136(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld t6, 128(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft0, 124(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft1, 120(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft2, 116(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft3, 112(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft4, 108(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft5, 104(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft6, 100(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft7, 96(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa0, 92(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa1, 88(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa2, 84(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa3, 80(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa4, 76(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa5, 72(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa6, 68(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa7, 64(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft8, 60(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft9, 56(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft10, 52(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft11, 48(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs0, 44(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs1, 40(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs2, 36(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs3, 32(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs4, 28(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs5, 24(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs6, 20(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs7, 16(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs8, 12(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs9, 8(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs10, 4(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs11, 0(sp) # 4-byte Folded Reload
 ; CHECK-RV64-F-NEXT:    addi sp, sp, 256
 ; CHECK-RV64-F-NEXT:    mret
 ;
@@ -490,54 +490,54 @@ define void @foo_with_call() #2 {
 ; CHECK-RV64-FD-NEXT:    fsd fs10, 8(sp) # 8-byte Folded Spill
 ; CHECK-RV64-FD-NEXT:    fsd fs11, 0(sp) # 8-byte Folded Spill
 ; CHECK-RV64-FD-NEXT:    call otherfoo at plt
-; CHECK-RV64-FD-NEXT:    fld fs11, 0(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs10, 8(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs9, 16(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs8, 24(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs7, 32(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs6, 40(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs5, 48(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs4, 56(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs3, 64(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs0, 88(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft11, 96(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft10, 104(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft9, 112(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft8, 120(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa7, 128(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa6, 136(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa5, 144(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa4, 152(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa3, 160(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa2, 168(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa1, 176(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa0, 184(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft7, 192(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft6, 200(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft5, 208(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft4, 216(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft3, 224(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft2, 232(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft1, 240(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft0, 248(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld t6, 256(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld t5, 264(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld t4, 272(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld t3, 280(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a7, 288(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a6, 296(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a5, 304(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a4, 312(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a3, 320(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a2, 328(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a1, 336(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a0, 344(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld t2, 352(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld t1, 360(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld t0, 368(sp) # 8-byte Folded Reload
 ; CHECK-RV64-FD-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld t0, 368(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld t1, 360(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld t2, 352(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a0, 344(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a1, 336(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a2, 328(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a3, 320(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a4, 312(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a5, 304(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a6, 296(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a7, 288(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld t3, 280(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld t4, 272(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld t5, 264(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld t6, 256(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft0, 248(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft1, 240(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft2, 232(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft3, 224(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft4, 216(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft5, 208(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft6, 200(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft7, 192(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa0, 184(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa1, 176(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa2, 168(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa3, 160(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa4, 152(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa5, 144(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa6, 136(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa7, 128(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft8, 120(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft9, 112(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft10, 104(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft11, 96(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs0, 88(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs1, 80(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs2, 72(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs3, 64(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs4, 56(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs5, 48(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs6, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs7, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs8, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs9, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs10, 8(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs11, 0(sp) # 8-byte Folded Reload
 ; CHECK-RV64-FD-NEXT:    addi sp, sp, 384
 ; CHECK-RV64-FD-NEXT:    mret
   %call = call i32 bitcast (i32 (...)* @otherfoo to i32 ()*)()
@@ -571,23 +571,23 @@ define void @foo_fp_with_call() #3 {
 ; CHECK-RV32-NEXT:    sw t6, 12(sp) # 4-byte Folded Spill
 ; CHECK-RV32-NEXT:    addi s0, sp, 80
 ; CHECK-RV32-NEXT:    call otherfoo at plt
-; CHECK-RV32-NEXT:    lw t6, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t5, 16(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t4, 20(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t3, 24(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a7, 28(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a6, 32(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a5, 36(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a4, 40(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a3, 44(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a2, 48(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a1, 52(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw s0, 60(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t2, 64(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t1, 68(sp) # 4-byte Folded Reload
-; CHECK-RV32-NEXT:    lw t0, 72(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    lw ra, 76(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t0, 72(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t1, 68(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t2, 64(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw s0, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a0, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a1, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a2, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a3, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a4, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a5, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a6, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw a7, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t3, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t4, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t5, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32-NEXT:    lw t6, 12(sp) # 4-byte Folded Reload
 ; CHECK-RV32-NEXT:    addi sp, sp, 80
 ; CHECK-RV32-NEXT:    mret
 ;
@@ -645,55 +645,55 @@ define void @foo_fp_with_call() #3 {
 ; CHECK-RV32-F-NEXT:    fsw fs11, 12(sp) # 4-byte Folded Spill
 ; CHECK-RV32-F-NEXT:    addi s0, sp, 208
 ; CHECK-RV32-F-NEXT:    call otherfoo at plt
-; CHECK-RV32-F-NEXT:    flw fs11, 12(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs10, 16(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs9, 20(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs8, 24(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs7, 28(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs6, 32(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs5, 36(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs4, 40(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs3, 44(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs2, 48(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs1, 52(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fs0, 56(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft11, 60(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft10, 64(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft9, 68(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft8, 72(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa7, 76(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa6, 80(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa5, 84(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa4, 88(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa3, 92(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa2, 96(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa1, 100(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw fa0, 104(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft7, 108(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft6, 112(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft5, 116(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft4, 120(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft3, 124(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft2, 128(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft1, 132(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    flw ft0, 136(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw t6, 140(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw t5, 144(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw t4, 148(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw t3, 152(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a7, 156(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a6, 160(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a5, 164(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a4, 168(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a3, 172(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a2, 176(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a1, 180(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw a0, 184(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw s0, 188(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw t2, 192(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw t1, 196(sp) # 4-byte Folded Reload
-; CHECK-RV32-F-NEXT:    lw t0, 200(sp) # 4-byte Folded Reload
 ; CHECK-RV32-F-NEXT:    lw ra, 204(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw t0, 200(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw t1, 196(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw t2, 192(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw s0, 188(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a0, 184(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a1, 180(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a2, 176(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a3, 172(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a4, 168(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a5, 164(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a6, 160(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw a7, 156(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw t3, 152(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw t4, 148(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw t5, 144(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    lw t6, 140(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft0, 136(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft1, 132(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft2, 128(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft3, 124(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft4, 120(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft5, 116(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft6, 112(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft7, 108(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa0, 104(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa1, 100(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa2, 96(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa3, 92(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa4, 88(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa5, 84(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa6, 80(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fa7, 76(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft8, 72(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft9, 68(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft10, 64(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw ft11, 60(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs0, 56(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs1, 52(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs2, 48(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs3, 44(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs4, 40(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs5, 36(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs6, 32(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs7, 28(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs8, 24(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs9, 20(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs10, 16(sp) # 4-byte Folded Reload
+; CHECK-RV32-F-NEXT:    flw fs11, 12(sp) # 4-byte Folded Reload
 ; CHECK-RV32-F-NEXT:    addi sp, sp, 208
 ; CHECK-RV32-F-NEXT:    mret
 ;
@@ -751,55 +751,55 @@ define void @foo_fp_with_call() #3 {
 ; CHECK-RV32-FD-NEXT:    fsd fs11, 8(sp) # 8-byte Folded Spill
 ; CHECK-RV32-FD-NEXT:    addi s0, sp, 336
 ; CHECK-RV32-FD-NEXT:    call otherfoo at plt
-; CHECK-RV32-FD-NEXT:    fld fs11, 8(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs10, 16(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs9, 24(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs8, 32(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs7, 40(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs6, 48(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs5, 56(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs4, 64(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs3, 72(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs2, 80(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs1, 88(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fs0, 96(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft11, 104(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft10, 112(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft9, 120(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft8, 128(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa7, 136(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa6, 144(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa5, 152(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa4, 160(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa3, 168(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa2, 176(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa1, 184(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld fa0, 192(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft7, 200(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft6, 208(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft5, 216(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft4, 224(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft3, 232(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft2, 240(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft1, 248(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    fld ft0, 256(sp) # 8-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw t6, 268(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw t5, 272(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw t4, 276(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw t3, 280(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a7, 284(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a6, 288(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a5, 292(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a4, 296(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a3, 300(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a2, 304(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a1, 308(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw a0, 312(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw s0, 316(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw t2, 320(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw t1, 324(sp) # 4-byte Folded Reload
-; CHECK-RV32-FD-NEXT:    lw t0, 328(sp) # 4-byte Folded Reload
 ; CHECK-RV32-FD-NEXT:    lw ra, 332(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw t0, 328(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw t1, 324(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw t2, 320(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw s0, 316(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a0, 312(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a1, 308(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a2, 304(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a3, 300(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a4, 296(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a5, 292(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a6, 288(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw a7, 284(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw t3, 280(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw t4, 276(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw t5, 272(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    lw t6, 268(sp) # 4-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft0, 256(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft1, 248(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft2, 240(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft3, 232(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft4, 224(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft5, 216(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft6, 208(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft7, 200(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa0, 192(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa1, 184(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa2, 176(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa3, 168(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa4, 160(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa5, 152(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa6, 144(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fa7, 136(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft8, 128(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft9, 120(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft10, 112(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld ft11, 104(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs0, 96(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs1, 88(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs2, 80(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs3, 72(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs4, 64(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs5, 56(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs6, 48(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs7, 40(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs8, 32(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs9, 24(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs10, 16(sp) # 8-byte Folded Reload
+; CHECK-RV32-FD-NEXT:    fld fs11, 8(sp) # 8-byte Folded Reload
 ; CHECK-RV32-FD-NEXT:    addi sp, sp, 336
 ; CHECK-RV32-FD-NEXT:    mret
 ;
@@ -825,23 +825,23 @@ define void @foo_fp_with_call() #3 {
 ; CHECK-RV64-NEXT:    sd t6, 8(sp) # 8-byte Folded Spill
 ; CHECK-RV64-NEXT:    addi s0, sp, 144
 ; CHECK-RV64-NEXT:    call otherfoo at plt
-; CHECK-RV64-NEXT:    ld t6, 8(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld t5, 16(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld t4, 24(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld t3, 32(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a7, 40(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a6, 48(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a5, 56(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a4, 64(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a3, 72(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a2, 80(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld a0, 96(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld s0, 104(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld t2, 112(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld t1, 120(sp) # 8-byte Folded Reload
-; CHECK-RV64-NEXT:    ld t0, 128(sp) # 8-byte Folded Reload
 ; CHECK-RV64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld t0, 128(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld t1, 120(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld t2, 112(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld s0, 104(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a0, 96(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a1, 88(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a2, 80(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a3, 72(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a4, 64(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a5, 56(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a6, 48(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld a7, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld t3, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld t4, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld t5, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64-NEXT:    ld t6, 8(sp) # 8-byte Folded Reload
 ; CHECK-RV64-NEXT:    addi sp, sp, 144
 ; CHECK-RV64-NEXT:    mret
 ;
@@ -899,55 +899,55 @@ define void @foo_fp_with_call() #3 {
 ; CHECK-RV64-F-NEXT:    fsw fs11, 8(sp) # 4-byte Folded Spill
 ; CHECK-RV64-F-NEXT:    addi s0, sp, 272
 ; CHECK-RV64-F-NEXT:    call otherfoo at plt
-; CHECK-RV64-F-NEXT:    flw fs11, 8(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs10, 12(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs9, 16(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs8, 20(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs7, 24(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs6, 28(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs5, 32(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs4, 36(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs3, 40(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs2, 44(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs1, 48(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fs0, 52(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft11, 56(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft10, 60(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft9, 64(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft8, 68(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa7, 72(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa6, 76(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa5, 80(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa4, 84(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa3, 88(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa2, 92(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa1, 96(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw fa0, 100(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft7, 104(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft6, 108(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft5, 112(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft4, 116(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft3, 120(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft2, 124(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft1, 128(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    flw ft0, 132(sp) # 4-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld t6, 136(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld t5, 144(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld t4, 152(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld t3, 160(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a7, 168(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a6, 176(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a5, 184(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a4, 192(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a3, 200(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a2, 208(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a1, 216(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld a0, 224(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld s0, 232(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld t2, 240(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
-; CHECK-RV64-F-NEXT:    ld t0, 256(sp) # 8-byte Folded Reload
 ; CHECK-RV64-F-NEXT:    ld ra, 264(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld t0, 256(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld t1, 248(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld t2, 240(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld s0, 232(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a0, 224(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a1, 216(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a2, 208(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a3, 200(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a4, 192(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a5, 184(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a6, 176(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld a7, 168(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld t3, 160(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld t4, 152(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld t5, 144(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    ld t6, 136(sp) # 8-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft0, 132(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft1, 128(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft2, 124(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft3, 120(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft4, 116(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft5, 112(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft6, 108(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft7, 104(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa0, 100(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa1, 96(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa2, 92(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa3, 88(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa4, 84(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa5, 80(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa6, 76(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fa7, 72(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft8, 68(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft9, 64(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft10, 60(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw ft11, 56(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs0, 52(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs1, 48(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs2, 44(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs3, 40(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs4, 36(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs5, 32(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs6, 28(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs7, 24(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs8, 20(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs9, 16(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs10, 12(sp) # 4-byte Folded Reload
+; CHECK-RV64-F-NEXT:    flw fs11, 8(sp) # 4-byte Folded Reload
 ; CHECK-RV64-F-NEXT:    addi sp, sp, 272
 ; CHECK-RV64-F-NEXT:    mret
 ;
@@ -1005,55 +1005,55 @@ define void @foo_fp_with_call() #3 {
 ; CHECK-RV64-FD-NEXT:    fsd fs11, 8(sp) # 8-byte Folded Spill
 ; CHECK-RV64-FD-NEXT:    addi s0, sp, 400
 ; CHECK-RV64-FD-NEXT:    call otherfoo at plt
-; CHECK-RV64-FD-NEXT:    fld fs11, 8(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs10, 16(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs9, 24(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs8, 32(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs7, 40(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs6, 48(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs5, 56(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs4, 64(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs3, 72(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs2, 80(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs1, 88(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fs0, 96(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft11, 104(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft10, 112(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft9, 120(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft8, 128(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa7, 136(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa6, 144(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa5, 152(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa4, 160(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa3, 168(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa2, 176(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa1, 184(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld fa0, 192(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft7, 200(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft6, 208(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft5, 216(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft4, 224(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft3, 232(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft2, 240(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft1, 248(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    fld ft0, 256(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld t6, 264(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld t5, 272(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld t4, 280(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld t3, 288(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a7, 296(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a6, 304(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a5, 312(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a4, 320(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a3, 328(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a2, 336(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a1, 344(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld a0, 352(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld s0, 360(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld t2, 368(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld t1, 376(sp) # 8-byte Folded Reload
-; CHECK-RV64-FD-NEXT:    ld t0, 384(sp) # 8-byte Folded Reload
 ; CHECK-RV64-FD-NEXT:    ld ra, 392(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld t0, 384(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld t1, 376(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld t2, 368(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld s0, 360(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a0, 352(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a1, 344(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a2, 336(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a3, 328(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a4, 320(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a5, 312(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a6, 304(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld a7, 296(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld t3, 288(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld t4, 280(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld t5, 272(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    ld t6, 264(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft0, 256(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft1, 248(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft2, 240(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft3, 232(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft4, 224(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft5, 216(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft6, 208(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft7, 200(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa0, 192(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa1, 184(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa2, 176(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa3, 168(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa4, 160(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa5, 152(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa6, 144(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fa7, 136(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft8, 128(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft9, 120(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft10, 112(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld ft11, 104(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs0, 96(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs1, 88(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs2, 80(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs3, 72(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs4, 64(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs5, 56(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs6, 48(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs7, 40(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs8, 32(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs9, 24(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs10, 16(sp) # 8-byte Folded Reload
+; CHECK-RV64-FD-NEXT:    fld fs11, 8(sp) # 8-byte Folded Reload
 ; CHECK-RV64-FD-NEXT:    addi sp, sp, 400
 ; CHECK-RV64-FD-NEXT:    mret
   %call = call i32 bitcast (i32 (...)* @otherfoo to i32 ()*)()

diff  --git a/llvm/test/CodeGen/RISCV/large-stack.ll b/llvm/test/CodeGen/RISCV/large-stack.ll
index 962d88907ee07..a78c9f4f4b402 100644
--- a/llvm/test/CodeGen/RISCV/large-stack.ll
+++ b/llvm/test/CodeGen/RISCV/large-stack.ll
@@ -34,8 +34,8 @@ define void @test() {
 ; RV32I-WITHFP-NEXT:    lui a0, 74565
 ; RV32I-WITHFP-NEXT:    addi a0, a0, -352
 ; RV32I-WITHFP-NEXT:    add sp, sp, a0
-; RV32I-WITHFP-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 2032
 ; RV32I-WITHFP-NEXT:    ret
   %tmp = alloca [ 305419896 x i8 ] , align 4
@@ -74,8 +74,8 @@ define void @test_emergency_spill_slot(i32 %a) {
 ; RV32I-FPELIM-NEXT:    lui a0, 97
 ; RV32I-FPELIM-NEXT:    addi a0, a0, 672
 ; RV32I-FPELIM-NEXT:    add sp, sp, a0
-; RV32I-FPELIM-NEXT:    lw s1, 2024(sp) # 4-byte Folded Reload
 ; RV32I-FPELIM-NEXT:    lw s0, 2028(sp) # 4-byte Folded Reload
+; RV32I-FPELIM-NEXT:    lw s1, 2024(sp) # 4-byte Folded Reload
 ; RV32I-FPELIM-NEXT:    addi sp, sp, 2032
 ; RV32I-FPELIM-NEXT:    ret
 ;
@@ -114,10 +114,10 @@ define void @test_emergency_spill_slot(i32 %a) {
 ; RV32I-WITHFP-NEXT:    lui a0, 97
 ; RV32I-WITHFP-NEXT:    addi a0, a0, 688
 ; RV32I-WITHFP-NEXT:    add sp, sp, a0
-; RV32I-WITHFP-NEXT:    lw s2, 2016(sp) # 4-byte Folded Reload
-; RV32I-WITHFP-NEXT:    lw s1, 2020(sp) # 4-byte Folded Reload
-; RV32I-WITHFP-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s1, 2020(sp) # 4-byte Folded Reload
+; RV32I-WITHFP-NEXT:    lw s2, 2016(sp) # 4-byte Folded Reload
 ; RV32I-WITHFP-NEXT:    addi sp, sp, 2032
 ; RV32I-WITHFP-NEXT:    ret
   %data = alloca [ 100000 x i32 ] , align 4

diff  --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll
index 43914996a43f8..411369557ff34 100644
--- a/llvm/test/CodeGen/RISCV/mul.ll
+++ b/llvm/test/CodeGen/RISCV/mul.ll
@@ -1239,9 +1239,9 @@ define i128 @muli128_m3840(i128 %a) nounwind {
 ; RV32IM-NEXT:    sw t0, 4(a0)
 ; RV32IM-NEXT:    sw t6, 8(a0)
 ; RV32IM-NEXT:    sw a1, 12(a0)
-; RV32IM-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    addi sp, sp, 16
 ; RV32IM-NEXT:    ret
 ;
@@ -1370,8 +1370,8 @@ define i128 @muli128_m63(i128 %a) nounwind {
 ; RV32IM-NEXT:    sw t0, 4(a0)
 ; RV32IM-NEXT:    sw t6, 8(a0)
 ; RV32IM-NEXT:    sw a1, 12(a0)
-; RV32IM-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    addi sp, sp, 16
 ; RV32IM-NEXT:    ret
 ;
@@ -1476,17 +1476,17 @@ define i64 @mulhsu_i64(i64 %a, i64 %b) nounwind {
 ; RV32I-NEXT:    add a0, a5, a0
 ; RV32I-NEXT:    add a1, a0, a4
 ; RV32I-NEXT:    mv a0, a2
-; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/out-of-reach-emergency-slot.mir b/llvm/test/CodeGen/RISCV/out-of-reach-emergency-slot.mir
index ebad4ef8f73b1..cee38fe804f1f 100644
--- a/llvm/test/CodeGen/RISCV/out-of-reach-emergency-slot.mir
+++ b/llvm/test/CodeGen/RISCV/out-of-reach-emergency-slot.mir
@@ -41,8 +41,8 @@
   ; CHECK-NEXT:    lui a0, 2
   ; CHECK-NEXT:    addiw a0, a0, -2032
   ; CHECK-NEXT:    add sp, sp, a0
-  ; CHECK-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
   ; CHECK-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
+  ; CHECK-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
   ; CHECK-NEXT:    addi sp, sp, 2032
   ; CHECK-NEXT:    ret
   entry:

diff  --git a/llvm/test/CodeGen/RISCV/remat.ll b/llvm/test/CodeGen/RISCV/remat.ll
index a70e29a61b715..46f7a4dbda60c 100644
--- a/llvm/test/CodeGen/RISCV/remat.ll
+++ b/llvm/test/CodeGen/RISCV/remat.ll
@@ -113,19 +113,19 @@ define i32 @test() nounwind {
 ; RV32I-NEXT:    j .LBB0_2
 ; RV32I-NEXT:  .LBB0_11: # %for.end
 ; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s10, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s9, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s8, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s7, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s6, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 40(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 44(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 48(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 48(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s9, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s10, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s11, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 64
 ; RV32I-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/RISCV/rv32i-rv64i-float-double.ll b/llvm/test/CodeGen/RISCV/rv32i-rv64i-float-double.ll
index f7b83deb207b8..cfa458732496e 100644
--- a/llvm/test/CodeGen/RISCV/rv32i-rv64i-float-double.ll
+++ b/llvm/test/CodeGen/RISCV/rv32i-rv64i-float-double.ll
@@ -21,8 +21,8 @@ define float @float_test(float %a, float %b) nounwind {
 ; RV32IF-NEXT:    call __addsf3 at plt
 ; RV32IF-NEXT:    mv a1, s0
 ; RV32IF-NEXT:    call __divsf3 at plt
-; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
 ;
@@ -35,8 +35,8 @@ define float @float_test(float %a, float %b) nounwind {
 ; RV64IF-NEXT:    call __addsf3 at plt
 ; RV64IF-NEXT:    mv a1, s0
 ; RV64IF-NEXT:    call __divsf3 at plt
-; RV64IF-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64IF-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64IF-NEXT:    addi sp, sp, 16
 ; RV64IF-NEXT:    ret
   %1 = fadd float %a, %b
@@ -57,9 +57,9 @@ define double @double_test(double %a, double %b) nounwind {
 ; RV32IF-NEXT:    mv a2, s1
 ; RV32IF-NEXT:    mv a3, s0
 ; RV32IF-NEXT:    call __divdf3 at plt
-; RV32IF-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32IF-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32IF-NEXT:    addi sp, sp, 16
 ; RV32IF-NEXT:    ret
 ;
@@ -72,8 +72,8 @@ define double @double_test(double %a, double %b) nounwind {
 ; RV64IF-NEXT:    call __adddf3 at plt
 ; RV64IF-NEXT:    mv a1, s0
 ; RV64IF-NEXT:    call __divdf3 at plt
-; RV64IF-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64IF-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64IF-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64IF-NEXT:    addi sp, sp, 16
 ; RV64IF-NEXT:    ret
   %1 = fadd double %a, %b

diff  --git a/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll b/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll
index 7f7cf044ec280..d90b0629b139a 100644
--- a/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll
+++ b/llvm/test/CodeGen/RISCV/rv32i-rv64i-half.ll
@@ -35,10 +35,10 @@ define half @half_test(half %a, half %b) nounwind {
 ; RV32I-NEXT:    mv a1, s0
 ; RV32I-NEXT:    call __divsf3 at plt
 ; RV32I-NEXT:    call __gnu_f2h_ieee at plt
-; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -67,10 +67,10 @@ define half @half_test(half %a, half %b) nounwind {
 ; RV64I-NEXT:    mv a1, s0
 ; RV64I-NEXT:    call __divsf3 at plt
 ; RV64I-NEXT:    call __gnu_f2h_ieee at plt
-; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = fadd half %a, %b

diff  --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 356c4abe1765f..a8e1b1958c96d 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -139,14 +139,14 @@ define i64 @ctlz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    srli a0, s2, 24
 ; RV32I-NEXT:  .LBB1_3:
 ; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -277,14 +277,14 @@ define i64 @cttz_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    srli a0, s2, 24
 ; RV32I-NEXT:  .LBB3_3:
 ; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -393,13 +393,13 @@ define i64 @ctpop_i64(i64 %a) nounwind {
 ; RV32I-NEXT:    srli a0, a0, 24
 ; RV32I-NEXT:    add a0, a0, s5
 ; RV32I-NEXT:    li a1, 0
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
index 1f24d8c097cf6..198aea298be2b 100644
--- a/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
+++ b/llvm/test/CodeGen/RISCV/rv64i-complex-float.ll
@@ -24,10 +24,10 @@ define i64 @complex_float_add(i64 %a.coerce, i64 %b.coerce) nounwind {
 ; CHECK-NEXT:    slli a1, s0, 32
 ; CHECK-NEXT:    srli a1, a1, 32
 ; CHECK-NEXT:    or a0, a0, a1
-; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index a5766e71d8871..6893afc335352 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -162,8 +162,8 @@ define signext i32 @log2_ceil_i32(i32 signext %a) nounwind {
 ; RV64I-NEXT:    srliw a1, a0, 24
 ; RV64I-NEXT:  .LBB2_2: # %cond.end
 ; RV64I-NEXT:    sub a0, s0, a1
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
@@ -224,8 +224,8 @@ define signext i32 @findLastSet_i32(i32 signext %a) nounwind {
 ; RV64I-NEXT:    srliw a0, a1, 24
 ; RV64I-NEXT:    xori a0, a0, 31
 ; RV64I-NEXT:  .LBB3_2:
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
@@ -507,8 +507,8 @@ define signext i32 @findFirstSet_i32(i32 signext %a) nounwind {
 ; RV64I-NEXT:  # %bb.1:
 ; RV64I-NEXT:    srliw a0, a1, 24
 ; RV64I-NEXT:  .LBB8_2:
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;
@@ -563,8 +563,8 @@ define signext i32 @ffs_i32(i32 signext %a) nounwind {
 ; RV64I-NEXT:    srliw a0, a1, 24
 ; RV64I-NEXT:    addi a0, a0, 1
 ; RV64I-NEXT:  .LBB9_2:
-; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 16
 ; RV64I-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
index 9528631626810..b9be04a3306d3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir
@@ -50,8 +50,8 @@ body: |
     ; CHECK-NEXT: $x10 = frame-destroy PseudoReadVLENB
     ; CHECK-NEXT: $x2 = frame-destroy ADD $x2, killed $x10
     ; CHECK-NEXT: $x2 = frame-destroy ADDI $x2, 240
-    ; CHECK-NEXT: $x8 = LD $x2, 2016 :: (load (s64) from %stack.4)
     ; CHECK-NEXT: $x1 = LD $x2, 2024 :: (load (s64) from %stack.3)
+    ; CHECK-NEXT: $x8 = LD $x2, 2016 :: (load (s64) from %stack.4)
     ; CHECK-NEXT: $x2 = frame-destroy ADDI $x2, 2032
     ; CHECK-NEXT: PseudoRET
     %1:gprnox0 = COPY $x11

diff  --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
index 44e11962b5f8c..065e5dee2f930 100644
--- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
@@ -40,8 +40,8 @@ define void @lmul4() nounwind {
 ; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    andi sp, sp, -32
 ; CHECK-NEXT:    addi sp, s0, -32
-; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
   %v = alloca <vscale x 4 x i64>
@@ -60,8 +60,8 @@ define void @lmul8() nounwind {
 ; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    andi sp, sp, -64
 ; CHECK-NEXT:    addi sp, s0, -64
-; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
   %v = alloca <vscale x 8 x i64>
@@ -98,8 +98,8 @@ define void @lmul2_and_4() nounwind {
 ; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    andi sp, sp, -32
 ; CHECK-NEXT:    addi sp, s0, -32
-; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
   %v1 = alloca <vscale x 2 x i64>
@@ -120,8 +120,8 @@ define void @lmul1_and_4() nounwind {
 ; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    andi sp, sp, -32
 ; CHECK-NEXT:    addi sp, s0, -32
-; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
   %v1 = alloca <vscale x 1 x i64>
@@ -159,8 +159,8 @@ define void @lmul4_and_1() nounwind {
 ; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    andi sp, sp, -32
 ; CHECK-NEXT:    addi sp, s0, -32
-; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
   %v1 = alloca <vscale x 4 x i64>
@@ -181,8 +181,8 @@ define void @lmul4_and_2() nounwind {
 ; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    andi sp, sp, -32
 ; CHECK-NEXT:    addi sp, s0, -32
-; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
   %v1 = alloca <vscale x 4 x i64>
@@ -203,8 +203,8 @@ define void @lmul4_and_2_x2_0() nounwind {
 ; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    andi sp, sp, -32
 ; CHECK-NEXT:    addi sp, s0, -32
-; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
   %v1 = alloca <vscale x 4 x i64>
@@ -227,8 +227,8 @@ define void @lmul4_and_2_x2_1() nounwind {
 ; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    andi sp, sp, -32
 ; CHECK-NEXT:    addi sp, s0, -32
-; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
   %v1 = alloca <vscale x 4 x i64>
@@ -277,8 +277,8 @@ define void @gpr_and_lmul1_and_4() nounwind {
 ; CHECK-NEXT:    li a0, 3
 ; CHECK-NEXT:    sd a0, 8(sp)
 ; CHECK-NEXT:    addi sp, s0, -32
-; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
   %x1 = alloca i64
@@ -301,8 +301,8 @@ define void @lmul_1_2_4_8() nounwind {
 ; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    andi sp, sp, -64
 ; CHECK-NEXT:    addi sp, s0, -64
-; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
   %v1 = alloca <vscale x 1 x i64>
@@ -325,8 +325,8 @@ define void @lmul_1_2_4_8_x2_0() nounwind {
 ; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    andi sp, sp, -64
 ; CHECK-NEXT:    addi sp, s0, -64
-; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
   %v1 = alloca <vscale x 1 x i64>
@@ -353,8 +353,8 @@ define void @lmul_1_2_4_8_x2_1() nounwind {
 ; CHECK-NEXT:    sub sp, sp, a0
 ; CHECK-NEXT:    andi sp, sp, -64
 ; CHECK-NEXT:    addi sp, s0, -64
-; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    ret
   %v8 = alloca <vscale x 8 x i64>

diff  --git a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
index 98032fb998538..4b62b8ead3b55 100644
--- a/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/emergency-slot.mir
@@ -163,19 +163,19 @@ body:             |
   ; CHECK-NEXT:   $x10 = frame-destroy ADDIW killed $x10, -1792
   ; CHECK-NEXT:   $x2 = frame-destroy SUB $x8, killed $x10
   ; CHECK-NEXT:   $x2 = frame-destroy ADDI $x2, 272
-  ; CHECK-NEXT:   $x27 = LD $x2, 1928 :: (load (s64) from %stack.15)
-  ; CHECK-NEXT:   $x26 = LD $x2, 1936 :: (load (s64) from %stack.14)
-  ; CHECK-NEXT:   $x25 = LD $x2, 1944 :: (load (s64) from %stack.13)
-  ; CHECK-NEXT:   $x24 = LD $x2, 1952 :: (load (s64) from %stack.12)
-  ; CHECK-NEXT:   $x23 = LD $x2, 1960 :: (load (s64) from %stack.11)
-  ; CHECK-NEXT:   $x22 = LD $x2, 1968 :: (load (s64) from %stack.10)
-  ; CHECK-NEXT:   $x21 = LD $x2, 1976 :: (load (s64) from %stack.9)
-  ; CHECK-NEXT:   $x20 = LD $x2, 1984 :: (load (s64) from %stack.8)
-  ; CHECK-NEXT:   $x19 = LD $x2, 1992 :: (load (s64) from %stack.7)
-  ; CHECK-NEXT:   $x18 = LD $x2, 2000 :: (load (s64) from %stack.6)
-  ; CHECK-NEXT:   $x9 = LD $x2, 2008 :: (load (s64) from %stack.5)
-  ; CHECK-NEXT:   $x8 = LD $x2, 2016 :: (load (s64) from %stack.4)
   ; CHECK-NEXT:   $x1 = LD $x2, 2024 :: (load (s64) from %stack.3)
+  ; CHECK-NEXT:   $x8 = LD $x2, 2016 :: (load (s64) from %stack.4)
+  ; CHECK-NEXT:   $x9 = LD $x2, 2008 :: (load (s64) from %stack.5)
+  ; CHECK-NEXT:   $x18 = LD $x2, 2000 :: (load (s64) from %stack.6)
+  ; CHECK-NEXT:   $x19 = LD $x2, 1992 :: (load (s64) from %stack.7)
+  ; CHECK-NEXT:   $x20 = LD $x2, 1984 :: (load (s64) from %stack.8)
+  ; CHECK-NEXT:   $x21 = LD $x2, 1976 :: (load (s64) from %stack.9)
+  ; CHECK-NEXT:   $x22 = LD $x2, 1968 :: (load (s64) from %stack.10)
+  ; CHECK-NEXT:   $x23 = LD $x2, 1960 :: (load (s64) from %stack.11)
+  ; CHECK-NEXT:   $x24 = LD $x2, 1952 :: (load (s64) from %stack.12)
+  ; CHECK-NEXT:   $x25 = LD $x2, 1944 :: (load (s64) from %stack.13)
+  ; CHECK-NEXT:   $x26 = LD $x2, 1936 :: (load (s64) from %stack.14)
+  ; CHECK-NEXT:   $x27 = LD $x2, 1928 :: (load (s64) from %stack.15)
   ; CHECK-NEXT:   $x2 = frame-destroy ADDI $x2, 2032
   ; CHECK-NEXT:   PseudoRET
   bb.0:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
index 8d0f0a3a03af6..6fe4cae6ebcef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
@@ -302,8 +302,8 @@ define fastcc <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x
 ; LMULMAX8-NEXT:    vmv.v.v v8, v24
 ; LMULMAX8-NEXT:    call ext3 at plt
 ; LMULMAX8-NEXT:    addi sp, s0, -384
-; LMULMAX8-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX8-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
+; LMULMAX8-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX8-NEXT:    addi sp, sp, 384
 ; LMULMAX8-NEXT:    ret
 ;
@@ -332,8 +332,8 @@ define fastcc <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x
 ; LMULMAX4-NEXT:    vmv.v.v v12, v28
 ; LMULMAX4-NEXT:    call ext3 at plt
 ; LMULMAX4-NEXT:    addi sp, s0, -384
-; LMULMAX4-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX4-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
+; LMULMAX4-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX4-NEXT:    addi sp, sp, 384
 ; LMULMAX4-NEXT:    ret
   %t = call fastcc <32 x i32> @ext3(<32 x i32> %z, <32 x i32> %y, <32 x i32> %x, i32 %w, i32 42)
@@ -396,8 +396,8 @@ define fastcc <32 x i32> @pass_vector_arg_indirect_stack(<32 x i32> %x, <32 x i3
 ; LMULMAX8-NEXT:    vmv.v.i v16, 0
 ; LMULMAX8-NEXT:    call vector_arg_indirect_stack at plt
 ; LMULMAX8-NEXT:    addi sp, s0, -384
-; LMULMAX8-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX8-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
+; LMULMAX8-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX8-NEXT:    addi sp, sp, 384
 ; LMULMAX8-NEXT:    ret
 ;
@@ -433,8 +433,8 @@ define fastcc <32 x i32> @pass_vector_arg_indirect_stack(<32 x i32> %x, <32 x i3
 ; LMULMAX4-NEXT:    vmv.v.i v20, 0
 ; LMULMAX4-NEXT:    call vector_arg_indirect_stack at plt
 ; LMULMAX4-NEXT:    addi sp, s0, -384
-; LMULMAX4-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX4-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
+; LMULMAX4-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX4-NEXT:    addi sp, sp, 384
 ; LMULMAX4-NEXT:    ret
   %s = call fastcc <32 x i32> @vector_arg_indirect_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, i32 8)

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
index 8ad2ce989b3b4..73c8ffa3ae79c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll
@@ -801,8 +801,8 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x
 ; LMULMAX8-NEXT:    vmv.v.v v8, v24
 ; LMULMAX8-NEXT:    call ext3 at plt
 ; LMULMAX8-NEXT:    addi sp, s0, -384
-; LMULMAX8-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX8-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
+; LMULMAX8-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX8-NEXT:    addi sp, sp, 384
 ; LMULMAX8-NEXT:    ret
 ;
@@ -831,8 +831,8 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x
 ; LMULMAX4-NEXT:    vmv.v.v v12, v28
 ; LMULMAX4-NEXT:    call ext3 at plt
 ; LMULMAX4-NEXT:    addi sp, s0, -384
-; LMULMAX4-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX4-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
+; LMULMAX4-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX4-NEXT:    addi sp, sp, 384
 ; LMULMAX4-NEXT:    ret
 ;
@@ -871,8 +871,8 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x
 ; LMULMAX2-NEXT:    vmv.v.v v14, v30
 ; LMULMAX2-NEXT:    call ext3 at plt
 ; LMULMAX2-NEXT:    addi sp, s0, -384
-; LMULMAX2-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX2-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
+; LMULMAX2-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX2-NEXT:    addi sp, sp, 384
 ; LMULMAX2-NEXT:    ret
 ;
@@ -934,8 +934,8 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x
 ; LMULMAX1-NEXT:    vmv.v.v v15, v31
 ; LMULMAX1-NEXT:    call ext3 at plt
 ; LMULMAX1-NEXT:    addi sp, s0, -384
-; LMULMAX1-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX1-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
+; LMULMAX1-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX1-NEXT:    addi sp, sp, 384
 ; LMULMAX1-NEXT:    ret
   %t = call <32 x i32> @ext3(<32 x i32> %z, <32 x i32> %y, <32 x i32> %x, i32 %w, i32 42)
@@ -1039,8 +1039,8 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
 ; LMULMAX8-NEXT:    vmv1r.v v12, v8
 ; LMULMAX8-NEXT:    call split_vector_args at plt
 ; LMULMAX8-NEXT:    addi sp, s0, -384
-; LMULMAX8-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX8-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
+; LMULMAX8-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX8-NEXT:    addi sp, sp, 384
 ; LMULMAX8-NEXT:    ret
 ;
@@ -1072,8 +1072,8 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
 ; LMULMAX4-NEXT:    vmv1r.v v12, v8
 ; LMULMAX4-NEXT:    call split_vector_args at plt
 ; LMULMAX4-NEXT:    addi sp, s0, -384
-; LMULMAX4-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX4-NEXT:    ld ra, 376(sp) # 8-byte Folded Reload
+; LMULMAX4-NEXT:    ld s0, 368(sp) # 8-byte Folded Reload
 ; LMULMAX4-NEXT:    addi sp, sp, 384
 ; LMULMAX4-NEXT:    ret
 ;
@@ -1112,8 +1112,8 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
 ; LMULMAX2-NEXT:    vmv.v.v v22, v14
 ; LMULMAX2-NEXT:    call split_vector_args at plt
 ; LMULMAX2-NEXT:    addi sp, s0, -256
-; LMULMAX2-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
 ; LMULMAX2-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
+; LMULMAX2-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
 ; LMULMAX2-NEXT:    addi sp, sp, 256
 ; LMULMAX2-NEXT:    ret
 ;
@@ -1166,8 +1166,8 @@ define <32 x i32> @call_split_vector_args(<2 x i32>* %pa, <32 x i32>* %pb) {
 ; LMULMAX1-NEXT:    vmv.v.v v23, v15
 ; LMULMAX1-NEXT:    call split_vector_args at plt
 ; LMULMAX1-NEXT:    addi sp, s0, -256
-; LMULMAX1-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
 ; LMULMAX1-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
+; LMULMAX1-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
 ; LMULMAX1-NEXT:    addi sp, sp, 256
 ; LMULMAX1-NEXT:    ret
   %a = load <2 x i32>, <2 x i32>* %pa

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
index f68b8f9049b0b..3e8e40fc24d77 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll
@@ -176,8 +176,8 @@ define i1 @extractelt_v256i1(<256 x i8>* %x, i64 %idx) nounwind {
 ; RV32-NEXT:    vse8.v v8, (a1)
 ; RV32-NEXT:    lb a0, 0(a0)
 ; RV32-NEXT:    addi sp, s0, -512
-; RV32-NEXT:    lw s0, 504(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw ra, 508(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 504(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 512
 ; RV32-NEXT:    ret
 ;
@@ -208,8 +208,8 @@ define i1 @extractelt_v256i1(<256 x i8>* %x, i64 %idx) nounwind {
 ; RV64-NEXT:    vse8.v v8, (a1)
 ; RV64-NEXT:    lb a0, 0(a0)
 ; RV64-NEXT:    addi sp, s0, -512
-; RV64-NEXT:    ld s0, 496(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld ra, 504(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 496(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 512
 ; RV64-NEXT:    ret
   %a = load <256 x i8>, <256 x i8>* %x

diff  --git a/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir b/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir
index 6073a080fd424..0a23c11b54a15 100644
--- a/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/large-rvv-stack-size.mir
@@ -36,8 +36,8 @@
   ; CHECK-NEXT:    addiw a0, a0, -1792
   ; CHECK-NEXT:    sub sp, s0, a0
   ; CHECK-NEXT:    addi sp, sp, 272
-  ; CHECK-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
   ; CHECK-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
+  ; CHECK-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
   ; CHECK-NEXT:    addi sp, sp, 2032
   ; CHECK-NEXT:    ret
     ret void

diff  --git a/llvm/test/CodeGen/RISCV/rvv/localvar.ll b/llvm/test/CodeGen/RISCV/rvv/localvar.ll
index 741db4f0e4df7..48f9bed5b6b59 100644
--- a/llvm/test/CodeGen/RISCV/rvv/localvar.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/localvar.ll
@@ -105,8 +105,8 @@ define void @local_var_m4() {
 ; RV64IV-NEXT:    addi a0, sp, 16
 ; RV64IV-NEXT:    vl4r.v v8, (a0)
 ; RV64IV-NEXT:    addi sp, s0, -32
-; RV64IV-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64IV-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IV-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64IV-NEXT:    addi sp, sp, 32
 ; RV64IV-NEXT:    ret
   %local0 = alloca <vscale x 32 x i8>
@@ -139,8 +139,8 @@ define void @local_var_m8() {
 ; RV64IV-NEXT:    addi a0, sp, 48
 ; RV64IV-NEXT:    vl8r.v v8, (a0)
 ; RV64IV-NEXT:    addi sp, s0, -64
-; RV64IV-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; RV64IV-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; RV64IV-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; RV64IV-NEXT:    addi sp, sp, 64
 ; RV64IV-NEXT:    ret
   %local0 = alloca <vscale x 64 x i8>
@@ -217,8 +217,8 @@ define void @local_var_m2_with_varsize_object(i64 %n) {
 ; RV64IV-NEXT:    addi a0, a0, -32
 ; RV64IV-NEXT:    vl2r.v v8, (a0)
 ; RV64IV-NEXT:    addi sp, s0, -32
-; RV64IV-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64IV-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64IV-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64IV-NEXT:    addi sp, sp, 32
 ; RV64IV-NEXT:    ret
   %1 = alloca i8, i64 %n
@@ -268,9 +268,9 @@ define void @local_var_m2_with_bp(i64 %n) {
 ; RV64IV-NEXT:    vl2r.v v8, (a0)
 ; RV64IV-NEXT:    lw a0, 120(s1)
 ; RV64IV-NEXT:    addi sp, s0, -256
-; RV64IV-NEXT:    ld s1, 232(sp) # 8-byte Folded Reload
-; RV64IV-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
 ; RV64IV-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
+; RV64IV-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
+; RV64IV-NEXT:    ld s1, 232(sp) # 8-byte Folded Reload
 ; RV64IV-NEXT:    addi sp, sp, 256
 ; RV64IV-NEXT:    ret
   %1 = alloca i8, i64 %n

diff  --git a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll
index f325c781b097f..97d1956ce1270 100644
--- a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll
@@ -59,8 +59,8 @@ define <vscale x 64 x i8> @caller() {
 ; RV64IV-NEXT:    vs8r.v v24, (a1)
 ; RV64IV-NEXT:    call callee at plt
 ; RV64IV-NEXT:    addi sp, s0, -64
-; RV64IV-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; RV64IV-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; RV64IV-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; RV64IV-NEXT:    addi sp, sp, 64
 ; RV64IV-NEXT:    ret
   %local0 = alloca <vscale x 64 x i8>

diff  --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
index 2e4feee53adab..b4db000233f34 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
@@ -74,8 +74,8 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
 ; SPILL-O2-NEXT:    csrr a0, vlenb
 ; SPILL-O2-NEXT:    slli a0, a0, 1
 ; SPILL-O2-NEXT:    add sp, sp, a0
-; SPILL-O2-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; SPILL-O2-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; SPILL-O2-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; SPILL-O2-NEXT:    addi sp, sp, 16
 ; SPILL-O2-NEXT:    ret
 {

diff  --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
index aa8ffefdf9300..46c263b6c4fad 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
@@ -74,8 +74,8 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
 ; SPILL-O2-NEXT:    csrr a0, vlenb
 ; SPILL-O2-NEXT:    slli a0, a0, 1
 ; SPILL-O2-NEXT:    add sp, sp, a0
-; SPILL-O2-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; SPILL-O2-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; SPILL-O2-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; SPILL-O2-NEXT:    addi sp, sp, 32
 ; SPILL-O2-NEXT:    ret
 {

diff  --git a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll
index 192a5d18c4156..03d95ed99db99 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-framelayout.ll
@@ -31,8 +31,8 @@ define void @rvv_vla(i64 %n, i64 %i) nounwind {
 ; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    lw a0, 0(a0)
 ; CHECK-NEXT:    addi sp, s0, -32
-; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
   %vla.addr = alloca i32, i64 %n
@@ -69,8 +69,8 @@ define void @rvv_overaligned() nounwind {
 ; CHECK-NEXT:    vl2re64.v v8, (a0)
 ; CHECK-NEXT:    lw a0, 64(sp)
 ; CHECK-NEXT:    addi sp, s0, -128
-; CHECK-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 128
 ; CHECK-NEXT:    ret
   %overaligned = alloca i32, align 64
@@ -116,9 +116,9 @@ define void @rvv_vla_and_overaligned(i64 %n, i64 %i) nounwind {
 ; CHECK-NEXT:    add a0, a0, a1
 ; CHECK-NEXT:    lw a0, 0(a0)
 ; CHECK-NEXT:    addi sp, s0, -128
-; CHECK-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
-; CHECK-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s1, 104(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 128
 ; CHECK-NEXT:    ret
   %overaligned = alloca i32, align 64

diff  --git a/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
index e28902dc6e4c9..df561fe6c7fde 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-out-arguments.ll
@@ -32,8 +32,8 @@ define dso_local void @lots_args(i32 signext %x0, i32 signext %x1, <vscale x 16
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
 ; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 64(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 80
 ; CHECK-NEXT:    ret
 entry:
@@ -141,8 +141,8 @@ define dso_local signext i32 @main() #0 {
 ; CHECK-NEXT:    csrr a1, vlenb
 ; CHECK-NEXT:    slli a1, a1, 3
 ; CHECK-NEXT:    add sp, sp, a1
-; CHECK-NEXT:    ld s0, 96(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 104(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 96(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 112
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index b5ac6cba9b482..e506a623b6fe5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
@@ -358,8 +358,8 @@ define <vscale x 1 x double> @test8(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 entry:
@@ -418,8 +418,8 @@ define <vscale x 1 x double> @test9(i64 %avl, i8 zeroext %cond, <vscale x 1 x do
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 1
 ; CHECK-NEXT:    add sp, sp, a0
-; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; CHECK-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
 entry:

diff  --git a/llvm/test/CodeGen/RISCV/select-cc.ll b/llvm/test/CodeGen/RISCV/select-cc.ll
index 8bc441b7ac1bb..5150f4ed70990 100644
--- a/llvm/test/CodeGen/RISCV/select-cc.ll
+++ b/llvm/test/CodeGen/RISCV/select-cc.ll
@@ -179,8 +179,8 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2,
 ; RV32I-NEXT:    call bar at plt
 ; RV32I-NEXT:  .LBB1_4:
 ; RV32I-NEXT:    mv a0, s0
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -196,8 +196,8 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2,
 ; RV32IBT-NEXT:    call bar at plt
 ; RV32IBT-NEXT:  .LBB1_2:
 ; RV32IBT-NEXT:    mv a0, s0
-; RV32IBT-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IBT-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IBT-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32IBT-NEXT:    addi sp, sp, 16
 ; RV32IBT-NEXT:    ret
   %5 = icmp eq i16 %0, 0

diff  --git a/llvm/test/CodeGen/RISCV/shadowcallstack.ll b/llvm/test/CodeGen/RISCV/shadowcallstack.ll
index 93ea48e3bc1dd..188bcead5d0d4 100644
--- a/llvm/test/CodeGen/RISCV/shadowcallstack.ll
+++ b/llvm/test/CodeGen/RISCV/shadowcallstack.ll
@@ -91,10 +91,10 @@ define i32 @f4() shadowcallstack {
 ; RV32-NEXT:    add a1, s3, s1
 ; RV32-NEXT:    add a0, s0, a0
 ; RV32-NEXT:    add a0, a1, a0
-; RV32-NEXT:    lw s3, 0(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 0(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    lw ra, -4(s2)
 ; RV32-NEXT:    addi s2, s2, -4
@@ -124,10 +124,10 @@ define i32 @f4() shadowcallstack {
 ; RV64-NEXT:    addw a1, s3, s1
 ; RV64-NEXT:    addw a0, s0, a0
 ; RV64-NEXT:    addw a0, a1, a0
-; RV64-NEXT:    ld s3, 0(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 0(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 32
 ; RV64-NEXT:    ld ra, -8(s2)
 ; RV64-NEXT:    addi s2, s2, -8

diff  --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
index d77a76bab40fd..cdee24f11a7b5 100644
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -402,8 +402,8 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind {
 ; RV32I-NEXT:    sw a4, 8(a0)
 ; RV32I-NEXT:    sw s1, 0(a0)
 ; RV32I-NEXT:    sw t0, 4(a0)
-; RV32I-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/shrinkwrap.ll b/llvm/test/CodeGen/RISCV/shrinkwrap.ll
index 178cd03777bc3..e64cc5273e56f 100644
--- a/llvm/test/CodeGen/RISCV/shrinkwrap.ll
+++ b/llvm/test/CodeGen/RISCV/shrinkwrap.ll
@@ -86,8 +86,8 @@ define void @conditional_alloca(i32 %n) nounwind {
 ; RV32I-SW-NO-NEXT:    call notdead at plt
 ; RV32I-SW-NO-NEXT:  .LBB1_2: # %if.end
 ; RV32I-SW-NO-NEXT:    addi sp, s0, -16
-; RV32I-SW-NO-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-SW-NO-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SW-NO-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-SW-NO-NEXT:    addi sp, sp, 16
 ; RV32I-SW-NO-NEXT:    ret
 ;
@@ -106,8 +106,8 @@ define void @conditional_alloca(i32 %n) nounwind {
 ; RV32I-SW-NEXT:    mv sp, a0
 ; RV32I-SW-NEXT:    call notdead at plt
 ; RV32I-SW-NEXT:    addi sp, s0, -16
-; RV32I-SW-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-SW-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-SW-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-SW-NEXT:    addi sp, sp, 16
 ; RV32I-SW-NEXT:  .LBB1_2: # %if.end
 ; RV32I-SW-NEXT:    ret
@@ -147,8 +147,8 @@ define void @conditional_alloca(i32 %n) nounwind {
 ; RV64I-SW-NEXT:    mv sp, a0
 ; RV64I-SW-NEXT:    call notdead at plt
 ; RV64I-SW-NEXT:    addi sp, s0, -16
-; RV64I-SW-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-SW-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-SW-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-SW-NEXT:    addi sp, sp, 16
 ; RV64I-SW-NEXT:  .LBB1_2: # %if.end
 ; RV64I-SW-NEXT:    ret

diff  --git a/llvm/test/CodeGen/RISCV/srem-lkk.ll b/llvm/test/CodeGen/RISCV/srem-lkk.ll
index 28da10d45ac65..da5900208a24f 100644
--- a/llvm/test/CodeGen/RISCV/srem-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-lkk.ll
@@ -243,9 +243,9 @@ define i32 @combine_srem_sdiv(i32 %x) nounwind {
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __divsi3 at plt
 ; RV32I-NEXT:    add a0, s1, a0
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -279,9 +279,9 @@ define i32 @combine_srem_sdiv(i32 %x) nounwind {
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __divdi3 at plt
 ; RV64I-NEXT:    addw a0, s1, a0
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index ed663acd0b091..58b27218d6c8e 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -379,14 +379,14 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32-NEXT:    slli a1, a2, 2
 ; RV32-NEXT:    sub a0, a0, a1
 ; RV32-NEXT:    sw a0, 8(s0)
-; RV32-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
@@ -470,13 +470,13 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV64-NEXT:    slli a1, a2, 33
 ; RV64-NEXT:    or a0, a0, a1
 ; RV64-NEXT:    sd a0, 0(s0)
-; RV64-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 64
 ; RV64-NEXT:    ret
 ;
@@ -551,14 +551,14 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32M-NEXT:    slli a1, a2, 2
 ; RV32M-NEXT:    sub a0, a0, a1
 ; RV32M-NEXT:    sw a0, 8(s0)
-; RV32M-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
-; RV32M-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32M-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32M-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32M-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32M-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32M-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32M-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32M-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32M-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32M-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32M-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32M-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32M-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
+; RV32M-NEXT:    lw s6, 0(sp) # 4-byte Folded Reload
 ; RV32M-NEXT:    addi sp, sp, 32
 ; RV32M-NEXT:    ret
 ;
@@ -751,13 +751,13 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV32MV-NEXT:    or a0, a1, a0
 ; RV32MV-NEXT:    sw a0, 8(s1)
 ; RV32MV-NEXT:    addi sp, s0, -96
-; RV32MV-NEXT:    lw s5, 68(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s4, 72(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s3, 76(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s2, 80(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s1, 84(sp) # 4-byte Folded Reload
-; RV32MV-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
 ; RV32MV-NEXT:    lw ra, 92(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s0, 88(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s1, 84(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s2, 80(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s3, 76(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s4, 72(sp) # 4-byte Folded Reload
+; RV32MV-NEXT:    lw s5, 68(sp) # 4-byte Folded Reload
 ; RV32MV-NEXT:    addi sp, sp, 96
 ; RV32MV-NEXT:    ret
 ;
@@ -862,8 +862,8 @@ define void @test_srem_vec(<3 x i33>* %X) nounwind {
 ; RV64MV-NEXT:    or a1, a1, a2
 ; RV64MV-NEXT:    sd a1, 0(a0)
 ; RV64MV-NEXT:    addi sp, s0, -96
-; RV64MV-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
 ; RV64MV-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
+; RV64MV-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
 ; RV64MV-NEXT:    addi sp, sp, 96
 ; RV64MV-NEXT:    ret
   %ld = load <3 x i33>, <3 x i33>* %X

diff  --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
index 64ab2bf118b47..feb66c5dbe02e 100644
--- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll
@@ -43,13 +43,13 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sh s0, 4(s1)
 ; RV32I-NEXT:    sh s5, 2(s1)
 ; RV32I-NEXT:    sh s4, 0(s1)
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -137,13 +137,13 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sh s0, 4(s1)
 ; RV64I-NEXT:    sh s5, 2(s1)
 ; RV64I-NEXT:    sh s4, 0(s1)
-; RV64I-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 64
 ; RV64I-NEXT:    ret
 ;
@@ -258,13 +258,13 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sh s0, 4(s1)
 ; RV32I-NEXT:    sh s5, 2(s1)
 ; RV32I-NEXT:    sh s4, 0(s1)
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -345,13 +345,13 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sh s0, 4(s1)
 ; RV64I-NEXT:    sh s5, 2(s1)
 ; RV64I-NEXT:    sh s4, 0(s1)
-; RV64I-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 64
 ; RV64I-NEXT:    ret
 ;
@@ -468,17 +468,17 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sh a2, 4(s0)
 ; RV32I-NEXT:    sh a1, 2(s0)
 ; RV32I-NEXT:    sh a0, 0(s0)
-; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
 ;
@@ -587,17 +587,17 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sh a2, 4(s0)
 ; RV64I-NEXT:    sh a1, 2(s0)
 ; RV64I-NEXT:    sh a0, 0(s0)
-; RV64I-NEXT:    ld s9, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s9, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 96
 ; RV64I-NEXT:    ret
 ;
@@ -692,11 +692,11 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sh s1, 4(s0)
 ; RV32I-NEXT:    sh s3, 2(s0)
 ; RV32I-NEXT:    sh s2, 0(s0)
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -765,11 +765,11 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sh s1, 4(s0)
 ; RV64I-NEXT:    sh s3, 2(s0)
 ; RV64I-NEXT:    sh s2, 0(s0)
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -846,11 +846,11 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sh a0, 6(s0)
 ; RV32I-NEXT:    sh s1, 4(s0)
 ; RV32I-NEXT:    sh s3, 2(s0)
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -923,11 +923,11 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sh a0, 6(s0)
 ; RV64I-NEXT:    sh s1, 4(s0)
 ; RV64I-NEXT:    sh s3, 2(s0)
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -1022,11 +1022,11 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sh a0, 6(s0)
 ; RV32I-NEXT:    sh s1, 4(s0)
 ; RV32I-NEXT:    sh s3, 2(s0)
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -1094,11 +1094,11 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sh a0, 6(s0)
 ; RV64I-NEXT:    sh s1, 4(s0)
 ; RV64I-NEXT:    sh s3, 2(s0)
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -1212,17 +1212,17 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
 ; RV32I-NEXT:    sw s6, 8(s0)
 ; RV32I-NEXT:    sw s8, 4(s0)
 ; RV32I-NEXT:    sw s7, 0(s0)
-; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
 ;
@@ -1283,17 +1283,17 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
 ; RV32IM-NEXT:    sw s6, 8(s0)
 ; RV32IM-NEXT:    sw s8, 4(s0)
 ; RV32IM-NEXT:    sw s7, 0(s0)
-; RV32IM-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    addi sp, sp, 48
 ; RV32IM-NEXT:    ret
 ;
@@ -1325,11 +1325,11 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind {
 ; RV64I-NEXT:    sd a0, 24(s0)
 ; RV64I-NEXT:    sd s1, 16(s0)
 ; RV64I-NEXT:    sd s3, 8(s0)
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll
index b7f29b18c641b..5bfa08a2201ab 100644
--- a/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll
+++ b/llvm/test/CodeGen/RISCV/stack-realignment-with-variable-sized-objects.ll
@@ -28,9 +28,9 @@ define void @caller(i32 %n) {
 ; RV32I-NEXT:    mv a1, s1
 ; RV32I-NEXT:    call callee at plt
 ; RV32I-NEXT:    addi sp, s0, -64
-; RV32I-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 52(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 64
 ; RV32I-NEXT:    ret
 ;
@@ -57,9 +57,9 @@ define void @caller(i32 %n) {
 ; RV64I-NEXT:    mv a1, s1
 ; RV64I-NEXT:    call callee at plt
 ; RV64I-NEXT:    addi sp, s0, -64
-; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 64
 ; RV64I-NEXT:    ret
   %1 = alloca i8, i32 %n

diff  --git a/llvm/test/CodeGen/RISCV/stack-realignment.ll b/llvm/test/CodeGen/RISCV/stack-realignment.ll
index 681ed762346c3..8d6d097636492 100644
--- a/llvm/test/CodeGen/RISCV/stack-realignment.ll
+++ b/llvm/test/CodeGen/RISCV/stack-realignment.ll
@@ -21,8 +21,8 @@ define void @caller32() {
 ; RV32I-NEXT:    mv a0, sp
 ; RV32I-NEXT:    call callee at plt
 ; RV32I-NEXT:    addi sp, s0, -32
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -40,8 +40,8 @@ define void @caller32() {
 ; RV64I-NEXT:    mv a0, sp
 ; RV64I-NEXT:    call callee at plt
 ; RV64I-NEXT:    addi sp, s0, -32
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
   %1 = alloca i8, align 32
@@ -93,8 +93,8 @@ define void @caller64() {
 ; RV32I-NEXT:    mv a0, sp
 ; RV32I-NEXT:    call callee at plt
 ; RV32I-NEXT:    addi sp, s0, -64
-; RV32I-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 64
 ; RV32I-NEXT:    ret
 ;
@@ -112,8 +112,8 @@ define void @caller64() {
 ; RV64I-NEXT:    mv a0, sp
 ; RV64I-NEXT:    call callee at plt
 ; RV64I-NEXT:    addi sp, s0, -64
-; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 64
 ; RV64I-NEXT:    ret
   %1 = alloca i8, align 64
@@ -165,8 +165,8 @@ define void @caller128() {
 ; RV32I-NEXT:    mv a0, sp
 ; RV32I-NEXT:    call callee at plt
 ; RV32I-NEXT:    addi sp, s0, -128
-; RV32I-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 124(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 120(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 128
 ; RV32I-NEXT:    ret
 ;
@@ -184,8 +184,8 @@ define void @caller128() {
 ; RV64I-NEXT:    mv a0, sp
 ; RV64I-NEXT:    call callee at plt
 ; RV64I-NEXT:    addi sp, s0, -128
-; RV64I-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 120(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 112(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 128
 ; RV64I-NEXT:    ret
   %1 = alloca i8, align 128
@@ -237,8 +237,8 @@ define void @caller256() {
 ; RV32I-NEXT:    mv a0, sp
 ; RV32I-NEXT:    call callee at plt
 ; RV32I-NEXT:    addi sp, s0, -256
-; RV32I-NEXT:    lw s0, 248(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 252(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 248(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 256
 ; RV32I-NEXT:    ret
 ;
@@ -256,8 +256,8 @@ define void @caller256() {
 ; RV64I-NEXT:    mv a0, sp
 ; RV64I-NEXT:    call callee at plt
 ; RV64I-NEXT:    addi sp, s0, -256
-; RV64I-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 248(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 240(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 256
 ; RV64I-NEXT:    ret
   %1 = alloca i8, align 256
@@ -309,8 +309,8 @@ define void @caller512() {
 ; RV32I-NEXT:    addi a0, sp, 512
 ; RV32I-NEXT:    call callee at plt
 ; RV32I-NEXT:    addi sp, s0, -1024
-; RV32I-NEXT:    lw s0, 1016(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 1020(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 1016(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 1024
 ; RV32I-NEXT:    ret
 ;
@@ -328,8 +328,8 @@ define void @caller512() {
 ; RV64I-NEXT:    addi a0, sp, 512
 ; RV64I-NEXT:    call callee at plt
 ; RV64I-NEXT:    addi sp, s0, -1024
-; RV64I-NEXT:    ld s0, 1008(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 1016(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 1008(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 1024
 ; RV64I-NEXT:    ret
   %1 = alloca i8, align 512
@@ -383,8 +383,8 @@ define void @caller1024() {
 ; RV32I-NEXT:    call callee at plt
 ; RV32I-NEXT:    addi sp, s0, -2048
 ; RV32I-NEXT:    addi sp, sp, 16
-; RV32I-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 2032
 ; RV32I-NEXT:    ret
 ;
@@ -404,8 +404,8 @@ define void @caller1024() {
 ; RV64I-NEXT:    call callee at plt
 ; RV64I-NEXT:    addi sp, s0, -2048
 ; RV64I-NEXT:    addi sp, sp, 16
-; RV64I-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 2032
 ; RV64I-NEXT:    ret
   %1 = alloca i8, align 1024
@@ -466,8 +466,8 @@ define void @caller2048() {
 ; RV32I-NEXT:    lui a0, 1
 ; RV32I-NEXT:    addi a0, a0, -2032
 ; RV32I-NEXT:    add sp, sp, a0
-; RV32I-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 2032
 ; RV32I-NEXT:    ret
 ;
@@ -494,8 +494,8 @@ define void @caller2048() {
 ; RV64I-NEXT:    lui a0, 1
 ; RV64I-NEXT:    addiw a0, a0, -2032
 ; RV64I-NEXT:    add sp, sp, a0
-; RV64I-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 2032
 ; RV64I-NEXT:    ret
   %1 = alloca i8, align 2048
@@ -556,8 +556,8 @@ define void @caller4096() {
 ; RV32I-NEXT:    lui a0, 2
 ; RV32I-NEXT:    addi a0, a0, -2032
 ; RV32I-NEXT:    add sp, sp, a0
-; RV32I-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 2028(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 2024(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 2032
 ; RV32I-NEXT:    ret
 ;
@@ -584,8 +584,8 @@ define void @caller4096() {
 ; RV64I-NEXT:    lui a0, 2
 ; RV64I-NEXT:    addiw a0, a0, -2032
 ; RV64I-NEXT:    add sp, sp, a0
-; RV64I-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 2024(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 2016(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 2032
 ; RV64I-NEXT:    ret
   %1 = alloca i8, align 4096

diff  --git a/llvm/test/CodeGen/RISCV/stack-store-check.ll b/llvm/test/CodeGen/RISCV/stack-store-check.ll
index 7b2e3bc4846e8..218490933333e 100644
--- a/llvm/test/CodeGen/RISCV/stack-store-check.ll
+++ b/llvm/test/CodeGen/RISCV/stack-store-check.ll
@@ -286,19 +286,19 @@ define void @main() local_unnamed_addr nounwind {
 ; CHECK-NEXT:    sw a1, %lo(Y1+12)(a4)
 ; CHECK-NEXT:    sw a2, %lo(Y1)(a4)
 ; CHECK-NEXT:    sw a3, %lo(Y1+4)(a4)
-; CHECK-NEXT:    lw s11, 636(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s10, 640(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s9, 644(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s8, 648(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s7, 652(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s6, 656(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s5, 660(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s4, 664(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s3, 668(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s2, 672(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s1, 676(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s0, 680(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    lw ra, 684(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s0, 680(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s1, 676(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s2, 672(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s3, 668(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s4, 664(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s5, 660(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s6, 656(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s7, 652(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s8, 648(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s9, 644(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s10, 640(sp) # 4-byte Folded Reload
+; CHECK-NEXT:    lw s11, 636(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 688
 ; CHECK-NEXT:    ret
   %1 = load fp128, fp128* @U, align 16

diff  --git a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
index 0c4cc314457a0..09f74a175802d 100644
--- a/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/RISCV/umulo-128-legalisation-lowering.ll
@@ -100,13 +100,13 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) #0 {
 ; RISCV32-NEXT:    sw t1, 8(a0)
 ; RISCV32-NEXT:    sw t4, 12(a0)
 ; RISCV32-NEXT:    sb a1, 16(a0)
-; RISCV32-NEXT:    lw s6, 4(sp) # 4-byte Folded Reload
-; RISCV32-NEXT:    lw s5, 8(sp) # 4-byte Folded Reload
-; RISCV32-NEXT:    lw s4, 12(sp) # 4-byte Folded Reload
-; RISCV32-NEXT:    lw s3, 16(sp) # 4-byte Folded Reload
-; RISCV32-NEXT:    lw s2, 20(sp) # 4-byte Folded Reload
-; RISCV32-NEXT:    lw s1, 24(sp) # 4-byte Folded Reload
 ; RISCV32-NEXT:    lw s0, 28(sp) # 4-byte Folded Reload
+; RISCV32-NEXT:    lw s1, 24(sp) # 4-byte Folded Reload
+; RISCV32-NEXT:    lw s2, 20(sp) # 4-byte Folded Reload
+; RISCV32-NEXT:    lw s3, 16(sp) # 4-byte Folded Reload
+; RISCV32-NEXT:    lw s4, 12(sp) # 4-byte Folded Reload
+; RISCV32-NEXT:    lw s5, 8(sp) # 4-byte Folded Reload
+; RISCV32-NEXT:    lw s6, 4(sp) # 4-byte Folded Reload
 ; RISCV32-NEXT:    addi sp, sp, 32
 ; RISCV32-NEXT:    ret
 start:

diff  --git a/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll
index 34b235bc353c5..1e2d99dd03f6a 100644
--- a/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll
+++ b/llvm/test/CodeGen/RISCV/unfold-masked-merge-scalar-variablemask.ll
@@ -1452,9 +1452,9 @@ define i32 @in_multiuse_A(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind {
 ; RV32-NEXT:    mv a0, s1
 ; RV32-NEXT:    call use32 at plt
 ; RV32-NEXT:    xor a0, s1, s0
-; RV32-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
@@ -1470,9 +1470,9 @@ define i32 @in_multiuse_A(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind {
 ; RV64-NEXT:    mv a0, s1
 ; RV64-NEXT:    call use32 at plt
 ; RV64-NEXT:    xor a0, s1, s0
-; RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 32
 ; RV64-NEXT:    ret
   %n0 = xor i32 %x, %y
@@ -1494,9 +1494,9 @@ define i32 @in_multiuse_B(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind {
 ; RV32-NEXT:    and s1, a0, a3
 ; RV32-NEXT:    call use32 at plt
 ; RV32-NEXT:    xor a0, s1, s0
-; RV32-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
@@ -1511,9 +1511,9 @@ define i32 @in_multiuse_B(i32 %x, i32 %y, i32 %z, i32 %mask) nounwind {
 ; RV64-NEXT:    and s1, a0, a3
 ; RV64-NEXT:    call use32 at plt
 ; RV64-NEXT:    xor a0, s1, s0
-; RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 32
 ; RV64-NEXT:    ret
   %n0 = xor i32 %x, %y

diff  --git a/llvm/test/CodeGen/RISCV/urem-lkk.ll b/llvm/test/CodeGen/RISCV/urem-lkk.ll
index 5824db068590d..16eb225d1b111 100644
--- a/llvm/test/CodeGen/RISCV/urem-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-lkk.ll
@@ -133,9 +133,9 @@ define i32 @combine_urem_udiv(i32 %x) nounwind {
 ; RV32I-NEXT:    mv a0, s0
 ; RV32I-NEXT:    call __udivsi3 at plt
 ; RV32I-NEXT:    add a0, s1, a0
-; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
@@ -170,9 +170,9 @@ define i32 @combine_urem_udiv(i32 %x) nounwind {
 ; RV64I-NEXT:    mv a0, s0
 ; RV64I-NEXT:    call __udivdi3 at plt
 ; RV64I-NEXT:    add a0, s1, a0
-; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 32
 ; RV64I-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index 560af257d4228..6a9c674d9d2b5 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -391,11 +391,11 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV32-NEXT:    slli a1, s1, 22
 ; RV32-NEXT:    sub a0, a0, a1
 ; RV32-NEXT:    sw a0, 0(s0)
-; RV32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 32
 ; RV32-NEXT:    ret
 ;
@@ -452,11 +452,11 @@ define void @test_urem_vec(<3 x i11>* %X) nounwind {
 ; RV64-NEXT:    and a0, a0, a1
 ; RV64-NEXT:    srli a0, a0, 32
 ; RV64-NEXT:    sb a0, 4(s0)
-; RV64-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64-NEXT:    addi sp, sp, 48
 ; RV64-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
index 3feeb5ce324c1..330797320787e 100644
--- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll
@@ -44,13 +44,13 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sh s0, 4(s1)
 ; RV32I-NEXT:    sh s5, 2(s1)
 ; RV32I-NEXT:    sh s4, 0(s1)
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -132,13 +132,13 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sh s0, 4(s1)
 ; RV64I-NEXT:    sh s5, 2(s1)
 ; RV64I-NEXT:    sh s4, 0(s1)
-; RV64I-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 64
 ; RV64I-NEXT:    ret
 ;
@@ -249,13 +249,13 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sh s0, 4(s1)
 ; RV32I-NEXT:    sh s5, 2(s1)
 ; RV32I-NEXT:    sh s4, 0(s1)
-; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -336,13 +336,13 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sh s0, 4(s1)
 ; RV64I-NEXT:    sh s5, 2(s1)
 ; RV64I-NEXT:    sh s4, 0(s1)
-; RV64I-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 64
 ; RV64I-NEXT:    ret
 ;
@@ -459,17 +459,17 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sh a2, 4(s0)
 ; RV32I-NEXT:    sh a1, 2(s0)
 ; RV32I-NEXT:    sh a0, 0(s0)
-; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
 ;
@@ -578,17 +578,17 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sh a2, 4(s0)
 ; RV64I-NEXT:    sh a1, 2(s0)
 ; RV64I-NEXT:    sh a0, 0(s0)
-; RV64I-NEXT:    ld s9, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s3, 56(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 64(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 88(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 80(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 72(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 64(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 56(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s4, 48(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s5, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s6, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s7, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s8, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s9, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 96
 ; RV64I-NEXT:    ret
 ;
@@ -675,11 +675,11 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sh a3, 4(s0)
 ; RV32I-NEXT:    sh a2, 2(s0)
 ; RV32I-NEXT:    sh a1, 0(s0)
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -731,11 +731,11 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sh a3, 4(s0)
 ; RV64I-NEXT:    sh a2, 2(s0)
 ; RV64I-NEXT:    sh a1, 0(s0)
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -803,11 +803,11 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
 ; RV32I-NEXT:    sh a0, 6(s0)
 ; RV32I-NEXT:    sh s1, 4(s0)
 ; RV32I-NEXT:    sh s3, 2(s0)
-; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 32
 ; RV32I-NEXT:    ret
 ;
@@ -873,11 +873,11 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind {
 ; RV64I-NEXT:    sh a0, 6(s0)
 ; RV64I-NEXT:    sh s1, 4(s0)
 ; RV64I-NEXT:    sh s3, 2(s0)
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;
@@ -1007,17 +1007,17 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
 ; RV32I-NEXT:    sw s6, 8(s0)
 ; RV32I-NEXT:    sw s8, 4(s0)
 ; RV32I-NEXT:    sw s7, 0(s0)
-; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 48
 ; RV32I-NEXT:    ret
 ;
@@ -1078,17 +1078,17 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
 ; RV32IM-NEXT:    sw s6, 8(s0)
 ; RV32IM-NEXT:    sw s8, 4(s0)
 ; RV32IM-NEXT:    sw s7, 0(s0)
-; RV32IM-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
-; RV32IM-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    lw ra, 44(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s0, 40(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s1, 36(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s2, 32(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s3, 28(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s4, 24(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s5, 20(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s6, 16(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s7, 12(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s8, 8(sp) # 4-byte Folded Reload
+; RV32IM-NEXT:    lw s9, 4(sp) # 4-byte Folded Reload
 ; RV32IM-NEXT:    addi sp, sp, 48
 ; RV32IM-NEXT:    ret
 ;
@@ -1120,11 +1120,11 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind {
 ; RV64I-NEXT:    sd a0, 24(s0)
 ; RV64I-NEXT:    sd s1, 16(s0)
 ; RV64I-NEXT:    sd s3, 8(s0)
-; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
 ; RV64I-NEXT:    addi sp, sp, 48
 ; RV64I-NEXT:    ret
 ;

diff  --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll
index 8141c7f84194f..5282bd532caff 100644
--- a/llvm/test/CodeGen/RISCV/vararg.ll
+++ b/llvm/test/CodeGen/RISCV/vararg.ll
@@ -75,8 +75,8 @@ define i32 @va1(i8* %fmt, ...) {
 ; ILP32-ILP32F-WITHFP-NEXT:    sw a1, 4(s0)
 ; ILP32-ILP32F-WITHFP-NEXT:    addi a1, s0, 8
 ; ILP32-ILP32F-WITHFP-NEXT:    sw a1, -12(s0)
-; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, sp, 48
 ; ILP32-ILP32F-WITHFP-NEXT:    ret
 ;
@@ -134,8 +134,8 @@ define i32 @va1(i8* %fmt, ...) {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi a0, s0, 12
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    sd a0, -24(s0)
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    lw a0, 8(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, sp, 96
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ret
   %va = alloca i8*, align 4
@@ -183,8 +183,8 @@ define i32 @va1_va_arg(i8* %fmt, ...) nounwind {
 ; ILP32-ILP32F-WITHFP-NEXT:    sw a1, 4(s0)
 ; ILP32-ILP32F-WITHFP-NEXT:    addi a1, s0, 8
 ; ILP32-ILP32F-WITHFP-NEXT:    sw a1, -12(s0)
-; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, sp, 48
 ; ILP32-ILP32F-WITHFP-NEXT:    ret
 ;
@@ -236,8 +236,8 @@ define i32 @va1_va_arg(i8* %fmt, ...) nounwind {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    sd a1, 8(s0)
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi a1, s0, 16
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    sd a1, -24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, sp, 96
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ret
   %va = alloca i8*, align 4
@@ -275,9 +275,9 @@ define i32 @va1_va_arg_alloca(i8* %fmt, ...) nounwind {
 ; ILP32-ILP32F-FPELIM-NEXT:    call notdead at plt
 ; ILP32-ILP32F-FPELIM-NEXT:    mv a0, s1
 ; ILP32-ILP32F-FPELIM-NEXT:    addi sp, s0, -16
-; ILP32-ILP32F-FPELIM-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; ILP32-ILP32F-FPELIM-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-FPELIM-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-FPELIM-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-FPELIM-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-FPELIM-NEXT:    addi sp, sp, 48
 ; ILP32-ILP32F-FPELIM-NEXT:    ret
 ;
@@ -305,9 +305,9 @@ define i32 @va1_va_arg_alloca(i8* %fmt, ...) nounwind {
 ; ILP32-ILP32F-WITHFP-NEXT:    call notdead at plt
 ; ILP32-ILP32F-WITHFP-NEXT:    mv a0, s1
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, s0, -16
-; ILP32-ILP32F-WITHFP-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, sp, 48
 ; ILP32-ILP32F-WITHFP-NEXT:    ret
 ;
@@ -335,9 +335,9 @@ define i32 @va1_va_arg_alloca(i8* %fmt, ...) nounwind {
 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    call notdead at plt
 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    mv a0, s1
 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    addi sp, s0, -16
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    addi sp, sp, 48
 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    ret
 ;
@@ -367,9 +367,9 @@ define i32 @va1_va_arg_alloca(i8* %fmt, ...) nounwind {
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    call notdead at plt
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    mv a0, s1
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    addi sp, s0, -32
-; LP64-LP64F-LP64D-FPELIM-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; LP64-LP64F-LP64D-FPELIM-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-FPELIM-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-FPELIM-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    addi sp, sp, 96
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    ret
 ;
@@ -399,9 +399,9 @@ define i32 @va1_va_arg_alloca(i8* %fmt, ...) nounwind {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    call notdead at plt
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    mv a0, s1
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, s0, -32
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, sp, 96
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ret
   %va = alloca i8*, align 4
@@ -438,8 +438,8 @@ define void @va1_caller() nounwind {
 ; ILP32-ILP32F-WITHFP-NEXT:    li a4, 2
 ; ILP32-ILP32F-WITHFP-NEXT:    li a2, 0
 ; ILP32-ILP32F-WITHFP-NEXT:    call va1 at plt
-; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, sp, 16
 ; ILP32-ILP32F-WITHFP-NEXT:    ret
 ;
@@ -477,8 +477,8 @@ define void @va1_caller() nounwind {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    slli a1, a0, 52
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    li a2, 2
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    call va1 at plt
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, sp, 16
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ret
   %1 = call i32 (i8*, ...) @va1(i8* undef, double 1.0, i32 2)
@@ -529,8 +529,8 @@ define i64 @va2(i8 *%fmt, ...) nounwind {
 ; ILP32-ILP32F-WITHFP-NEXT:    lw a0, 0(a1)
 ; ILP32-ILP32F-WITHFP-NEXT:    ori a1, a1, 4
 ; ILP32-ILP32F-WITHFP-NEXT:    lw a1, 0(a1)
-; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, sp, 48
 ; ILP32-ILP32F-WITHFP-NEXT:    ret
 ;
@@ -606,8 +606,8 @@ define i64 @va2(i8 *%fmt, ...) nounwind {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi a1, a1, -8
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    and a0, a0, a1
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld a0, 0(a0)
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, sp, 96
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ret
   %va = alloca i8*, align 4
@@ -670,8 +670,8 @@ define i64 @va2_va_arg(i8 *%fmt, ...) nounwind {
 ; ILP32-ILP32F-WITHFP-NEXT:    addi a1, a1, 8
 ; ILP32-ILP32F-WITHFP-NEXT:    sw a1, -12(s0)
 ; ILP32-ILP32F-WITHFP-NEXT:    lw a1, 0(a2)
-; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, sp, 48
 ; ILP32-ILP32F-WITHFP-NEXT:    ret
 ;
@@ -728,8 +728,8 @@ define i64 @va2_va_arg(i8 *%fmt, ...) nounwind {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    sd a1, 8(s0)
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi a1, s0, 16
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    sd a1, -24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, sp, 96
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ret
   %va = alloca i8*, align 4
@@ -762,8 +762,8 @@ define void @va2_caller() nounwind {
 ; ILP32-ILP32F-WITHFP-NEXT:    lui a3, 261888
 ; ILP32-ILP32F-WITHFP-NEXT:    li a2, 0
 ; ILP32-ILP32F-WITHFP-NEXT:    call va2 at plt
-; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, sp, 16
 ; ILP32-ILP32F-WITHFP-NEXT:    ret
 ;
@@ -798,8 +798,8 @@ define void @va2_caller() nounwind {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    li a0, 1023
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    slli a1, a0, 52
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    call va2 at plt
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, sp, 16
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ret
  %1 = call i64 (i8*, ...) @va2(i8* undef, double 1.000000e+00)
@@ -854,8 +854,8 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
 ; ILP32-ILP32F-WITHFP-NEXT:    sltu a1, a0, a1
 ; ILP32-ILP32F-WITHFP-NEXT:    add a2, a2, a4
 ; ILP32-ILP32F-WITHFP-NEXT:    add a1, a2, a1
-; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 16(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    lw ra, 20(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 16(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, sp, 48
 ; ILP32-ILP32F-WITHFP-NEXT:    ret
 ;
@@ -933,8 +933,8 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    and a0, a0, a2
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld a0, 0(a0)
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    add a0, a1, a0
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, sp, 80
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ret
   %va = alloca i8*, align 4
@@ -1002,8 +1002,8 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind {
 ; ILP32-ILP32F-WITHFP-NEXT:    sltu a1, a0, a1
 ; ILP32-ILP32F-WITHFP-NEXT:    add a2, a2, a3
 ; ILP32-ILP32F-WITHFP-NEXT:    add a1, a2, a1
-; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 16(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    lw ra, 20(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 16(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, sp, 48
 ; ILP32-ILP32F-WITHFP-NEXT:    ret
 ;
@@ -1060,8 +1060,8 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi a3, s0, 8
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    add a0, a1, a2
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    sd a3, -24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, sp, 80
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ret
   %va = alloca i8*, align 4
@@ -1101,8 +1101,8 @@ define void @va3_caller() nounwind {
 ; ILP32-ILP32F-WITHFP-NEXT:    li a2, 0
 ; ILP32-ILP32F-WITHFP-NEXT:    li a4, 0
 ; ILP32-ILP32F-WITHFP-NEXT:    call va3 at plt
-; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, sp, 16
 ; ILP32-ILP32F-WITHFP-NEXT:    ret
 ;
@@ -1144,8 +1144,8 @@ define void @va3_caller() nounwind {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    li a0, 2
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    li a1, 1111
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    call va3 at plt
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, sp, 16
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ret
  %1 = call i64 (i32, i64, ...) @va3(i32 2, i64 1111, double 2.000000e+00)
@@ -1191,8 +1191,8 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
 ; ILP32-ILP32F-FPELIM-NEXT:    add a1, a1, s0
 ; ILP32-ILP32F-FPELIM-NEXT:    add a1, a1, a2
 ; ILP32-ILP32F-FPELIM-NEXT:    add a0, a1, a0
-; ILP32-ILP32F-FPELIM-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-FPELIM-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-FPELIM-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-FPELIM-NEXT:    addi sp, sp, 48
 ; ILP32-ILP32F-FPELIM-NEXT:    ret
 ;
@@ -1234,9 +1234,9 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
 ; ILP32-ILP32F-WITHFP-NEXT:    add a1, a1, s1
 ; ILP32-ILP32F-WITHFP-NEXT:    add a1, a1, a2
 ; ILP32-ILP32F-WITHFP-NEXT:    add a0, a1, a0
-; ILP32-ILP32F-WITHFP-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
-; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, sp, 64
 ; ILP32-ILP32F-WITHFP-NEXT:    ret
 ;
@@ -1276,8 +1276,8 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    add a1, a1, s0
 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    add a1, a1, a2
 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    add a0, a1, a0
-; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    addi sp, sp, 48
 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT:    ret
 ;
@@ -1317,8 +1317,8 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    addw a1, a1, s0
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    addw a1, a1, a2
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    addw a0, a1, a0
-; LP64-LP64F-LP64D-FPELIM-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-FPELIM-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    addi sp, sp, 96
 ; LP64-LP64F-LP64D-FPELIM-NEXT:    ret
 ;
@@ -1360,9 +1360,9 @@ define i32 @va4_va_copy(i32 %argno, ...) nounwind {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addw a1, a1, s1
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addw a1, a1, a2
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addw a0, a1, a0
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, sp, 112
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ret
   %vargs = alloca i8*, align 4
@@ -1476,8 +1476,8 @@ define void @va5_aligned_stack_caller() nounwind {
 ; ILP32-ILP32F-WITHFP-NEXT:    li a7, 4
 ; ILP32-ILP32F-WITHFP-NEXT:    sw a5, -32(s0)
 ; ILP32-ILP32F-WITHFP-NEXT:    call va5_aligned_stack_callee at plt
-; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 56(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, sp, 64
 ; ILP32-ILP32F-WITHFP-NEXT:    ret
 ;
@@ -1620,8 +1620,8 @@ define void @va5_aligned_stack_caller() nounwind {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    li a7, 14
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    sd t0, 0(sp)
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    call va5_aligned_stack_callee at plt
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, sp, 48
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ret
   %1 = call i32 (i32, ...) @va5_aligned_stack_callee(i32 1, i32 11,
@@ -1667,8 +1667,8 @@ define i32 @va6_no_fixed_args(...) nounwind {
 ; ILP32-ILP32F-WITHFP-NEXT:    sw a0, 0(s0)
 ; ILP32-ILP32F-WITHFP-NEXT:    addi a1, s0, 4
 ; ILP32-ILP32F-WITHFP-NEXT:    sw a1, -12(s0)
-; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, sp, 48
 ; ILP32-ILP32F-WITHFP-NEXT:    ret
 ;
@@ -1720,8 +1720,8 @@ define i32 @va6_no_fixed_args(...) nounwind {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    sd a0, 0(s0)
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi a1, s0, 8
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    sd a1, -24(s0)
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, sp, 96
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ret
   %va = alloca i8*, align 4
@@ -1808,8 +1808,8 @@ define i32 @va_large_stack(i8* %fmt, ...) {
 ; ILP32-ILP32F-WITHFP-NEXT:    lui a1, 24414
 ; ILP32-ILP32F-WITHFP-NEXT:    addi a1, a1, -1728
 ; ILP32-ILP32F-WITHFP-NEXT:    add sp, sp, a1
-; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 1992(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    lw ra, 1996(sp) # 4-byte Folded Reload
+; ILP32-ILP32F-WITHFP-NEXT:    lw s0, 1992(sp) # 4-byte Folded Reload
 ; ILP32-ILP32F-WITHFP-NEXT:    addi sp, sp, 2032
 ; ILP32-ILP32F-WITHFP-NEXT:    ret
 ;
@@ -1933,8 +1933,8 @@ define i32 @va_large_stack(i8* %fmt, ...) {
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    lui a1, 24414
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addiw a1, a1, -1680
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    add sp, sp, a1
-; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 1952(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ld ra, 1960(sp) # 8-byte Folded Reload
+; LP64-LP64F-LP64D-WITHFP-NEXT:    ld s0, 1952(sp) # 8-byte Folded Reload
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    addi sp, sp, 2032
 ; LP64-LP64F-LP64D-WITHFP-NEXT:    ret
   %large = alloca [ 100000000 x i8 ]

diff  --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll
index c3ff237bc4739..880129e012b66 100644
--- a/llvm/test/CodeGen/RISCV/xaluo.ll
+++ b/llvm/test/CodeGen/RISCV/xaluo.ll
@@ -959,10 +959,10 @@ define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, i64* %res) {
 ; RV32-NEXT:    sw a0, 0(a4)
 ; RV32-NEXT:    sw a6, 4(a4)
 ; RV32-NEXT:    mv a0, a1
-; RV32-NEXT:    lw s3, 0(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s3, 0(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
@@ -1036,10 +1036,10 @@ define zeroext i1 @smulo.i64(i64 %v1, i64 %v2, i64* %res) {
 ; RV32ZBA-NEXT:    sw a0, 0(a4)
 ; RV32ZBA-NEXT:    sw a6, 4(a4)
 ; RV32ZBA-NEXT:    mv a0, a1
-; RV32ZBA-NEXT:    lw s3, 0(sp) # 4-byte Folded Reload
-; RV32ZBA-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
-; RV32ZBA-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32ZBA-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32ZBA-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
+; RV32ZBA-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
+; RV32ZBA-NEXT:    lw s3, 0(sp) # 4-byte Folded Reload
 ; RV32ZBA-NEXT:    addi sp, sp, 16
 ; RV32ZBA-NEXT:    ret
 ;
@@ -2393,9 +2393,9 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
 ; RV32-NEXT:    mv a0, a2
 ; RV32-NEXT:    mv a1, a3
 ; RV32-NEXT:  .LBB44_2: # %entry
-; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
@@ -2468,9 +2468,9 @@ define i64 @smulo.select.i64(i64 %v1, i64 %v2) {
 ; RV32ZBA-NEXT:    mv a0, a2
 ; RV32ZBA-NEXT:    mv a1, a3
 ; RV32ZBA-NEXT:  .LBB44_2: # %entry
-; RV32ZBA-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
-; RV32ZBA-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32ZBA-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32ZBA-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
+; RV32ZBA-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32ZBA-NEXT:    addi sp, sp, 16
 ; RV32ZBA-NEXT:    ret
 ;
@@ -2546,9 +2546,9 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
 ; RV32-NEXT:    xor a1, t6, a1
 ; RV32-NEXT:    or a0, a1, a0
 ; RV32-NEXT:    seqz a0, a0
-; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
@@ -2615,9 +2615,9 @@ define i1 @smulo.not.i64(i64 %v1, i64 %v2) {
 ; RV32ZBA-NEXT:    xor a1, t6, a1
 ; RV32ZBA-NEXT:    or a0, a1, a0
 ; RV32ZBA-NEXT:    seqz a0, a0
-; RV32ZBA-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
-; RV32ZBA-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32ZBA-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32ZBA-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
+; RV32ZBA-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32ZBA-NEXT:    addi sp, sp, 16
 ; RV32ZBA-NEXT:    ret
 ;
@@ -3534,9 +3534,9 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
 ; RV32-NEXT:  .LBB59_2: # %continue
 ; RV32-NEXT:    li a0, 1
 ; RV32-NEXT:  .LBB59_3: # %overflow
-; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
@@ -3613,9 +3613,9 @@ define zeroext i1 @smulo.br.i64(i64 %v1, i64 %v2) {
 ; RV32ZBA-NEXT:  .LBB59_2: # %continue
 ; RV32ZBA-NEXT:    li a0, 1
 ; RV32ZBA-NEXT:  .LBB59_3: # %overflow
-; RV32ZBA-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
-; RV32ZBA-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
 ; RV32ZBA-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32ZBA-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
+; RV32ZBA-NEXT:    lw s2, 4(sp) # 4-byte Folded Reload
 ; RV32ZBA-NEXT:    addi sp, sp, 16
 ; RV32ZBA-NEXT:    ret
 ;


        


More information about the llvm-commits mailing list