[llvm] b6c4ad7 - [RISCV] Remove x7 from fastcc list. (#96729)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 16 17:37:59 PDT 2024


Author: Yeting Kuo
Date: 2024-07-17T08:37:55+08:00
New Revision: b6c4ad700b0f5851313f18df89b9da2c27ba3185

URL: https://github.com/llvm/llvm-project/commit/b6c4ad700b0f5851313f18df89b9da2c27ba3185
DIFF: https://github.com/llvm/llvm-project/commit/b6c4ad700b0f5851313f18df89b9da2c27ba3185.diff

LOG: [RISCV] Remove x7 from fastcc list. (#96729)

Like #93321, this patch also tries to solve the conflict usage of x7 for
fastcc and Zicfilp. But this patch removes x7 from fastcc directly. Its
purpose is to reduce the code complexity of #93321, and we also found
that it at most increase 0.02% instruction count for most benchmarks and
it might be benefit for benchmarks.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/fastcc-int.ll
    llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
    llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8b5e56bff4097..1280201d7b814 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -18884,15 +18884,14 @@ ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) {
 static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
   // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
   // for save-restore libcall, so we don't use them.
+  // Don't use X7 for fastcc, since Zicfilp uses X7 as the label register.
   static const MCPhysReg FastCCIGPRs[] = {
-      RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
-      RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
-      RISCV::X29, RISCV::X30, RISCV::X31};
+      RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15,
+      RISCV::X16, RISCV::X17, RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31};
 
   // The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
   static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
-                                          RISCV::X13, RISCV::X14, RISCV::X15,
-                                          RISCV::X7};
+                                          RISCV::X13, RISCV::X14, RISCV::X15};
 
   if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
     return ArrayRef(FastCCEGPRs);

diff  --git a/llvm/test/CodeGen/RISCV/fastcc-int.ll b/llvm/test/CodeGen/RISCV/fastcc-int.ll
index e4c41a1aa890f..75046b701b235 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-int.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-int.ll
@@ -32,16 +32,17 @@ define i32 @caller(<16 x i32> %A) nounwind {
 ; RV32-NEXT:    lw a5, 20(a0)
 ; RV32-NEXT:    lw a6, 24(a0)
 ; RV32-NEXT:    lw a7, 28(a0)
-; RV32-NEXT:    lw t2, 32(a0)
-; RV32-NEXT:    lw t3, 36(a0)
-; RV32-NEXT:    lw t4, 40(a0)
-; RV32-NEXT:    lw t5, 44(a0)
-; RV32-NEXT:    lw t6, 48(a0)
-; RV32-NEXT:    lw t1, 52(a0)
+; RV32-NEXT:    lw t3, 32(a0)
+; RV32-NEXT:    lw t4, 36(a0)
+; RV32-NEXT:    lw t5, 40(a0)
+; RV32-NEXT:    lw t6, 44(a0)
+; RV32-NEXT:    lw t1, 48(a0)
+; RV32-NEXT:    lw t2, 52(a0)
 ; RV32-NEXT:    lw s0, 56(a0)
 ; RV32-NEXT:    lw a0, 60(a0)
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    sw s0, 4(sp)
+; RV32-NEXT:    sw a0, 12(sp)
+; RV32-NEXT:    sw s0, 8(sp)
+; RV32-NEXT:    sw t2, 4(sp)
 ; RV32-NEXT:    sw t1, 0(sp)
 ; RV32-NEXT:    mv a0, t0
 ; RV32-NEXT:    call callee
@@ -63,16 +64,17 @@ define i32 @caller(<16 x i32> %A) nounwind {
 ; RV64-NEXT:    ld a5, 40(a0)
 ; RV64-NEXT:    ld a6, 48(a0)
 ; RV64-NEXT:    ld a7, 56(a0)
-; RV64-NEXT:    ld t2, 64(a0)
-; RV64-NEXT:    ld t3, 72(a0)
-; RV64-NEXT:    ld t4, 80(a0)
-; RV64-NEXT:    ld t5, 88(a0)
-; RV64-NEXT:    ld t6, 96(a0)
-; RV64-NEXT:    ld t1, 104(a0)
+; RV64-NEXT:    ld t3, 64(a0)
+; RV64-NEXT:    ld t4, 72(a0)
+; RV64-NEXT:    ld t5, 80(a0)
+; RV64-NEXT:    ld t6, 88(a0)
+; RV64-NEXT:    ld t1, 96(a0)
+; RV64-NEXT:    ld t2, 104(a0)
 ; RV64-NEXT:    ld s0, 112(a0)
 ; RV64-NEXT:    ld a0, 120(a0)
-; RV64-NEXT:    sd a0, 16(sp)
-; RV64-NEXT:    sd s0, 8(sp)
+; RV64-NEXT:    sd a0, 24(sp)
+; RV64-NEXT:    sd s0, 16(sp)
+; RV64-NEXT:    sd t2, 8(sp)
 ; RV64-NEXT:    sd t1, 0(sp)
 ; RV64-NEXT:    mv a0, t0
 ; RV64-NEXT:    call callee

diff  --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
index a44d31dff09cc..1dbb060fc35fa 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
@@ -288,29 +288,30 @@ define half @caller_half_32(<32 x half> %A) nounwind {
 ; ZHINX32-NEXT:    lh t2, 196(sp)
 ; ZHINX32-NEXT:    lh t1, 200(sp)
 ; ZHINX32-NEXT:    lh t0, 204(sp)
-; ZHINX32-NEXT:    sh t0, 36(sp)
-; ZHINX32-NEXT:    sh t1, 34(sp)
-; ZHINX32-NEXT:    sh t2, 32(sp)
-; ZHINX32-NEXT:    sh t3, 30(sp)
-; ZHINX32-NEXT:    sh ra, 28(sp)
-; ZHINX32-NEXT:    sh s11, 26(sp)
-; ZHINX32-NEXT:    sh s10, 24(sp)
-; ZHINX32-NEXT:    sh s9, 22(sp)
-; ZHINX32-NEXT:    sh s8, 20(sp)
-; ZHINX32-NEXT:    sh s7, 18(sp)
-; ZHINX32-NEXT:    sh s6, 16(sp)
-; ZHINX32-NEXT:    sh s5, 14(sp)
-; ZHINX32-NEXT:    sh s4, 12(sp)
-; ZHINX32-NEXT:    sh s3, 10(sp)
-; ZHINX32-NEXT:    sh s2, 8(sp)
-; ZHINX32-NEXT:    sh s1, 6(sp)
-; ZHINX32-NEXT:    sh s0, 4(sp)
-; ZHINX32-NEXT:    sh t4, 2(sp)
-; ZHINX32-NEXT:    sh t5, 0(sp)
-; ZHINX32-NEXT:    lw t2, 56(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t3, 52(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t4, 48(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t5, 44(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    sh t0, 38(sp)
+; ZHINX32-NEXT:    sh t1, 36(sp)
+; ZHINX32-NEXT:    sh t2, 34(sp)
+; ZHINX32-NEXT:    sh t3, 32(sp)
+; ZHINX32-NEXT:    sh ra, 30(sp)
+; ZHINX32-NEXT:    sh s11, 28(sp)
+; ZHINX32-NEXT:    sh s10, 26(sp)
+; ZHINX32-NEXT:    sh s9, 24(sp)
+; ZHINX32-NEXT:    sh s8, 22(sp)
+; ZHINX32-NEXT:    sh s7, 20(sp)
+; ZHINX32-NEXT:    sh s6, 18(sp)
+; ZHINX32-NEXT:    sh s5, 16(sp)
+; ZHINX32-NEXT:    sh s4, 14(sp)
+; ZHINX32-NEXT:    sh s3, 12(sp)
+; ZHINX32-NEXT:    sh s2, 10(sp)
+; ZHINX32-NEXT:    sh s1, 8(sp)
+; ZHINX32-NEXT:    sh s0, 6(sp)
+; ZHINX32-NEXT:    sh t4, 4(sp)
+; ZHINX32-NEXT:    sh t5, 2(sp)
+; ZHINX32-NEXT:    sh t6, 0(sp)
+; ZHINX32-NEXT:    lw t3, 56(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t4, 52(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t5, 48(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t6, 44(sp) # 4-byte Folded Reload
 ; ZHINX32-NEXT:    call callee_half_32
 ; ZHINX32-NEXT:    lw ra, 108(sp) # 4-byte Folded Reload
 ; ZHINX32-NEXT:    lw s0, 104(sp) # 4-byte Folded Reload
@@ -372,29 +373,30 @@ define half @caller_half_32(<32 x half> %A) nounwind {
 ; ZHINX64-NEXT:    lh t2, 344(sp)
 ; ZHINX64-NEXT:    lh t1, 352(sp)
 ; ZHINX64-NEXT:    lh t0, 360(sp)
-; ZHINX64-NEXT:    sh t0, 36(sp)
-; ZHINX64-NEXT:    sh t1, 34(sp)
-; ZHINX64-NEXT:    sh t2, 32(sp)
-; ZHINX64-NEXT:    sh t3, 30(sp)
-; ZHINX64-NEXT:    sh ra, 28(sp)
-; ZHINX64-NEXT:    sh s11, 26(sp)
-; ZHINX64-NEXT:    sh s10, 24(sp)
-; ZHINX64-NEXT:    sh s9, 22(sp)
-; ZHINX64-NEXT:    sh s8, 20(sp)
-; ZHINX64-NEXT:    sh s7, 18(sp)
-; ZHINX64-NEXT:    sh s6, 16(sp)
-; ZHINX64-NEXT:    sh s5, 14(sp)
-; ZHINX64-NEXT:    sh s4, 12(sp)
-; ZHINX64-NEXT:    sh s3, 10(sp)
-; ZHINX64-NEXT:    sh s2, 8(sp)
-; ZHINX64-NEXT:    sh s1, 6(sp)
-; ZHINX64-NEXT:    sh s0, 4(sp)
-; ZHINX64-NEXT:    sh t4, 2(sp)
-; ZHINX64-NEXT:    sh t5, 0(sp)
-; ZHINX64-NEXT:    ld t2, 64(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t3, 56(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t4, 48(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t5, 40(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    sh t0, 38(sp)
+; ZHINX64-NEXT:    sh t1, 36(sp)
+; ZHINX64-NEXT:    sh t2, 34(sp)
+; ZHINX64-NEXT:    sh t3, 32(sp)
+; ZHINX64-NEXT:    sh ra, 30(sp)
+; ZHINX64-NEXT:    sh s11, 28(sp)
+; ZHINX64-NEXT:    sh s10, 26(sp)
+; ZHINX64-NEXT:    sh s9, 24(sp)
+; ZHINX64-NEXT:    sh s8, 22(sp)
+; ZHINX64-NEXT:    sh s7, 20(sp)
+; ZHINX64-NEXT:    sh s6, 18(sp)
+; ZHINX64-NEXT:    sh s5, 16(sp)
+; ZHINX64-NEXT:    sh s4, 14(sp)
+; ZHINX64-NEXT:    sh s3, 12(sp)
+; ZHINX64-NEXT:    sh s2, 10(sp)
+; ZHINX64-NEXT:    sh s1, 8(sp)
+; ZHINX64-NEXT:    sh s0, 6(sp)
+; ZHINX64-NEXT:    sh t4, 4(sp)
+; ZHINX64-NEXT:    sh t5, 2(sp)
+; ZHINX64-NEXT:    sh t6, 0(sp)
+; ZHINX64-NEXT:    ld t3, 64(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld t4, 56(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld t5, 48(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld t6, 40(sp) # 8-byte Folded Reload
 ; ZHINX64-NEXT:    call callee_half_32
 ; ZHINX64-NEXT:    ld ra, 168(sp) # 8-byte Folded Reload
 ; ZHINX64-NEXT:    ld s0, 160(sp) # 8-byte Folded Reload
@@ -414,38 +416,38 @@ define half @caller_half_32(<32 x half> %A) nounwind {
 ;
 ; ZFINX32-LABEL: caller_half_32:
 ; ZFINX32:       # %bb.0:
-; ZFINX32-NEXT:    addi sp, sp, -144
-; ZFINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    addi sp, sp, -160
+; ZFINX32-NEXT:    sw ra, 156(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s0, 152(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s1, 148(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s2, 144(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s3, 140(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s4, 136(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s5, 132(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s6, 128(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s7, 124(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s8, 120(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s9, 116(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s10, 112(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s11, 108(sp) # 4-byte Folded Spill
 ; ZFINX32-NEXT:    lw t0, 0(a0)
 ; ZFINX32-NEXT:    lw a1, 4(a0)
-; ZFINX32-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw a1, 104(sp) # 4-byte Folded Spill
 ; ZFINX32-NEXT:    lw a1, 8(a0)
-; ZFINX32-NEXT:    sw a1, 84(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw a1, 100(sp) # 4-byte Folded Spill
 ; ZFINX32-NEXT:    lw a1, 12(a0)
-; ZFINX32-NEXT:    sw a1, 80(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw a1, 96(sp) # 4-byte Folded Spill
 ; ZFINX32-NEXT:    lw a1, 16(a0)
-; ZFINX32-NEXT:    sw a1, 76(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw a1, 92(sp) # 4-byte Folded Spill
 ; ZFINX32-NEXT:    lw a5, 20(a0)
 ; ZFINX32-NEXT:    lw a6, 24(a0)
 ; ZFINX32-NEXT:    lw a7, 28(a0)
-; ZFINX32-NEXT:    lw t2, 32(a0)
-; ZFINX32-NEXT:    lw t3, 36(a0)
-; ZFINX32-NEXT:    lw t4, 40(a0)
-; ZFINX32-NEXT:    lw t5, 44(a0)
-; ZFINX32-NEXT:    lw t6, 48(a0)
-; ZFINX32-NEXT:    lw t1, 52(a0)
+; ZFINX32-NEXT:    lw t3, 32(a0)
+; ZFINX32-NEXT:    lw t4, 36(a0)
+; ZFINX32-NEXT:    lw t5, 40(a0)
+; ZFINX32-NEXT:    lw t6, 44(a0)
+; ZFINX32-NEXT:    lw t1, 48(a0)
+; ZFINX32-NEXT:    lw t2, 52(a0)
 ; ZFINX32-NEXT:    lw s0, 56(a0)
 ; ZFINX32-NEXT:    lw s1, 60(a0)
 ; ZFINX32-NEXT:    lw s2, 64(a0)
@@ -464,83 +466,84 @@ define half @caller_half_32(<32 x half> %A) nounwind {
 ; ZFINX32-NEXT:    lw a2, 116(a0)
 ; ZFINX32-NEXT:    lw a1, 120(a0)
 ; ZFINX32-NEXT:    lw a0, 124(a0)
-; ZFINX32-NEXT:    sw a0, 72(sp)
-; ZFINX32-NEXT:    sw a1, 68(sp)
-; ZFINX32-NEXT:    sw a2, 64(sp)
-; ZFINX32-NEXT:    sw a3, 60(sp)
-; ZFINX32-NEXT:    sw a4, 56(sp)
-; ZFINX32-NEXT:    sw ra, 52(sp)
-; ZFINX32-NEXT:    sw s11, 48(sp)
-; ZFINX32-NEXT:    sw s10, 44(sp)
-; ZFINX32-NEXT:    sw s9, 40(sp)
-; ZFINX32-NEXT:    sw s8, 36(sp)
-; ZFINX32-NEXT:    sw s7, 32(sp)
-; ZFINX32-NEXT:    sw s6, 28(sp)
-; ZFINX32-NEXT:    sw s5, 24(sp)
-; ZFINX32-NEXT:    sw s4, 20(sp)
-; ZFINX32-NEXT:    sw s3, 16(sp)
-; ZFINX32-NEXT:    sw s2, 12(sp)
-; ZFINX32-NEXT:    sw s1, 8(sp)
-; ZFINX32-NEXT:    sw s0, 4(sp)
+; ZFINX32-NEXT:    sw a0, 76(sp)
+; ZFINX32-NEXT:    sw a1, 72(sp)
+; ZFINX32-NEXT:    sw a2, 68(sp)
+; ZFINX32-NEXT:    sw a3, 64(sp)
+; ZFINX32-NEXT:    sw a4, 60(sp)
+; ZFINX32-NEXT:    sw ra, 56(sp)
+; ZFINX32-NEXT:    sw s11, 52(sp)
+; ZFINX32-NEXT:    sw s10, 48(sp)
+; ZFINX32-NEXT:    sw s9, 44(sp)
+; ZFINX32-NEXT:    sw s8, 40(sp)
+; ZFINX32-NEXT:    sw s7, 36(sp)
+; ZFINX32-NEXT:    sw s6, 32(sp)
+; ZFINX32-NEXT:    sw s5, 28(sp)
+; ZFINX32-NEXT:    sw s4, 24(sp)
+; ZFINX32-NEXT:    sw s3, 20(sp)
+; ZFINX32-NEXT:    sw s2, 16(sp)
+; ZFINX32-NEXT:    sw s1, 12(sp)
+; ZFINX32-NEXT:    sw s0, 8(sp)
+; ZFINX32-NEXT:    sw t2, 4(sp)
 ; ZFINX32-NEXT:    sw t1, 0(sp)
 ; ZFINX32-NEXT:    mv a0, t0
-; ZFINX32-NEXT:    lw a1, 88(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw a3, 80(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw a4, 76(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw a1, 104(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw a2, 100(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw a3, 96(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw a4, 92(sp) # 4-byte Folded Reload
 ; ZFINX32-NEXT:    call callee_half_32
 ; ZFINX32-NEXT:    lui a1, 1048560
 ; ZFINX32-NEXT:    or a0, a0, a1
-; ZFINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    addi sp, sp, 144
+; ZFINX32-NEXT:    lw ra, 156(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s0, 152(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s1, 148(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s2, 144(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s3, 140(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s4, 136(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s5, 132(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s6, 128(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s7, 124(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s8, 120(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s9, 116(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s10, 112(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s11, 108(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    addi sp, sp, 160
 ; ZFINX32-NEXT:    ret
 ;
 ; ZFINX64-LABEL: caller_half_32:
 ; ZFINX64:       # %bb.0:
-; ZFINX64-NEXT:    addi sp, sp, -288
-; ZFINX64-NEXT:    sd ra, 280(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s0, 272(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s1, 264(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s2, 256(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s3, 248(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s4, 240(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s5, 232(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s6, 224(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s7, 216(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s8, 208(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s9, 200(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s10, 192(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT:    sd s11, 184(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    addi sp, sp, -304
+; ZFINX64-NEXT:    sd ra, 296(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s0, 288(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s1, 280(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s2, 272(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s3, 264(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s4, 256(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s5, 248(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s6, 240(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s7, 232(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s8, 224(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s9, 216(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s10, 208(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s11, 200(sp) # 8-byte Folded Spill
 ; ZFINX64-NEXT:    ld t0, 0(a0)
 ; ZFINX64-NEXT:    ld a1, 8(a0)
-; ZFINX64-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
 ; ZFINX64-NEXT:    ld a1, 16(a0)
-; ZFINX64-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
 ; ZFINX64-NEXT:    ld a1, 24(a0)
-; ZFINX64-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
 ; ZFINX64-NEXT:    ld a1, 32(a0)
-; ZFINX64-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
 ; ZFINX64-NEXT:    ld a5, 40(a0)
 ; ZFINX64-NEXT:    ld a6, 48(a0)
 ; ZFINX64-NEXT:    ld a7, 56(a0)
-; ZFINX64-NEXT:    ld t2, 64(a0)
-; ZFINX64-NEXT:    ld t3, 72(a0)
-; ZFINX64-NEXT:    ld t4, 80(a0)
-; ZFINX64-NEXT:    ld t5, 88(a0)
-; ZFINX64-NEXT:    ld t6, 96(a0)
-; ZFINX64-NEXT:    ld t1, 104(a0)
+; ZFINX64-NEXT:    ld t3, 64(a0)
+; ZFINX64-NEXT:    ld t4, 72(a0)
+; ZFINX64-NEXT:    ld t5, 80(a0)
+; ZFINX64-NEXT:    ld t6, 88(a0)
+; ZFINX64-NEXT:    ld t1, 96(a0)
+; ZFINX64-NEXT:    ld t2, 104(a0)
 ; ZFINX64-NEXT:    ld s0, 112(a0)
 ; ZFINX64-NEXT:    ld s1, 120(a0)
 ; ZFINX64-NEXT:    ld s2, 128(a0)
@@ -559,83 +562,84 @@ define half @caller_half_32(<32 x half> %A) nounwind {
 ; ZFINX64-NEXT:    ld a2, 232(a0)
 ; ZFINX64-NEXT:    ld a1, 240(a0)
 ; ZFINX64-NEXT:    ld a0, 248(a0)
-; ZFINX64-NEXT:    sd a0, 144(sp)
-; ZFINX64-NEXT:    sd a1, 136(sp)
-; ZFINX64-NEXT:    sd a2, 128(sp)
-; ZFINX64-NEXT:    sd a3, 120(sp)
-; ZFINX64-NEXT:    sd a4, 112(sp)
-; ZFINX64-NEXT:    sd ra, 104(sp)
-; ZFINX64-NEXT:    sd s11, 96(sp)
-; ZFINX64-NEXT:    sd s10, 88(sp)
-; ZFINX64-NEXT:    sd s9, 80(sp)
-; ZFINX64-NEXT:    sd s8, 72(sp)
-; ZFINX64-NEXT:    sd s7, 64(sp)
-; ZFINX64-NEXT:    sd s6, 56(sp)
-; ZFINX64-NEXT:    sd s5, 48(sp)
-; ZFINX64-NEXT:    sd s4, 40(sp)
-; ZFINX64-NEXT:    sd s3, 32(sp)
-; ZFINX64-NEXT:    sd s2, 24(sp)
-; ZFINX64-NEXT:    sd s1, 16(sp)
-; ZFINX64-NEXT:    sd s0, 8(sp)
+; ZFINX64-NEXT:    sd a0, 152(sp)
+; ZFINX64-NEXT:    sd a1, 144(sp)
+; ZFINX64-NEXT:    sd a2, 136(sp)
+; ZFINX64-NEXT:    sd a3, 128(sp)
+; ZFINX64-NEXT:    sd a4, 120(sp)
+; ZFINX64-NEXT:    sd ra, 112(sp)
+; ZFINX64-NEXT:    sd s11, 104(sp)
+; ZFINX64-NEXT:    sd s10, 96(sp)
+; ZFINX64-NEXT:    sd s9, 88(sp)
+; ZFINX64-NEXT:    sd s8, 80(sp)
+; ZFINX64-NEXT:    sd s7, 72(sp)
+; ZFINX64-NEXT:    sd s6, 64(sp)
+; ZFINX64-NEXT:    sd s5, 56(sp)
+; ZFINX64-NEXT:    sd s4, 48(sp)
+; ZFINX64-NEXT:    sd s3, 40(sp)
+; ZFINX64-NEXT:    sd s2, 32(sp)
+; ZFINX64-NEXT:    sd s1, 24(sp)
+; ZFINX64-NEXT:    sd s0, 16(sp)
+; ZFINX64-NEXT:    sd t2, 8(sp)
 ; ZFINX64-NEXT:    sd t1, 0(sp)
 ; ZFINX64-NEXT:    mv a0, t0
-; ZFINX64-NEXT:    ld a1, 176(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld a2, 168(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld a3, 160(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld a4, 152(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld a1, 192(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld a3, 176(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld a4, 168(sp) # 8-byte Folded Reload
 ; ZFINX64-NEXT:    call callee_half_32
 ; ZFINX64-NEXT:    lui a1, 1048560
 ; ZFINX64-NEXT:    or a0, a0, a1
-; ZFINX64-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s2, 256(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s3, 248(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s4, 240(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s5, 232(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s6, 224(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s7, 216(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s8, 208(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s9, 200(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s10, 192(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld s11, 184(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    addi sp, sp, 288
+; ZFINX64-NEXT:    ld ra, 296(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s0, 288(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s1, 280(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s2, 272(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s3, 264(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s4, 256(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s5, 248(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s6, 240(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s7, 232(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s8, 224(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s9, 216(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s10, 208(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s11, 200(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    addi sp, sp, 304
 ; ZFINX64-NEXT:    ret
 ;
 ; ZDINX32-LABEL: caller_half_32:
 ; ZDINX32:       # %bb.0:
-; ZDINX32-NEXT:    addi sp, sp, -144
-; ZDINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    addi sp, sp, -160
+; ZDINX32-NEXT:    sw ra, 156(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s0, 152(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s1, 148(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s2, 144(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s3, 140(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s4, 136(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s5, 132(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s6, 128(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s7, 124(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s8, 120(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s9, 116(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s10, 112(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s11, 108(sp) # 4-byte Folded Spill
 ; ZDINX32-NEXT:    lw t0, 0(a0)
 ; ZDINX32-NEXT:    lw a1, 4(a0)
-; ZDINX32-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw a1, 104(sp) # 4-byte Folded Spill
 ; ZDINX32-NEXT:    lw a1, 8(a0)
-; ZDINX32-NEXT:    sw a1, 84(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw a1, 100(sp) # 4-byte Folded Spill
 ; ZDINX32-NEXT:    lw a1, 12(a0)
-; ZDINX32-NEXT:    sw a1, 80(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw a1, 96(sp) # 4-byte Folded Spill
 ; ZDINX32-NEXT:    lw a1, 16(a0)
-; ZDINX32-NEXT:    sw a1, 76(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw a1, 92(sp) # 4-byte Folded Spill
 ; ZDINX32-NEXT:    lw a5, 20(a0)
 ; ZDINX32-NEXT:    lw a6, 24(a0)
 ; ZDINX32-NEXT:    lw a7, 28(a0)
-; ZDINX32-NEXT:    lw t2, 32(a0)
-; ZDINX32-NEXT:    lw t3, 36(a0)
-; ZDINX32-NEXT:    lw t4, 40(a0)
-; ZDINX32-NEXT:    lw t5, 44(a0)
-; ZDINX32-NEXT:    lw t6, 48(a0)
-; ZDINX32-NEXT:    lw t1, 52(a0)
+; ZDINX32-NEXT:    lw t3, 32(a0)
+; ZDINX32-NEXT:    lw t4, 36(a0)
+; ZDINX32-NEXT:    lw t5, 40(a0)
+; ZDINX32-NEXT:    lw t6, 44(a0)
+; ZDINX32-NEXT:    lw t1, 48(a0)
+; ZDINX32-NEXT:    lw t2, 52(a0)
 ; ZDINX32-NEXT:    lw s0, 56(a0)
 ; ZDINX32-NEXT:    lw s1, 60(a0)
 ; ZDINX32-NEXT:    lw s2, 64(a0)
@@ -654,83 +658,84 @@ define half @caller_half_32(<32 x half> %A) nounwind {
 ; ZDINX32-NEXT:    lw a2, 116(a0)
 ; ZDINX32-NEXT:    lw a1, 120(a0)
 ; ZDINX32-NEXT:    lw a0, 124(a0)
-; ZDINX32-NEXT:    sw a0, 72(sp)
-; ZDINX32-NEXT:    sw a1, 68(sp)
-; ZDINX32-NEXT:    sw a2, 64(sp)
-; ZDINX32-NEXT:    sw a3, 60(sp)
-; ZDINX32-NEXT:    sw a4, 56(sp)
-; ZDINX32-NEXT:    sw ra, 52(sp)
-; ZDINX32-NEXT:    sw s11, 48(sp)
-; ZDINX32-NEXT:    sw s10, 44(sp)
-; ZDINX32-NEXT:    sw s9, 40(sp)
-; ZDINX32-NEXT:    sw s8, 36(sp)
-; ZDINX32-NEXT:    sw s7, 32(sp)
-; ZDINX32-NEXT:    sw s6, 28(sp)
-; ZDINX32-NEXT:    sw s5, 24(sp)
-; ZDINX32-NEXT:    sw s4, 20(sp)
-; ZDINX32-NEXT:    sw s3, 16(sp)
-; ZDINX32-NEXT:    sw s2, 12(sp)
-; ZDINX32-NEXT:    sw s1, 8(sp)
-; ZDINX32-NEXT:    sw s0, 4(sp)
+; ZDINX32-NEXT:    sw a0, 76(sp)
+; ZDINX32-NEXT:    sw a1, 72(sp)
+; ZDINX32-NEXT:    sw a2, 68(sp)
+; ZDINX32-NEXT:    sw a3, 64(sp)
+; ZDINX32-NEXT:    sw a4, 60(sp)
+; ZDINX32-NEXT:    sw ra, 56(sp)
+; ZDINX32-NEXT:    sw s11, 52(sp)
+; ZDINX32-NEXT:    sw s10, 48(sp)
+; ZDINX32-NEXT:    sw s9, 44(sp)
+; ZDINX32-NEXT:    sw s8, 40(sp)
+; ZDINX32-NEXT:    sw s7, 36(sp)
+; ZDINX32-NEXT:    sw s6, 32(sp)
+; ZDINX32-NEXT:    sw s5, 28(sp)
+; ZDINX32-NEXT:    sw s4, 24(sp)
+; ZDINX32-NEXT:    sw s3, 20(sp)
+; ZDINX32-NEXT:    sw s2, 16(sp)
+; ZDINX32-NEXT:    sw s1, 12(sp)
+; ZDINX32-NEXT:    sw s0, 8(sp)
+; ZDINX32-NEXT:    sw t2, 4(sp)
 ; ZDINX32-NEXT:    sw t1, 0(sp)
 ; ZDINX32-NEXT:    mv a0, t0
-; ZDINX32-NEXT:    lw a1, 88(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw a3, 80(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw a4, 76(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw a1, 104(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw a2, 100(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw a3, 96(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw a4, 92(sp) # 4-byte Folded Reload
 ; ZDINX32-NEXT:    call callee_half_32
 ; ZDINX32-NEXT:    lui a1, 1048560
 ; ZDINX32-NEXT:    or a0, a0, a1
-; ZDINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    addi sp, sp, 144
+; ZDINX32-NEXT:    lw ra, 156(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s0, 152(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s1, 148(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s2, 144(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s3, 140(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s4, 136(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s5, 132(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s6, 128(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s7, 124(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s8, 120(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s9, 116(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s10, 112(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s11, 108(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    addi sp, sp, 160
 ; ZDINX32-NEXT:    ret
 ;
 ; ZDINX64-LABEL: caller_half_32:
 ; ZDINX64:       # %bb.0:
-; ZDINX64-NEXT:    addi sp, sp, -288
-; ZDINX64-NEXT:    sd ra, 280(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s0, 272(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s1, 264(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s2, 256(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s3, 248(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s4, 240(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s5, 232(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s6, 224(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s7, 216(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s8, 208(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s9, 200(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s10, 192(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT:    sd s11, 184(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    addi sp, sp, -304
+; ZDINX64-NEXT:    sd ra, 296(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s0, 288(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s1, 280(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s2, 272(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s3, 264(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s4, 256(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s5, 248(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s6, 240(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s7, 232(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s8, 224(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s9, 216(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s10, 208(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s11, 200(sp) # 8-byte Folded Spill
 ; ZDINX64-NEXT:    ld t0, 0(a0)
 ; ZDINX64-NEXT:    ld a1, 8(a0)
-; ZDINX64-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd a1, 192(sp) # 8-byte Folded Spill
 ; ZDINX64-NEXT:    ld a1, 16(a0)
-; ZDINX64-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd a1, 184(sp) # 8-byte Folded Spill
 ; ZDINX64-NEXT:    ld a1, 24(a0)
-; ZDINX64-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
 ; ZDINX64-NEXT:    ld a1, 32(a0)
-; ZDINX64-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
 ; ZDINX64-NEXT:    ld a5, 40(a0)
 ; ZDINX64-NEXT:    ld a6, 48(a0)
 ; ZDINX64-NEXT:    ld a7, 56(a0)
-; ZDINX64-NEXT:    ld t2, 64(a0)
-; ZDINX64-NEXT:    ld t3, 72(a0)
-; ZDINX64-NEXT:    ld t4, 80(a0)
-; ZDINX64-NEXT:    ld t5, 88(a0)
-; ZDINX64-NEXT:    ld t6, 96(a0)
-; ZDINX64-NEXT:    ld t1, 104(a0)
+; ZDINX64-NEXT:    ld t3, 64(a0)
+; ZDINX64-NEXT:    ld t4, 72(a0)
+; ZDINX64-NEXT:    ld t5, 80(a0)
+; ZDINX64-NEXT:    ld t6, 88(a0)
+; ZDINX64-NEXT:    ld t1, 96(a0)
+; ZDINX64-NEXT:    ld t2, 104(a0)
 ; ZDINX64-NEXT:    ld s0, 112(a0)
 ; ZDINX64-NEXT:    ld s1, 120(a0)
 ; ZDINX64-NEXT:    ld s2, 128(a0)
@@ -749,47 +754,48 @@ define half @caller_half_32(<32 x half> %A) nounwind {
 ; ZDINX64-NEXT:    ld a2, 232(a0)
 ; ZDINX64-NEXT:    ld a1, 240(a0)
 ; ZDINX64-NEXT:    ld a0, 248(a0)
-; ZDINX64-NEXT:    sd a0, 144(sp)
-; ZDINX64-NEXT:    sd a1, 136(sp)
-; ZDINX64-NEXT:    sd a2, 128(sp)
-; ZDINX64-NEXT:    sd a3, 120(sp)
-; ZDINX64-NEXT:    sd a4, 112(sp)
-; ZDINX64-NEXT:    sd ra, 104(sp)
-; ZDINX64-NEXT:    sd s11, 96(sp)
-; ZDINX64-NEXT:    sd s10, 88(sp)
-; ZDINX64-NEXT:    sd s9, 80(sp)
-; ZDINX64-NEXT:    sd s8, 72(sp)
-; ZDINX64-NEXT:    sd s7, 64(sp)
-; ZDINX64-NEXT:    sd s6, 56(sp)
-; ZDINX64-NEXT:    sd s5, 48(sp)
-; ZDINX64-NEXT:    sd s4, 40(sp)
-; ZDINX64-NEXT:    sd s3, 32(sp)
-; ZDINX64-NEXT:    sd s2, 24(sp)
-; ZDINX64-NEXT:    sd s1, 16(sp)
-; ZDINX64-NEXT:    sd s0, 8(sp)
+; ZDINX64-NEXT:    sd a0, 152(sp)
+; ZDINX64-NEXT:    sd a1, 144(sp)
+; ZDINX64-NEXT:    sd a2, 136(sp)
+; ZDINX64-NEXT:    sd a3, 128(sp)
+; ZDINX64-NEXT:    sd a4, 120(sp)
+; ZDINX64-NEXT:    sd ra, 112(sp)
+; ZDINX64-NEXT:    sd s11, 104(sp)
+; ZDINX64-NEXT:    sd s10, 96(sp)
+; ZDINX64-NEXT:    sd s9, 88(sp)
+; ZDINX64-NEXT:    sd s8, 80(sp)
+; ZDINX64-NEXT:    sd s7, 72(sp)
+; ZDINX64-NEXT:    sd s6, 64(sp)
+; ZDINX64-NEXT:    sd s5, 56(sp)
+; ZDINX64-NEXT:    sd s4, 48(sp)
+; ZDINX64-NEXT:    sd s3, 40(sp)
+; ZDINX64-NEXT:    sd s2, 32(sp)
+; ZDINX64-NEXT:    sd s1, 24(sp)
+; ZDINX64-NEXT:    sd s0, 16(sp)
+; ZDINX64-NEXT:    sd t2, 8(sp)
 ; ZDINX64-NEXT:    sd t1, 0(sp)
 ; ZDINX64-NEXT:    mv a0, t0
-; ZDINX64-NEXT:    ld a1, 176(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld a2, 168(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld a3, 160(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld a4, 152(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld a1, 192(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld a2, 184(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld a3, 176(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld a4, 168(sp) # 8-byte Folded Reload
 ; ZDINX64-NEXT:    call callee_half_32
 ; ZDINX64-NEXT:    lui a1, 1048560
 ; ZDINX64-NEXT:    or a0, a0, a1
-; ZDINX64-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s2, 256(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s3, 248(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s4, 240(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s5, 232(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s6, 224(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s7, 216(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s8, 208(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s9, 200(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s10, 192(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld s11, 184(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    addi sp, sp, 288
+; ZDINX64-NEXT:    ld ra, 296(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s0, 288(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s1, 280(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s2, 272(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s3, 264(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s4, 256(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s5, 248(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s6, 240(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s7, 232(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s8, 224(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s9, 216(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s10, 208(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s11, 200(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    addi sp, sp, 304
 ; ZDINX64-NEXT:    ret
 	%C = call fastcc half @callee_half_32(<32 x half> %A)
 	ret half %C
@@ -826,86 +832,87 @@ define fastcc float @callee_float_32(<32 x float> %A) nounwind {
 define float @caller_float_32(<32 x float> %A) nounwind {
 ; ZHINX32-LABEL: caller_float_32:
 ; ZHINX32:       # %bb.0:
-; ZHINX32-NEXT:    addi sp, sp, -144
-; ZHINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lw t0, 144(sp)
-; ZHINX32-NEXT:    sw t0, 88(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lw t0, 148(sp)
-; ZHINX32-NEXT:    sw t0, 84(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lw t0, 152(sp)
-; ZHINX32-NEXT:    sw t0, 80(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lw t0, 156(sp)
-; ZHINX32-NEXT:    sw t0, 76(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT:    lw t6, 160(sp)
-; ZHINX32-NEXT:    lw t5, 164(sp)
-; ZHINX32-NEXT:    lw t4, 168(sp)
-; ZHINX32-NEXT:    lw s0, 172(sp)
-; ZHINX32-NEXT:    lw s1, 176(sp)
-; ZHINX32-NEXT:    lw s2, 180(sp)
-; ZHINX32-NEXT:    lw s3, 184(sp)
-; ZHINX32-NEXT:    lw s4, 188(sp)
-; ZHINX32-NEXT:    lw s5, 192(sp)
-; ZHINX32-NEXT:    lw s6, 196(sp)
-; ZHINX32-NEXT:    lw s7, 200(sp)
-; ZHINX32-NEXT:    lw s8, 204(sp)
-; ZHINX32-NEXT:    lw s9, 208(sp)
-; ZHINX32-NEXT:    lw s10, 212(sp)
-; ZHINX32-NEXT:    lw s11, 216(sp)
-; ZHINX32-NEXT:    lw ra, 220(sp)
-; ZHINX32-NEXT:    lw t3, 224(sp)
-; ZHINX32-NEXT:    lw t2, 228(sp)
-; ZHINX32-NEXT:    lw t1, 232(sp)
-; ZHINX32-NEXT:    lw t0, 236(sp)
-; ZHINX32-NEXT:    sw t0, 72(sp)
-; ZHINX32-NEXT:    sw t1, 68(sp)
-; ZHINX32-NEXT:    sw t2, 64(sp)
-; ZHINX32-NEXT:    sw t3, 60(sp)
-; ZHINX32-NEXT:    sw ra, 56(sp)
-; ZHINX32-NEXT:    sw s11, 52(sp)
-; ZHINX32-NEXT:    sw s10, 48(sp)
-; ZHINX32-NEXT:    sw s9, 44(sp)
-; ZHINX32-NEXT:    sw s8, 40(sp)
-; ZHINX32-NEXT:    sw s7, 36(sp)
-; ZHINX32-NEXT:    sw s6, 32(sp)
-; ZHINX32-NEXT:    sw s5, 28(sp)
-; ZHINX32-NEXT:    sw s4, 24(sp)
-; ZHINX32-NEXT:    sw s3, 20(sp)
-; ZHINX32-NEXT:    sw s2, 16(sp)
-; ZHINX32-NEXT:    sw s1, 12(sp)
-; ZHINX32-NEXT:    sw s0, 8(sp)
-; ZHINX32-NEXT:    sw t4, 4(sp)
-; ZHINX32-NEXT:    sw t5, 0(sp)
-; ZHINX32-NEXT:    lw t2, 88(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t3, 84(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t4, 80(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw t5, 76(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    addi sp, sp, -160
+; ZHINX32-NEXT:    sw ra, 156(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s0, 152(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s1, 148(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s2, 144(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s3, 140(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s4, 136(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s5, 132(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s6, 128(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s7, 124(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s8, 120(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s9, 116(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s10, 112(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s11, 108(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    lw t0, 160(sp)
+; ZHINX32-NEXT:    sw t0, 104(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    lw t0, 164(sp)
+; ZHINX32-NEXT:    sw t0, 100(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    lw t0, 168(sp)
+; ZHINX32-NEXT:    sw t0, 96(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    lw t0, 172(sp)
+; ZHINX32-NEXT:    sw t0, 92(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    lw t6, 176(sp)
+; ZHINX32-NEXT:    lw t5, 180(sp)
+; ZHINX32-NEXT:    lw t4, 184(sp)
+; ZHINX32-NEXT:    lw s0, 188(sp)
+; ZHINX32-NEXT:    lw s1, 192(sp)
+; ZHINX32-NEXT:    lw s2, 196(sp)
+; ZHINX32-NEXT:    lw s3, 200(sp)
+; ZHINX32-NEXT:    lw s4, 204(sp)
+; ZHINX32-NEXT:    lw s5, 208(sp)
+; ZHINX32-NEXT:    lw s6, 212(sp)
+; ZHINX32-NEXT:    lw s7, 216(sp)
+; ZHINX32-NEXT:    lw s8, 220(sp)
+; ZHINX32-NEXT:    lw s9, 224(sp)
+; ZHINX32-NEXT:    lw s10, 228(sp)
+; ZHINX32-NEXT:    lw s11, 232(sp)
+; ZHINX32-NEXT:    lw ra, 236(sp)
+; ZHINX32-NEXT:    lw t3, 240(sp)
+; ZHINX32-NEXT:    lw t2, 244(sp)
+; ZHINX32-NEXT:    lw t1, 248(sp)
+; ZHINX32-NEXT:    lw t0, 252(sp)
+; ZHINX32-NEXT:    sw t0, 76(sp)
+; ZHINX32-NEXT:    sw t1, 72(sp)
+; ZHINX32-NEXT:    sw t2, 68(sp)
+; ZHINX32-NEXT:    sw t3, 64(sp)
+; ZHINX32-NEXT:    sw ra, 60(sp)
+; ZHINX32-NEXT:    sw s11, 56(sp)
+; ZHINX32-NEXT:    sw s10, 52(sp)
+; ZHINX32-NEXT:    sw s9, 48(sp)
+; ZHINX32-NEXT:    sw s8, 44(sp)
+; ZHINX32-NEXT:    sw s7, 40(sp)
+; ZHINX32-NEXT:    sw s6, 36(sp)
+; ZHINX32-NEXT:    sw s5, 32(sp)
+; ZHINX32-NEXT:    sw s4, 28(sp)
+; ZHINX32-NEXT:    sw s3, 24(sp)
+; ZHINX32-NEXT:    sw s2, 20(sp)
+; ZHINX32-NEXT:    sw s1, 16(sp)
+; ZHINX32-NEXT:    sw s0, 12(sp)
+; ZHINX32-NEXT:    sw t4, 8(sp)
+; ZHINX32-NEXT:    sw t5, 4(sp)
+; ZHINX32-NEXT:    sw t6, 0(sp)
+; ZHINX32-NEXT:    lw t3, 104(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t4, 100(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t5, 96(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t6, 92(sp) # 4-byte Folded Reload
 ; ZHINX32-NEXT:    call callee_float_32
-; ZHINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT:    addi sp, sp, 144
+; ZHINX32-NEXT:    lw ra, 156(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s0, 152(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s1, 148(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s2, 144(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s3, 140(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s4, 136(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s5, 132(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s6, 128(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s7, 124(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s8, 120(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s9, 116(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s10, 112(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s11, 108(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    addi sp, sp, 160
 ; ZHINX32-NEXT:    ret
 ;
 ; ZHINX64-LABEL: caller_float_32:
@@ -952,29 +959,30 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ; ZHINX64-NEXT:    lw t2, 392(sp)
 ; ZHINX64-NEXT:    lw t1, 400(sp)
 ; ZHINX64-NEXT:    lw t0, 408(sp)
-; ZHINX64-NEXT:    sw t0, 72(sp)
-; ZHINX64-NEXT:    sw t1, 68(sp)
-; ZHINX64-NEXT:    sw t2, 64(sp)
-; ZHINX64-NEXT:    sw t3, 60(sp)
-; ZHINX64-NEXT:    sw ra, 56(sp)
-; ZHINX64-NEXT:    sw s11, 52(sp)
-; ZHINX64-NEXT:    sw s10, 48(sp)
-; ZHINX64-NEXT:    sw s9, 44(sp)
-; ZHINX64-NEXT:    sw s8, 40(sp)
-; ZHINX64-NEXT:    sw s7, 36(sp)
-; ZHINX64-NEXT:    sw s6, 32(sp)
-; ZHINX64-NEXT:    sw s5, 28(sp)
-; ZHINX64-NEXT:    sw s4, 24(sp)
-; ZHINX64-NEXT:    sw s3, 20(sp)
-; ZHINX64-NEXT:    sw s2, 16(sp)
-; ZHINX64-NEXT:    sw s1, 12(sp)
-; ZHINX64-NEXT:    sw s0, 8(sp)
-; ZHINX64-NEXT:    sw t4, 4(sp)
-; ZHINX64-NEXT:    sw t5, 0(sp)
-; ZHINX64-NEXT:    ld t2, 112(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t4, 96(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT:    ld t5, 88(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    sw t0, 76(sp)
+; ZHINX64-NEXT:    sw t1, 72(sp)
+; ZHINX64-NEXT:    sw t2, 68(sp)
+; ZHINX64-NEXT:    sw t3, 64(sp)
+; ZHINX64-NEXT:    sw ra, 60(sp)
+; ZHINX64-NEXT:    sw s11, 56(sp)
+; ZHINX64-NEXT:    sw s10, 52(sp)
+; ZHINX64-NEXT:    sw s9, 48(sp)
+; ZHINX64-NEXT:    sw s8, 44(sp)
+; ZHINX64-NEXT:    sw s7, 40(sp)
+; ZHINX64-NEXT:    sw s6, 36(sp)
+; ZHINX64-NEXT:    sw s5, 32(sp)
+; ZHINX64-NEXT:    sw s4, 28(sp)
+; ZHINX64-NEXT:    sw s3, 24(sp)
+; ZHINX64-NEXT:    sw s2, 20(sp)
+; ZHINX64-NEXT:    sw s1, 16(sp)
+; ZHINX64-NEXT:    sw s0, 12(sp)
+; ZHINX64-NEXT:    sw t4, 8(sp)
+; ZHINX64-NEXT:    sw t5, 4(sp)
+; ZHINX64-NEXT:    sw t6, 0(sp)
+; ZHINX64-NEXT:    ld t3, 112(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld t4, 104(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld t5, 96(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld t6, 88(sp) # 8-byte Folded Reload
 ; ZHINX64-NEXT:    call callee_float_32
 ; ZHINX64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
 ; ZHINX64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
@@ -994,86 +1002,87 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ;
 ; ZFINX32-LABEL: caller_float_32:
 ; ZFINX32:       # %bb.0:
-; ZFINX32-NEXT:    addi sp, sp, -144
-; ZFINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    lw t0, 144(sp)
-; ZFINX32-NEXT:    sw t0, 88(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    lw t0, 148(sp)
-; ZFINX32-NEXT:    sw t0, 84(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    lw t0, 152(sp)
-; ZFINX32-NEXT:    sw t0, 80(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    lw t0, 156(sp)
-; ZFINX32-NEXT:    sw t0, 76(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT:    lw t6, 160(sp)
-; ZFINX32-NEXT:    lw t5, 164(sp)
-; ZFINX32-NEXT:    lw t4, 168(sp)
-; ZFINX32-NEXT:    lw s0, 172(sp)
-; ZFINX32-NEXT:    lw s1, 176(sp)
-; ZFINX32-NEXT:    lw s2, 180(sp)
-; ZFINX32-NEXT:    lw s3, 184(sp)
-; ZFINX32-NEXT:    lw s4, 188(sp)
-; ZFINX32-NEXT:    lw s5, 192(sp)
-; ZFINX32-NEXT:    lw s6, 196(sp)
-; ZFINX32-NEXT:    lw s7, 200(sp)
-; ZFINX32-NEXT:    lw s8, 204(sp)
-; ZFINX32-NEXT:    lw s9, 208(sp)
-; ZFINX32-NEXT:    lw s10, 212(sp)
-; ZFINX32-NEXT:    lw s11, 216(sp)
-; ZFINX32-NEXT:    lw ra, 220(sp)
-; ZFINX32-NEXT:    lw t3, 224(sp)
-; ZFINX32-NEXT:    lw t2, 228(sp)
-; ZFINX32-NEXT:    lw t1, 232(sp)
-; ZFINX32-NEXT:    lw t0, 236(sp)
-; ZFINX32-NEXT:    sw t0, 72(sp)
-; ZFINX32-NEXT:    sw t1, 68(sp)
-; ZFINX32-NEXT:    sw t2, 64(sp)
-; ZFINX32-NEXT:    sw t3, 60(sp)
-; ZFINX32-NEXT:    sw ra, 56(sp)
-; ZFINX32-NEXT:    sw s11, 52(sp)
-; ZFINX32-NEXT:    sw s10, 48(sp)
-; ZFINX32-NEXT:    sw s9, 44(sp)
-; ZFINX32-NEXT:    sw s8, 40(sp)
-; ZFINX32-NEXT:    sw s7, 36(sp)
-; ZFINX32-NEXT:    sw s6, 32(sp)
-; ZFINX32-NEXT:    sw s5, 28(sp)
-; ZFINX32-NEXT:    sw s4, 24(sp)
-; ZFINX32-NEXT:    sw s3, 20(sp)
-; ZFINX32-NEXT:    sw s2, 16(sp)
-; ZFINX32-NEXT:    sw s1, 12(sp)
-; ZFINX32-NEXT:    sw s0, 8(sp)
-; ZFINX32-NEXT:    sw t4, 4(sp)
-; ZFINX32-NEXT:    sw t5, 0(sp)
-; ZFINX32-NEXT:    lw t2, 88(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw t3, 84(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw t4, 80(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw t5, 76(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    addi sp, sp, -160
+; ZFINX32-NEXT:    sw ra, 156(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s0, 152(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s1, 148(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s2, 144(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s3, 140(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s4, 136(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s5, 132(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s6, 128(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s7, 124(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s8, 120(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s9, 116(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s10, 112(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s11, 108(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t0, 160(sp)
+; ZFINX32-NEXT:    sw t0, 104(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t0, 164(sp)
+; ZFINX32-NEXT:    sw t0, 100(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t0, 168(sp)
+; ZFINX32-NEXT:    sw t0, 96(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t0, 172(sp)
+; ZFINX32-NEXT:    sw t0, 92(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t6, 176(sp)
+; ZFINX32-NEXT:    lw t5, 180(sp)
+; ZFINX32-NEXT:    lw t4, 184(sp)
+; ZFINX32-NEXT:    lw s0, 188(sp)
+; ZFINX32-NEXT:    lw s1, 192(sp)
+; ZFINX32-NEXT:    lw s2, 196(sp)
+; ZFINX32-NEXT:    lw s3, 200(sp)
+; ZFINX32-NEXT:    lw s4, 204(sp)
+; ZFINX32-NEXT:    lw s5, 208(sp)
+; ZFINX32-NEXT:    lw s6, 212(sp)
+; ZFINX32-NEXT:    lw s7, 216(sp)
+; ZFINX32-NEXT:    lw s8, 220(sp)
+; ZFINX32-NEXT:    lw s9, 224(sp)
+; ZFINX32-NEXT:    lw s10, 228(sp)
+; ZFINX32-NEXT:    lw s11, 232(sp)
+; ZFINX32-NEXT:    lw ra, 236(sp)
+; ZFINX32-NEXT:    lw t3, 240(sp)
+; ZFINX32-NEXT:    lw t2, 244(sp)
+; ZFINX32-NEXT:    lw t1, 248(sp)
+; ZFINX32-NEXT:    lw t0, 252(sp)
+; ZFINX32-NEXT:    sw t0, 76(sp)
+; ZFINX32-NEXT:    sw t1, 72(sp)
+; ZFINX32-NEXT:    sw t2, 68(sp)
+; ZFINX32-NEXT:    sw t3, 64(sp)
+; ZFINX32-NEXT:    sw ra, 60(sp)
+; ZFINX32-NEXT:    sw s11, 56(sp)
+; ZFINX32-NEXT:    sw s10, 52(sp)
+; ZFINX32-NEXT:    sw s9, 48(sp)
+; ZFINX32-NEXT:    sw s8, 44(sp)
+; ZFINX32-NEXT:    sw s7, 40(sp)
+; ZFINX32-NEXT:    sw s6, 36(sp)
+; ZFINX32-NEXT:    sw s5, 32(sp)
+; ZFINX32-NEXT:    sw s4, 28(sp)
+; ZFINX32-NEXT:    sw s3, 24(sp)
+; ZFINX32-NEXT:    sw s2, 20(sp)
+; ZFINX32-NEXT:    sw s1, 16(sp)
+; ZFINX32-NEXT:    sw s0, 12(sp)
+; ZFINX32-NEXT:    sw t4, 8(sp)
+; ZFINX32-NEXT:    sw t5, 4(sp)
+; ZFINX32-NEXT:    sw t6, 0(sp)
+; ZFINX32-NEXT:    lw t3, 104(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw t4, 100(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw t5, 96(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw t6, 92(sp) # 4-byte Folded Reload
 ; ZFINX32-NEXT:    call callee_float_32
-; ZFINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT:    addi sp, sp, 144
+; ZFINX32-NEXT:    lw ra, 156(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s0, 152(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s1, 148(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s2, 144(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s3, 140(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s4, 136(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s5, 132(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s6, 128(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s7, 124(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s8, 120(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s9, 116(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s10, 112(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s11, 108(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    addi sp, sp, 160
 ; ZFINX32-NEXT:    ret
 ;
 ; ZFINX64-LABEL: caller_float_32:
@@ -1120,29 +1129,30 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ; ZFINX64-NEXT:    lw t2, 392(sp)
 ; ZFINX64-NEXT:    lw t1, 400(sp)
 ; ZFINX64-NEXT:    lw t0, 408(sp)
-; ZFINX64-NEXT:    sw t0, 72(sp)
-; ZFINX64-NEXT:    sw t1, 68(sp)
-; ZFINX64-NEXT:    sw t2, 64(sp)
-; ZFINX64-NEXT:    sw t3, 60(sp)
-; ZFINX64-NEXT:    sw ra, 56(sp)
-; ZFINX64-NEXT:    sw s11, 52(sp)
-; ZFINX64-NEXT:    sw s10, 48(sp)
-; ZFINX64-NEXT:    sw s9, 44(sp)
-; ZFINX64-NEXT:    sw s8, 40(sp)
-; ZFINX64-NEXT:    sw s7, 36(sp)
-; ZFINX64-NEXT:    sw s6, 32(sp)
-; ZFINX64-NEXT:    sw s5, 28(sp)
-; ZFINX64-NEXT:    sw s4, 24(sp)
-; ZFINX64-NEXT:    sw s3, 20(sp)
-; ZFINX64-NEXT:    sw s2, 16(sp)
-; ZFINX64-NEXT:    sw s1, 12(sp)
-; ZFINX64-NEXT:    sw s0, 8(sp)
-; ZFINX64-NEXT:    sw t4, 4(sp)
-; ZFINX64-NEXT:    sw t5, 0(sp)
-; ZFINX64-NEXT:    ld t2, 112(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld t4, 96(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT:    ld t5, 88(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    sw t0, 76(sp)
+; ZFINX64-NEXT:    sw t1, 72(sp)
+; ZFINX64-NEXT:    sw t2, 68(sp)
+; ZFINX64-NEXT:    sw t3, 64(sp)
+; ZFINX64-NEXT:    sw ra, 60(sp)
+; ZFINX64-NEXT:    sw s11, 56(sp)
+; ZFINX64-NEXT:    sw s10, 52(sp)
+; ZFINX64-NEXT:    sw s9, 48(sp)
+; ZFINX64-NEXT:    sw s8, 44(sp)
+; ZFINX64-NEXT:    sw s7, 40(sp)
+; ZFINX64-NEXT:    sw s6, 36(sp)
+; ZFINX64-NEXT:    sw s5, 32(sp)
+; ZFINX64-NEXT:    sw s4, 28(sp)
+; ZFINX64-NEXT:    sw s3, 24(sp)
+; ZFINX64-NEXT:    sw s2, 20(sp)
+; ZFINX64-NEXT:    sw s1, 16(sp)
+; ZFINX64-NEXT:    sw s0, 12(sp)
+; ZFINX64-NEXT:    sw t4, 8(sp)
+; ZFINX64-NEXT:    sw t5, 4(sp)
+; ZFINX64-NEXT:    sw t6, 0(sp)
+; ZFINX64-NEXT:    ld t3, 112(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld t4, 104(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld t5, 96(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld t6, 88(sp) # 8-byte Folded Reload
 ; ZFINX64-NEXT:    call callee_float_32
 ; ZFINX64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
 ; ZFINX64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
@@ -1162,86 +1172,87 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ;
 ; ZDINX32-LABEL: caller_float_32:
 ; ZDINX32:       # %bb.0:
-; ZDINX32-NEXT:    addi sp, sp, -144
-; ZDINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    lw t0, 144(sp)
-; ZDINX32-NEXT:    sw t0, 88(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    lw t0, 148(sp)
-; ZDINX32-NEXT:    sw t0, 84(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    lw t0, 152(sp)
-; ZDINX32-NEXT:    sw t0, 80(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    lw t0, 156(sp)
-; ZDINX32-NEXT:    sw t0, 76(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT:    lw t6, 160(sp)
-; ZDINX32-NEXT:    lw t5, 164(sp)
-; ZDINX32-NEXT:    lw t4, 168(sp)
-; ZDINX32-NEXT:    lw s0, 172(sp)
-; ZDINX32-NEXT:    lw s1, 176(sp)
-; ZDINX32-NEXT:    lw s2, 180(sp)
-; ZDINX32-NEXT:    lw s3, 184(sp)
-; ZDINX32-NEXT:    lw s4, 188(sp)
-; ZDINX32-NEXT:    lw s5, 192(sp)
-; ZDINX32-NEXT:    lw s6, 196(sp)
-; ZDINX32-NEXT:    lw s7, 200(sp)
-; ZDINX32-NEXT:    lw s8, 204(sp)
-; ZDINX32-NEXT:    lw s9, 208(sp)
-; ZDINX32-NEXT:    lw s10, 212(sp)
-; ZDINX32-NEXT:    lw s11, 216(sp)
-; ZDINX32-NEXT:    lw ra, 220(sp)
-; ZDINX32-NEXT:    lw t3, 224(sp)
-; ZDINX32-NEXT:    lw t2, 228(sp)
-; ZDINX32-NEXT:    lw t1, 232(sp)
-; ZDINX32-NEXT:    lw t0, 236(sp)
-; ZDINX32-NEXT:    sw t0, 72(sp)
-; ZDINX32-NEXT:    sw t1, 68(sp)
-; ZDINX32-NEXT:    sw t2, 64(sp)
-; ZDINX32-NEXT:    sw t3, 60(sp)
-; ZDINX32-NEXT:    sw ra, 56(sp)
-; ZDINX32-NEXT:    sw s11, 52(sp)
-; ZDINX32-NEXT:    sw s10, 48(sp)
-; ZDINX32-NEXT:    sw s9, 44(sp)
-; ZDINX32-NEXT:    sw s8, 40(sp)
-; ZDINX32-NEXT:    sw s7, 36(sp)
-; ZDINX32-NEXT:    sw s6, 32(sp)
-; ZDINX32-NEXT:    sw s5, 28(sp)
-; ZDINX32-NEXT:    sw s4, 24(sp)
-; ZDINX32-NEXT:    sw s3, 20(sp)
-; ZDINX32-NEXT:    sw s2, 16(sp)
-; ZDINX32-NEXT:    sw s1, 12(sp)
-; ZDINX32-NEXT:    sw s0, 8(sp)
-; ZDINX32-NEXT:    sw t4, 4(sp)
-; ZDINX32-NEXT:    sw t5, 0(sp)
-; ZDINX32-NEXT:    lw t2, 88(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw t3, 84(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw t4, 80(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw t5, 76(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    addi sp, sp, -160
+; ZDINX32-NEXT:    sw ra, 156(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s0, 152(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s1, 148(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s2, 144(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s3, 140(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s4, 136(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s5, 132(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s6, 128(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s7, 124(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s8, 120(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s9, 116(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s10, 112(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s11, 108(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t0, 160(sp)
+; ZDINX32-NEXT:    sw t0, 104(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t0, 164(sp)
+; ZDINX32-NEXT:    sw t0, 100(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t0, 168(sp)
+; ZDINX32-NEXT:    sw t0, 96(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t0, 172(sp)
+; ZDINX32-NEXT:    sw t0, 92(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t6, 176(sp)
+; ZDINX32-NEXT:    lw t5, 180(sp)
+; ZDINX32-NEXT:    lw t4, 184(sp)
+; ZDINX32-NEXT:    lw s0, 188(sp)
+; ZDINX32-NEXT:    lw s1, 192(sp)
+; ZDINX32-NEXT:    lw s2, 196(sp)
+; ZDINX32-NEXT:    lw s3, 200(sp)
+; ZDINX32-NEXT:    lw s4, 204(sp)
+; ZDINX32-NEXT:    lw s5, 208(sp)
+; ZDINX32-NEXT:    lw s6, 212(sp)
+; ZDINX32-NEXT:    lw s7, 216(sp)
+; ZDINX32-NEXT:    lw s8, 220(sp)
+; ZDINX32-NEXT:    lw s9, 224(sp)
+; ZDINX32-NEXT:    lw s10, 228(sp)
+; ZDINX32-NEXT:    lw s11, 232(sp)
+; ZDINX32-NEXT:    lw ra, 236(sp)
+; ZDINX32-NEXT:    lw t3, 240(sp)
+; ZDINX32-NEXT:    lw t2, 244(sp)
+; ZDINX32-NEXT:    lw t1, 248(sp)
+; ZDINX32-NEXT:    lw t0, 252(sp)
+; ZDINX32-NEXT:    sw t0, 76(sp)
+; ZDINX32-NEXT:    sw t1, 72(sp)
+; ZDINX32-NEXT:    sw t2, 68(sp)
+; ZDINX32-NEXT:    sw t3, 64(sp)
+; ZDINX32-NEXT:    sw ra, 60(sp)
+; ZDINX32-NEXT:    sw s11, 56(sp)
+; ZDINX32-NEXT:    sw s10, 52(sp)
+; ZDINX32-NEXT:    sw s9, 48(sp)
+; ZDINX32-NEXT:    sw s8, 44(sp)
+; ZDINX32-NEXT:    sw s7, 40(sp)
+; ZDINX32-NEXT:    sw s6, 36(sp)
+; ZDINX32-NEXT:    sw s5, 32(sp)
+; ZDINX32-NEXT:    sw s4, 28(sp)
+; ZDINX32-NEXT:    sw s3, 24(sp)
+; ZDINX32-NEXT:    sw s2, 20(sp)
+; ZDINX32-NEXT:    sw s1, 16(sp)
+; ZDINX32-NEXT:    sw s0, 12(sp)
+; ZDINX32-NEXT:    sw t4, 8(sp)
+; ZDINX32-NEXT:    sw t5, 4(sp)
+; ZDINX32-NEXT:    sw t6, 0(sp)
+; ZDINX32-NEXT:    lw t3, 104(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw t4, 100(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw t5, 96(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw t6, 92(sp) # 4-byte Folded Reload
 ; ZDINX32-NEXT:    call callee_float_32
-; ZDINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT:    addi sp, sp, 144
+; ZDINX32-NEXT:    lw ra, 156(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s0, 152(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s1, 148(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s2, 144(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s3, 140(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s4, 136(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s5, 132(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s6, 128(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s7, 124(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s8, 120(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s9, 116(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s10, 112(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s11, 108(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    addi sp, sp, 160
 ; ZDINX32-NEXT:    ret
 ;
 ; ZDINX64-LABEL: caller_float_32:
@@ -1288,29 +1299,30 @@ define float @caller_float_32(<32 x float> %A) nounwind {
 ; ZDINX64-NEXT:    lw t2, 392(sp)
 ; ZDINX64-NEXT:    lw t1, 400(sp)
 ; ZDINX64-NEXT:    lw t0, 408(sp)
-; ZDINX64-NEXT:    sw t0, 72(sp)
-; ZDINX64-NEXT:    sw t1, 68(sp)
-; ZDINX64-NEXT:    sw t2, 64(sp)
-; ZDINX64-NEXT:    sw t3, 60(sp)
-; ZDINX64-NEXT:    sw ra, 56(sp)
-; ZDINX64-NEXT:    sw s11, 52(sp)
-; ZDINX64-NEXT:    sw s10, 48(sp)
-; ZDINX64-NEXT:    sw s9, 44(sp)
-; ZDINX64-NEXT:    sw s8, 40(sp)
-; ZDINX64-NEXT:    sw s7, 36(sp)
-; ZDINX64-NEXT:    sw s6, 32(sp)
-; ZDINX64-NEXT:    sw s5, 28(sp)
-; ZDINX64-NEXT:    sw s4, 24(sp)
-; ZDINX64-NEXT:    sw s3, 20(sp)
-; ZDINX64-NEXT:    sw s2, 16(sp)
-; ZDINX64-NEXT:    sw s1, 12(sp)
-; ZDINX64-NEXT:    sw s0, 8(sp)
-; ZDINX64-NEXT:    sw t4, 4(sp)
-; ZDINX64-NEXT:    sw t5, 0(sp)
-; ZDINX64-NEXT:    ld t2, 112(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld t4, 96(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT:    ld t5, 88(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    sw t0, 76(sp)
+; ZDINX64-NEXT:    sw t1, 72(sp)
+; ZDINX64-NEXT:    sw t2, 68(sp)
+; ZDINX64-NEXT:    sw t3, 64(sp)
+; ZDINX64-NEXT:    sw ra, 60(sp)
+; ZDINX64-NEXT:    sw s11, 56(sp)
+; ZDINX64-NEXT:    sw s10, 52(sp)
+; ZDINX64-NEXT:    sw s9, 48(sp)
+; ZDINX64-NEXT:    sw s8, 44(sp)
+; ZDINX64-NEXT:    sw s7, 40(sp)
+; ZDINX64-NEXT:    sw s6, 36(sp)
+; ZDINX64-NEXT:    sw s5, 32(sp)
+; ZDINX64-NEXT:    sw s4, 28(sp)
+; ZDINX64-NEXT:    sw s3, 24(sp)
+; ZDINX64-NEXT:    sw s2, 20(sp)
+; ZDINX64-NEXT:    sw s1, 16(sp)
+; ZDINX64-NEXT:    sw s0, 12(sp)
+; ZDINX64-NEXT:    sw t4, 8(sp)
+; ZDINX64-NEXT:    sw t5, 4(sp)
+; ZDINX64-NEXT:    sw t6, 0(sp)
+; ZDINX64-NEXT:    ld t3, 112(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld t4, 104(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld t5, 96(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld t6, 88(sp) # 8-byte Folded Reload
 ; ZDINX64-NEXT:    call callee_float_32
 ; ZDINX64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
 ; ZDINX64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload

diff  --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
index ee9f96a45d23e..fb84a2528778a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
@@ -502,8 +502,8 @@ define fastcc <vscale x 32 x i32> @vector_arg_indirect_stack(i32 %0, i32 %1, i32
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    csrr a0, vlenb
 ; CHECK-NEXT:    slli a0, a0, 3
-; CHECK-NEXT:    add a0, t4, a0
-; CHECK-NEXT:    vl8re32.v v24, (t4)
+; CHECK-NEXT:    add a0, t5, a0
+; CHECK-NEXT:    vl8re32.v v24, (t5)
 ; CHECK-NEXT:    vl8re32.v v0, (a0)
 ; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
 ; CHECK-NEXT:    vadd.vv v8, v8, v24
@@ -521,25 +521,31 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
 ; RV32-NEXT:    .cfi_def_cfa_offset 144
 ; RV32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; RV32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
 ; RV32-NEXT:    .cfi_offset s0, -8
+; RV32-NEXT:    .cfi_offset s1, -12
 ; RV32-NEXT:    addi s0, sp, 144
 ; RV32-NEXT:    .cfi_def_cfa s0, 0
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    sub sp, sp, a0
 ; RV32-NEXT:    andi sp, sp, -128
+; RV32-NEXT:    mv s1, sp
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
 ; RV32-NEXT:    vmv.v.i v8, 0
-; RV32-NEXT:    addi a1, sp, 128
+; RV32-NEXT:    addi a1, s1, 128
 ; RV32-NEXT:    vs8r.v v8, (a1)
 ; RV32-NEXT:    csrr a2, vlenb
 ; RV32-NEXT:    slli a2, a2, 4
-; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    add a2, s1, a2
 ; RV32-NEXT:    addi a2, a2, 128
 ; RV32-NEXT:    vs8r.v v8, (a2)
+; RV32-NEXT:    li a3, 8
+; RV32-NEXT:    sw a3, 0(sp)
 ; RV32-NEXT:    add a1, a1, a0
 ; RV32-NEXT:    vs8r.v v8, (a1)
 ; RV32-NEXT:    add a0, a2, a0
@@ -550,47 +556,54 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
 ; RV32-NEXT:    li a5, 5
 ; RV32-NEXT:    li a6, 6
 ; RV32-NEXT:    li a7, 7
-; RV32-NEXT:    csrr t2, vlenb
-; RV32-NEXT:    slli t2, t2, 4
-; RV32-NEXT:    add t2, sp, t2
-; RV32-NEXT:    addi t2, t2, 128
-; RV32-NEXT:    addi t4, sp, 128
-; RV32-NEXT:    li t6, 8
+; RV32-NEXT:    csrr t3, vlenb
+; RV32-NEXT:    slli t3, t3, 4
+; RV32-NEXT:    add t3, s1, t3
+; RV32-NEXT:    addi t3, t3, 128
+; RV32-NEXT:    addi t5, s1, 128
 ; RV32-NEXT:    vs8r.v v8, (a0)
 ; RV32-NEXT:    li a0, 0
 ; RV32-NEXT:    vmv.v.i v16, 0
 ; RV32-NEXT:    call vector_arg_indirect_stack
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    addi sp, s0, -144
 ; RV32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; RV32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
 ; RV32-NEXT:    addi sp, sp, 144
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: pass_vector_arg_indirect_stack:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -144
-; RV64-NEXT:    .cfi_def_cfa_offset 144
-; RV64-NEXT:    sd ra, 136(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 128(sp) # 8-byte Folded Spill
+; RV64-NEXT:    addi sp, sp, -160
+; RV64-NEXT:    .cfi_def_cfa_offset 160
+; RV64-NEXT:    sd ra, 152(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s0, 144(sp) # 8-byte Folded Spill
+; RV64-NEXT:    sd s1, 136(sp) # 8-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
 ; RV64-NEXT:    .cfi_offset s0, -16
-; RV64-NEXT:    addi s0, sp, 144
+; RV64-NEXT:    .cfi_offset s1, -24
+; RV64-NEXT:    addi s0, sp, 160
 ; RV64-NEXT:    .cfi_def_cfa s0, 0
 ; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    slli a0, a0, 5
 ; RV64-NEXT:    sub sp, sp, a0
 ; RV64-NEXT:    andi sp, sp, -128
+; RV64-NEXT:    mv s1, sp
 ; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    addi sp, sp, -16
 ; RV64-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
 ; RV64-NEXT:    vmv.v.i v8, 0
-; RV64-NEXT:    addi a1, sp, 128
+; RV64-NEXT:    addi a1, s1, 128
 ; RV64-NEXT:    vs8r.v v8, (a1)
 ; RV64-NEXT:    csrr a2, vlenb
 ; RV64-NEXT:    slli a2, a2, 4
-; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    add a2, s1, a2
 ; RV64-NEXT:    addi a2, a2, 128
 ; RV64-NEXT:    vs8r.v v8, (a2)
+; RV64-NEXT:    li a3, 8
+; RV64-NEXT:    sd a3, 0(sp)
 ; RV64-NEXT:    add a1, a1, a0
 ; RV64-NEXT:    vs8r.v v8, (a1)
 ; RV64-NEXT:    add a0, a2, a0
@@ -601,20 +614,21 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
 ; RV64-NEXT:    li a5, 5
 ; RV64-NEXT:    li a6, 6
 ; RV64-NEXT:    li a7, 7
-; RV64-NEXT:    csrr t2, vlenb
-; RV64-NEXT:    slli t2, t2, 4
-; RV64-NEXT:    add t2, sp, t2
-; RV64-NEXT:    addi t2, t2, 128
-; RV64-NEXT:    addi t4, sp, 128
-; RV64-NEXT:    li t6, 8
+; RV64-NEXT:    csrr t3, vlenb
+; RV64-NEXT:    slli t3, t3, 4
+; RV64-NEXT:    add t3, s1, t3
+; RV64-NEXT:    addi t3, t3, 128
+; RV64-NEXT:    addi t5, s1, 128
 ; RV64-NEXT:    vs8r.v v8, (a0)
 ; RV64-NEXT:    li a0, 0
 ; RV64-NEXT:    vmv.v.i v16, 0
 ; RV64-NEXT:    call vector_arg_indirect_stack
-; RV64-NEXT:    addi sp, s0, -144
-; RV64-NEXT:    ld ra, 136(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 128(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 144
+; RV64-NEXT:    addi sp, sp, 16
+; RV64-NEXT:    addi sp, s0, -160
+; RV64-NEXT:    ld ra, 152(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s0, 144(sp) # 8-byte Folded Reload
+; RV64-NEXT:    ld s1, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT:    addi sp, sp, 160
 ; RV64-NEXT:    ret
   %s = call fastcc <vscale x 32 x i32> @vector_arg_indirect_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 8)
   ret <vscale x 32 x i32> %s

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
index 63cd42e97ef6f..9f48fdb3608a0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
@@ -230,7 +230,7 @@ define fastcc <32 x i32> @vector_arg_indirect_stack(i32 %0, i32 %1, i32 %2, i32
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT:    vle32.v v16, (t2)
+; CHECK-NEXT:    vle32.v v16, (t3)
 ; CHECK-NEXT:    vadd.vv v8, v8, v16
 ; CHECK-NEXT:    ret
   %s = add <32 x i32> %x, %z
@@ -261,8 +261,8 @@ define fastcc <32 x i32> @pass_vector_arg_indirect_stack(<32 x i32> %x, <32 x i3
 ; CHECK-NEXT:    li a5, 5
 ; CHECK-NEXT:    li a6, 6
 ; CHECK-NEXT:    li a7, 7
-; CHECK-NEXT:    mv t2, sp
-; CHECK-NEXT:    li t3, 8
+; CHECK-NEXT:    mv t3, sp
+; CHECK-NEXT:    li t4, 8
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:    vmv.v.i v16, 0
@@ -281,7 +281,7 @@ define fastcc <32 x i32> @vector_arg_direct_stack(i32 %0, i32 %1, i32 %2, i32 %3
 ; CHECK-LABEL: vector_arg_direct_stack:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a0, 32
-; CHECK-NEXT:    addi a1, sp, 8
+; CHECK-NEXT:    addi a1, sp, 16
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
 ; CHECK-NEXT:    vle32.v v24, (a1)
 ; CHECK-NEXT:    vadd.vv v8, v8, v16
@@ -303,11 +303,13 @@ define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32>
 ; CHECK-NEXT:    li a0, 32
 ; CHECK-NEXT:    vsetvli zero, a0, e32, m8, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    addi a0, sp, 8
+; CHECK-NEXT:    addi a0, sp, 16
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    li a0, 1
-; CHECK-NEXT:    sd a0, 136(sp)
+; CHECK-NEXT:    sd a0, 144(sp)
 ; CHECK-NEXT:    li a0, 13
+; CHECK-NEXT:    sd a0, 8(sp)
+; CHECK-NEXT:    li a0, 12
 ; CHECK-NEXT:    li a1, 1
 ; CHECK-NEXT:    li a2, 2
 ; CHECK-NEXT:    li a3, 3
@@ -315,11 +317,10 @@ define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32>
 ; CHECK-NEXT:    li a5, 5
 ; CHECK-NEXT:    li a6, 6
 ; CHECK-NEXT:    li a7, 7
-; CHECK-NEXT:    li t2, 8
-; CHECK-NEXT:    li t3, 9
-; CHECK-NEXT:    li t4, 10
-; CHECK-NEXT:    li t5, 11
-; CHECK-NEXT:    li t6, 12
+; CHECK-NEXT:    li t3, 8
+; CHECK-NEXT:    li t4, 9
+; CHECK-NEXT:    li t5, 10
+; CHECK-NEXT:    li t6, 11
 ; CHECK-NEXT:    sd a0, 0(sp)
 ; CHECK-NEXT:    li a0, 0
 ; CHECK-NEXT:    vmv.v.i v16, 0
@@ -336,7 +337,7 @@ define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32>
 define fastcc <4 x i1> @vector_mask_arg_direct_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, <32 x i32> %x, <32 x i32> %y, <32 x i32> %z, <4 x i1> %m1, <4 x i1> %m2, i32 %last) {
 ; CHECK-LABEL: vector_mask_arg_direct_stack:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a0, sp, 136
+; CHECK-NEXT:    addi a0, sp, 144
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vlm.v v8, (a0)
 ; CHECK-NEXT:    vmxor.mm v0, v0, v8


        


More information about the llvm-commits mailing list