[llvm] [RISCV] Remove x7 from fastcc list. (PR #96729)
Yeting Kuo via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 25 20:45:36 PDT 2024
https://github.com/yetingk created https://github.com/llvm/llvm-project/pull/96729
Like #93321, this patch also tries to solve the conflict usage of x7 for fastcc and Zicfilp. But this patch removes x7 from fastcc directly. Its purpose is to reduce the code complexity of #93321, and we also found that it at most increase 0.02% instruction count for most benchmarks and it might be benefit to overall benchmarks.
>From a0b935673bf2113ca515f9a0a499be68693c70be Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Tue, 25 Jun 2024 20:30:50 -0700
Subject: [PATCH] [RISCV] Remove x7 from fastcc list.
Like #93321, this patch also tries to solve the conflict usage of x7 for
fastcc and Zicfilp. But this patch removes x7 from fastcc directly. Its
purpose is to reduce the code complexity of #93321, and we also found
that it at most increase 0.02% instruction count for most benchmarks and it
might be benefit to overall benchmarks.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 7 +-
llvm/test/CodeGen/RISCV/fastcc-int.ll | 34 +-
.../CodeGen/RISCV/fastcc-without-f-reg.ll | 1196 +++++++++--------
.../CodeGen/RISCV/rvv/calling-conv-fastcc.ll | 68 +-
.../rvv/fixed-vectors-calling-conv-fastcc.ll | 25 +-
5 files changed, 679 insertions(+), 651 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index af3950773e4d0..bdb0ecdb27b64 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -18696,13 +18696,12 @@ static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
// for save-restore libcall, so we don't use them.
static const MCPhysReg FastCCIGPRs[] = {
RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
- RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7, RISCV::X28,
- RISCV::X29, RISCV::X30, RISCV::X31};
+ RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X28, RISCV::X29,
+ RISCV::X30, RISCV::X31};
// The GPRs used for passing arguments in the FastCC when using ILP32E/ILP64E.
static const MCPhysReg FastCCEGPRs[] = {RISCV::X10, RISCV::X11, RISCV::X12,
- RISCV::X13, RISCV::X14, RISCV::X15,
- RISCV::X7};
+ RISCV::X13, RISCV::X14, RISCV::X15};
if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
return ArrayRef(FastCCEGPRs);
diff --git a/llvm/test/CodeGen/RISCV/fastcc-int.ll b/llvm/test/CodeGen/RISCV/fastcc-int.ll
index e4c41a1aa890f..75046b701b235 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-int.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-int.ll
@@ -32,16 +32,17 @@ define i32 @caller(<16 x i32> %A) nounwind {
; RV32-NEXT: lw a5, 20(a0)
; RV32-NEXT: lw a6, 24(a0)
; RV32-NEXT: lw a7, 28(a0)
-; RV32-NEXT: lw t2, 32(a0)
-; RV32-NEXT: lw t3, 36(a0)
-; RV32-NEXT: lw t4, 40(a0)
-; RV32-NEXT: lw t5, 44(a0)
-; RV32-NEXT: lw t6, 48(a0)
-; RV32-NEXT: lw t1, 52(a0)
+; RV32-NEXT: lw t3, 32(a0)
+; RV32-NEXT: lw t4, 36(a0)
+; RV32-NEXT: lw t5, 40(a0)
+; RV32-NEXT: lw t6, 44(a0)
+; RV32-NEXT: lw t1, 48(a0)
+; RV32-NEXT: lw t2, 52(a0)
; RV32-NEXT: lw s0, 56(a0)
; RV32-NEXT: lw a0, 60(a0)
-; RV32-NEXT: sw a0, 8(sp)
-; RV32-NEXT: sw s0, 4(sp)
+; RV32-NEXT: sw a0, 12(sp)
+; RV32-NEXT: sw s0, 8(sp)
+; RV32-NEXT: sw t2, 4(sp)
; RV32-NEXT: sw t1, 0(sp)
; RV32-NEXT: mv a0, t0
; RV32-NEXT: call callee
@@ -63,16 +64,17 @@ define i32 @caller(<16 x i32> %A) nounwind {
; RV64-NEXT: ld a5, 40(a0)
; RV64-NEXT: ld a6, 48(a0)
; RV64-NEXT: ld a7, 56(a0)
-; RV64-NEXT: ld t2, 64(a0)
-; RV64-NEXT: ld t3, 72(a0)
-; RV64-NEXT: ld t4, 80(a0)
-; RV64-NEXT: ld t5, 88(a0)
-; RV64-NEXT: ld t6, 96(a0)
-; RV64-NEXT: ld t1, 104(a0)
+; RV64-NEXT: ld t3, 64(a0)
+; RV64-NEXT: ld t4, 72(a0)
+; RV64-NEXT: ld t5, 80(a0)
+; RV64-NEXT: ld t6, 88(a0)
+; RV64-NEXT: ld t1, 96(a0)
+; RV64-NEXT: ld t2, 104(a0)
; RV64-NEXT: ld s0, 112(a0)
; RV64-NEXT: ld a0, 120(a0)
-; RV64-NEXT: sd a0, 16(sp)
-; RV64-NEXT: sd s0, 8(sp)
+; RV64-NEXT: sd a0, 24(sp)
+; RV64-NEXT: sd s0, 16(sp)
+; RV64-NEXT: sd t2, 8(sp)
; RV64-NEXT: sd t1, 0(sp)
; RV64-NEXT: mv a0, t0
; RV64-NEXT: call callee
diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
index a44d31dff09cc..1dbb060fc35fa 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
@@ -288,29 +288,30 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZHINX32-NEXT: lh t2, 196(sp)
; ZHINX32-NEXT: lh t1, 200(sp)
; ZHINX32-NEXT: lh t0, 204(sp)
-; ZHINX32-NEXT: sh t0, 36(sp)
-; ZHINX32-NEXT: sh t1, 34(sp)
-; ZHINX32-NEXT: sh t2, 32(sp)
-; ZHINX32-NEXT: sh t3, 30(sp)
-; ZHINX32-NEXT: sh ra, 28(sp)
-; ZHINX32-NEXT: sh s11, 26(sp)
-; ZHINX32-NEXT: sh s10, 24(sp)
-; ZHINX32-NEXT: sh s9, 22(sp)
-; ZHINX32-NEXT: sh s8, 20(sp)
-; ZHINX32-NEXT: sh s7, 18(sp)
-; ZHINX32-NEXT: sh s6, 16(sp)
-; ZHINX32-NEXT: sh s5, 14(sp)
-; ZHINX32-NEXT: sh s4, 12(sp)
-; ZHINX32-NEXT: sh s3, 10(sp)
-; ZHINX32-NEXT: sh s2, 8(sp)
-; ZHINX32-NEXT: sh s1, 6(sp)
-; ZHINX32-NEXT: sh s0, 4(sp)
-; ZHINX32-NEXT: sh t4, 2(sp)
-; ZHINX32-NEXT: sh t5, 0(sp)
-; ZHINX32-NEXT: lw t2, 56(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw t3, 52(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw t4, 48(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw t5, 44(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: sh t0, 38(sp)
+; ZHINX32-NEXT: sh t1, 36(sp)
+; ZHINX32-NEXT: sh t2, 34(sp)
+; ZHINX32-NEXT: sh t3, 32(sp)
+; ZHINX32-NEXT: sh ra, 30(sp)
+; ZHINX32-NEXT: sh s11, 28(sp)
+; ZHINX32-NEXT: sh s10, 26(sp)
+; ZHINX32-NEXT: sh s9, 24(sp)
+; ZHINX32-NEXT: sh s8, 22(sp)
+; ZHINX32-NEXT: sh s7, 20(sp)
+; ZHINX32-NEXT: sh s6, 18(sp)
+; ZHINX32-NEXT: sh s5, 16(sp)
+; ZHINX32-NEXT: sh s4, 14(sp)
+; ZHINX32-NEXT: sh s3, 12(sp)
+; ZHINX32-NEXT: sh s2, 10(sp)
+; ZHINX32-NEXT: sh s1, 8(sp)
+; ZHINX32-NEXT: sh s0, 6(sp)
+; ZHINX32-NEXT: sh t4, 4(sp)
+; ZHINX32-NEXT: sh t5, 2(sp)
+; ZHINX32-NEXT: sh t6, 0(sp)
+; ZHINX32-NEXT: lw t3, 56(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw t4, 52(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw t5, 48(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw t6, 44(sp) # 4-byte Folded Reload
; ZHINX32-NEXT: call callee_half_32
; ZHINX32-NEXT: lw ra, 108(sp) # 4-byte Folded Reload
; ZHINX32-NEXT: lw s0, 104(sp) # 4-byte Folded Reload
@@ -372,29 +373,30 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZHINX64-NEXT: lh t2, 344(sp)
; ZHINX64-NEXT: lh t1, 352(sp)
; ZHINX64-NEXT: lh t0, 360(sp)
-; ZHINX64-NEXT: sh t0, 36(sp)
-; ZHINX64-NEXT: sh t1, 34(sp)
-; ZHINX64-NEXT: sh t2, 32(sp)
-; ZHINX64-NEXT: sh t3, 30(sp)
-; ZHINX64-NEXT: sh ra, 28(sp)
-; ZHINX64-NEXT: sh s11, 26(sp)
-; ZHINX64-NEXT: sh s10, 24(sp)
-; ZHINX64-NEXT: sh s9, 22(sp)
-; ZHINX64-NEXT: sh s8, 20(sp)
-; ZHINX64-NEXT: sh s7, 18(sp)
-; ZHINX64-NEXT: sh s6, 16(sp)
-; ZHINX64-NEXT: sh s5, 14(sp)
-; ZHINX64-NEXT: sh s4, 12(sp)
-; ZHINX64-NEXT: sh s3, 10(sp)
-; ZHINX64-NEXT: sh s2, 8(sp)
-; ZHINX64-NEXT: sh s1, 6(sp)
-; ZHINX64-NEXT: sh s0, 4(sp)
-; ZHINX64-NEXT: sh t4, 2(sp)
-; ZHINX64-NEXT: sh t5, 0(sp)
-; ZHINX64-NEXT: ld t2, 64(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld t3, 56(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld t4, 48(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld t5, 40(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: sh t0, 38(sp)
+; ZHINX64-NEXT: sh t1, 36(sp)
+; ZHINX64-NEXT: sh t2, 34(sp)
+; ZHINX64-NEXT: sh t3, 32(sp)
+; ZHINX64-NEXT: sh ra, 30(sp)
+; ZHINX64-NEXT: sh s11, 28(sp)
+; ZHINX64-NEXT: sh s10, 26(sp)
+; ZHINX64-NEXT: sh s9, 24(sp)
+; ZHINX64-NEXT: sh s8, 22(sp)
+; ZHINX64-NEXT: sh s7, 20(sp)
+; ZHINX64-NEXT: sh s6, 18(sp)
+; ZHINX64-NEXT: sh s5, 16(sp)
+; ZHINX64-NEXT: sh s4, 14(sp)
+; ZHINX64-NEXT: sh s3, 12(sp)
+; ZHINX64-NEXT: sh s2, 10(sp)
+; ZHINX64-NEXT: sh s1, 8(sp)
+; ZHINX64-NEXT: sh s0, 6(sp)
+; ZHINX64-NEXT: sh t4, 4(sp)
+; ZHINX64-NEXT: sh t5, 2(sp)
+; ZHINX64-NEXT: sh t6, 0(sp)
+; ZHINX64-NEXT: ld t3, 64(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld t4, 56(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld t5, 48(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld t6, 40(sp) # 8-byte Folded Reload
; ZHINX64-NEXT: call callee_half_32
; ZHINX64-NEXT: ld ra, 168(sp) # 8-byte Folded Reload
; ZHINX64-NEXT: ld s0, 160(sp) # 8-byte Folded Reload
@@ -414,38 +416,38 @@ define half @caller_half_32(<32 x half> %A) nounwind {
;
; ZFINX32-LABEL: caller_half_32:
; ZFINX32: # %bb.0:
-; ZFINX32-NEXT: addi sp, sp, -144
-; ZFINX32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s1, 132(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s2, 128(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s3, 124(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s4, 120(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s5, 116(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s6, 112(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s7, 108(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s8, 104(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: addi sp, sp, -160
+; ZFINX32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s1, 148(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s2, 144(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s3, 140(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s4, 136(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s5, 132(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s6, 128(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s7, 124(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s8, 120(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: lw t0, 0(a0)
; ZFINX32-NEXT: lw a1, 4(a0)
-; ZFINX32-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: lw a1, 8(a0)
-; ZFINX32-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: lw a1, 12(a0)
-; ZFINX32-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: lw a1, 16(a0)
-; ZFINX32-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
; ZFINX32-NEXT: lw a5, 20(a0)
; ZFINX32-NEXT: lw a6, 24(a0)
; ZFINX32-NEXT: lw a7, 28(a0)
-; ZFINX32-NEXT: lw t2, 32(a0)
-; ZFINX32-NEXT: lw t3, 36(a0)
-; ZFINX32-NEXT: lw t4, 40(a0)
-; ZFINX32-NEXT: lw t5, 44(a0)
-; ZFINX32-NEXT: lw t6, 48(a0)
-; ZFINX32-NEXT: lw t1, 52(a0)
+; ZFINX32-NEXT: lw t3, 32(a0)
+; ZFINX32-NEXT: lw t4, 36(a0)
+; ZFINX32-NEXT: lw t5, 40(a0)
+; ZFINX32-NEXT: lw t6, 44(a0)
+; ZFINX32-NEXT: lw t1, 48(a0)
+; ZFINX32-NEXT: lw t2, 52(a0)
; ZFINX32-NEXT: lw s0, 56(a0)
; ZFINX32-NEXT: lw s1, 60(a0)
; ZFINX32-NEXT: lw s2, 64(a0)
@@ -464,83 +466,84 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZFINX32-NEXT: lw a2, 116(a0)
; ZFINX32-NEXT: lw a1, 120(a0)
; ZFINX32-NEXT: lw a0, 124(a0)
-; ZFINX32-NEXT: sw a0, 72(sp)
-; ZFINX32-NEXT: sw a1, 68(sp)
-; ZFINX32-NEXT: sw a2, 64(sp)
-; ZFINX32-NEXT: sw a3, 60(sp)
-; ZFINX32-NEXT: sw a4, 56(sp)
-; ZFINX32-NEXT: sw ra, 52(sp)
-; ZFINX32-NEXT: sw s11, 48(sp)
-; ZFINX32-NEXT: sw s10, 44(sp)
-; ZFINX32-NEXT: sw s9, 40(sp)
-; ZFINX32-NEXT: sw s8, 36(sp)
-; ZFINX32-NEXT: sw s7, 32(sp)
-; ZFINX32-NEXT: sw s6, 28(sp)
-; ZFINX32-NEXT: sw s5, 24(sp)
-; ZFINX32-NEXT: sw s4, 20(sp)
-; ZFINX32-NEXT: sw s3, 16(sp)
-; ZFINX32-NEXT: sw s2, 12(sp)
-; ZFINX32-NEXT: sw s1, 8(sp)
-; ZFINX32-NEXT: sw s0, 4(sp)
+; ZFINX32-NEXT: sw a0, 76(sp)
+; ZFINX32-NEXT: sw a1, 72(sp)
+; ZFINX32-NEXT: sw a2, 68(sp)
+; ZFINX32-NEXT: sw a3, 64(sp)
+; ZFINX32-NEXT: sw a4, 60(sp)
+; ZFINX32-NEXT: sw ra, 56(sp)
+; ZFINX32-NEXT: sw s11, 52(sp)
+; ZFINX32-NEXT: sw s10, 48(sp)
+; ZFINX32-NEXT: sw s9, 44(sp)
+; ZFINX32-NEXT: sw s8, 40(sp)
+; ZFINX32-NEXT: sw s7, 36(sp)
+; ZFINX32-NEXT: sw s6, 32(sp)
+; ZFINX32-NEXT: sw s5, 28(sp)
+; ZFINX32-NEXT: sw s4, 24(sp)
+; ZFINX32-NEXT: sw s3, 20(sp)
+; ZFINX32-NEXT: sw s2, 16(sp)
+; ZFINX32-NEXT: sw s1, 12(sp)
+; ZFINX32-NEXT: sw s0, 8(sp)
+; ZFINX32-NEXT: sw t2, 4(sp)
; ZFINX32-NEXT: sw t1, 0(sp)
; ZFINX32-NEXT: mv a0, t0
-; ZFINX32-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw a2, 84(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw a3, 80(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw a4, 76(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw a2, 100(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw a3, 96(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: call callee_half_32
; ZFINX32-NEXT: lui a1, 1048560
; ZFINX32-NEXT: or a0, a0, a1
-; ZFINX32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: addi sp, sp, 144
+; ZFINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s2, 144(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s3, 140(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s4, 136(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s5, 132(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s6, 128(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s7, 124(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s8, 120(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s9, 116(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s10, 112(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s11, 108(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: addi sp, sp, 160
; ZFINX32-NEXT: ret
;
; ZFINX64-LABEL: caller_half_32:
; ZFINX64: # %bb.0:
-; ZFINX64-NEXT: addi sp, sp, -288
-; ZFINX64-NEXT: sd ra, 280(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s0, 272(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s1, 264(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s2, 256(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s3, 248(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s4, 240(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s5, 232(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s6, 224(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s7, 216(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s8, 208(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s9, 200(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s10, 192(sp) # 8-byte Folded Spill
-; ZFINX64-NEXT: sd s11, 184(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: addi sp, sp, -304
+; ZFINX64-NEXT: sd ra, 296(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s0, 288(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s1, 280(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s2, 272(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s3, 264(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s4, 256(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s5, 248(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s6, 240(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s7, 232(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s8, 224(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s9, 216(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s10, 208(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd s11, 200(sp) # 8-byte Folded Spill
; ZFINX64-NEXT: ld t0, 0(a0)
; ZFINX64-NEXT: ld a1, 8(a0)
-; ZFINX64-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd a1, 192(sp) # 8-byte Folded Spill
; ZFINX64-NEXT: ld a1, 16(a0)
-; ZFINX64-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
; ZFINX64-NEXT: ld a1, 24(a0)
-; ZFINX64-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
; ZFINX64-NEXT: ld a1, 32(a0)
-; ZFINX64-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
; ZFINX64-NEXT: ld a5, 40(a0)
; ZFINX64-NEXT: ld a6, 48(a0)
; ZFINX64-NEXT: ld a7, 56(a0)
-; ZFINX64-NEXT: ld t2, 64(a0)
-; ZFINX64-NEXT: ld t3, 72(a0)
-; ZFINX64-NEXT: ld t4, 80(a0)
-; ZFINX64-NEXT: ld t5, 88(a0)
-; ZFINX64-NEXT: ld t6, 96(a0)
-; ZFINX64-NEXT: ld t1, 104(a0)
+; ZFINX64-NEXT: ld t3, 64(a0)
+; ZFINX64-NEXT: ld t4, 72(a0)
+; ZFINX64-NEXT: ld t5, 80(a0)
+; ZFINX64-NEXT: ld t6, 88(a0)
+; ZFINX64-NEXT: ld t1, 96(a0)
+; ZFINX64-NEXT: ld t2, 104(a0)
; ZFINX64-NEXT: ld s0, 112(a0)
; ZFINX64-NEXT: ld s1, 120(a0)
; ZFINX64-NEXT: ld s2, 128(a0)
@@ -559,83 +562,84 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZFINX64-NEXT: ld a2, 232(a0)
; ZFINX64-NEXT: ld a1, 240(a0)
; ZFINX64-NEXT: ld a0, 248(a0)
-; ZFINX64-NEXT: sd a0, 144(sp)
-; ZFINX64-NEXT: sd a1, 136(sp)
-; ZFINX64-NEXT: sd a2, 128(sp)
-; ZFINX64-NEXT: sd a3, 120(sp)
-; ZFINX64-NEXT: sd a4, 112(sp)
-; ZFINX64-NEXT: sd ra, 104(sp)
-; ZFINX64-NEXT: sd s11, 96(sp)
-; ZFINX64-NEXT: sd s10, 88(sp)
-; ZFINX64-NEXT: sd s9, 80(sp)
-; ZFINX64-NEXT: sd s8, 72(sp)
-; ZFINX64-NEXT: sd s7, 64(sp)
-; ZFINX64-NEXT: sd s6, 56(sp)
-; ZFINX64-NEXT: sd s5, 48(sp)
-; ZFINX64-NEXT: sd s4, 40(sp)
-; ZFINX64-NEXT: sd s3, 32(sp)
-; ZFINX64-NEXT: sd s2, 24(sp)
-; ZFINX64-NEXT: sd s1, 16(sp)
-; ZFINX64-NEXT: sd s0, 8(sp)
+; ZFINX64-NEXT: sd a0, 152(sp)
+; ZFINX64-NEXT: sd a1, 144(sp)
+; ZFINX64-NEXT: sd a2, 136(sp)
+; ZFINX64-NEXT: sd a3, 128(sp)
+; ZFINX64-NEXT: sd a4, 120(sp)
+; ZFINX64-NEXT: sd ra, 112(sp)
+; ZFINX64-NEXT: sd s11, 104(sp)
+; ZFINX64-NEXT: sd s10, 96(sp)
+; ZFINX64-NEXT: sd s9, 88(sp)
+; ZFINX64-NEXT: sd s8, 80(sp)
+; ZFINX64-NEXT: sd s7, 72(sp)
+; ZFINX64-NEXT: sd s6, 64(sp)
+; ZFINX64-NEXT: sd s5, 56(sp)
+; ZFINX64-NEXT: sd s4, 48(sp)
+; ZFINX64-NEXT: sd s3, 40(sp)
+; ZFINX64-NEXT: sd s2, 32(sp)
+; ZFINX64-NEXT: sd s1, 24(sp)
+; ZFINX64-NEXT: sd s0, 16(sp)
+; ZFINX64-NEXT: sd t2, 8(sp)
; ZFINX64-NEXT: sd t1, 0(sp)
; ZFINX64-NEXT: mv a0, t0
-; ZFINX64-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld a2, 168(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld a3, 160(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld a4, 152(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld a2, 184(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld a3, 176(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld a4, 168(sp) # 8-byte Folded Reload
; ZFINX64-NEXT: call callee_half_32
; ZFINX64-NEXT: lui a1, 1048560
; ZFINX64-NEXT: or a0, a0, a1
-; ZFINX64-NEXT: ld ra, 280(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s0, 272(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s1, 264(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s2, 256(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s3, 248(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s4, 240(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s5, 232(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s6, 224(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s7, 216(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s8, 208(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s9, 200(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s10, 192(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld s11, 184(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: addi sp, sp, 288
+; ZFINX64-NEXT: ld ra, 296(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s0, 288(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s1, 280(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s2, 272(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s3, 264(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s4, 256(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s5, 248(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s6, 240(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s7, 232(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s8, 224(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s9, 216(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s10, 208(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld s11, 200(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: addi sp, sp, 304
; ZFINX64-NEXT: ret
;
; ZDINX32-LABEL: caller_half_32:
; ZDINX32: # %bb.0:
-; ZDINX32-NEXT: addi sp, sp, -144
-; ZDINX32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s1, 132(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s2, 128(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s3, 124(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s4, 120(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s5, 116(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s6, 112(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s7, 108(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s8, 104(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: addi sp, sp, -160
+; ZDINX32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s1, 148(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s2, 144(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s3, 140(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s4, 136(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s5, 132(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s6, 128(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s7, 124(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s8, 120(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill
; ZDINX32-NEXT: lw t0, 0(a0)
; ZDINX32-NEXT: lw a1, 4(a0)
-; ZDINX32-NEXT: sw a1, 88(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw a1, 104(sp) # 4-byte Folded Spill
; ZDINX32-NEXT: lw a1, 8(a0)
-; ZDINX32-NEXT: sw a1, 84(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw a1, 100(sp) # 4-byte Folded Spill
; ZDINX32-NEXT: lw a1, 12(a0)
-; ZDINX32-NEXT: sw a1, 80(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw a1, 96(sp) # 4-byte Folded Spill
; ZDINX32-NEXT: lw a1, 16(a0)
-; ZDINX32-NEXT: sw a1, 76(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw a1, 92(sp) # 4-byte Folded Spill
; ZDINX32-NEXT: lw a5, 20(a0)
; ZDINX32-NEXT: lw a6, 24(a0)
; ZDINX32-NEXT: lw a7, 28(a0)
-; ZDINX32-NEXT: lw t2, 32(a0)
-; ZDINX32-NEXT: lw t3, 36(a0)
-; ZDINX32-NEXT: lw t4, 40(a0)
-; ZDINX32-NEXT: lw t5, 44(a0)
-; ZDINX32-NEXT: lw t6, 48(a0)
-; ZDINX32-NEXT: lw t1, 52(a0)
+; ZDINX32-NEXT: lw t3, 32(a0)
+; ZDINX32-NEXT: lw t4, 36(a0)
+; ZDINX32-NEXT: lw t5, 40(a0)
+; ZDINX32-NEXT: lw t6, 44(a0)
+; ZDINX32-NEXT: lw t1, 48(a0)
+; ZDINX32-NEXT: lw t2, 52(a0)
; ZDINX32-NEXT: lw s0, 56(a0)
; ZDINX32-NEXT: lw s1, 60(a0)
; ZDINX32-NEXT: lw s2, 64(a0)
@@ -654,83 +658,84 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZDINX32-NEXT: lw a2, 116(a0)
; ZDINX32-NEXT: lw a1, 120(a0)
; ZDINX32-NEXT: lw a0, 124(a0)
-; ZDINX32-NEXT: sw a0, 72(sp)
-; ZDINX32-NEXT: sw a1, 68(sp)
-; ZDINX32-NEXT: sw a2, 64(sp)
-; ZDINX32-NEXT: sw a3, 60(sp)
-; ZDINX32-NEXT: sw a4, 56(sp)
-; ZDINX32-NEXT: sw ra, 52(sp)
-; ZDINX32-NEXT: sw s11, 48(sp)
-; ZDINX32-NEXT: sw s10, 44(sp)
-; ZDINX32-NEXT: sw s9, 40(sp)
-; ZDINX32-NEXT: sw s8, 36(sp)
-; ZDINX32-NEXT: sw s7, 32(sp)
-; ZDINX32-NEXT: sw s6, 28(sp)
-; ZDINX32-NEXT: sw s5, 24(sp)
-; ZDINX32-NEXT: sw s4, 20(sp)
-; ZDINX32-NEXT: sw s3, 16(sp)
-; ZDINX32-NEXT: sw s2, 12(sp)
-; ZDINX32-NEXT: sw s1, 8(sp)
-; ZDINX32-NEXT: sw s0, 4(sp)
+; ZDINX32-NEXT: sw a0, 76(sp)
+; ZDINX32-NEXT: sw a1, 72(sp)
+; ZDINX32-NEXT: sw a2, 68(sp)
+; ZDINX32-NEXT: sw a3, 64(sp)
+; ZDINX32-NEXT: sw a4, 60(sp)
+; ZDINX32-NEXT: sw ra, 56(sp)
+; ZDINX32-NEXT: sw s11, 52(sp)
+; ZDINX32-NEXT: sw s10, 48(sp)
+; ZDINX32-NEXT: sw s9, 44(sp)
+; ZDINX32-NEXT: sw s8, 40(sp)
+; ZDINX32-NEXT: sw s7, 36(sp)
+; ZDINX32-NEXT: sw s6, 32(sp)
+; ZDINX32-NEXT: sw s5, 28(sp)
+; ZDINX32-NEXT: sw s4, 24(sp)
+; ZDINX32-NEXT: sw s3, 20(sp)
+; ZDINX32-NEXT: sw s2, 16(sp)
+; ZDINX32-NEXT: sw s1, 12(sp)
+; ZDINX32-NEXT: sw s0, 8(sp)
+; ZDINX32-NEXT: sw t2, 4(sp)
; ZDINX32-NEXT: sw t1, 0(sp)
; ZDINX32-NEXT: mv a0, t0
-; ZDINX32-NEXT: lw a1, 88(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw a2, 84(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw a3, 80(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw a4, 76(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw a1, 104(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw a2, 100(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw a3, 96(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw a4, 92(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: call callee_half_32
; ZDINX32-NEXT: lui a1, 1048560
; ZDINX32-NEXT: or a0, a0, a1
-; ZDINX32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: addi sp, sp, 144
+; ZDINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s2, 144(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s3, 140(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s4, 136(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s5, 132(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s6, 128(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s7, 124(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s8, 120(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s9, 116(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s10, 112(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s11, 108(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: addi sp, sp, 160
; ZDINX32-NEXT: ret
;
; ZDINX64-LABEL: caller_half_32:
; ZDINX64: # %bb.0:
-; ZDINX64-NEXT: addi sp, sp, -288
-; ZDINX64-NEXT: sd ra, 280(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s0, 272(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s1, 264(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s2, 256(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s3, 248(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s4, 240(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s5, 232(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s6, 224(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s7, 216(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s8, 208(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s9, 200(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s10, 192(sp) # 8-byte Folded Spill
-; ZDINX64-NEXT: sd s11, 184(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: addi sp, sp, -304
+; ZDINX64-NEXT: sd ra, 296(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s0, 288(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s1, 280(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s2, 272(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s3, 264(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s4, 256(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s5, 248(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s6, 240(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s7, 232(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s8, 224(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s9, 216(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s10, 208(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd s11, 200(sp) # 8-byte Folded Spill
; ZDINX64-NEXT: ld t0, 0(a0)
; ZDINX64-NEXT: ld a1, 8(a0)
-; ZDINX64-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd a1, 192(sp) # 8-byte Folded Spill
; ZDINX64-NEXT: ld a1, 16(a0)
-; ZDINX64-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd a1, 184(sp) # 8-byte Folded Spill
; ZDINX64-NEXT: ld a1, 24(a0)
-; ZDINX64-NEXT: sd a1, 160(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd a1, 176(sp) # 8-byte Folded Spill
; ZDINX64-NEXT: ld a1, 32(a0)
-; ZDINX64-NEXT: sd a1, 152(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT: sd a1, 168(sp) # 8-byte Folded Spill
; ZDINX64-NEXT: ld a5, 40(a0)
; ZDINX64-NEXT: ld a6, 48(a0)
; ZDINX64-NEXT: ld a7, 56(a0)
-; ZDINX64-NEXT: ld t2, 64(a0)
-; ZDINX64-NEXT: ld t3, 72(a0)
-; ZDINX64-NEXT: ld t4, 80(a0)
-; ZDINX64-NEXT: ld t5, 88(a0)
-; ZDINX64-NEXT: ld t6, 96(a0)
-; ZDINX64-NEXT: ld t1, 104(a0)
+; ZDINX64-NEXT: ld t3, 64(a0)
+; ZDINX64-NEXT: ld t4, 72(a0)
+; ZDINX64-NEXT: ld t5, 80(a0)
+; ZDINX64-NEXT: ld t6, 88(a0)
+; ZDINX64-NEXT: ld t1, 96(a0)
+; ZDINX64-NEXT: ld t2, 104(a0)
; ZDINX64-NEXT: ld s0, 112(a0)
; ZDINX64-NEXT: ld s1, 120(a0)
; ZDINX64-NEXT: ld s2, 128(a0)
@@ -749,47 +754,48 @@ define half @caller_half_32(<32 x half> %A) nounwind {
; ZDINX64-NEXT: ld a2, 232(a0)
; ZDINX64-NEXT: ld a1, 240(a0)
; ZDINX64-NEXT: ld a0, 248(a0)
-; ZDINX64-NEXT: sd a0, 144(sp)
-; ZDINX64-NEXT: sd a1, 136(sp)
-; ZDINX64-NEXT: sd a2, 128(sp)
-; ZDINX64-NEXT: sd a3, 120(sp)
-; ZDINX64-NEXT: sd a4, 112(sp)
-; ZDINX64-NEXT: sd ra, 104(sp)
-; ZDINX64-NEXT: sd s11, 96(sp)
-; ZDINX64-NEXT: sd s10, 88(sp)
-; ZDINX64-NEXT: sd s9, 80(sp)
-; ZDINX64-NEXT: sd s8, 72(sp)
-; ZDINX64-NEXT: sd s7, 64(sp)
-; ZDINX64-NEXT: sd s6, 56(sp)
-; ZDINX64-NEXT: sd s5, 48(sp)
-; ZDINX64-NEXT: sd s4, 40(sp)
-; ZDINX64-NEXT: sd s3, 32(sp)
-; ZDINX64-NEXT: sd s2, 24(sp)
-; ZDINX64-NEXT: sd s1, 16(sp)
-; ZDINX64-NEXT: sd s0, 8(sp)
+; ZDINX64-NEXT: sd a0, 152(sp)
+; ZDINX64-NEXT: sd a1, 144(sp)
+; ZDINX64-NEXT: sd a2, 136(sp)
+; ZDINX64-NEXT: sd a3, 128(sp)
+; ZDINX64-NEXT: sd a4, 120(sp)
+; ZDINX64-NEXT: sd ra, 112(sp)
+; ZDINX64-NEXT: sd s11, 104(sp)
+; ZDINX64-NEXT: sd s10, 96(sp)
+; ZDINX64-NEXT: sd s9, 88(sp)
+; ZDINX64-NEXT: sd s8, 80(sp)
+; ZDINX64-NEXT: sd s7, 72(sp)
+; ZDINX64-NEXT: sd s6, 64(sp)
+; ZDINX64-NEXT: sd s5, 56(sp)
+; ZDINX64-NEXT: sd s4, 48(sp)
+; ZDINX64-NEXT: sd s3, 40(sp)
+; ZDINX64-NEXT: sd s2, 32(sp)
+; ZDINX64-NEXT: sd s1, 24(sp)
+; ZDINX64-NEXT: sd s0, 16(sp)
+; ZDINX64-NEXT: sd t2, 8(sp)
; ZDINX64-NEXT: sd t1, 0(sp)
; ZDINX64-NEXT: mv a0, t0
-; ZDINX64-NEXT: ld a1, 176(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld a2, 168(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld a3, 160(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld a4, 152(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld a1, 192(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld a2, 184(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld a3, 176(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld a4, 168(sp) # 8-byte Folded Reload
; ZDINX64-NEXT: call callee_half_32
; ZDINX64-NEXT: lui a1, 1048560
; ZDINX64-NEXT: or a0, a0, a1
-; ZDINX64-NEXT: ld ra, 280(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s0, 272(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s1, 264(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s2, 256(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s3, 248(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s4, 240(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s5, 232(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s6, 224(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s7, 216(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s8, 208(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s9, 200(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s10, 192(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld s11, 184(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: addi sp, sp, 288
+; ZDINX64-NEXT: ld ra, 296(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s0, 288(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s1, 280(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s2, 272(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s3, 264(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s4, 256(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s5, 248(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s6, 240(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s7, 232(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s8, 224(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s9, 216(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s10, 208(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld s11, 200(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: addi sp, sp, 304
; ZDINX64-NEXT: ret
%C = call fastcc half @callee_half_32(<32 x half> %A)
ret half %C
@@ -826,86 +832,87 @@ define fastcc float @callee_float_32(<32 x float> %A) nounwind {
define float @caller_float_32(<32 x float> %A) nounwind {
; ZHINX32-LABEL: caller_float_32:
; ZHINX32: # %bb.0:
-; ZHINX32-NEXT: addi sp, sp, -144
-; ZHINX32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: sw s1, 132(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: sw s2, 128(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: sw s3, 124(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: sw s4, 120(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: sw s5, 116(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: sw s6, 112(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: sw s7, 108(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: sw s8, 104(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: lw t0, 144(sp)
-; ZHINX32-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: lw t0, 148(sp)
-; ZHINX32-NEXT: sw t0, 84(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: lw t0, 152(sp)
-; ZHINX32-NEXT: sw t0, 80(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: lw t0, 156(sp)
-; ZHINX32-NEXT: sw t0, 76(sp) # 4-byte Folded Spill
-; ZHINX32-NEXT: lw t6, 160(sp)
-; ZHINX32-NEXT: lw t5, 164(sp)
-; ZHINX32-NEXT: lw t4, 168(sp)
-; ZHINX32-NEXT: lw s0, 172(sp)
-; ZHINX32-NEXT: lw s1, 176(sp)
-; ZHINX32-NEXT: lw s2, 180(sp)
-; ZHINX32-NEXT: lw s3, 184(sp)
-; ZHINX32-NEXT: lw s4, 188(sp)
-; ZHINX32-NEXT: lw s5, 192(sp)
-; ZHINX32-NEXT: lw s6, 196(sp)
-; ZHINX32-NEXT: lw s7, 200(sp)
-; ZHINX32-NEXT: lw s8, 204(sp)
-; ZHINX32-NEXT: lw s9, 208(sp)
-; ZHINX32-NEXT: lw s10, 212(sp)
-; ZHINX32-NEXT: lw s11, 216(sp)
-; ZHINX32-NEXT: lw ra, 220(sp)
-; ZHINX32-NEXT: lw t3, 224(sp)
-; ZHINX32-NEXT: lw t2, 228(sp)
-; ZHINX32-NEXT: lw t1, 232(sp)
-; ZHINX32-NEXT: lw t0, 236(sp)
-; ZHINX32-NEXT: sw t0, 72(sp)
-; ZHINX32-NEXT: sw t1, 68(sp)
-; ZHINX32-NEXT: sw t2, 64(sp)
-; ZHINX32-NEXT: sw t3, 60(sp)
-; ZHINX32-NEXT: sw ra, 56(sp)
-; ZHINX32-NEXT: sw s11, 52(sp)
-; ZHINX32-NEXT: sw s10, 48(sp)
-; ZHINX32-NEXT: sw s9, 44(sp)
-; ZHINX32-NEXT: sw s8, 40(sp)
-; ZHINX32-NEXT: sw s7, 36(sp)
-; ZHINX32-NEXT: sw s6, 32(sp)
-; ZHINX32-NEXT: sw s5, 28(sp)
-; ZHINX32-NEXT: sw s4, 24(sp)
-; ZHINX32-NEXT: sw s3, 20(sp)
-; ZHINX32-NEXT: sw s2, 16(sp)
-; ZHINX32-NEXT: sw s1, 12(sp)
-; ZHINX32-NEXT: sw s0, 8(sp)
-; ZHINX32-NEXT: sw t4, 4(sp)
-; ZHINX32-NEXT: sw t5, 0(sp)
-; ZHINX32-NEXT: lw t2, 88(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw t3, 84(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw t4, 80(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw t5, 76(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: addi sp, sp, -160
+; ZHINX32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw s1, 148(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw s2, 144(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw s3, 140(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw s4, 136(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw s5, 132(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw s6, 128(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw s7, 124(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw s8, 120(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: lw t0, 160(sp)
+; ZHINX32-NEXT: sw t0, 104(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: lw t0, 164(sp)
+; ZHINX32-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: lw t0, 168(sp)
+; ZHINX32-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: lw t0, 172(sp)
+; ZHINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT: lw t6, 176(sp)
+; ZHINX32-NEXT: lw t5, 180(sp)
+; ZHINX32-NEXT: lw t4, 184(sp)
+; ZHINX32-NEXT: lw s0, 188(sp)
+; ZHINX32-NEXT: lw s1, 192(sp)
+; ZHINX32-NEXT: lw s2, 196(sp)
+; ZHINX32-NEXT: lw s3, 200(sp)
+; ZHINX32-NEXT: lw s4, 204(sp)
+; ZHINX32-NEXT: lw s5, 208(sp)
+; ZHINX32-NEXT: lw s6, 212(sp)
+; ZHINX32-NEXT: lw s7, 216(sp)
+; ZHINX32-NEXT: lw s8, 220(sp)
+; ZHINX32-NEXT: lw s9, 224(sp)
+; ZHINX32-NEXT: lw s10, 228(sp)
+; ZHINX32-NEXT: lw s11, 232(sp)
+; ZHINX32-NEXT: lw ra, 236(sp)
+; ZHINX32-NEXT: lw t3, 240(sp)
+; ZHINX32-NEXT: lw t2, 244(sp)
+; ZHINX32-NEXT: lw t1, 248(sp)
+; ZHINX32-NEXT: lw t0, 252(sp)
+; ZHINX32-NEXT: sw t0, 76(sp)
+; ZHINX32-NEXT: sw t1, 72(sp)
+; ZHINX32-NEXT: sw t2, 68(sp)
+; ZHINX32-NEXT: sw t3, 64(sp)
+; ZHINX32-NEXT: sw ra, 60(sp)
+; ZHINX32-NEXT: sw s11, 56(sp)
+; ZHINX32-NEXT: sw s10, 52(sp)
+; ZHINX32-NEXT: sw s9, 48(sp)
+; ZHINX32-NEXT: sw s8, 44(sp)
+; ZHINX32-NEXT: sw s7, 40(sp)
+; ZHINX32-NEXT: sw s6, 36(sp)
+; ZHINX32-NEXT: sw s5, 32(sp)
+; ZHINX32-NEXT: sw s4, 28(sp)
+; ZHINX32-NEXT: sw s3, 24(sp)
+; ZHINX32-NEXT: sw s2, 20(sp)
+; ZHINX32-NEXT: sw s1, 16(sp)
+; ZHINX32-NEXT: sw s0, 12(sp)
+; ZHINX32-NEXT: sw t4, 8(sp)
+; ZHINX32-NEXT: sw t5, 4(sp)
+; ZHINX32-NEXT: sw t6, 0(sp)
+; ZHINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw t6, 92(sp) # 4-byte Folded Reload
; ZHINX32-NEXT: call callee_float_32
-; ZHINX32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
-; ZHINX32-NEXT: addi sp, sp, 144
+; ZHINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw s2, 144(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw s3, 140(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw s4, 136(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw s5, 132(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw s6, 128(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw s7, 124(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw s8, 120(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw s9, 116(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw s10, 112(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: lw s11, 108(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT: addi sp, sp, 160
; ZHINX32-NEXT: ret
;
; ZHINX64-LABEL: caller_float_32:
@@ -952,29 +959,30 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZHINX64-NEXT: lw t2, 392(sp)
; ZHINX64-NEXT: lw t1, 400(sp)
; ZHINX64-NEXT: lw t0, 408(sp)
-; ZHINX64-NEXT: sw t0, 72(sp)
-; ZHINX64-NEXT: sw t1, 68(sp)
-; ZHINX64-NEXT: sw t2, 64(sp)
-; ZHINX64-NEXT: sw t3, 60(sp)
-; ZHINX64-NEXT: sw ra, 56(sp)
-; ZHINX64-NEXT: sw s11, 52(sp)
-; ZHINX64-NEXT: sw s10, 48(sp)
-; ZHINX64-NEXT: sw s9, 44(sp)
-; ZHINX64-NEXT: sw s8, 40(sp)
-; ZHINX64-NEXT: sw s7, 36(sp)
-; ZHINX64-NEXT: sw s6, 32(sp)
-; ZHINX64-NEXT: sw s5, 28(sp)
-; ZHINX64-NEXT: sw s4, 24(sp)
-; ZHINX64-NEXT: sw s3, 20(sp)
-; ZHINX64-NEXT: sw s2, 16(sp)
-; ZHINX64-NEXT: sw s1, 12(sp)
-; ZHINX64-NEXT: sw s0, 8(sp)
-; ZHINX64-NEXT: sw t4, 4(sp)
-; ZHINX64-NEXT: sw t5, 0(sp)
-; ZHINX64-NEXT: ld t2, 112(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld t3, 104(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload
-; ZHINX64-NEXT: ld t5, 88(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: sw t0, 76(sp)
+; ZHINX64-NEXT: sw t1, 72(sp)
+; ZHINX64-NEXT: sw t2, 68(sp)
+; ZHINX64-NEXT: sw t3, 64(sp)
+; ZHINX64-NEXT: sw ra, 60(sp)
+; ZHINX64-NEXT: sw s11, 56(sp)
+; ZHINX64-NEXT: sw s10, 52(sp)
+; ZHINX64-NEXT: sw s9, 48(sp)
+; ZHINX64-NEXT: sw s8, 44(sp)
+; ZHINX64-NEXT: sw s7, 40(sp)
+; ZHINX64-NEXT: sw s6, 36(sp)
+; ZHINX64-NEXT: sw s5, 32(sp)
+; ZHINX64-NEXT: sw s4, 28(sp)
+; ZHINX64-NEXT: sw s3, 24(sp)
+; ZHINX64-NEXT: sw s2, 20(sp)
+; ZHINX64-NEXT: sw s1, 16(sp)
+; ZHINX64-NEXT: sw s0, 12(sp)
+; ZHINX64-NEXT: sw t4, 8(sp)
+; ZHINX64-NEXT: sw t5, 4(sp)
+; ZHINX64-NEXT: sw t6, 0(sp)
+; ZHINX64-NEXT: ld t3, 112(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld t4, 104(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld t5, 96(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT: ld t6, 88(sp) # 8-byte Folded Reload
; ZHINX64-NEXT: call callee_float_32
; ZHINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload
; ZHINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload
@@ -994,86 +1002,87 @@ define float @caller_float_32(<32 x float> %A) nounwind {
;
; ZFINX32-LABEL: caller_float_32:
; ZFINX32: # %bb.0:
-; ZFINX32-NEXT: addi sp, sp, -144
-; ZFINX32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s1, 132(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s2, 128(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s3, 124(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s4, 120(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s5, 116(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s6, 112(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s7, 108(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s8, 104(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw t0, 144(sp)
-; ZFINX32-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw t0, 148(sp)
-; ZFINX32-NEXT: sw t0, 84(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw t0, 152(sp)
-; ZFINX32-NEXT: sw t0, 80(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw t0, 156(sp)
-; ZFINX32-NEXT: sw t0, 76(sp) # 4-byte Folded Spill
-; ZFINX32-NEXT: lw t6, 160(sp)
-; ZFINX32-NEXT: lw t5, 164(sp)
-; ZFINX32-NEXT: lw t4, 168(sp)
-; ZFINX32-NEXT: lw s0, 172(sp)
-; ZFINX32-NEXT: lw s1, 176(sp)
-; ZFINX32-NEXT: lw s2, 180(sp)
-; ZFINX32-NEXT: lw s3, 184(sp)
-; ZFINX32-NEXT: lw s4, 188(sp)
-; ZFINX32-NEXT: lw s5, 192(sp)
-; ZFINX32-NEXT: lw s6, 196(sp)
-; ZFINX32-NEXT: lw s7, 200(sp)
-; ZFINX32-NEXT: lw s8, 204(sp)
-; ZFINX32-NEXT: lw s9, 208(sp)
-; ZFINX32-NEXT: lw s10, 212(sp)
-; ZFINX32-NEXT: lw s11, 216(sp)
-; ZFINX32-NEXT: lw ra, 220(sp)
-; ZFINX32-NEXT: lw t3, 224(sp)
-; ZFINX32-NEXT: lw t2, 228(sp)
-; ZFINX32-NEXT: lw t1, 232(sp)
-; ZFINX32-NEXT: lw t0, 236(sp)
-; ZFINX32-NEXT: sw t0, 72(sp)
-; ZFINX32-NEXT: sw t1, 68(sp)
-; ZFINX32-NEXT: sw t2, 64(sp)
-; ZFINX32-NEXT: sw t3, 60(sp)
-; ZFINX32-NEXT: sw ra, 56(sp)
-; ZFINX32-NEXT: sw s11, 52(sp)
-; ZFINX32-NEXT: sw s10, 48(sp)
-; ZFINX32-NEXT: sw s9, 44(sp)
-; ZFINX32-NEXT: sw s8, 40(sp)
-; ZFINX32-NEXT: sw s7, 36(sp)
-; ZFINX32-NEXT: sw s6, 32(sp)
-; ZFINX32-NEXT: sw s5, 28(sp)
-; ZFINX32-NEXT: sw s4, 24(sp)
-; ZFINX32-NEXT: sw s3, 20(sp)
-; ZFINX32-NEXT: sw s2, 16(sp)
-; ZFINX32-NEXT: sw s1, 12(sp)
-; ZFINX32-NEXT: sw s0, 8(sp)
-; ZFINX32-NEXT: sw t4, 4(sp)
-; ZFINX32-NEXT: sw t5, 0(sp)
-; ZFINX32-NEXT: lw t2, 88(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw t3, 84(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw t4, 80(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw t5, 76(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: addi sp, sp, -160
+; ZFINX32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s1, 148(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s2, 144(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s3, 140(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s4, 136(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s5, 132(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s6, 128(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s7, 124(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s8, 120(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: lw t0, 160(sp)
+; ZFINX32-NEXT: sw t0, 104(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: lw t0, 164(sp)
+; ZFINX32-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: lw t0, 168(sp)
+; ZFINX32-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: lw t0, 172(sp)
+; ZFINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT: lw t6, 176(sp)
+; ZFINX32-NEXT: lw t5, 180(sp)
+; ZFINX32-NEXT: lw t4, 184(sp)
+; ZFINX32-NEXT: lw s0, 188(sp)
+; ZFINX32-NEXT: lw s1, 192(sp)
+; ZFINX32-NEXT: lw s2, 196(sp)
+; ZFINX32-NEXT: lw s3, 200(sp)
+; ZFINX32-NEXT: lw s4, 204(sp)
+; ZFINX32-NEXT: lw s5, 208(sp)
+; ZFINX32-NEXT: lw s6, 212(sp)
+; ZFINX32-NEXT: lw s7, 216(sp)
+; ZFINX32-NEXT: lw s8, 220(sp)
+; ZFINX32-NEXT: lw s9, 224(sp)
+; ZFINX32-NEXT: lw s10, 228(sp)
+; ZFINX32-NEXT: lw s11, 232(sp)
+; ZFINX32-NEXT: lw ra, 236(sp)
+; ZFINX32-NEXT: lw t3, 240(sp)
+; ZFINX32-NEXT: lw t2, 244(sp)
+; ZFINX32-NEXT: lw t1, 248(sp)
+; ZFINX32-NEXT: lw t0, 252(sp)
+; ZFINX32-NEXT: sw t0, 76(sp)
+; ZFINX32-NEXT: sw t1, 72(sp)
+; ZFINX32-NEXT: sw t2, 68(sp)
+; ZFINX32-NEXT: sw t3, 64(sp)
+; ZFINX32-NEXT: sw ra, 60(sp)
+; ZFINX32-NEXT: sw s11, 56(sp)
+; ZFINX32-NEXT: sw s10, 52(sp)
+; ZFINX32-NEXT: sw s9, 48(sp)
+; ZFINX32-NEXT: sw s8, 44(sp)
+; ZFINX32-NEXT: sw s7, 40(sp)
+; ZFINX32-NEXT: sw s6, 36(sp)
+; ZFINX32-NEXT: sw s5, 32(sp)
+; ZFINX32-NEXT: sw s4, 28(sp)
+; ZFINX32-NEXT: sw s3, 24(sp)
+; ZFINX32-NEXT: sw s2, 20(sp)
+; ZFINX32-NEXT: sw s1, 16(sp)
+; ZFINX32-NEXT: sw s0, 12(sp)
+; ZFINX32-NEXT: sw t4, 8(sp)
+; ZFINX32-NEXT: sw t5, 4(sp)
+; ZFINX32-NEXT: sw t6, 0(sp)
+; ZFINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw t6, 92(sp) # 4-byte Folded Reload
; ZFINX32-NEXT: call callee_float_32
-; ZFINX32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
-; ZFINX32-NEXT: addi sp, sp, 144
+; ZFINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s2, 144(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s3, 140(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s4, 136(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s5, 132(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s6, 128(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s7, 124(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s8, 120(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s9, 116(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s10, 112(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: lw s11, 108(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT: addi sp, sp, 160
; ZFINX32-NEXT: ret
;
; ZFINX64-LABEL: caller_float_32:
@@ -1120,29 +1129,30 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZFINX64-NEXT: lw t2, 392(sp)
; ZFINX64-NEXT: lw t1, 400(sp)
; ZFINX64-NEXT: lw t0, 408(sp)
-; ZFINX64-NEXT: sw t0, 72(sp)
-; ZFINX64-NEXT: sw t1, 68(sp)
-; ZFINX64-NEXT: sw t2, 64(sp)
-; ZFINX64-NEXT: sw t3, 60(sp)
-; ZFINX64-NEXT: sw ra, 56(sp)
-; ZFINX64-NEXT: sw s11, 52(sp)
-; ZFINX64-NEXT: sw s10, 48(sp)
-; ZFINX64-NEXT: sw s9, 44(sp)
-; ZFINX64-NEXT: sw s8, 40(sp)
-; ZFINX64-NEXT: sw s7, 36(sp)
-; ZFINX64-NEXT: sw s6, 32(sp)
-; ZFINX64-NEXT: sw s5, 28(sp)
-; ZFINX64-NEXT: sw s4, 24(sp)
-; ZFINX64-NEXT: sw s3, 20(sp)
-; ZFINX64-NEXT: sw s2, 16(sp)
-; ZFINX64-NEXT: sw s1, 12(sp)
-; ZFINX64-NEXT: sw s0, 8(sp)
-; ZFINX64-NEXT: sw t4, 4(sp)
-; ZFINX64-NEXT: sw t5, 0(sp)
-; ZFINX64-NEXT: ld t2, 112(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld t3, 104(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload
-; ZFINX64-NEXT: ld t5, 88(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: sw t0, 76(sp)
+; ZFINX64-NEXT: sw t1, 72(sp)
+; ZFINX64-NEXT: sw t2, 68(sp)
+; ZFINX64-NEXT: sw t3, 64(sp)
+; ZFINX64-NEXT: sw ra, 60(sp)
+; ZFINX64-NEXT: sw s11, 56(sp)
+; ZFINX64-NEXT: sw s10, 52(sp)
+; ZFINX64-NEXT: sw s9, 48(sp)
+; ZFINX64-NEXT: sw s8, 44(sp)
+; ZFINX64-NEXT: sw s7, 40(sp)
+; ZFINX64-NEXT: sw s6, 36(sp)
+; ZFINX64-NEXT: sw s5, 32(sp)
+; ZFINX64-NEXT: sw s4, 28(sp)
+; ZFINX64-NEXT: sw s3, 24(sp)
+; ZFINX64-NEXT: sw s2, 20(sp)
+; ZFINX64-NEXT: sw s1, 16(sp)
+; ZFINX64-NEXT: sw s0, 12(sp)
+; ZFINX64-NEXT: sw t4, 8(sp)
+; ZFINX64-NEXT: sw t5, 4(sp)
+; ZFINX64-NEXT: sw t6, 0(sp)
+; ZFINX64-NEXT: ld t3, 112(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld t4, 104(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld t5, 96(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT: ld t6, 88(sp) # 8-byte Folded Reload
; ZFINX64-NEXT: call callee_float_32
; ZFINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload
; ZFINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload
@@ -1162,86 +1172,87 @@ define float @caller_float_32(<32 x float> %A) nounwind {
;
; ZDINX32-LABEL: caller_float_32:
; ZDINX32: # %bb.0:
-; ZDINX32-NEXT: addi sp, sp, -144
-; ZDINX32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s1, 132(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s2, 128(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s3, 124(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s4, 120(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s5, 116(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s6, 112(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s7, 108(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s8, 104(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s9, 100(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s10, 96(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: sw s11, 92(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw t0, 144(sp)
-; ZDINX32-NEXT: sw t0, 88(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw t0, 148(sp)
-; ZDINX32-NEXT: sw t0, 84(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw t0, 152(sp)
-; ZDINX32-NEXT: sw t0, 80(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw t0, 156(sp)
-; ZDINX32-NEXT: sw t0, 76(sp) # 4-byte Folded Spill
-; ZDINX32-NEXT: lw t6, 160(sp)
-; ZDINX32-NEXT: lw t5, 164(sp)
-; ZDINX32-NEXT: lw t4, 168(sp)
-; ZDINX32-NEXT: lw s0, 172(sp)
-; ZDINX32-NEXT: lw s1, 176(sp)
-; ZDINX32-NEXT: lw s2, 180(sp)
-; ZDINX32-NEXT: lw s3, 184(sp)
-; ZDINX32-NEXT: lw s4, 188(sp)
-; ZDINX32-NEXT: lw s5, 192(sp)
-; ZDINX32-NEXT: lw s6, 196(sp)
-; ZDINX32-NEXT: lw s7, 200(sp)
-; ZDINX32-NEXT: lw s8, 204(sp)
-; ZDINX32-NEXT: lw s9, 208(sp)
-; ZDINX32-NEXT: lw s10, 212(sp)
-; ZDINX32-NEXT: lw s11, 216(sp)
-; ZDINX32-NEXT: lw ra, 220(sp)
-; ZDINX32-NEXT: lw t3, 224(sp)
-; ZDINX32-NEXT: lw t2, 228(sp)
-; ZDINX32-NEXT: lw t1, 232(sp)
-; ZDINX32-NEXT: lw t0, 236(sp)
-; ZDINX32-NEXT: sw t0, 72(sp)
-; ZDINX32-NEXT: sw t1, 68(sp)
-; ZDINX32-NEXT: sw t2, 64(sp)
-; ZDINX32-NEXT: sw t3, 60(sp)
-; ZDINX32-NEXT: sw ra, 56(sp)
-; ZDINX32-NEXT: sw s11, 52(sp)
-; ZDINX32-NEXT: sw s10, 48(sp)
-; ZDINX32-NEXT: sw s9, 44(sp)
-; ZDINX32-NEXT: sw s8, 40(sp)
-; ZDINX32-NEXT: sw s7, 36(sp)
-; ZDINX32-NEXT: sw s6, 32(sp)
-; ZDINX32-NEXT: sw s5, 28(sp)
-; ZDINX32-NEXT: sw s4, 24(sp)
-; ZDINX32-NEXT: sw s3, 20(sp)
-; ZDINX32-NEXT: sw s2, 16(sp)
-; ZDINX32-NEXT: sw s1, 12(sp)
-; ZDINX32-NEXT: sw s0, 8(sp)
-; ZDINX32-NEXT: sw t4, 4(sp)
-; ZDINX32-NEXT: sw t5, 0(sp)
-; ZDINX32-NEXT: lw t2, 88(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw t3, 84(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw t4, 80(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw t5, 76(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: addi sp, sp, -160
+; ZDINX32-NEXT: sw ra, 156(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s0, 152(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s1, 148(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s2, 144(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s3, 140(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s4, 136(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s5, 132(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s6, 128(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s7, 124(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s8, 120(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s9, 116(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s10, 112(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: sw s11, 108(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: lw t0, 160(sp)
+; ZDINX32-NEXT: sw t0, 104(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: lw t0, 164(sp)
+; ZDINX32-NEXT: sw t0, 100(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: lw t0, 168(sp)
+; ZDINX32-NEXT: sw t0, 96(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: lw t0, 172(sp)
+; ZDINX32-NEXT: sw t0, 92(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT: lw t6, 176(sp)
+; ZDINX32-NEXT: lw t5, 180(sp)
+; ZDINX32-NEXT: lw t4, 184(sp)
+; ZDINX32-NEXT: lw s0, 188(sp)
+; ZDINX32-NEXT: lw s1, 192(sp)
+; ZDINX32-NEXT: lw s2, 196(sp)
+; ZDINX32-NEXT: lw s3, 200(sp)
+; ZDINX32-NEXT: lw s4, 204(sp)
+; ZDINX32-NEXT: lw s5, 208(sp)
+; ZDINX32-NEXT: lw s6, 212(sp)
+; ZDINX32-NEXT: lw s7, 216(sp)
+; ZDINX32-NEXT: lw s8, 220(sp)
+; ZDINX32-NEXT: lw s9, 224(sp)
+; ZDINX32-NEXT: lw s10, 228(sp)
+; ZDINX32-NEXT: lw s11, 232(sp)
+; ZDINX32-NEXT: lw ra, 236(sp)
+; ZDINX32-NEXT: lw t3, 240(sp)
+; ZDINX32-NEXT: lw t2, 244(sp)
+; ZDINX32-NEXT: lw t1, 248(sp)
+; ZDINX32-NEXT: lw t0, 252(sp)
+; ZDINX32-NEXT: sw t0, 76(sp)
+; ZDINX32-NEXT: sw t1, 72(sp)
+; ZDINX32-NEXT: sw t2, 68(sp)
+; ZDINX32-NEXT: sw t3, 64(sp)
+; ZDINX32-NEXT: sw ra, 60(sp)
+; ZDINX32-NEXT: sw s11, 56(sp)
+; ZDINX32-NEXT: sw s10, 52(sp)
+; ZDINX32-NEXT: sw s9, 48(sp)
+; ZDINX32-NEXT: sw s8, 44(sp)
+; ZDINX32-NEXT: sw s7, 40(sp)
+; ZDINX32-NEXT: sw s6, 36(sp)
+; ZDINX32-NEXT: sw s5, 32(sp)
+; ZDINX32-NEXT: sw s4, 28(sp)
+; ZDINX32-NEXT: sw s3, 24(sp)
+; ZDINX32-NEXT: sw s2, 20(sp)
+; ZDINX32-NEXT: sw s1, 16(sp)
+; ZDINX32-NEXT: sw s0, 12(sp)
+; ZDINX32-NEXT: sw t4, 8(sp)
+; ZDINX32-NEXT: sw t5, 4(sp)
+; ZDINX32-NEXT: sw t6, 0(sp)
+; ZDINX32-NEXT: lw t3, 104(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw t4, 100(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw t5, 96(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw t6, 92(sp) # 4-byte Folded Reload
; ZDINX32-NEXT: call callee_float_32
-; ZDINX32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s2, 128(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s3, 124(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s4, 120(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s5, 116(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s6, 112(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s7, 108(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s8, 104(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s9, 100(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s10, 96(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: lw s11, 92(sp) # 4-byte Folded Reload
-; ZDINX32-NEXT: addi sp, sp, 144
+; ZDINX32-NEXT: lw ra, 156(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s0, 152(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s1, 148(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s2, 144(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s3, 140(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s4, 136(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s5, 132(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s6, 128(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s7, 124(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s8, 120(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s9, 116(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s10, 112(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: lw s11, 108(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT: addi sp, sp, 160
; ZDINX32-NEXT: ret
;
; ZDINX64-LABEL: caller_float_32:
@@ -1288,29 +1299,30 @@ define float @caller_float_32(<32 x float> %A) nounwind {
; ZDINX64-NEXT: lw t2, 392(sp)
; ZDINX64-NEXT: lw t1, 400(sp)
; ZDINX64-NEXT: lw t0, 408(sp)
-; ZDINX64-NEXT: sw t0, 72(sp)
-; ZDINX64-NEXT: sw t1, 68(sp)
-; ZDINX64-NEXT: sw t2, 64(sp)
-; ZDINX64-NEXT: sw t3, 60(sp)
-; ZDINX64-NEXT: sw ra, 56(sp)
-; ZDINX64-NEXT: sw s11, 52(sp)
-; ZDINX64-NEXT: sw s10, 48(sp)
-; ZDINX64-NEXT: sw s9, 44(sp)
-; ZDINX64-NEXT: sw s8, 40(sp)
-; ZDINX64-NEXT: sw s7, 36(sp)
-; ZDINX64-NEXT: sw s6, 32(sp)
-; ZDINX64-NEXT: sw s5, 28(sp)
-; ZDINX64-NEXT: sw s4, 24(sp)
-; ZDINX64-NEXT: sw s3, 20(sp)
-; ZDINX64-NEXT: sw s2, 16(sp)
-; ZDINX64-NEXT: sw s1, 12(sp)
-; ZDINX64-NEXT: sw s0, 8(sp)
-; ZDINX64-NEXT: sw t4, 4(sp)
-; ZDINX64-NEXT: sw t5, 0(sp)
-; ZDINX64-NEXT: ld t2, 112(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld t3, 104(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld t4, 96(sp) # 8-byte Folded Reload
-; ZDINX64-NEXT: ld t5, 88(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: sw t0, 76(sp)
+; ZDINX64-NEXT: sw t1, 72(sp)
+; ZDINX64-NEXT: sw t2, 68(sp)
+; ZDINX64-NEXT: sw t3, 64(sp)
+; ZDINX64-NEXT: sw ra, 60(sp)
+; ZDINX64-NEXT: sw s11, 56(sp)
+; ZDINX64-NEXT: sw s10, 52(sp)
+; ZDINX64-NEXT: sw s9, 48(sp)
+; ZDINX64-NEXT: sw s8, 44(sp)
+; ZDINX64-NEXT: sw s7, 40(sp)
+; ZDINX64-NEXT: sw s6, 36(sp)
+; ZDINX64-NEXT: sw s5, 32(sp)
+; ZDINX64-NEXT: sw s4, 28(sp)
+; ZDINX64-NEXT: sw s3, 24(sp)
+; ZDINX64-NEXT: sw s2, 20(sp)
+; ZDINX64-NEXT: sw s1, 16(sp)
+; ZDINX64-NEXT: sw s0, 12(sp)
+; ZDINX64-NEXT: sw t4, 8(sp)
+; ZDINX64-NEXT: sw t5, 4(sp)
+; ZDINX64-NEXT: sw t6, 0(sp)
+; ZDINX64-NEXT: ld t3, 112(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld t4, 104(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld t5, 96(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT: ld t6, 88(sp) # 8-byte Folded Reload
; ZDINX64-NEXT: call callee_float_32
; ZDINX64-NEXT: ld ra, 216(sp) # 8-byte Folded Reload
; ZDINX64-NEXT: ld s0, 208(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
index 2f0d5bb6e19c4..fd81edf277d45 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
@@ -504,8 +504,8 @@ define fastcc <vscale x 32 x i32> @vector_arg_indirect_stack(i32 %0, i32 %1, i32
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, t4, a0
-; CHECK-NEXT: vl8re32.v v24, (t4)
+; CHECK-NEXT: add a0, t5, a0
+; CHECK-NEXT: vl8re32.v v24, (t5)
; CHECK-NEXT: vl8re32.v v0, (a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: vadd.vv v8, v8, v24
@@ -523,25 +523,31 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
; RV32-NEXT: .cfi_def_cfa_offset 144
; RV32-NEXT: sw ra, 140(sp) # 4-byte Folded Spill
; RV32-NEXT: sw s0, 136(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s1, 132(sp) # 4-byte Folded Spill
; RV32-NEXT: .cfi_offset ra, -4
; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: .cfi_offset s1, -12
; RV32-NEXT: addi s0, sp, 144
; RV32-NEXT: .cfi_def_cfa s0, 0
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: sub sp, sp, a0
; RV32-NEXT: andi sp, sp, -128
+; RV32-NEXT: mv s1, sp
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
+; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: addi a1, sp, 128
+; RV32-NEXT: addi a1, s1, 128
; RV32-NEXT: vs8r.v v8, (a1)
; RV32-NEXT: csrr a2, vlenb
; RV32-NEXT: slli a2, a2, 4
-; RV32-NEXT: add a2, sp, a2
+; RV32-NEXT: add a2, s1, a2
; RV32-NEXT: addi a2, a2, 128
; RV32-NEXT: vs8r.v v8, (a2)
+; RV32-NEXT: li a3, 8
+; RV32-NEXT: sw a3, 0(sp)
; RV32-NEXT: add a1, a1, a0
; RV32-NEXT: vs8r.v v8, (a1)
; RV32-NEXT: add a0, a2, a0
@@ -552,47 +558,54 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
; RV32-NEXT: li a5, 5
; RV32-NEXT: li a6, 6
; RV32-NEXT: li a7, 7
-; RV32-NEXT: csrr t2, vlenb
-; RV32-NEXT: slli t2, t2, 4
-; RV32-NEXT: add t2, sp, t2
-; RV32-NEXT: addi t2, t2, 128
-; RV32-NEXT: addi t4, sp, 128
-; RV32-NEXT: li t6, 8
+; RV32-NEXT: csrr t3, vlenb
+; RV32-NEXT: slli t3, t3, 4
+; RV32-NEXT: add t3, s1, t3
+; RV32-NEXT: addi t3, t3, 128
+; RV32-NEXT: addi t5, s1, 128
; RV32-NEXT: vs8r.v v8, (a0)
; RV32-NEXT: li a0, 0
; RV32-NEXT: vmv.v.i v16, 0
; RV32-NEXT: call vector_arg_indirect_stack
+; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: addi sp, s0, -144
; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload
; RV32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s1, 132(sp) # 4-byte Folded Reload
; RV32-NEXT: addi sp, sp, 144
; RV32-NEXT: ret
;
; RV64-LABEL: pass_vector_arg_indirect_stack:
; RV64: # %bb.0:
-; RV64-NEXT: addi sp, sp, -144
-; RV64-NEXT: .cfi_def_cfa_offset 144
-; RV64-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
-; RV64-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
+; RV64-NEXT: addi sp, sp, -160
+; RV64-NEXT: .cfi_def_cfa_offset 160
+; RV64-NEXT: sd ra, 152(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 144(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s1, 136(sp) # 8-byte Folded Spill
; RV64-NEXT: .cfi_offset ra, -8
; RV64-NEXT: .cfi_offset s0, -16
-; RV64-NEXT: addi s0, sp, 144
+; RV64-NEXT: .cfi_offset s1, -24
+; RV64-NEXT: addi s0, sp, 160
; RV64-NEXT: .cfi_def_cfa s0, 0
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 5
; RV64-NEXT: sub sp, sp, a0
; RV64-NEXT: andi sp, sp, -128
+; RV64-NEXT: mv s1, sp
; RV64-NEXT: csrr a0, vlenb
; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: addi a1, sp, 128
+; RV64-NEXT: addi a1, s1, 128
; RV64-NEXT: vs8r.v v8, (a1)
; RV64-NEXT: csrr a2, vlenb
; RV64-NEXT: slli a2, a2, 4
-; RV64-NEXT: add a2, sp, a2
+; RV64-NEXT: add a2, s1, a2
; RV64-NEXT: addi a2, a2, 128
; RV64-NEXT: vs8r.v v8, (a2)
+; RV64-NEXT: li a3, 8
+; RV64-NEXT: sd a3, 0(sp)
; RV64-NEXT: add a1, a1, a0
; RV64-NEXT: vs8r.v v8, (a1)
; RV64-NEXT: add a0, a2, a0
@@ -603,20 +616,21 @@ define fastcc <vscale x 32 x i32> @pass_vector_arg_indirect_stack(<vscale x 32 x
; RV64-NEXT: li a5, 5
; RV64-NEXT: li a6, 6
; RV64-NEXT: li a7, 7
-; RV64-NEXT: csrr t2, vlenb
-; RV64-NEXT: slli t2, t2, 4
-; RV64-NEXT: add t2, sp, t2
-; RV64-NEXT: addi t2, t2, 128
-; RV64-NEXT: addi t4, sp, 128
-; RV64-NEXT: li t6, 8
+; RV64-NEXT: csrr t3, vlenb
+; RV64-NEXT: slli t3, t3, 4
+; RV64-NEXT: add t3, s1, t3
+; RV64-NEXT: addi t3, t3, 128
+; RV64-NEXT: addi t5, s1, 128
; RV64-NEXT: vs8r.v v8, (a0)
; RV64-NEXT: li a0, 0
; RV64-NEXT: vmv.v.i v16, 0
; RV64-NEXT: call vector_arg_indirect_stack
-; RV64-NEXT: addi sp, s0, -144
-; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
-; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
-; RV64-NEXT: addi sp, sp, 144
+; RV64-NEXT: addi sp, sp, 16
+; RV64-NEXT: addi sp, s0, -160
+; RV64-NEXT: ld ra, 152(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 144(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s1, 136(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 160
; RV64-NEXT: ret
%s = call fastcc <vscale x 32 x i32> @vector_arg_indirect_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, <vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, <vscale x 32 x i32> zeroinitializer, i32 8)
ret <vscale x 32 x i32> %s
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
index 63cd42e97ef6f..9f48fdb3608a0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll
@@ -230,7 +230,7 @@ define fastcc <32 x i32> @vector_arg_indirect_stack(i32 %0, i32 %1, i32 %2, i32
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
-; CHECK-NEXT: vle32.v v16, (t2)
+; CHECK-NEXT: vle32.v v16, (t3)
; CHECK-NEXT: vadd.vv v8, v8, v16
; CHECK-NEXT: ret
%s = add <32 x i32> %x, %z
@@ -261,8 +261,8 @@ define fastcc <32 x i32> @pass_vector_arg_indirect_stack(<32 x i32> %x, <32 x i3
; CHECK-NEXT: li a5, 5
; CHECK-NEXT: li a6, 6
; CHECK-NEXT: li a7, 7
-; CHECK-NEXT: mv t2, sp
-; CHECK-NEXT: li t3, 8
+; CHECK-NEXT: mv t3, sp
+; CHECK-NEXT: li t4, 8
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: vmv.v.i v16, 0
@@ -281,7 +281,7 @@ define fastcc <32 x i32> @vector_arg_direct_stack(i32 %0, i32 %1, i32 %2, i32 %3
; CHECK-LABEL: vector_arg_direct_stack:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 32
-; CHECK-NEXT: addi a1, sp, 8
+; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vle32.v v24, (a1)
; CHECK-NEXT: vadd.vv v8, v8, v16
@@ -303,11 +303,13 @@ define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32>
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: addi a0, sp, 8
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: li a0, 1
-; CHECK-NEXT: sd a0, 136(sp)
+; CHECK-NEXT: sd a0, 144(sp)
; CHECK-NEXT: li a0, 13
+; CHECK-NEXT: sd a0, 8(sp)
+; CHECK-NEXT: li a0, 12
; CHECK-NEXT: li a1, 1
; CHECK-NEXT: li a2, 2
; CHECK-NEXT: li a3, 3
@@ -315,11 +317,10 @@ define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32>
; CHECK-NEXT: li a5, 5
; CHECK-NEXT: li a6, 6
; CHECK-NEXT: li a7, 7
-; CHECK-NEXT: li t2, 8
-; CHECK-NEXT: li t3, 9
-; CHECK-NEXT: li t4, 10
-; CHECK-NEXT: li t5, 11
-; CHECK-NEXT: li t6, 12
+; CHECK-NEXT: li t3, 8
+; CHECK-NEXT: li t4, 9
+; CHECK-NEXT: li t5, 10
+; CHECK-NEXT: li t6, 11
; CHECK-NEXT: sd a0, 0(sp)
; CHECK-NEXT: li a0, 0
; CHECK-NEXT: vmv.v.i v16, 0
@@ -336,7 +337,7 @@ define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32>
define fastcc <4 x i1> @vector_mask_arg_direct_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, <32 x i32> %x, <32 x i32> %y, <32 x i32> %z, <4 x i1> %m1, <4 x i1> %m2, i32 %last) {
; CHECK-LABEL: vector_mask_arg_direct_stack:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi a0, sp, 136
+; CHECK-NEXT: addi a0, sp, 144
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vlm.v v8, (a0)
; CHECK-NEXT: vmxor.mm v0, v0, v8
More information about the llvm-commits
mailing list