[llvm] 32c257d - [RISCV] Use the stack for MVT::f16 for fastcc when there are no other registers available

via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 18 19:49:23 PDT 2023


Author: eopXD
Date: 2023-07-18T19:49:17-07:00
New Revision: 32c257d384f3f073b52b8779a5193cb190a4cae4

URL: https://github.com/llvm/llvm-project/commit/32c257d384f3f073b52b8779a5193cb190a4cae4
DIFF: https://github.com/llvm/llvm-project/commit/32c257d384f3f073b52b8779a5193cb190a4cae4.diff

LOG: [RISCV] Use the stack for MVT::f16 for fastcc when there are no other registers available

In D155502, we added code for the compiler to check GPR-s for f16
under zhinx. This commit adds code to hit the stack when we run out of
GPR-s.

With this patch and D155502, resolves #63922

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D155507

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 64cee392f55cf7..bb59f613344176 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15254,6 +15254,12 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
     }
   }
 
+  if (LocVT == MVT::f16) {
+    unsigned Offset2 = State.AllocateStack(2, Align(2));
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset2, LocVT, LocInfo));
+    return false;
+  }
+
   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
     unsigned Offset4 = State.AllocateStack(4, Align(4));
     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));

diff  --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
index 94cf8becd5236c..9b3716f2f81188 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll
@@ -216,3 +216,1220 @@ define internal fastcc double @d(double %x) nounwind {
 entry:
   ret double %x
 }
+
+define fastcc half @callee_half_32(<32 x half> %A) nounwind {
+; ZHINX32-LABEL: callee_half_32:
+; ZHINX32:       # %bb.0:
+; ZHINX32-NEXT:    ret
+;
+; ZHINX64-LABEL: callee_half_32:
+; ZHINX64:       # %bb.0:
+; ZHINX64-NEXT:    ret
+;
+; ZFINX32-LABEL: callee_half_32:
+; ZFINX32:       # %bb.0:
+; ZFINX32-NEXT:    lui a1, 1048560
+; ZFINX32-NEXT:    or a0, a0, a1
+; ZFINX32-NEXT:    ret
+;
+; ZFINX64-LABEL: callee_half_32:
+; ZFINX64:       # %bb.0:
+; ZFINX64-NEXT:    lui a1, 1048560
+; ZFINX64-NEXT:    or a0, a0, a1
+; ZFINX64-NEXT:    ret
+;
+; ZDINX32-LABEL: callee_half_32:
+; ZDINX32:       # %bb.0:
+; ZDINX32-NEXT:    lui a1, 1048560
+; ZDINX32-NEXT:    or a0, a0, a1
+; ZDINX32-NEXT:    ret
+;
+; ZDINX64-LABEL: callee_half_32:
+; ZDINX64:       # %bb.0:
+; ZDINX64-NEXT:    lui a1, 1048560
+; ZDINX64-NEXT:    or a0, a0, a1
+; ZDINX64-NEXT:    ret
+	%B = extractelement <32 x half> %A, i32 0
+	ret half %B
+}
+
+define half @caller_half_32(<32 x half> %A) nounwind {
+; ZHINX32-LABEL: caller_half_32:
+; ZHINX32:       # %bb.0:
+; ZHINX32-NEXT:    addi sp, sp, -112
+; ZHINX32-NEXT:    sw ra, 108(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s0, 104(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s1, 100(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s2, 96(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s3, 92(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s4, 88(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s5, 84(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s6, 80(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s7, 76(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s8, 72(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s9, 68(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s10, 64(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s11, 60(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    addi t0, sp, 112
+; ZHINX32-NEXT:    lh t0, 0(t0)
+; ZHINX32-NEXT:    sw t0, 56(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    addi t0, sp, 116
+; ZHINX32-NEXT:    lh t0, 0(t0)
+; ZHINX32-NEXT:    sw t0, 52(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    addi t0, sp, 120
+; ZHINX32-NEXT:    lh t0, 0(t0)
+; ZHINX32-NEXT:    sw t0, 48(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    addi t0, sp, 124
+; ZHINX32-NEXT:    lh t0, 0(t0)
+; ZHINX32-NEXT:    sw t0, 44(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    addi t0, sp, 128
+; ZHINX32-NEXT:    lh t0, 0(t0)
+; ZHINX32-NEXT:    sw t0, 40(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    addi t0, sp, 132
+; ZHINX32-NEXT:    lh t6, 0(t0)
+; ZHINX32-NEXT:    addi t1, sp, 136
+; ZHINX32-NEXT:    lh t1, 0(t1)
+; ZHINX32-NEXT:    addi s0, sp, 140
+; ZHINX32-NEXT:    lh s0, 0(s0)
+; ZHINX32-NEXT:    addi s1, sp, 144
+; ZHINX32-NEXT:    lh s1, 0(s1)
+; ZHINX32-NEXT:    addi s2, sp, 148
+; ZHINX32-NEXT:    lh s2, 0(s2)
+; ZHINX32-NEXT:    addi s3, sp, 152
+; ZHINX32-NEXT:    lh s3, 0(s3)
+; ZHINX32-NEXT:    addi s4, sp, 156
+; ZHINX32-NEXT:    lh s4, 0(s4)
+; ZHINX32-NEXT:    addi s5, sp, 160
+; ZHINX32-NEXT:    lh s5, 0(s5)
+; ZHINX32-NEXT:    addi s6, sp, 164
+; ZHINX32-NEXT:    lh s6, 0(s6)
+; ZHINX32-NEXT:    addi s7, sp, 168
+; ZHINX32-NEXT:    lh s7, 0(s7)
+; ZHINX32-NEXT:    addi s8, sp, 172
+; ZHINX32-NEXT:    lh s8, 0(s8)
+; ZHINX32-NEXT:    addi s9, sp, 176
+; ZHINX32-NEXT:    lh s9, 0(s9)
+; ZHINX32-NEXT:    addi s10, sp, 180
+; ZHINX32-NEXT:    lh s10, 0(s10)
+; ZHINX32-NEXT:    addi s11, sp, 184
+; ZHINX32-NEXT:    lh s11, 0(s11)
+; ZHINX32-NEXT:    addi ra, sp, 188
+; ZHINX32-NEXT:    lh ra, 0(ra)
+; ZHINX32-NEXT:    addi t0, sp, 192
+; ZHINX32-NEXT:    lh t0, 0(t0)
+; ZHINX32-NEXT:    addi t2, sp, 196
+; ZHINX32-NEXT:    lh t2, 0(t2)
+; ZHINX32-NEXT:    addi t3, sp, 200
+; ZHINX32-NEXT:    lh t3, 0(t3)
+; ZHINX32-NEXT:    addi t4, sp, 204
+; ZHINX32-NEXT:    lh t4, 0(t4)
+; ZHINX32-NEXT:    addi t5, sp, 36
+; ZHINX32-NEXT:    sh t4, 0(t5)
+; ZHINX32-NEXT:    addi t4, sp, 34
+; ZHINX32-NEXT:    sh t3, 0(t4)
+; ZHINX32-NEXT:    addi t3, sp, 32
+; ZHINX32-NEXT:    sh t2, 0(t3)
+; ZHINX32-NEXT:    addi t2, sp, 30
+; ZHINX32-NEXT:    sh t0, 0(t2)
+; ZHINX32-NEXT:    addi t0, sp, 28
+; ZHINX32-NEXT:    sh ra, 0(t0)
+; ZHINX32-NEXT:    addi t0, sp, 26
+; ZHINX32-NEXT:    sh s11, 0(t0)
+; ZHINX32-NEXT:    addi t0, sp, 24
+; ZHINX32-NEXT:    sh s10, 0(t0)
+; ZHINX32-NEXT:    addi t0, sp, 22
+; ZHINX32-NEXT:    sh s9, 0(t0)
+; ZHINX32-NEXT:    addi t0, sp, 20
+; ZHINX32-NEXT:    sh s8, 0(t0)
+; ZHINX32-NEXT:    addi t0, sp, 18
+; ZHINX32-NEXT:    sh s7, 0(t0)
+; ZHINX32-NEXT:    addi t0, sp, 16
+; ZHINX32-NEXT:    sh s6, 0(t0)
+; ZHINX32-NEXT:    addi t0, sp, 14
+; ZHINX32-NEXT:    sh s5, 0(t0)
+; ZHINX32-NEXT:    addi t0, sp, 12
+; ZHINX32-NEXT:    sh s4, 0(t0)
+; ZHINX32-NEXT:    addi t0, sp, 10
+; ZHINX32-NEXT:    sh s3, 0(t0)
+; ZHINX32-NEXT:    addi t0, sp, 8
+; ZHINX32-NEXT:    sh s2, 0(t0)
+; ZHINX32-NEXT:    addi t0, sp, 6
+; ZHINX32-NEXT:    sh s1, 0(t0)
+; ZHINX32-NEXT:    addi t0, sp, 4
+; ZHINX32-NEXT:    sh s0, 0(t0)
+; ZHINX32-NEXT:    addi t0, sp, 2
+; ZHINX32-NEXT:    sh t1, 0(t0)
+; ZHINX32-NEXT:    sh t6, 0(sp)
+; ZHINX32-NEXT:    lw t2, 56(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t3, 52(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t4, 48(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t5, 44(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t6, 40(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    call callee_half_32 at plt
+; ZHINX32-NEXT:    lw ra, 108(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s0, 104(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s1, 100(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s2, 96(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s3, 92(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s4, 88(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s5, 84(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s6, 80(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s7, 76(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s8, 72(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s9, 68(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s10, 64(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s11, 60(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    addi sp, sp, 112
+; ZHINX32-NEXT:    ret
+;
+; ZHINX64-LABEL: caller_half_32:
+; ZHINX64:       # %bb.0:
+; ZHINX64-NEXT:    addi sp, sp, -192
+; ZHINX64-NEXT:    sd ra, 184(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s0, 176(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s1, 168(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s2, 160(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s3, 152(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s4, 144(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s5, 136(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s6, 128(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s7, 120(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s8, 112(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s9, 104(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s10, 96(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s11, 88(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    addi t0, sp, 192
+; ZHINX64-NEXT:    lh t0, 0(t0)
+; ZHINX64-NEXT:    sd t0, 80(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    addi t0, sp, 200
+; ZHINX64-NEXT:    lh t0, 0(t0)
+; ZHINX64-NEXT:    sd t0, 72(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    addi t0, sp, 208
+; ZHINX64-NEXT:    lh t0, 0(t0)
+; ZHINX64-NEXT:    sd t0, 64(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    addi t0, sp, 216
+; ZHINX64-NEXT:    lh t0, 0(t0)
+; ZHINX64-NEXT:    sd t0, 56(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    addi t0, sp, 224
+; ZHINX64-NEXT:    lh t0, 0(t0)
+; ZHINX64-NEXT:    sd t0, 48(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    addi t0, sp, 232
+; ZHINX64-NEXT:    lh t6, 0(t0)
+; ZHINX64-NEXT:    addi t1, sp, 240
+; ZHINX64-NEXT:    lh t1, 0(t1)
+; ZHINX64-NEXT:    addi s0, sp, 248
+; ZHINX64-NEXT:    lh s0, 0(s0)
+; ZHINX64-NEXT:    addi s1, sp, 256
+; ZHINX64-NEXT:    lh s1, 0(s1)
+; ZHINX64-NEXT:    addi s2, sp, 264
+; ZHINX64-NEXT:    lh s2, 0(s2)
+; ZHINX64-NEXT:    addi s3, sp, 272
+; ZHINX64-NEXT:    lh s3, 0(s3)
+; ZHINX64-NEXT:    addi s4, sp, 280
+; ZHINX64-NEXT:    lh s4, 0(s4)
+; ZHINX64-NEXT:    addi s5, sp, 288
+; ZHINX64-NEXT:    lh s5, 0(s5)
+; ZHINX64-NEXT:    addi s6, sp, 296
+; ZHINX64-NEXT:    lh s6, 0(s6)
+; ZHINX64-NEXT:    addi s7, sp, 304
+; ZHINX64-NEXT:    lh s7, 0(s7)
+; ZHINX64-NEXT:    addi s8, sp, 312
+; ZHINX64-NEXT:    lh s8, 0(s8)
+; ZHINX64-NEXT:    addi s9, sp, 320
+; ZHINX64-NEXT:    lh s9, 0(s9)
+; ZHINX64-NEXT:    addi s10, sp, 328
+; ZHINX64-NEXT:    lh s10, 0(s10)
+; ZHINX64-NEXT:    addi s11, sp, 336
+; ZHINX64-NEXT:    lh s11, 0(s11)
+; ZHINX64-NEXT:    addi ra, sp, 344
+; ZHINX64-NEXT:    lh ra, 0(ra)
+; ZHINX64-NEXT:    addi t0, sp, 352
+; ZHINX64-NEXT:    lh t0, 0(t0)
+; ZHINX64-NEXT:    addi t2, sp, 360
+; ZHINX64-NEXT:    lh t2, 0(t2)
+; ZHINX64-NEXT:    addi t3, sp, 368
+; ZHINX64-NEXT:    lh t3, 0(t3)
+; ZHINX64-NEXT:    addi t4, sp, 376
+; ZHINX64-NEXT:    lh t4, 0(t4)
+; ZHINX64-NEXT:    addi t5, sp, 36
+; ZHINX64-NEXT:    sh t4, 0(t5)
+; ZHINX64-NEXT:    addi t4, sp, 34
+; ZHINX64-NEXT:    sh t3, 0(t4)
+; ZHINX64-NEXT:    addi t3, sp, 32
+; ZHINX64-NEXT:    sh t2, 0(t3)
+; ZHINX64-NEXT:    addi t2, sp, 30
+; ZHINX64-NEXT:    sh t0, 0(t2)
+; ZHINX64-NEXT:    addi t0, sp, 28
+; ZHINX64-NEXT:    sh ra, 0(t0)
+; ZHINX64-NEXT:    addi t0, sp, 26
+; ZHINX64-NEXT:    sh s11, 0(t0)
+; ZHINX64-NEXT:    addi t0, sp, 24
+; ZHINX64-NEXT:    sh s10, 0(t0)
+; ZHINX64-NEXT:    addi t0, sp, 22
+; ZHINX64-NEXT:    sh s9, 0(t0)
+; ZHINX64-NEXT:    addi t0, sp, 20
+; ZHINX64-NEXT:    sh s8, 0(t0)
+; ZHINX64-NEXT:    addi t0, sp, 18
+; ZHINX64-NEXT:    sh s7, 0(t0)
+; ZHINX64-NEXT:    addi t0, sp, 16
+; ZHINX64-NEXT:    sh s6, 0(t0)
+; ZHINX64-NEXT:    addi t0, sp, 14
+; ZHINX64-NEXT:    sh s5, 0(t0)
+; ZHINX64-NEXT:    addi t0, sp, 12
+; ZHINX64-NEXT:    sh s4, 0(t0)
+; ZHINX64-NEXT:    addi t0, sp, 10
+; ZHINX64-NEXT:    sh s3, 0(t0)
+; ZHINX64-NEXT:    addi t0, sp, 8
+; ZHINX64-NEXT:    sh s2, 0(t0)
+; ZHINX64-NEXT:    addi t0, sp, 6
+; ZHINX64-NEXT:    sh s1, 0(t0)
+; ZHINX64-NEXT:    addi t0, sp, 4
+; ZHINX64-NEXT:    sh s0, 0(t0)
+; ZHINX64-NEXT:    addi t0, sp, 2
+; ZHINX64-NEXT:    sh t1, 0(t0)
+; ZHINX64-NEXT:    sh t6, 0(sp)
+; ZHINX64-NEXT:    ld t2, 80(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld t3, 72(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld t4, 64(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld t5, 56(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld t6, 48(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    call callee_half_32 at plt
+; ZHINX64-NEXT:    ld ra, 184(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s0, 176(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s1, 168(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s2, 160(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s3, 152(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s4, 144(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s5, 136(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s6, 128(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s7, 120(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s8, 112(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s9, 104(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s10, 96(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s11, 88(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    addi sp, sp, 192
+; ZHINX64-NEXT:    ret
+;
+; ZFINX32-LABEL: caller_half_32:
+; ZFINX32:       # %bb.0:
+; ZFINX32-NEXT:    addi sp, sp, -144
+; ZFINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t0, 0(a0)
+; ZFINX32-NEXT:    lw a1, 4(a0)
+; ZFINX32-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw a1, 8(a0)
+; ZFINX32-NEXT:    sw a1, 84(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw a1, 12(a0)
+; ZFINX32-NEXT:    sw a1, 80(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw a1, 16(a0)
+; ZFINX32-NEXT:    sw a1, 76(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw a5, 20(a0)
+; ZFINX32-NEXT:    lw a6, 24(a0)
+; ZFINX32-NEXT:    lw a7, 28(a0)
+; ZFINX32-NEXT:    lw t2, 32(a0)
+; ZFINX32-NEXT:    lw t3, 36(a0)
+; ZFINX32-NEXT:    lw t4, 40(a0)
+; ZFINX32-NEXT:    lw t5, 44(a0)
+; ZFINX32-NEXT:    lw t6, 48(a0)
+; ZFINX32-NEXT:    lw t1, 52(a0)
+; ZFINX32-NEXT:    lw s0, 56(a0)
+; ZFINX32-NEXT:    lw s1, 60(a0)
+; ZFINX32-NEXT:    lw s2, 64(a0)
+; ZFINX32-NEXT:    lw s3, 68(a0)
+; ZFINX32-NEXT:    lw s4, 72(a0)
+; ZFINX32-NEXT:    lw s5, 76(a0)
+; ZFINX32-NEXT:    lw s6, 80(a0)
+; ZFINX32-NEXT:    lw s7, 84(a0)
+; ZFINX32-NEXT:    lw s8, 88(a0)
+; ZFINX32-NEXT:    lw s9, 92(a0)
+; ZFINX32-NEXT:    lw s10, 96(a0)
+; ZFINX32-NEXT:    lw s11, 100(a0)
+; ZFINX32-NEXT:    lw ra, 104(a0)
+; ZFINX32-NEXT:    lw a4, 108(a0)
+; ZFINX32-NEXT:    lw a3, 112(a0)
+; ZFINX32-NEXT:    lw a2, 116(a0)
+; ZFINX32-NEXT:    lw a1, 120(a0)
+; ZFINX32-NEXT:    lw a0, 124(a0)
+; ZFINX32-NEXT:    sw a0, 72(sp)
+; ZFINX32-NEXT:    sw a1, 68(sp)
+; ZFINX32-NEXT:    sw a2, 64(sp)
+; ZFINX32-NEXT:    sw a3, 60(sp)
+; ZFINX32-NEXT:    sw a4, 56(sp)
+; ZFINX32-NEXT:    sw ra, 52(sp)
+; ZFINX32-NEXT:    sw s11, 48(sp)
+; ZFINX32-NEXT:    sw s10, 44(sp)
+; ZFINX32-NEXT:    sw s9, 40(sp)
+; ZFINX32-NEXT:    sw s8, 36(sp)
+; ZFINX32-NEXT:    sw s7, 32(sp)
+; ZFINX32-NEXT:    sw s6, 28(sp)
+; ZFINX32-NEXT:    sw s5, 24(sp)
+; ZFINX32-NEXT:    sw s4, 20(sp)
+; ZFINX32-NEXT:    sw s3, 16(sp)
+; ZFINX32-NEXT:    sw s2, 12(sp)
+; ZFINX32-NEXT:    sw s1, 8(sp)
+; ZFINX32-NEXT:    sw s0, 4(sp)
+; ZFINX32-NEXT:    sw t1, 0(sp)
+; ZFINX32-NEXT:    mv a0, t0
+; ZFINX32-NEXT:    lw a1, 88(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw a3, 80(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw a4, 76(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    call callee_half_32 at plt
+; ZFINX32-NEXT:    lui a1, 1048560
+; ZFINX32-NEXT:    or a0, a0, a1
+; ZFINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    addi sp, sp, 144
+; ZFINX32-NEXT:    ret
+;
+; ZFINX64-LABEL: caller_half_32:
+; ZFINX64:       # %bb.0:
+; ZFINX64-NEXT:    addi sp, sp, -288
+; ZFINX64-NEXT:    sd ra, 280(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s0, 272(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s1, 264(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s2, 256(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s3, 248(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s4, 240(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s5, 232(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s6, 224(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s7, 216(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s8, 208(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s9, 200(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s10, 192(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s11, 184(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    ld t0, 0(a0)
+; ZFINX64-NEXT:    ld a1, 8(a0)
+; ZFINX64-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    ld a1, 16(a0)
+; ZFINX64-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    ld a1, 24(a0)
+; ZFINX64-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    ld a1, 32(a0)
+; ZFINX64-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    ld a5, 40(a0)
+; ZFINX64-NEXT:    ld a6, 48(a0)
+; ZFINX64-NEXT:    ld a7, 56(a0)
+; ZFINX64-NEXT:    ld t2, 64(a0)
+; ZFINX64-NEXT:    ld t3, 72(a0)
+; ZFINX64-NEXT:    ld t4, 80(a0)
+; ZFINX64-NEXT:    ld t5, 88(a0)
+; ZFINX64-NEXT:    ld t6, 96(a0)
+; ZFINX64-NEXT:    ld t1, 104(a0)
+; ZFINX64-NEXT:    ld s0, 112(a0)
+; ZFINX64-NEXT:    ld s1, 120(a0)
+; ZFINX64-NEXT:    ld s2, 128(a0)
+; ZFINX64-NEXT:    ld s3, 136(a0)
+; ZFINX64-NEXT:    ld s4, 144(a0)
+; ZFINX64-NEXT:    ld s5, 152(a0)
+; ZFINX64-NEXT:    ld s6, 160(a0)
+; ZFINX64-NEXT:    ld s7, 168(a0)
+; ZFINX64-NEXT:    ld s8, 176(a0)
+; ZFINX64-NEXT:    ld s9, 184(a0)
+; ZFINX64-NEXT:    ld s10, 192(a0)
+; ZFINX64-NEXT:    ld s11, 200(a0)
+; ZFINX64-NEXT:    ld ra, 208(a0)
+; ZFINX64-NEXT:    ld a4, 216(a0)
+; ZFINX64-NEXT:    ld a3, 224(a0)
+; ZFINX64-NEXT:    ld a2, 232(a0)
+; ZFINX64-NEXT:    ld a1, 240(a0)
+; ZFINX64-NEXT:    ld a0, 248(a0)
+; ZFINX64-NEXT:    sd a0, 144(sp)
+; ZFINX64-NEXT:    sd a1, 136(sp)
+; ZFINX64-NEXT:    sd a2, 128(sp)
+; ZFINX64-NEXT:    sd a3, 120(sp)
+; ZFINX64-NEXT:    sd a4, 112(sp)
+; ZFINX64-NEXT:    sd ra, 104(sp)
+; ZFINX64-NEXT:    sd s11, 96(sp)
+; ZFINX64-NEXT:    sd s10, 88(sp)
+; ZFINX64-NEXT:    sd s9, 80(sp)
+; ZFINX64-NEXT:    sd s8, 72(sp)
+; ZFINX64-NEXT:    sd s7, 64(sp)
+; ZFINX64-NEXT:    sd s6, 56(sp)
+; ZFINX64-NEXT:    sd s5, 48(sp)
+; ZFINX64-NEXT:    sd s4, 40(sp)
+; ZFINX64-NEXT:    sd s3, 32(sp)
+; ZFINX64-NEXT:    sd s2, 24(sp)
+; ZFINX64-NEXT:    sd s1, 16(sp)
+; ZFINX64-NEXT:    sd s0, 8(sp)
+; ZFINX64-NEXT:    sd t1, 0(sp)
+; ZFINX64-NEXT:    mv a0, t0
+; ZFINX64-NEXT:    ld a1, 176(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld a2, 168(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld a3, 160(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld a4, 152(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    call callee_half_32 at plt
+; ZFINX64-NEXT:    lui a1, 1048560
+; ZFINX64-NEXT:    or a0, a0, a1
+; ZFINX64-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s2, 256(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s3, 248(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s4, 240(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s5, 232(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s6, 224(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s7, 216(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s8, 208(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s9, 200(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s10, 192(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s11, 184(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    addi sp, sp, 288
+; ZFINX64-NEXT:    ret
+;
+; ZDINX32-LABEL: caller_half_32:
+; ZDINX32:       # %bb.0:
+; ZDINX32-NEXT:    addi sp, sp, -144
+; ZDINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t0, 0(a0)
+; ZDINX32-NEXT:    lw a1, 4(a0)
+; ZDINX32-NEXT:    sw a1, 88(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw a1, 8(a0)
+; ZDINX32-NEXT:    sw a1, 84(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw a1, 12(a0)
+; ZDINX32-NEXT:    sw a1, 80(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw a1, 16(a0)
+; ZDINX32-NEXT:    sw a1, 76(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw a5, 20(a0)
+; ZDINX32-NEXT:    lw a6, 24(a0)
+; ZDINX32-NEXT:    lw a7, 28(a0)
+; ZDINX32-NEXT:    lw t2, 32(a0)
+; ZDINX32-NEXT:    lw t3, 36(a0)
+; ZDINX32-NEXT:    lw t4, 40(a0)
+; ZDINX32-NEXT:    lw t5, 44(a0)
+; ZDINX32-NEXT:    lw t6, 48(a0)
+; ZDINX32-NEXT:    lw t1, 52(a0)
+; ZDINX32-NEXT:    lw s0, 56(a0)
+; ZDINX32-NEXT:    lw s1, 60(a0)
+; ZDINX32-NEXT:    lw s2, 64(a0)
+; ZDINX32-NEXT:    lw s3, 68(a0)
+; ZDINX32-NEXT:    lw s4, 72(a0)
+; ZDINX32-NEXT:    lw s5, 76(a0)
+; ZDINX32-NEXT:    lw s6, 80(a0)
+; ZDINX32-NEXT:    lw s7, 84(a0)
+; ZDINX32-NEXT:    lw s8, 88(a0)
+; ZDINX32-NEXT:    lw s9, 92(a0)
+; ZDINX32-NEXT:    lw s10, 96(a0)
+; ZDINX32-NEXT:    lw s11, 100(a0)
+; ZDINX32-NEXT:    lw ra, 104(a0)
+; ZDINX32-NEXT:    lw a4, 108(a0)
+; ZDINX32-NEXT:    lw a3, 112(a0)
+; ZDINX32-NEXT:    lw a2, 116(a0)
+; ZDINX32-NEXT:    lw a1, 120(a0)
+; ZDINX32-NEXT:    lw a0, 124(a0)
+; ZDINX32-NEXT:    sw a0, 72(sp)
+; ZDINX32-NEXT:    sw a1, 68(sp)
+; ZDINX32-NEXT:    sw a2, 64(sp)
+; ZDINX32-NEXT:    sw a3, 60(sp)
+; ZDINX32-NEXT:    sw a4, 56(sp)
+; ZDINX32-NEXT:    sw ra, 52(sp)
+; ZDINX32-NEXT:    sw s11, 48(sp)
+; ZDINX32-NEXT:    sw s10, 44(sp)
+; ZDINX32-NEXT:    sw s9, 40(sp)
+; ZDINX32-NEXT:    sw s8, 36(sp)
+; ZDINX32-NEXT:    sw s7, 32(sp)
+; ZDINX32-NEXT:    sw s6, 28(sp)
+; ZDINX32-NEXT:    sw s5, 24(sp)
+; ZDINX32-NEXT:    sw s4, 20(sp)
+; ZDINX32-NEXT:    sw s3, 16(sp)
+; ZDINX32-NEXT:    sw s2, 12(sp)
+; ZDINX32-NEXT:    sw s1, 8(sp)
+; ZDINX32-NEXT:    sw s0, 4(sp)
+; ZDINX32-NEXT:    sw t1, 0(sp)
+; ZDINX32-NEXT:    mv a0, t0
+; ZDINX32-NEXT:    lw a1, 88(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw a2, 84(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw a3, 80(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw a4, 76(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    call callee_half_32 at plt
+; ZDINX32-NEXT:    lui a1, 1048560
+; ZDINX32-NEXT:    or a0, a0, a1
+; ZDINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    addi sp, sp, 144
+; ZDINX32-NEXT:    ret
+;
+; ZDINX64-LABEL: caller_half_32:
+; ZDINX64:       # %bb.0:
+; ZDINX64-NEXT:    addi sp, sp, -288
+; ZDINX64-NEXT:    sd ra, 280(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s0, 272(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s1, 264(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s2, 256(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s3, 248(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s4, 240(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s5, 232(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s6, 224(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s7, 216(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s8, 208(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s9, 200(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s10, 192(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s11, 184(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    ld t0, 0(a0)
+; ZDINX64-NEXT:    ld a1, 8(a0)
+; ZDINX64-NEXT:    sd a1, 176(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    ld a1, 16(a0)
+; ZDINX64-NEXT:    sd a1, 168(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    ld a1, 24(a0)
+; ZDINX64-NEXT:    sd a1, 160(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    ld a1, 32(a0)
+; ZDINX64-NEXT:    sd a1, 152(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    ld a5, 40(a0)
+; ZDINX64-NEXT:    ld a6, 48(a0)
+; ZDINX64-NEXT:    ld a7, 56(a0)
+; ZDINX64-NEXT:    ld t2, 64(a0)
+; ZDINX64-NEXT:    ld t3, 72(a0)
+; ZDINX64-NEXT:    ld t4, 80(a0)
+; ZDINX64-NEXT:    ld t5, 88(a0)
+; ZDINX64-NEXT:    ld t6, 96(a0)
+; ZDINX64-NEXT:    ld t1, 104(a0)
+; ZDINX64-NEXT:    ld s0, 112(a0)
+; ZDINX64-NEXT:    ld s1, 120(a0)
+; ZDINX64-NEXT:    ld s2, 128(a0)
+; ZDINX64-NEXT:    ld s3, 136(a0)
+; ZDINX64-NEXT:    ld s4, 144(a0)
+; ZDINX64-NEXT:    ld s5, 152(a0)
+; ZDINX64-NEXT:    ld s6, 160(a0)
+; ZDINX64-NEXT:    ld s7, 168(a0)
+; ZDINX64-NEXT:    ld s8, 176(a0)
+; ZDINX64-NEXT:    ld s9, 184(a0)
+; ZDINX64-NEXT:    ld s10, 192(a0)
+; ZDINX64-NEXT:    ld s11, 200(a0)
+; ZDINX64-NEXT:    ld ra, 208(a0)
+; ZDINX64-NEXT:    ld a4, 216(a0)
+; ZDINX64-NEXT:    ld a3, 224(a0)
+; ZDINX64-NEXT:    ld a2, 232(a0)
+; ZDINX64-NEXT:    ld a1, 240(a0)
+; ZDINX64-NEXT:    ld a0, 248(a0)
+; ZDINX64-NEXT:    sd a0, 144(sp)
+; ZDINX64-NEXT:    sd a1, 136(sp)
+; ZDINX64-NEXT:    sd a2, 128(sp)
+; ZDINX64-NEXT:    sd a3, 120(sp)
+; ZDINX64-NEXT:    sd a4, 112(sp)
+; ZDINX64-NEXT:    sd ra, 104(sp)
+; ZDINX64-NEXT:    sd s11, 96(sp)
+; ZDINX64-NEXT:    sd s10, 88(sp)
+; ZDINX64-NEXT:    sd s9, 80(sp)
+; ZDINX64-NEXT:    sd s8, 72(sp)
+; ZDINX64-NEXT:    sd s7, 64(sp)
+; ZDINX64-NEXT:    sd s6, 56(sp)
+; ZDINX64-NEXT:    sd s5, 48(sp)
+; ZDINX64-NEXT:    sd s4, 40(sp)
+; ZDINX64-NEXT:    sd s3, 32(sp)
+; ZDINX64-NEXT:    sd s2, 24(sp)
+; ZDINX64-NEXT:    sd s1, 16(sp)
+; ZDINX64-NEXT:    sd s0, 8(sp)
+; ZDINX64-NEXT:    sd t1, 0(sp)
+; ZDINX64-NEXT:    mv a0, t0
+; ZDINX64-NEXT:    ld a1, 176(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld a2, 168(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld a3, 160(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld a4, 152(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    call callee_half_32 at plt
+; ZDINX64-NEXT:    lui a1, 1048560
+; ZDINX64-NEXT:    or a0, a0, a1
+; ZDINX64-NEXT:    ld ra, 280(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s0, 272(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s1, 264(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s2, 256(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s3, 248(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s4, 240(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s5, 232(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s6, 224(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s7, 216(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s8, 208(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s9, 200(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s10, 192(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s11, 184(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    addi sp, sp, 288
+; ZDINX64-NEXT:    ret
+	%C = call fastcc half @callee_half_32(<32 x half> %A)
+	ret half %C
+}
+
+define fastcc float @callee_float_32(<32 x float> %A) nounwind {
+; ZHINX32-LABEL: callee_float_32:
+; ZHINX32:       # %bb.0:
+; ZHINX32-NEXT:    ret
+;
+; ZHINX64-LABEL: callee_float_32:
+; ZHINX64:       # %bb.0:
+; ZHINX64-NEXT:    ret
+;
+; ZFINX32-LABEL: callee_float_32:
+; ZFINX32:       # %bb.0:
+; ZFINX32-NEXT:    ret
+;
+; ZFINX64-LABEL: callee_float_32:
+; ZFINX64:       # %bb.0:
+; ZFINX64-NEXT:    ret
+;
+; ZDINX32-LABEL: callee_float_32:
+; ZDINX32:       # %bb.0:
+; ZDINX32-NEXT:    ret
+;
+; ZDINX64-LABEL: callee_float_32:
+; ZDINX64:       # %bb.0:
+; ZDINX64-NEXT:    ret
+	%B = extractelement <32 x float> %A, i32 0
+	ret float %B
+}
+
+define float @caller_float_32(<32 x float> %A) nounwind {
+; ZHINX32-LABEL: caller_float_32:
+; ZHINX32:       # %bb.0:
+; ZHINX32-NEXT:    addi sp, sp, -144
+; ZHINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    lw t0, 144(sp)
+; ZHINX32-NEXT:    sw t0, 88(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    lw t0, 148(sp)
+; ZHINX32-NEXT:    sw t0, 84(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    lw t0, 152(sp)
+; ZHINX32-NEXT:    sw t0, 80(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    lw t0, 156(sp)
+; ZHINX32-NEXT:    sw t0, 76(sp) # 4-byte Folded Spill
+; ZHINX32-NEXT:    lw t6, 160(sp)
+; ZHINX32-NEXT:    lw t5, 164(sp)
+; ZHINX32-NEXT:    lw t4, 168(sp)
+; ZHINX32-NEXT:    lw s0, 172(sp)
+; ZHINX32-NEXT:    lw s1, 176(sp)
+; ZHINX32-NEXT:    lw s2, 180(sp)
+; ZHINX32-NEXT:    lw s3, 184(sp)
+; ZHINX32-NEXT:    lw s4, 188(sp)
+; ZHINX32-NEXT:    lw s5, 192(sp)
+; ZHINX32-NEXT:    lw s6, 196(sp)
+; ZHINX32-NEXT:    lw s7, 200(sp)
+; ZHINX32-NEXT:    lw s8, 204(sp)
+; ZHINX32-NEXT:    lw s9, 208(sp)
+; ZHINX32-NEXT:    lw s10, 212(sp)
+; ZHINX32-NEXT:    lw s11, 216(sp)
+; ZHINX32-NEXT:    lw ra, 220(sp)
+; ZHINX32-NEXT:    lw t3, 224(sp)
+; ZHINX32-NEXT:    lw t2, 228(sp)
+; ZHINX32-NEXT:    lw t1, 232(sp)
+; ZHINX32-NEXT:    lw t0, 236(sp)
+; ZHINX32-NEXT:    sw t0, 72(sp)
+; ZHINX32-NEXT:    sw t1, 68(sp)
+; ZHINX32-NEXT:    sw t2, 64(sp)
+; ZHINX32-NEXT:    sw t3, 60(sp)
+; ZHINX32-NEXT:    sw ra, 56(sp)
+; ZHINX32-NEXT:    sw s11, 52(sp)
+; ZHINX32-NEXT:    sw s10, 48(sp)
+; ZHINX32-NEXT:    sw s9, 44(sp)
+; ZHINX32-NEXT:    sw s8, 40(sp)
+; ZHINX32-NEXT:    sw s7, 36(sp)
+; ZHINX32-NEXT:    sw s6, 32(sp)
+; ZHINX32-NEXT:    sw s5, 28(sp)
+; ZHINX32-NEXT:    sw s4, 24(sp)
+; ZHINX32-NEXT:    sw s3, 20(sp)
+; ZHINX32-NEXT:    sw s2, 16(sp)
+; ZHINX32-NEXT:    sw s1, 12(sp)
+; ZHINX32-NEXT:    sw s0, 8(sp)
+; ZHINX32-NEXT:    sw t4, 4(sp)
+; ZHINX32-NEXT:    sw t5, 0(sp)
+; ZHINX32-NEXT:    lw t2, 88(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t3, 84(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t4, 80(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw t5, 76(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    call callee_float_32 at plt
+; ZHINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; ZHINX32-NEXT:    addi sp, sp, 144
+; ZHINX32-NEXT:    ret
+;
+; ZHINX64-LABEL: caller_float_32:
+; ZHINX64:       # %bb.0:
+; ZHINX64-NEXT:    addi sp, sp, -224
+; ZHINX64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    lw t0, 224(sp)
+; ZHINX64-NEXT:    sd t0, 112(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    lw t0, 232(sp)
+; ZHINX64-NEXT:    sd t0, 104(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    lw t0, 240(sp)
+; ZHINX64-NEXT:    sd t0, 96(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    lw t0, 248(sp)
+; ZHINX64-NEXT:    sd t0, 88(sp) # 8-byte Folded Spill
+; ZHINX64-NEXT:    lw t6, 256(sp)
+; ZHINX64-NEXT:    lw t5, 264(sp)
+; ZHINX64-NEXT:    lw t4, 272(sp)
+; ZHINX64-NEXT:    lw s0, 280(sp)
+; ZHINX64-NEXT:    lw s1, 288(sp)
+; ZHINX64-NEXT:    lw s2, 296(sp)
+; ZHINX64-NEXT:    lw s3, 304(sp)
+; ZHINX64-NEXT:    lw s4, 312(sp)
+; ZHINX64-NEXT:    lw s5, 320(sp)
+; ZHINX64-NEXT:    lw s6, 328(sp)
+; ZHINX64-NEXT:    lw s7, 336(sp)
+; ZHINX64-NEXT:    lw s8, 344(sp)
+; ZHINX64-NEXT:    lw s9, 352(sp)
+; ZHINX64-NEXT:    lw s10, 360(sp)
+; ZHINX64-NEXT:    lw s11, 368(sp)
+; ZHINX64-NEXT:    lw ra, 376(sp)
+; ZHINX64-NEXT:    lw t3, 384(sp)
+; ZHINX64-NEXT:    lw t2, 392(sp)
+; ZHINX64-NEXT:    lw t1, 400(sp)
+; ZHINX64-NEXT:    lw t0, 408(sp)
+; ZHINX64-NEXT:    sw t0, 72(sp)
+; ZHINX64-NEXT:    sw t1, 68(sp)
+; ZHINX64-NEXT:    sw t2, 64(sp)
+; ZHINX64-NEXT:    sw t3, 60(sp)
+; ZHINX64-NEXT:    sw ra, 56(sp)
+; ZHINX64-NEXT:    sw s11, 52(sp)
+; ZHINX64-NEXT:    sw s10, 48(sp)
+; ZHINX64-NEXT:    sw s9, 44(sp)
+; ZHINX64-NEXT:    sw s8, 40(sp)
+; ZHINX64-NEXT:    sw s7, 36(sp)
+; ZHINX64-NEXT:    sw s6, 32(sp)
+; ZHINX64-NEXT:    sw s5, 28(sp)
+; ZHINX64-NEXT:    sw s4, 24(sp)
+; ZHINX64-NEXT:    sw s3, 20(sp)
+; ZHINX64-NEXT:    sw s2, 16(sp)
+; ZHINX64-NEXT:    sw s1, 12(sp)
+; ZHINX64-NEXT:    sw s0, 8(sp)
+; ZHINX64-NEXT:    sw t4, 4(sp)
+; ZHINX64-NEXT:    sw t5, 0(sp)
+; ZHINX64-NEXT:    ld t2, 112(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld t4, 96(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld t5, 88(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    call callee_float_32 at plt
+; ZHINX64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
+; ZHINX64-NEXT:    addi sp, sp, 224
+; ZHINX64-NEXT:    ret
+;
+; ZFINX32-LABEL: caller_float_32:
+; ZFINX32:       # %bb.0:
+; ZFINX32-NEXT:    addi sp, sp, -144
+; ZFINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t0, 144(sp)
+; ZFINX32-NEXT:    sw t0, 88(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t0, 148(sp)
+; ZFINX32-NEXT:    sw t0, 84(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t0, 152(sp)
+; ZFINX32-NEXT:    sw t0, 80(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t0, 156(sp)
+; ZFINX32-NEXT:    sw t0, 76(sp) # 4-byte Folded Spill
+; ZFINX32-NEXT:    lw t6, 160(sp)
+; ZFINX32-NEXT:    lw t5, 164(sp)
+; ZFINX32-NEXT:    lw t4, 168(sp)
+; ZFINX32-NEXT:    lw s0, 172(sp)
+; ZFINX32-NEXT:    lw s1, 176(sp)
+; ZFINX32-NEXT:    lw s2, 180(sp)
+; ZFINX32-NEXT:    lw s3, 184(sp)
+; ZFINX32-NEXT:    lw s4, 188(sp)
+; ZFINX32-NEXT:    lw s5, 192(sp)
+; ZFINX32-NEXT:    lw s6, 196(sp)
+; ZFINX32-NEXT:    lw s7, 200(sp)
+; ZFINX32-NEXT:    lw s8, 204(sp)
+; ZFINX32-NEXT:    lw s9, 208(sp)
+; ZFINX32-NEXT:    lw s10, 212(sp)
+; ZFINX32-NEXT:    lw s11, 216(sp)
+; ZFINX32-NEXT:    lw ra, 220(sp)
+; ZFINX32-NEXT:    lw t3, 224(sp)
+; ZFINX32-NEXT:    lw t2, 228(sp)
+; ZFINX32-NEXT:    lw t1, 232(sp)
+; ZFINX32-NEXT:    lw t0, 236(sp)
+; ZFINX32-NEXT:    sw t0, 72(sp)
+; ZFINX32-NEXT:    sw t1, 68(sp)
+; ZFINX32-NEXT:    sw t2, 64(sp)
+; ZFINX32-NEXT:    sw t3, 60(sp)
+; ZFINX32-NEXT:    sw ra, 56(sp)
+; ZFINX32-NEXT:    sw s11, 52(sp)
+; ZFINX32-NEXT:    sw s10, 48(sp)
+; ZFINX32-NEXT:    sw s9, 44(sp)
+; ZFINX32-NEXT:    sw s8, 40(sp)
+; ZFINX32-NEXT:    sw s7, 36(sp)
+; ZFINX32-NEXT:    sw s6, 32(sp)
+; ZFINX32-NEXT:    sw s5, 28(sp)
+; ZFINX32-NEXT:    sw s4, 24(sp)
+; ZFINX32-NEXT:    sw s3, 20(sp)
+; ZFINX32-NEXT:    sw s2, 16(sp)
+; ZFINX32-NEXT:    sw s1, 12(sp)
+; ZFINX32-NEXT:    sw s0, 8(sp)
+; ZFINX32-NEXT:    sw t4, 4(sp)
+; ZFINX32-NEXT:    sw t5, 0(sp)
+; ZFINX32-NEXT:    lw t2, 88(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw t3, 84(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw t4, 80(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw t5, 76(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    call callee_float_32 at plt
+; ZFINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; ZFINX32-NEXT:    addi sp, sp, 144
+; ZFINX32-NEXT:    ret
+;
+; ZFINX64-LABEL: caller_float_32:
+; ZFINX64:       # %bb.0:
+; ZFINX64-NEXT:    addi sp, sp, -224
+; ZFINX64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    lw t0, 224(sp)
+; ZFINX64-NEXT:    sd t0, 112(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    lw t0, 232(sp)
+; ZFINX64-NEXT:    sd t0, 104(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    lw t0, 240(sp)
+; ZFINX64-NEXT:    sd t0, 96(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    lw t0, 248(sp)
+; ZFINX64-NEXT:    sd t0, 88(sp) # 8-byte Folded Spill
+; ZFINX64-NEXT:    lw t6, 256(sp)
+; ZFINX64-NEXT:    lw t5, 264(sp)
+; ZFINX64-NEXT:    lw t4, 272(sp)
+; ZFINX64-NEXT:    lw s0, 280(sp)
+; ZFINX64-NEXT:    lw s1, 288(sp)
+; ZFINX64-NEXT:    lw s2, 296(sp)
+; ZFINX64-NEXT:    lw s3, 304(sp)
+; ZFINX64-NEXT:    lw s4, 312(sp)
+; ZFINX64-NEXT:    lw s5, 320(sp)
+; ZFINX64-NEXT:    lw s6, 328(sp)
+; ZFINX64-NEXT:    lw s7, 336(sp)
+; ZFINX64-NEXT:    lw s8, 344(sp)
+; ZFINX64-NEXT:    lw s9, 352(sp)
+; ZFINX64-NEXT:    lw s10, 360(sp)
+; ZFINX64-NEXT:    lw s11, 368(sp)
+; ZFINX64-NEXT:    lw ra, 376(sp)
+; ZFINX64-NEXT:    lw t3, 384(sp)
+; ZFINX64-NEXT:    lw t2, 392(sp)
+; ZFINX64-NEXT:    lw t1, 400(sp)
+; ZFINX64-NEXT:    lw t0, 408(sp)
+; ZFINX64-NEXT:    sw t0, 72(sp)
+; ZFINX64-NEXT:    sw t1, 68(sp)
+; ZFINX64-NEXT:    sw t2, 64(sp)
+; ZFINX64-NEXT:    sw t3, 60(sp)
+; ZFINX64-NEXT:    sw ra, 56(sp)
+; ZFINX64-NEXT:    sw s11, 52(sp)
+; ZFINX64-NEXT:    sw s10, 48(sp)
+; ZFINX64-NEXT:    sw s9, 44(sp)
+; ZFINX64-NEXT:    sw s8, 40(sp)
+; ZFINX64-NEXT:    sw s7, 36(sp)
+; ZFINX64-NEXT:    sw s6, 32(sp)
+; ZFINX64-NEXT:    sw s5, 28(sp)
+; ZFINX64-NEXT:    sw s4, 24(sp)
+; ZFINX64-NEXT:    sw s3, 20(sp)
+; ZFINX64-NEXT:    sw s2, 16(sp)
+; ZFINX64-NEXT:    sw s1, 12(sp)
+; ZFINX64-NEXT:    sw s0, 8(sp)
+; ZFINX64-NEXT:    sw t4, 4(sp)
+; ZFINX64-NEXT:    sw t5, 0(sp)
+; ZFINX64-NEXT:    ld t2, 112(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld t4, 96(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld t5, 88(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    call callee_float_32 at plt
+; ZFINX64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
+; ZFINX64-NEXT:    addi sp, sp, 224
+; ZFINX64-NEXT:    ret
+;
+; ZDINX32-LABEL: caller_float_32:
+; ZDINX32:       # %bb.0:
+; ZDINX32-NEXT:    addi sp, sp, -144
+; ZDINX32-NEXT:    sw ra, 140(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s0, 136(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s1, 132(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s2, 128(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s3, 124(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s4, 120(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s5, 116(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s6, 112(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s7, 108(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s8, 104(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s9, 100(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s10, 96(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    sw s11, 92(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t0, 144(sp)
+; ZDINX32-NEXT:    sw t0, 88(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t0, 148(sp)
+; ZDINX32-NEXT:    sw t0, 84(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t0, 152(sp)
+; ZDINX32-NEXT:    sw t0, 80(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t0, 156(sp)
+; ZDINX32-NEXT:    sw t0, 76(sp) # 4-byte Folded Spill
+; ZDINX32-NEXT:    lw t6, 160(sp)
+; ZDINX32-NEXT:    lw t5, 164(sp)
+; ZDINX32-NEXT:    lw t4, 168(sp)
+; ZDINX32-NEXT:    lw s0, 172(sp)
+; ZDINX32-NEXT:    lw s1, 176(sp)
+; ZDINX32-NEXT:    lw s2, 180(sp)
+; ZDINX32-NEXT:    lw s3, 184(sp)
+; ZDINX32-NEXT:    lw s4, 188(sp)
+; ZDINX32-NEXT:    lw s5, 192(sp)
+; ZDINX32-NEXT:    lw s6, 196(sp)
+; ZDINX32-NEXT:    lw s7, 200(sp)
+; ZDINX32-NEXT:    lw s8, 204(sp)
+; ZDINX32-NEXT:    lw s9, 208(sp)
+; ZDINX32-NEXT:    lw s10, 212(sp)
+; ZDINX32-NEXT:    lw s11, 216(sp)
+; ZDINX32-NEXT:    lw ra, 220(sp)
+; ZDINX32-NEXT:    lw t3, 224(sp)
+; ZDINX32-NEXT:    lw t2, 228(sp)
+; ZDINX32-NEXT:    lw t1, 232(sp)
+; ZDINX32-NEXT:    lw t0, 236(sp)
+; ZDINX32-NEXT:    sw t0, 72(sp)
+; ZDINX32-NEXT:    sw t1, 68(sp)
+; ZDINX32-NEXT:    sw t2, 64(sp)
+; ZDINX32-NEXT:    sw t3, 60(sp)
+; ZDINX32-NEXT:    sw ra, 56(sp)
+; ZDINX32-NEXT:    sw s11, 52(sp)
+; ZDINX32-NEXT:    sw s10, 48(sp)
+; ZDINX32-NEXT:    sw s9, 44(sp)
+; ZDINX32-NEXT:    sw s8, 40(sp)
+; ZDINX32-NEXT:    sw s7, 36(sp)
+; ZDINX32-NEXT:    sw s6, 32(sp)
+; ZDINX32-NEXT:    sw s5, 28(sp)
+; ZDINX32-NEXT:    sw s4, 24(sp)
+; ZDINX32-NEXT:    sw s3, 20(sp)
+; ZDINX32-NEXT:    sw s2, 16(sp)
+; ZDINX32-NEXT:    sw s1, 12(sp)
+; ZDINX32-NEXT:    sw s0, 8(sp)
+; ZDINX32-NEXT:    sw t4, 4(sp)
+; ZDINX32-NEXT:    sw t5, 0(sp)
+; ZDINX32-NEXT:    lw t2, 88(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw t3, 84(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw t4, 80(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw t5, 76(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    call callee_float_32 at plt
+; ZDINX32-NEXT:    lw ra, 140(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s0, 136(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s1, 132(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s2, 128(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s3, 124(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s4, 120(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s5, 116(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s6, 112(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s7, 108(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s8, 104(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s9, 100(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s10, 96(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    lw s11, 92(sp) # 4-byte Folded Reload
+; ZDINX32-NEXT:    addi sp, sp, 144
+; ZDINX32-NEXT:    ret
+;
+; ZDINX64-LABEL: caller_float_32:
+; ZDINX64:       # %bb.0:
+; ZDINX64-NEXT:    addi sp, sp, -224
+; ZDINX64-NEXT:    sd ra, 216(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s0, 208(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s1, 200(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s2, 192(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s3, 184(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s4, 176(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s5, 168(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s6, 160(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s7, 152(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s8, 144(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s9, 136(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s10, 128(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    sd s11, 120(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    lw t0, 224(sp)
+; ZDINX64-NEXT:    sd t0, 112(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    lw t0, 232(sp)
+; ZDINX64-NEXT:    sd t0, 104(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    lw t0, 240(sp)
+; ZDINX64-NEXT:    sd t0, 96(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    lw t0, 248(sp)
+; ZDINX64-NEXT:    sd t0, 88(sp) # 8-byte Folded Spill
+; ZDINX64-NEXT:    lw t6, 256(sp)
+; ZDINX64-NEXT:    lw t5, 264(sp)
+; ZDINX64-NEXT:    lw t4, 272(sp)
+; ZDINX64-NEXT:    lw s0, 280(sp)
+; ZDINX64-NEXT:    lw s1, 288(sp)
+; ZDINX64-NEXT:    lw s2, 296(sp)
+; ZDINX64-NEXT:    lw s3, 304(sp)
+; ZDINX64-NEXT:    lw s4, 312(sp)
+; ZDINX64-NEXT:    lw s5, 320(sp)
+; ZDINX64-NEXT:    lw s6, 328(sp)
+; ZDINX64-NEXT:    lw s7, 336(sp)
+; ZDINX64-NEXT:    lw s8, 344(sp)
+; ZDINX64-NEXT:    lw s9, 352(sp)
+; ZDINX64-NEXT:    lw s10, 360(sp)
+; ZDINX64-NEXT:    lw s11, 368(sp)
+; ZDINX64-NEXT:    lw ra, 376(sp)
+; ZDINX64-NEXT:    lw t3, 384(sp)
+; ZDINX64-NEXT:    lw t2, 392(sp)
+; ZDINX64-NEXT:    lw t1, 400(sp)
+; ZDINX64-NEXT:    lw t0, 408(sp)
+; ZDINX64-NEXT:    sw t0, 72(sp)
+; ZDINX64-NEXT:    sw t1, 68(sp)
+; ZDINX64-NEXT:    sw t2, 64(sp)
+; ZDINX64-NEXT:    sw t3, 60(sp)
+; ZDINX64-NEXT:    sw ra, 56(sp)
+; ZDINX64-NEXT:    sw s11, 52(sp)
+; ZDINX64-NEXT:    sw s10, 48(sp)
+; ZDINX64-NEXT:    sw s9, 44(sp)
+; ZDINX64-NEXT:    sw s8, 40(sp)
+; ZDINX64-NEXT:    sw s7, 36(sp)
+; ZDINX64-NEXT:    sw s6, 32(sp)
+; ZDINX64-NEXT:    sw s5, 28(sp)
+; ZDINX64-NEXT:    sw s4, 24(sp)
+; ZDINX64-NEXT:    sw s3, 20(sp)
+; ZDINX64-NEXT:    sw s2, 16(sp)
+; ZDINX64-NEXT:    sw s1, 12(sp)
+; ZDINX64-NEXT:    sw s0, 8(sp)
+; ZDINX64-NEXT:    sw t4, 4(sp)
+; ZDINX64-NEXT:    sw t5, 0(sp)
+; ZDINX64-NEXT:    ld t2, 112(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld t3, 104(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld t4, 96(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld t5, 88(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    call callee_float_32 at plt
+; ZDINX64-NEXT:    ld ra, 216(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s0, 208(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s1, 200(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s2, 192(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s3, 184(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s4, 176(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s5, 168(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s6, 160(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s7, 152(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s8, 144(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s9, 136(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s10, 128(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    ld s11, 120(sp) # 8-byte Folded Reload
+; ZDINX64-NEXT:    addi sp, sp, 224
+; ZDINX64-NEXT:    ret
+	%C = call fastcc float @callee_float_32(<32 x float> %A)
+	ret float %C
+}
+
+define fastcc double @callee_double_32(<32 x double> %A) nounwind {
+	%B = extractelement <32 x double> %A, i32 0
+	ret double %B
+}


        


More information about the llvm-commits mailing list